mirror of
https://github.com/kennethreitz/kjvstudy.org.git
synced 2026-06-05 23:00:16 +00:00
b21d0cd50b
- Add 52 Matthew verses (Beatitudes, Lord's Prayer, Great Commission, etc.) - Add 38 John verses (I am statements, Upper Room, High Priestly Prayer) - Add 19 new Psalms verses (Psalm 1, 23, 27, 34, 37, 46, 91, 103, 119, 127, 133, 139) - Add scripts/check_top_verses.py to track coverage of top 536 most-searched verses - Add scripts/migrate_commentary.py to safely merge commentary into per-book files Coverage improved: 61.8% of top verses now have commentary. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
202 lines
6.2 KiB
Python
202 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Migrate commentary from verse_commentary.json (root) to per-book files.
|
|
|
|
Only migrates verses that DON'T already exist in the per-book files.
|
|
Leaves duplicates in the source file for review.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Paths
|
|
ROOT_FILE = Path(__file__).parent.parent / "verse_commentary.json"
|
|
DATA_DIR = Path(__file__).parent.parent / "kjvstudy_org" / "data" / "verse_commentary"
|
|
|
|
|
|
def slugify(book: str) -> str:
|
|
"""Create filesystem-friendly file name for a book."""
|
|
import re
|
|
slug = re.sub(r"[^a-z0-9]+", "_", book.lower())
|
|
slug = re.sub(r"_+", "_", slug).strip("_")
|
|
return slug or "book"
|
|
|
|
|
|
def load_root_commentary():
|
|
"""Load the root verse_commentary.json file."""
|
|
if not ROOT_FILE.exists():
|
|
print(f"Source file not found: {ROOT_FILE}")
|
|
return []
|
|
|
|
with open(ROOT_FILE, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Handle both array format and object format
|
|
if isinstance(data, list):
|
|
return data
|
|
elif isinstance(data, dict):
|
|
# If it's a dict with book as key, convert to list format
|
|
return [{"book": book, "commentary": chapters} for book, chapters in data.items()]
|
|
return []
|
|
|
|
|
|
def load_book_file(book_name: str) -> dict:
|
|
"""Load existing per-book commentary file."""
|
|
slug = slugify(book_name)
|
|
book_file = DATA_DIR / f"{slug}.json"
|
|
|
|
if not book_file.exists():
|
|
return {"book": book_name, "commentary": {}}
|
|
|
|
with open(book_file, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
|
|
def save_book_file(book_name: str, data: dict):
|
|
"""Save per-book commentary file."""
|
|
slug = slugify(book_name)
|
|
book_file = DATA_DIR / f"{slug}.json"
|
|
|
|
with open(book_file, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f" Saved: {book_file.name}")
|
|
|
|
|
|
def main():
|
|
print("=" * 70)
|
|
print("COMMENTARY MIGRATION")
|
|
print("=" * 70)
|
|
print()
|
|
print(f"Source: {ROOT_FILE}")
|
|
print(f"Target: {DATA_DIR}")
|
|
print()
|
|
|
|
# Load source data
|
|
source_data = load_root_commentary()
|
|
if not source_data:
|
|
print("No source data to migrate.")
|
|
return
|
|
|
|
print(f"Found {len(source_data)} book entries in source file")
|
|
print()
|
|
|
|
# Track statistics
|
|
total_migrated = 0
|
|
total_duplicates = 0
|
|
duplicates_by_book = {}
|
|
|
|
# Process each book
|
|
for book_entry in source_data:
|
|
book_name = book_entry.get("book")
|
|
new_commentary = book_entry.get("commentary", {})
|
|
|
|
if not book_name or not new_commentary:
|
|
continue
|
|
|
|
print(f"Processing: {book_name}")
|
|
|
|
# Load existing book file
|
|
existing_data = load_book_file(book_name)
|
|
existing_commentary = existing_data.get("commentary", {})
|
|
|
|
migrated_count = 0
|
|
duplicate_count = 0
|
|
duplicate_verses = []
|
|
|
|
# Check each chapter/verse
|
|
for chapter_str, verses in new_commentary.items():
|
|
if not isinstance(verses, dict):
|
|
continue
|
|
|
|
for verse_str, entry in verses.items():
|
|
# Check if verse already exists
|
|
existing_chapter = existing_commentary.get(chapter_str, {})
|
|
|
|
if verse_str in existing_chapter:
|
|
# Duplicate - don't overwrite
|
|
duplicate_count += 1
|
|
duplicate_verses.append(f"{chapter_str}:{verse_str}")
|
|
else:
|
|
# New verse - migrate it
|
|
if chapter_str not in existing_commentary:
|
|
existing_commentary[chapter_str] = {}
|
|
existing_commentary[chapter_str][verse_str] = entry
|
|
migrated_count += 1
|
|
|
|
# Save updated book file if we migrated anything
|
|
if migrated_count > 0:
|
|
existing_data["commentary"] = existing_commentary
|
|
save_book_file(book_name, existing_data)
|
|
|
|
print(f" Migrated: {migrated_count} verses")
|
|
print(f" Duplicates (skipped): {duplicate_count} verses")
|
|
|
|
if duplicate_verses:
|
|
duplicates_by_book[book_name] = duplicate_verses
|
|
|
|
total_migrated += migrated_count
|
|
total_duplicates += duplicate_count
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 70)
|
|
print("MIGRATION SUMMARY")
|
|
print("=" * 70)
|
|
print(f"Total verses migrated: {total_migrated}")
|
|
print(f"Total duplicates skipped: {total_duplicates}")
|
|
print()
|
|
|
|
if duplicates_by_book:
|
|
print("DUPLICATES BY BOOK (for review):")
|
|
print("-" * 70)
|
|
for book, verses in duplicates_by_book.items():
|
|
print(f"\n{book}:")
|
|
for v in verses[:10]: # Show first 10
|
|
print(f" {v}")
|
|
if len(verses) > 10:
|
|
print(f" ... and {len(verses) - 10} more")
|
|
|
|
# Create a new file with only duplicates for review
|
|
if total_duplicates > 0:
|
|
print()
|
|
print("Creating duplicates file for review...")
|
|
duplicates_data = []
|
|
|
|
for book_entry in source_data:
|
|
book_name = book_entry.get("book")
|
|
if book_name not in duplicates_by_book:
|
|
continue
|
|
|
|
dup_verses = set(duplicates_by_book[book_name])
|
|
dup_commentary = {}
|
|
|
|
for chapter_str, verses in book_entry.get("commentary", {}).items():
|
|
if not isinstance(verses, dict):
|
|
continue
|
|
for verse_str, entry in verses.items():
|
|
if f"{chapter_str}:{verse_str}" in dup_verses:
|
|
if chapter_str not in dup_commentary:
|
|
dup_commentary[chapter_str] = {}
|
|
dup_commentary[chapter_str][verse_str] = entry
|
|
|
|
if dup_commentary:
|
|
duplicates_data.append({
|
|
"book": book_name,
|
|
"commentary": dup_commentary
|
|
})
|
|
|
|
if duplicates_data:
|
|
dup_file = Path(__file__).parent.parent / "verse_commentary_duplicates.json"
|
|
with open(dup_file, 'w', encoding='utf-8') as f:
|
|
json.dump(duplicates_data, f, ensure_ascii=False, indent=2)
|
|
print(f"Saved duplicates to: {dup_file}")
|
|
|
|
print()
|
|
print("Done! Review duplicates if any, then delete source file.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|