Files
kjvstudy.org/scripts/migrate_commentary.py
T
kennethreitz b21d0cd50b Add commentary for top searched verses (Matthew, John, Psalms)
- Add 52 Matthew verses (Beatitudes, Lord's Prayer, Great Commission, etc.)
- Add 38 John verses (I am statements, Upper Room, High Priestly Prayer)
- Add 19 new Psalms verses (Psalm 1, 23, 27, 34, 37, 46, 91, 103, 119, 127, 133, 139)
- Add scripts/check_top_verses.py to track coverage of top 536 most-searched verses
- Add scripts/migrate_commentary.py to safely merge commentary into per-book files

Coverage improved: 61.8% of top verses now have commentary.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 11:23:50 -05:00

202 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
Migrate commentary from verse_commentary.json (root) to per-book files.
Only migrates verses that DON'T already exist in the per-book files.
Leaves duplicates in the source file for review.
"""
import json
import sys
from pathlib import Path
# Paths
ROOT_FILE = Path(__file__).parent.parent / "verse_commentary.json"
DATA_DIR = Path(__file__).parent.parent / "kjvstudy_org" / "data" / "verse_commentary"
def slugify(book: str) -> str:
"""Create filesystem-friendly file name for a book."""
import re
slug = re.sub(r"[^a-z0-9]+", "_", book.lower())
slug = re.sub(r"_+", "_", slug).strip("_")
return slug or "book"
def load_root_commentary():
"""Load the root verse_commentary.json file."""
if not ROOT_FILE.exists():
print(f"Source file not found: {ROOT_FILE}")
return []
with open(ROOT_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
# Handle both array format and object format
if isinstance(data, list):
return data
elif isinstance(data, dict):
# If it's a dict with book as key, convert to list format
return [{"book": book, "commentary": chapters} for book, chapters in data.items()]
return []
def load_book_file(book_name: str) -> dict:
"""Load existing per-book commentary file."""
slug = slugify(book_name)
book_file = DATA_DIR / f"{slug}.json"
if not book_file.exists():
return {"book": book_name, "commentary": {}}
with open(book_file, 'r', encoding='utf-8') as f:
return json.load(f)
def save_book_file(book_name: str, data: dict):
"""Save per-book commentary file."""
slug = slugify(book_name)
book_file = DATA_DIR / f"{slug}.json"
with open(book_file, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f" Saved: {book_file.name}")
def main():
print("=" * 70)
print("COMMENTARY MIGRATION")
print("=" * 70)
print()
print(f"Source: {ROOT_FILE}")
print(f"Target: {DATA_DIR}")
print()
# Load source data
source_data = load_root_commentary()
if not source_data:
print("No source data to migrate.")
return
print(f"Found {len(source_data)} book entries in source file")
print()
# Track statistics
total_migrated = 0
total_duplicates = 0
duplicates_by_book = {}
# Process each book
for book_entry in source_data:
book_name = book_entry.get("book")
new_commentary = book_entry.get("commentary", {})
if not book_name or not new_commentary:
continue
print(f"Processing: {book_name}")
# Load existing book file
existing_data = load_book_file(book_name)
existing_commentary = existing_data.get("commentary", {})
migrated_count = 0
duplicate_count = 0
duplicate_verses = []
# Check each chapter/verse
for chapter_str, verses in new_commentary.items():
if not isinstance(verses, dict):
continue
for verse_str, entry in verses.items():
# Check if verse already exists
existing_chapter = existing_commentary.get(chapter_str, {})
if verse_str in existing_chapter:
# Duplicate - don't overwrite
duplicate_count += 1
duplicate_verses.append(f"{chapter_str}:{verse_str}")
else:
# New verse - migrate it
if chapter_str not in existing_commentary:
existing_commentary[chapter_str] = {}
existing_commentary[chapter_str][verse_str] = entry
migrated_count += 1
# Save updated book file if we migrated anything
if migrated_count > 0:
existing_data["commentary"] = existing_commentary
save_book_file(book_name, existing_data)
print(f" Migrated: {migrated_count} verses")
print(f" Duplicates (skipped): {duplicate_count} verses")
if duplicate_verses:
duplicates_by_book[book_name] = duplicate_verses
total_migrated += migrated_count
total_duplicates += duplicate_count
print()
# Summary
print("=" * 70)
print("MIGRATION SUMMARY")
print("=" * 70)
print(f"Total verses migrated: {total_migrated}")
print(f"Total duplicates skipped: {total_duplicates}")
print()
if duplicates_by_book:
print("DUPLICATES BY BOOK (for review):")
print("-" * 70)
for book, verses in duplicates_by_book.items():
print(f"\n{book}:")
for v in verses[:10]: # Show first 10
print(f" {v}")
if len(verses) > 10:
print(f" ... and {len(verses) - 10} more")
# Create a new file with only duplicates for review
if total_duplicates > 0:
print()
print("Creating duplicates file for review...")
duplicates_data = []
for book_entry in source_data:
book_name = book_entry.get("book")
if book_name not in duplicates_by_book:
continue
dup_verses = set(duplicates_by_book[book_name])
dup_commentary = {}
for chapter_str, verses in book_entry.get("commentary", {}).items():
if not isinstance(verses, dict):
continue
for verse_str, entry in verses.items():
if f"{chapter_str}:{verse_str}" in dup_verses:
if chapter_str not in dup_commentary:
dup_commentary[chapter_str] = {}
dup_commentary[chapter_str][verse_str] = entry
if dup_commentary:
duplicates_data.append({
"book": book_name,
"commentary": dup_commentary
})
if duplicates_data:
dup_file = Path(__file__).parent.parent / "verse_commentary_duplicates.json"
with open(dup_file, 'w', encoding='utf-8') as f:
json.dump(duplicates_data, f, ensure_ascii=False, indent=2)
print(f"Saved duplicates to: {dup_file}")
print()
print("Done! Review duplicates if any, then delete source file.")
if __name__ == "__main__":
main()