mirror of
https://github.com/kennethreitz/kjvstudy.org.git
synced 2026-06-05 23:00:16 +00:00
195 lines
5.8 KiB
Python
195 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Analyze data coverage to identify gaps and areas for improvement.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from collections import defaultdict
|
|
|
|
# Add parent directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from kjvstudy_org.kjv import bible
|
|
|
|
|
|
def analyze_commentary_coverage():
|
|
"""Analyze verse commentary coverage."""
|
|
print("=" * 80)
|
|
print("VERSE COMMENTARY COVERAGE ANALYSIS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
commentary_file = Path("kjvstudy_org/data/verse_commentary.json")
|
|
with open(commentary_file, 'r') as f:
|
|
commentary = json.load(f)
|
|
|
|
total_verses = 31102
|
|
verses_with_commentary = len(commentary)
|
|
coverage_pct = (verses_with_commentary / total_verses) * 100
|
|
|
|
print(f"Total verses in Bible: {total_verses:,}")
|
|
print(f"Verses with commentary: {verses_with_commentary:,}")
|
|
print(f"Coverage: {coverage_pct:.1f}%")
|
|
print()
|
|
|
|
# Analyze by book
|
|
by_book = defaultdict(int)
|
|
for verse_ref in commentary.keys():
|
|
book = verse_ref.split(':')[0] if ':' in verse_ref else verse_ref.split(' ')[0]
|
|
by_book[book] += 1
|
|
|
|
print(f"Books with commentary ({len(by_book)} books):")
|
|
for book, count in sorted(by_book.items(), key=lambda x: x[1], reverse=True):
|
|
print(f" {book}: {count} verses")
|
|
print()
|
|
|
|
return verses_with_commentary
|
|
|
|
|
|
def analyze_cross_reference_coverage():
|
|
"""Analyze cross-reference coverage."""
|
|
print("=" * 80)
|
|
print("CROSS-REFERENCE COVERAGE ANALYSIS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
xref_file = Path("kjvstudy_org/data/cross_references.json")
|
|
with open(xref_file, 'r') as f:
|
|
cross_refs = json.load(f)
|
|
|
|
total_verses = 31102
|
|
verses_with_xrefs = len(cross_refs)
|
|
total_xrefs = sum(len(refs) for refs in cross_refs.values())
|
|
avg_xrefs_per_verse = total_xrefs / verses_with_xrefs if verses_with_xrefs > 0 else 0
|
|
|
|
# Count how many have descriptions
|
|
xrefs_with_descriptions = 0
|
|
xrefs_without_descriptions = 0
|
|
|
|
for refs in cross_refs.values():
|
|
for ref in refs:
|
|
if ref.get('note') and ref['note'].strip():
|
|
xrefs_with_descriptions += 1
|
|
else:
|
|
xrefs_without_descriptions += 1
|
|
|
|
coverage_pct = (verses_with_xrefs / total_verses) * 100
|
|
description_pct = (xrefs_with_descriptions / total_xrefs) * 100 if total_xrefs > 0 else 0
|
|
|
|
print(f"Total verses in Bible: {total_verses:,}")
|
|
print(f"Verses with cross-references: {verses_with_xrefs:,}")
|
|
print(f"Coverage: {coverage_pct:.1f}%")
|
|
print()
|
|
print(f"Total cross-reference entries: {total_xrefs:,}")
|
|
print(f"Average per verse: {avg_xrefs_per_verse:.1f}")
|
|
print()
|
|
print(f"Cross-refs with descriptions: {xrefs_with_descriptions:,} ({description_pct:.1f}%)")
|
|
print(f"Cross-refs without descriptions: {xrefs_without_descriptions:,}")
|
|
print()
|
|
|
|
|
|
def analyze_word_studies():
|
|
"""Analyze word studies coverage."""
|
|
print("=" * 80)
|
|
print("WORD STUDIES ANALYSIS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
word_studies_file = Path("kjvstudy_org/data/word_studies.json")
|
|
with open(word_studies_file, 'r') as f:
|
|
word_studies = json.load(f)
|
|
|
|
total_studies = len(word_studies)
|
|
|
|
# Categorize by OT/NT coverage
|
|
has_ot = 0
|
|
has_nt = 0
|
|
has_both = 0
|
|
|
|
for word, data in word_studies.items():
|
|
ot = bool(data.get('ot_term'))
|
|
nt = bool(data.get('nt_term'))
|
|
|
|
if ot and nt:
|
|
has_both += 1
|
|
elif ot:
|
|
has_ot += 1
|
|
elif nt:
|
|
has_nt += 1
|
|
|
|
print(f"Total word studies: {total_studies}")
|
|
print(f" Both OT and NT: {has_both}")
|
|
print(f" OT only: {has_ot}")
|
|
print(f" NT only: {has_nt}")
|
|
print()
|
|
|
|
# Show sample words
|
|
print("Sample words studied:")
|
|
for word in list(word_studies.keys())[:10]:
|
|
print(f" - {word}")
|
|
print()
|
|
|
|
|
|
def analyze_books_without_data():
|
|
"""Identify books with minimal study material."""
|
|
print("=" * 80)
|
|
print("BOOKS WITH MINIMAL STUDY MATERIAL")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
books = bible.get_books()
|
|
|
|
# Load commentary
|
|
commentary_file = Path("kjvstudy_org/data/verse_commentary.json")
|
|
with open(commentary_file, 'r') as f:
|
|
commentary = json.load(f)
|
|
|
|
# Count verses per book
|
|
book_stats = {}
|
|
|
|
for book in books:
|
|
chapters = bible.get_chapters_for_book(book)
|
|
total_verses = sum(len(bible.get_verses_by_book_chapter(book, ch)) for ch in chapters)
|
|
|
|
# Count commentary for this book
|
|
commentary_count = sum(1 for ref in commentary.keys() if ref.startswith(book + ':') or ref.startswith(book + ' '))
|
|
|
|
book_stats[book] = {
|
|
'total_verses': total_verses,
|
|
'commentary': commentary_count,
|
|
'commentary_pct': (commentary_count / total_verses * 100) if total_verses > 0 else 0
|
|
}
|
|
|
|
# Sort by lowest commentary percentage
|
|
sorted_books = sorted(book_stats.items(), key=lambda x: x[1]['commentary_pct'])
|
|
|
|
print("Books with lowest commentary coverage:")
|
|
print()
|
|
for book, stats in sorted_books[:15]:
|
|
print(f"{book:20} {stats['total_verses']:4} verses, {stats['commentary']:4} commentary ({stats['commentary_pct']:.1f}%)")
|
|
print()
|
|
|
|
|
|
def main():
|
|
"""Run all analyses."""
|
|
analyze_commentary_coverage()
|
|
analyze_cross_reference_coverage()
|
|
analyze_word_studies()
|
|
analyze_books_without_data()
|
|
|
|
print("=" * 80)
|
|
print("RECOMMENDATIONS FOR IMPROVEMENT")
|
|
print("=" * 80)
|
|
print()
|
|
print("1. Expand verse commentary to more books (currently 6-7% coverage)")
|
|
print("2. Add more word studies (currently 53 terms)")
|
|
print("3. Focus commentary on underserved books")
|
|
print("4. Consider adding thematic commentary (not just verse-by-verse)")
|
|
print()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|