diff --git a/kjvstudy_org/routes/api.py b/kjvstudy_org/routes/api.py index 7146318..92ab233 100644 --- a/kjvstudy_org/routes/api.py +++ b/kjvstudy_org/routes/api.py @@ -3,6 +3,7 @@ from typing import Optional, List from pydantic import BaseModel, Field import json import random +import re from pathlib import Path as FilePath from functools import lru_cache @@ -16,7 +17,7 @@ from ..cross_references import get_cross_references from ..reading_plans import get_plan, get_plan_summary from ..topics import get_all_topics, get_topic_with_text from ..interlinear_loader import get_interlinear_data, has_interlinear_data -from ..utils.books import normalize_book_name, OT_BOOKS +from ..utils.books import normalize_book_name, OT_BOOKS, NT_BOOKS from ..utils.search import perform_full_text_search from ..utils.helpers import get_daily_verse, create_slug, CHAPTER_EXPLANATIONS from ..utils.commentary_loader import load_commentary @@ -2335,3 +2336,179 @@ async def api_search_strongs( "total": len(results), "results": results } + + +@lru_cache(maxsize=1) +def _get_site_stats(): + """Generate comprehensive site statistics. Cached since data rarely changes.""" + data_dir = FilePath(__file__).parent.parent / "data" + + # Bible statistics + total_verses = bible.get_verse_count() + total_books = len(bible.get_books()) + total_chapters = len(bible.get_chapters()) + + # Calculate words in Bible + total_words = sum(len(verse.text.split()) for verse in bible.iter_verses()) + + # Count unique book types + ot_books = len(OT_BOOKS) + nt_books = len(NT_BOOKS) + + # Data file statistics + total_json_files = len(list(data_dir.glob('**/*.json'))) + + # Verse commentary statistics + verse_commentary_files = len(list((data_dir / 'verse_commentary').glob('*.json'))) + total_commentary_verses = 0 + total_commentary_words = 0 + for file in (data_dir / 'verse_commentary').glob('*.json'): + data = json.load(open(file)) + commentary = data.get('commentary', {}) + for chapter in commentary.values(): + for verse_data in chapter.values(): + total_commentary_verses += 1 + analysis = verse_data.get('analysis', '') + historical = verse_data.get('historical', '') + clean_analysis = re.sub(r'<[^>]+>', '', analysis) + clean_historical = re.sub(r'<[^>]+>', '', historical) + total_commentary_words += len(clean_analysis.split()) + len(clean_historical.split()) + + # Cross-reference statistics + cross_reference_files = len(list((data_dir / 'cross_references').glob('*.json'))) + total_cross_refs = 0 + verses_with_cross_refs = 0 + for file in (data_dir / 'cross_references').glob('*.json'): + data = json.load(open(file)) + verses_with_cross_refs += len(data) + for verse_refs in data.values(): + total_cross_refs += len(verse_refs) + + # Red letter statistics + red_letter_data = json.load(open(data_dir / 'red_letter_verses.json')) + total_red_letter_verses = len(red_letter_data['verses']) + + # Study resources + study_guide_files = len(list((data_dir / 'study_guides').glob('*.json'))) + topic_files = len(list((data_dir / 'topics').glob('*.json'))) + resource_files = len(list((data_dir / 'resources').glob('*.json'))) + story_files = len(list((data_dir / 'stories').glob('*.json'))) + + # Interlinear data size + interlinear_file = data_dir / 'interlinear.json.gz' + interlinear_size_mb = interlinear_file.stat().st_size / 1024 / 1024 if interlinear_file.exists() else 0 + + # Calculate total data directory size + total_data_size = sum(f.stat().st_size for f in data_dir.glob('**/*') if f.is_file()) + total_data_size_mb = total_data_size / 1024 / 1024 + + # Book abbreviations + bible_metadata_file = data_dir / 'bible_metadata.json' + bible_metadata = json.load(open(bible_metadata_file)) + total_abbreviations = len(bible_metadata.get('book_abbreviations', {})) + + # Biographies + bio_data = json.load(open(data_dir / 'biographies.json')) + total_biographies = len(bio_data.get('biographies', {})) + + # Reading plans + reading_plan_files = len(list((data_dir / 'reading_plans').glob('*.json'))) + + # Strong's concordance + strongs_dir = data_dir / 'strongs' + if strongs_dir.exists(): + hebrew_data = json.load(open(strongs_dir / 'hebrew.json')) + greek_data = json.load(open(strongs_dir / 'greek.json')) + total_hebrew_entries = len(hebrew_data) + total_greek_entries = len(greek_data) + else: + total_hebrew_entries = 0 + total_greek_entries = 0 + + return { + 'bible': { + 'total_verses': total_verses, + 'total_books': total_books, + 'ot_books': ot_books, + 'nt_books': nt_books, + 'total_chapters': total_chapters, + 'total_words': total_words, + 'avg_words_per_verse': round(total_words / total_verses, 1), + 'avg_verses_per_chapter': round(total_verses / total_chapters, 1), + }, + 'commentary': { + 'files': verse_commentary_files, + 'verses_covered': total_commentary_verses, + 'total_words': total_commentary_words, + 'avg_words_per_verse': round(total_commentary_words / total_commentary_verses, 1) if total_commentary_verses > 0 else 0, + 'coverage_percent': round((total_commentary_verses / total_verses) * 100, 1), + }, + 'cross_references': { + 'files': cross_reference_files, + 'verses_with_refs': verses_with_cross_refs, + 'total_references': total_cross_refs, + 'avg_refs_per_verse': round(total_cross_refs / verses_with_cross_refs, 1) if verses_with_cross_refs > 0 else 0, + 'coverage_percent': round((verses_with_cross_refs / total_verses) * 100, 1), + }, + 'red_letter': { + 'total_verses': total_red_letter_verses, + 'percent_of_bible': round((total_red_letter_verses / total_verses) * 100, 1), + }, + 'study_resources': { + 'study_guides': study_guide_files, + 'topics': topic_files, + 'resources': resource_files, + 'stories': story_files, + 'biographies': total_biographies, + 'reading_plans': reading_plan_files, + }, + 'language_tools': { + 'hebrew_entries': total_hebrew_entries, + 'greek_entries': total_greek_entries, + 'total_strongs': total_hebrew_entries + total_greek_entries, + 'interlinear_size_mb': round(interlinear_size_mb, 1), + }, + 'data': { + 'total_json_files': total_json_files, + 'total_size_mb': round(total_data_size_mb, 1), + 'book_abbreviations': total_abbreviations, + } + } + + +@router.get( + "/stats", + summary="Site statistics", + description="Comprehensive statistics about KJV Study - Bible data, commentary, cross-references, and study resources.", + responses={ + 200: { + "description": "Site statistics", + "content": { + "application/json": { + "example": { + "bible": { + "total_verses": 31102, + "total_books": 66, + "ot_books": 39, + "nt_books": 27, + "total_chapters": 1189, + "total_words": 789629, + "avg_words_per_verse": 25.4, + "avg_verses_per_chapter": 26.2 + }, + "commentary": { + "files": 66, + "verses_covered": 31102, + "total_words": 5000000, + "avg_words_per_verse": 160.7, + "coverage_percent": 100.0 + } + } + } + } + } + } +) +async def api_stats(): + """Get comprehensive site statistics.""" + return _get_site_stats() diff --git a/tests/test_api.py b/tests/test_api.py index 3cef79b..4bf2a13 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -994,3 +994,73 @@ class TestFamilyTreeEndpoints: assert "family_tree" in data["endpoints"] assert "family_tree_stats" in data["endpoints"] assert "biography" in data["endpoints"] + + +class TestStatsEndpoint: + """Tests for the /api/stats endpoint""" + + def test_stats_endpoint_returns_200(self, client): + """Test /api/stats returns 200""" + response = client.get("/api/stats") + assert response.status_code == 200 + + def test_stats_has_bible_section(self, client): + """Test stats includes bible statistics""" + response = client.get("/api/stats") + data = response.json() + assert "bible" in data + assert data["bible"]["total_verses"] == 31102 + assert data["bible"]["total_books"] == 66 + assert data["bible"]["ot_books"] == 39 + assert data["bible"]["nt_books"] == 27 + + def test_stats_has_commentary_section(self, client): + """Test stats includes commentary statistics""" + response = client.get("/api/stats") + data = response.json() + assert "commentary" in data + assert "files" in data["commentary"] + assert "verses_covered" in data["commentary"] + assert "coverage_percent" in data["commentary"] + + def test_stats_has_cross_references_section(self, client): + """Test stats includes cross-references statistics""" + response = client.get("/api/stats") + data = response.json() + assert "cross_references" in data + assert "total_references" in data["cross_references"] + assert "coverage_percent" in data["cross_references"] + + def test_stats_has_red_letter_section(self, client): + """Test stats includes red letter statistics""" + response = client.get("/api/stats") + data = response.json() + assert "red_letter" in data + assert "total_verses" in data["red_letter"] + assert "percent_of_bible" in data["red_letter"] + + def test_stats_has_study_resources_section(self, client): + """Test stats includes study resources statistics""" + response = client.get("/api/stats") + data = response.json() + assert "study_resources" in data + assert "study_guides" in data["study_resources"] + assert "topics" in data["study_resources"] + assert "biographies" in data["study_resources"] + + def test_stats_has_language_tools_section(self, client): + """Test stats includes language tools statistics""" + response = client.get("/api/stats") + data = response.json() + assert "language_tools" in data + assert "hebrew_entries" in data["language_tools"] + assert "greek_entries" in data["language_tools"] + assert "total_strongs" in data["language_tools"] + + def test_stats_has_data_section(self, client): + """Test stats includes data statistics""" + response = client.get("/api/stats") + data = response.json() + assert "data" in data + assert "total_json_files" in data["data"] + assert "total_size_mb" in data["data"]