Add /api/stats endpoint for site statistics

- Add cached _get_site_stats() function with comprehensive metrics - Create GET /api/stats endpoint with OpenAPI documentation - Include bible, commentary, cross-references, red letter, study resources, language tools, and data statistics - Add 8 tests for the new endpoint 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-05 23:00:16 +00:00 · 2025-12-01 23:23:04 -05:00
parent 16a684d714
commit f436560701
2 changed files with 248 additions and 1 deletions
@@ -3,6 +3,7 @@ from typing import Optional, List
 from pydantic import BaseModel, Field
 import json
 import random
+import re
 from pathlib import Path as FilePath
 from functools import lru_cache

@@ -16,7 +17,7 @@ from ..cross_references import get_cross_references
 from ..reading_plans import get_plan, get_plan_summary
 from ..topics import get_all_topics, get_topic_with_text
 from ..interlinear_loader import get_interlinear_data, has_interlinear_data
-from ..utils.books import normalize_book_name, OT_BOOKS
+from ..utils.books import normalize_book_name, OT_BOOKS, NT_BOOKS
 from ..utils.search import perform_full_text_search
 from ..utils.helpers import get_daily_verse, create_slug, CHAPTER_EXPLANATIONS
 from ..utils.commentary_loader import load_commentary
@@ -2335,3 +2336,179 @@ async def api_search_strongs(
        "total": len(results),
        "results": results
    }
+
+
+@lru_cache(maxsize=1)
+def _get_site_stats():
+    """Generate comprehensive site statistics. Cached since data rarely changes."""
+    data_dir = FilePath(__file__).parent.parent / "data"
+
+    # Bible statistics
+    total_verses = bible.get_verse_count()
+    total_books = len(bible.get_books())
+    total_chapters = len(bible.get_chapters())
+
+    # Calculate words in Bible
+    total_words = sum(len(verse.text.split()) for verse in bible.iter_verses())
+
+    # Count unique book types
+    ot_books = len(OT_BOOKS)
+    nt_books = len(NT_BOOKS)
+
+    # Data file statistics
+    total_json_files = len(list(data_dir.glob('**/*.json')))
+
+    # Verse commentary statistics
+    verse_commentary_files = len(list((data_dir / 'verse_commentary').glob('*.json')))
+    total_commentary_verses = 0
+    total_commentary_words = 0
+    for file in (data_dir / 'verse_commentary').glob('*.json'):
+        data = json.load(open(file))
+        commentary = data.get('commentary', {})
+        for chapter in commentary.values():
+            for verse_data in chapter.values():
+                total_commentary_verses += 1
+                analysis = verse_data.get('analysis', '')
+                historical = verse_data.get('historical', '')
+                clean_analysis = re.sub(r'<[^>]+>', '', analysis)
+                clean_historical = re.sub(r'<[^>]+>', '', historical)
+                total_commentary_words += len(clean_analysis.split()) + len(clean_historical.split())
+
+    # Cross-reference statistics
+    cross_reference_files = len(list((data_dir / 'cross_references').glob('*.json')))
+    total_cross_refs = 0
+    verses_with_cross_refs = 0
+    for file in (data_dir / 'cross_references').glob('*.json'):
+        data = json.load(open(file))
+        verses_with_cross_refs += len(data)
+        for verse_refs in data.values():
+            total_cross_refs += len(verse_refs)
+
+    # Red letter statistics
+    red_letter_data = json.load(open(data_dir / 'red_letter_verses.json'))
+    total_red_letter_verses = len(red_letter_data['verses'])
+
+    # Study resources
+    study_guide_files = len(list((data_dir / 'study_guides').glob('*.json')))
+    topic_files = len(list((data_dir / 'topics').glob('*.json')))
+    resource_files = len(list((data_dir / 'resources').glob('*.json')))
+    story_files = len(list((data_dir / 'stories').glob('*.json')))
+
+    # Interlinear data size
+    interlinear_file = data_dir / 'interlinear.json.gz'
+    interlinear_size_mb = interlinear_file.stat().st_size / 1024 / 1024 if interlinear_file.exists() else 0
+
+    # Calculate total data directory size
+    total_data_size = sum(f.stat().st_size for f in data_dir.glob('**/*') if f.is_file())
+    total_data_size_mb = total_data_size / 1024 / 1024
+
+    # Book abbreviations
+    bible_metadata_file = data_dir / 'bible_metadata.json'
+    bible_metadata = json.load(open(bible_metadata_file))
+    total_abbreviations = len(bible_metadata.get('book_abbreviations', {}))
+
+    # Biographies
+    bio_data = json.load(open(data_dir / 'biographies.json'))
+    total_biographies = len(bio_data.get('biographies', {}))
+
+    # Reading plans
+    reading_plan_files = len(list((data_dir / 'reading_plans').glob('*.json')))
+
+    # Strong's concordance
+    strongs_dir = data_dir / 'strongs'
+    if strongs_dir.exists():
+        hebrew_data = json.load(open(strongs_dir / 'hebrew.json'))
+        greek_data = json.load(open(strongs_dir / 'greek.json'))
+        total_hebrew_entries = len(hebrew_data)
+        total_greek_entries = len(greek_data)
+    else:
+        total_hebrew_entries = 0
+        total_greek_entries = 0
+
+    return {
+        'bible': {
+            'total_verses': total_verses,
+            'total_books': total_books,
+            'ot_books': ot_books,
+            'nt_books': nt_books,
+            'total_chapters': total_chapters,
+            'total_words': total_words,
+            'avg_words_per_verse': round(total_words / total_verses, 1),
+            'avg_verses_per_chapter': round(total_verses / total_chapters, 1),
+        },
+        'commentary': {
+            'files': verse_commentary_files,
+            'verses_covered': total_commentary_verses,
+            'total_words': total_commentary_words,
+            'avg_words_per_verse': round(total_commentary_words / total_commentary_verses, 1) if total_commentary_verses > 0 else 0,
+            'coverage_percent': round((total_commentary_verses / total_verses) * 100, 1),
+        },
+        'cross_references': {
+            'files': cross_reference_files,
+            'verses_with_refs': verses_with_cross_refs,
+            'total_references': total_cross_refs,
+            'avg_refs_per_verse': round(total_cross_refs / verses_with_cross_refs, 1) if verses_with_cross_refs > 0 else 0,
+            'coverage_percent': round((verses_with_cross_refs / total_verses) * 100, 1),
+        },
+        'red_letter': {
+            'total_verses': total_red_letter_verses,
+            'percent_of_bible': round((total_red_letter_verses / total_verses) * 100, 1),
+        },
+        'study_resources': {
+            'study_guides': study_guide_files,
+            'topics': topic_files,
+            'resources': resource_files,
+            'stories': story_files,
+            'biographies': total_biographies,
+            'reading_plans': reading_plan_files,
+        },
+        'language_tools': {
+            'hebrew_entries': total_hebrew_entries,
+            'greek_entries': total_greek_entries,
+            'total_strongs': total_hebrew_entries + total_greek_entries,
+            'interlinear_size_mb': round(interlinear_size_mb, 1),
+        },
+        'data': {
+            'total_json_files': total_json_files,
+            'total_size_mb': round(total_data_size_mb, 1),
+            'book_abbreviations': total_abbreviations,
+        }
+    }
+
+
+@router.get(
+    "/stats",
+    summary="Site statistics",
+    description="Comprehensive statistics about KJV Study - Bible data, commentary, cross-references, and study resources.",
+    responses={
+        200: {
+            "description": "Site statistics",
+            "content": {
+                "application/json": {
+                    "example": {
+                        "bible": {
+                            "total_verses": 31102,
+                            "total_books": 66,
+                            "ot_books": 39,
+                            "nt_books": 27,
+                            "total_chapters": 1189,
+                            "total_words": 789629,
+                            "avg_words_per_verse": 25.4,
+                            "avg_verses_per_chapter": 26.2
+                        },
+                        "commentary": {
+                            "files": 66,
+                            "verses_covered": 31102,
+                            "total_words": 5000000,
+                            "avg_words_per_verse": 160.7,
+                            "coverage_percent": 100.0
+                        }
+                    }
+                }
+            }
+        }
+    }
+)
+async def api_stats():
+    """Get comprehensive site statistics."""
+    return _get_site_stats()
@@ -994,3 +994,73 @@ class TestFamilyTreeEndpoints:
        assert "family_tree" in data["endpoints"]
        assert "family_tree_stats" in data["endpoints"]
        assert "biography" in data["endpoints"]
+
+
+class TestStatsEndpoint:
+    """Tests for the /api/stats endpoint"""
+
+    def test_stats_endpoint_returns_200(self, client):
+        """Test /api/stats returns 200"""
+        response = client.get("/api/stats")
+        assert response.status_code == 200
+
+    def test_stats_has_bible_section(self, client):
+        """Test stats includes bible statistics"""
+        response = client.get("/api/stats")
+        data = response.json()
+        assert "bible" in data
+        assert data["bible"]["total_verses"] == 31102
+        assert data["bible"]["total_books"] == 66
+        assert data["bible"]["ot_books"] == 39
+        assert data["bible"]["nt_books"] == 27
+
+    def test_stats_has_commentary_section(self, client):
+        """Test stats includes commentary statistics"""
+        response = client.get("/api/stats")
+        data = response.json()
+        assert "commentary" in data
+        assert "files" in data["commentary"]
+        assert "verses_covered" in data["commentary"]
+        assert "coverage_percent" in data["commentary"]
+
+    def test_stats_has_cross_references_section(self, client):
+        """Test stats includes cross-references statistics"""
+        response = client.get("/api/stats")
+        data = response.json()
+        assert "cross_references" in data
+        assert "total_references" in data["cross_references"]
+        assert "coverage_percent" in data["cross_references"]
+
+    def test_stats_has_red_letter_section(self, client):
+        """Test stats includes red letter statistics"""
+        response = client.get("/api/stats")
+        data = response.json()
+        assert "red_letter" in data
+        assert "total_verses" in data["red_letter"]
+        assert "percent_of_bible" in data["red_letter"]
+
+    def test_stats_has_study_resources_section(self, client):
+        """Test stats includes study resources statistics"""
+        response = client.get("/api/stats")
+        data = response.json()
+        assert "study_resources" in data
+        assert "study_guides" in data["study_resources"]
+        assert "topics" in data["study_resources"]
+        assert "biographies" in data["study_resources"]
+
+    def test_stats_has_language_tools_section(self, client):
+        """Test stats includes language tools statistics"""
+        response = client.get("/api/stats")
+        data = response.json()
+        assert "language_tools" in data
+        assert "hebrew_entries" in data["language_tools"]
+        assert "greek_entries" in data["language_tools"]
+        assert "total_strongs" in data["language_tools"]
+
+    def test_stats_has_data_section(self, client):
+        """Test stats includes data statistics"""
+        response = client.get("/api/stats")
+        data = response.json()
+        assert "data" in data
+        assert "total_json_files" in data["data"]
+        assert "total_size_mb" in data["data"]