Add /api/stats endpoint for site statistics

- Add cached _get_site_stats() function with comprehensive metrics
- Create GET /api/stats endpoint with OpenAPI documentation
- Include bible, commentary, cross-references, red letter, study resources,
  language tools, and data statistics
- Add 8 tests for the new endpoint

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-01 23:23:04 -05:00
parent 16a684d714
commit f436560701
2 changed files with 248 additions and 1 deletions
+178 -1
View File
@@ -3,6 +3,7 @@ from typing import Optional, List
from pydantic import BaseModel, Field
import json
import random
import re
from pathlib import Path as FilePath
from functools import lru_cache
@@ -16,7 +17,7 @@ from ..cross_references import get_cross_references
from ..reading_plans import get_plan, get_plan_summary
from ..topics import get_all_topics, get_topic_with_text
from ..interlinear_loader import get_interlinear_data, has_interlinear_data
from ..utils.books import normalize_book_name, OT_BOOKS
from ..utils.books import normalize_book_name, OT_BOOKS, NT_BOOKS
from ..utils.search import perform_full_text_search
from ..utils.helpers import get_daily_verse, create_slug, CHAPTER_EXPLANATIONS
from ..utils.commentary_loader import load_commentary
@@ -2335,3 +2336,179 @@ async def api_search_strongs(
"total": len(results),
"results": results
}
@lru_cache(maxsize=1)
def _get_site_stats():
"""Generate comprehensive site statistics. Cached since data rarely changes."""
data_dir = FilePath(__file__).parent.parent / "data"
# Bible statistics
total_verses = bible.get_verse_count()
total_books = len(bible.get_books())
total_chapters = len(bible.get_chapters())
# Calculate words in Bible
total_words = sum(len(verse.text.split()) for verse in bible.iter_verses())
# Count unique book types
ot_books = len(OT_BOOKS)
nt_books = len(NT_BOOKS)
# Data file statistics
total_json_files = len(list(data_dir.glob('**/*.json')))
# Verse commentary statistics
verse_commentary_files = len(list((data_dir / 'verse_commentary').glob('*.json')))
total_commentary_verses = 0
total_commentary_words = 0
for file in (data_dir / 'verse_commentary').glob('*.json'):
data = json.load(open(file))
commentary = data.get('commentary', {})
for chapter in commentary.values():
for verse_data in chapter.values():
total_commentary_verses += 1
analysis = verse_data.get('analysis', '')
historical = verse_data.get('historical', '')
clean_analysis = re.sub(r'<[^>]+>', '', analysis)
clean_historical = re.sub(r'<[^>]+>', '', historical)
total_commentary_words += len(clean_analysis.split()) + len(clean_historical.split())
# Cross-reference statistics
cross_reference_files = len(list((data_dir / 'cross_references').glob('*.json')))
total_cross_refs = 0
verses_with_cross_refs = 0
for file in (data_dir / 'cross_references').glob('*.json'):
data = json.load(open(file))
verses_with_cross_refs += len(data)
for verse_refs in data.values():
total_cross_refs += len(verse_refs)
# Red letter statistics
red_letter_data = json.load(open(data_dir / 'red_letter_verses.json'))
total_red_letter_verses = len(red_letter_data['verses'])
# Study resources
study_guide_files = len(list((data_dir / 'study_guides').glob('*.json')))
topic_files = len(list((data_dir / 'topics').glob('*.json')))
resource_files = len(list((data_dir / 'resources').glob('*.json')))
story_files = len(list((data_dir / 'stories').glob('*.json')))
# Interlinear data size
interlinear_file = data_dir / 'interlinear.json.gz'
interlinear_size_mb = interlinear_file.stat().st_size / 1024 / 1024 if interlinear_file.exists() else 0
# Calculate total data directory size
total_data_size = sum(f.stat().st_size for f in data_dir.glob('**/*') if f.is_file())
total_data_size_mb = total_data_size / 1024 / 1024
# Book abbreviations
bible_metadata_file = data_dir / 'bible_metadata.json'
bible_metadata = json.load(open(bible_metadata_file))
total_abbreviations = len(bible_metadata.get('book_abbreviations', {}))
# Biographies
bio_data = json.load(open(data_dir / 'biographies.json'))
total_biographies = len(bio_data.get('biographies', {}))
# Reading plans
reading_plan_files = len(list((data_dir / 'reading_plans').glob('*.json')))
# Strong's concordance
strongs_dir = data_dir / 'strongs'
if strongs_dir.exists():
hebrew_data = json.load(open(strongs_dir / 'hebrew.json'))
greek_data = json.load(open(strongs_dir / 'greek.json'))
total_hebrew_entries = len(hebrew_data)
total_greek_entries = len(greek_data)
else:
total_hebrew_entries = 0
total_greek_entries = 0
return {
'bible': {
'total_verses': total_verses,
'total_books': total_books,
'ot_books': ot_books,
'nt_books': nt_books,
'total_chapters': total_chapters,
'total_words': total_words,
'avg_words_per_verse': round(total_words / total_verses, 1),
'avg_verses_per_chapter': round(total_verses / total_chapters, 1),
},
'commentary': {
'files': verse_commentary_files,
'verses_covered': total_commentary_verses,
'total_words': total_commentary_words,
'avg_words_per_verse': round(total_commentary_words / total_commentary_verses, 1) if total_commentary_verses > 0 else 0,
'coverage_percent': round((total_commentary_verses / total_verses) * 100, 1),
},
'cross_references': {
'files': cross_reference_files,
'verses_with_refs': verses_with_cross_refs,
'total_references': total_cross_refs,
'avg_refs_per_verse': round(total_cross_refs / verses_with_cross_refs, 1) if verses_with_cross_refs > 0 else 0,
'coverage_percent': round((verses_with_cross_refs / total_verses) * 100, 1),
},
'red_letter': {
'total_verses': total_red_letter_verses,
'percent_of_bible': round((total_red_letter_verses / total_verses) * 100, 1),
},
'study_resources': {
'study_guides': study_guide_files,
'topics': topic_files,
'resources': resource_files,
'stories': story_files,
'biographies': total_biographies,
'reading_plans': reading_plan_files,
},
'language_tools': {
'hebrew_entries': total_hebrew_entries,
'greek_entries': total_greek_entries,
'total_strongs': total_hebrew_entries + total_greek_entries,
'interlinear_size_mb': round(interlinear_size_mb, 1),
},
'data': {
'total_json_files': total_json_files,
'total_size_mb': round(total_data_size_mb, 1),
'book_abbreviations': total_abbreviations,
}
}
@router.get(
"/stats",
summary="Site statistics",
description="Comprehensive statistics about KJV Study - Bible data, commentary, cross-references, and study resources.",
responses={
200: {
"description": "Site statistics",
"content": {
"application/json": {
"example": {
"bible": {
"total_verses": 31102,
"total_books": 66,
"ot_books": 39,
"nt_books": 27,
"total_chapters": 1189,
"total_words": 789629,
"avg_words_per_verse": 25.4,
"avg_verses_per_chapter": 26.2
},
"commentary": {
"files": 66,
"verses_covered": 31102,
"total_words": 5000000,
"avg_words_per_verse": 160.7,
"coverage_percent": 100.0
}
}
}
}
}
}
)
async def api_stats():
"""Get comprehensive site statistics."""
return _get_site_stats()
+70
View File
@@ -994,3 +994,73 @@ class TestFamilyTreeEndpoints:
assert "family_tree" in data["endpoints"]
assert "family_tree_stats" in data["endpoints"]
assert "biography" in data["endpoints"]
class TestStatsEndpoint:
"""Tests for the /api/stats endpoint"""
def test_stats_endpoint_returns_200(self, client):
"""Test /api/stats returns 200"""
response = client.get("/api/stats")
assert response.status_code == 200
def test_stats_has_bible_section(self, client):
"""Test stats includes bible statistics"""
response = client.get("/api/stats")
data = response.json()
assert "bible" in data
assert data["bible"]["total_verses"] == 31102
assert data["bible"]["total_books"] == 66
assert data["bible"]["ot_books"] == 39
assert data["bible"]["nt_books"] == 27
def test_stats_has_commentary_section(self, client):
"""Test stats includes commentary statistics"""
response = client.get("/api/stats")
data = response.json()
assert "commentary" in data
assert "files" in data["commentary"]
assert "verses_covered" in data["commentary"]
assert "coverage_percent" in data["commentary"]
def test_stats_has_cross_references_section(self, client):
"""Test stats includes cross-references statistics"""
response = client.get("/api/stats")
data = response.json()
assert "cross_references" in data
assert "total_references" in data["cross_references"]
assert "coverage_percent" in data["cross_references"]
def test_stats_has_red_letter_section(self, client):
"""Test stats includes red letter statistics"""
response = client.get("/api/stats")
data = response.json()
assert "red_letter" in data
assert "total_verses" in data["red_letter"]
assert "percent_of_bible" in data["red_letter"]
def test_stats_has_study_resources_section(self, client):
"""Test stats includes study resources statistics"""
response = client.get("/api/stats")
data = response.json()
assert "study_resources" in data
assert "study_guides" in data["study_resources"]
assert "topics" in data["study_resources"]
assert "biographies" in data["study_resources"]
def test_stats_has_language_tools_section(self, client):
"""Test stats includes language tools statistics"""
response = client.get("/api/stats")
data = response.json()
assert "language_tools" in data
assert "hebrew_entries" in data["language_tools"]
assert "greek_entries" in data["language_tools"]
assert "total_strongs" in data["language_tools"]
def test_stats_has_data_section(self, client):
"""Test stats includes data statistics"""
response = client.get("/api/stats")
data = response.json()
assert "data" in data
assert "total_json_files" in data["data"]
assert "total_size_mb" in data["data"]