#!/usr/bin/env python3 """ Detect shallow/generic commentary entries that need improvement. Flags entries that have: - Generic templated questions ("How does X:Y deepen my understanding...") - Boilerplate historical sections - Missing Greek/Hebrew terms - Very short analysis sections - Templated analysis patterns """ import json import os import re from pathlib import Path # Patterns that indicate generic/shallow commentary GENERIC_QUESTION_PATTERNS = [ r"How does .+ deepen my understanding of the gospel", r"What specific action or attitude change does this verse call me to make", r"How can I more sacrificially love the people", r"How does this passage point to Christ and His redemptive work\?$", ] GENERIC_HISTORICAL_PATTERNS = [ r"Historical Setting: .+ was written around \d+ CE from", r"Occasion: Preparing for visit to Rome", r"The Greco-Roman world valued rhetoric, philosophy, and social status", r"First-century believers lived in a pluralistic, pagan society", r"Paul's instructions addressed both timeless theological truths and specific cultural situations", ] GENERIC_ANALYSIS_PATTERNS = [ r"This verse contributes to .+'s overall purpose in", r"The key themes of justification by faith, law and grace, Israel and the church are evident", r"Paul carefully explains the law's role: revealing sin and pointing to Christ", r"The Holy Spirit empowers believers for holiness and service", r"Christ is the center of Paul's theology and message", r"Paul's discussion of Israel's role in God's redemptive plan\.$", r"Paul's teaching on sanctification and life in the Spirit\.$", ] # Good indicators (if missing, flag the entry) GOOD_INDICATORS = { 'greek': [r'[^<]+', r'[Gg]reek', r'[α-ωΑ-Ω]'], 'hebrew': [r'[^<]+', r'[Hh]ebrew', r'[\u0590-\u05FF]'], } def check_entry(book: str, chapter: str, verse: str, entry: dict) -> list[str]: """Check a single commentary entry for quality issues.""" issues = [] analysis = entry.get('analysis', '') historical = entry.get('historical', '') questions = entry.get('questions', []) ref = f"{book} {chapter}:{verse}" # Check for generic questions for q in questions: for pattern in GENERIC_QUESTION_PATTERNS: if re.search(pattern, q): issues.append(f"{ref}: Generic question pattern detected") break # Check for generic historical content for pattern in GENERIC_HISTORICAL_PATTERNS: if re.search(pattern, historical): issues.append(f"{ref}: Generic historical boilerplate detected") break # Check for generic analysis patterns for pattern in GENERIC_ANALYSIS_PATTERNS: if re.search(pattern, analysis): issues.append(f"{ref}: Generic analysis pattern detected") break # Check analysis length (too short is suspicious) # Good commentary should be at least 500 chars if len(analysis) < 400: issues.append(f"{ref}: Analysis too short ({len(analysis)} chars)") # Check for presence of original language terms in NT books nt_books = ['matthew', 'mark', 'luke', 'john', 'acts', 'romans', '1_corinthians', '2_corinthians', 'galatians', 'ephesians', 'philippians', 'colossians', '1_thessalonians', '2_thessalonians', '1_timothy', '2_timothy', 'titus', 'philemon', 'hebrews', 'james', '1_peter', '2_peter', '1_john', '2_john', '3_john', 'jude', 'revelation'] book_lower = book.lower().replace(' ', '_') # Check for Greek in NT if book_lower in nt_books: has_greek = any(re.search(p, analysis) for p in GOOD_INDICATORS['greek']) if not has_greek: issues.append(f"{ref}: Missing Greek terms (NT book)") # Check for Hebrew in OT if book_lower not in nt_books: has_hebrew = any(re.search(p, analysis) for p in GOOD_INDICATORS['hebrew']) if not has_hebrew: issues.append(f"{ref}: Missing Hebrew terms (OT book)") return issues def scan_book(filepath: Path) -> list[str]: """Scan a single book's commentary file.""" all_issues = [] with open(filepath) as f: data = json.load(f) book = data.get('book', filepath.stem.replace('_', ' ').title()) commentary = data.get('commentary', {}) for chapter, verses in commentary.items(): if not isinstance(verses, dict): continue for verse, entry in verses.items(): if not isinstance(entry, dict) or 'analysis' not in entry: continue issues = check_entry(book, chapter, verse, entry) all_issues.extend(issues) return all_issues def main(): """Scan all commentary files and report issues.""" import argparse parser = argparse.ArgumentParser(description='Detect shallow commentary') parser.add_argument('--worst', action='store_true', help='Show only worst offenders (3+ issues)') parser.add_argument('--book', type=str, help='Check specific book only') parser.add_argument('--export', type=str, help='Export problem verses to file') args = parser.parse_args() commentary_dir = Path('kjvstudy_org/data/verse_commentary') # Track issues per verse verse_issues = {} # ref -> list of issues for filepath in sorted(commentary_dir.glob('*.json')): if args.book and args.book.lower() not in filepath.stem.lower(): continue with open(filepath) as f: data = json.load(f) book = data.get('book', filepath.stem.replace('_', ' ').title()) commentary = data.get('commentary', {}) for chapter, verses in commentary.items(): if not isinstance(verses, dict): continue for verse, entry in verses.items(): if not isinstance(entry, dict) or 'analysis' not in entry: continue issues = check_entry(book, chapter, verse, entry) if issues: ref = f"{book} {chapter}:{verse}" verse_issues[ref] = [i.split(': ', 1)[1] for i in issues] # Summary print(f"\n{'='*60}") print(f"SHALLOW COMMENTARY DETECTION REPORT") print(f"{'='*60}\n") if not verse_issues: print("✅ No issues detected! All commentary appears to be high quality.") return # Filter to worst offenders if requested if args.worst: verse_issues = {k: v for k, v in verse_issues.items() if len(v) >= 3} print(f"Showing only verses with 3+ issues:\n") # Sort by number of issues (worst first) sorted_verses = sorted(verse_issues.items(), key=lambda x: -len(x[1])) # Count by severity severe = sum(1 for v in verse_issues.values() if len(v) >= 3) moderate = sum(1 for v in verse_issues.values() if len(v) == 2) minor = sum(1 for v in verse_issues.values() if len(v) == 1) print(f"📊 Issue Summary:") print(f" 🔴 Severe (3+ issues): {severe} verses") print(f" 🟡 Moderate (2 issues): {moderate} verses") print(f" 🟢 Minor (1 issue): {minor} verses") print(f" Total: {len(verse_issues)} verses with issues\n") # Show worst offenders print(f"\n🔴 WORST OFFENDERS (need immediate attention):") print("-" * 60) shown = 0 for ref, issues in sorted_verses: if len(issues) >= 3: print(f"\n{ref} ({len(issues)} issues):") for issue in issues: print(f" • {issue}") shown += 1 if shown >= 50: remaining = sum(1 for _, v in sorted_verses if len(v) >= 3) - 50 if remaining > 0: print(f"\n ... and {remaining} more severe cases") break # Export if requested if args.export: with open(args.export, 'w') as f: for ref, issues in sorted_verses: if len(issues) >= 3: f.write(f"{ref}\n") print(f"\n📁 Exported {severe} severe cases to {args.export}") print(f"\n{'='*60}") if __name__ == '__main__': main()