diff --git a/scripts/detect_shallow_commentary.py b/scripts/detect_shallow_commentary.py
new file mode 100644
index 0000000..6667762
--- /dev/null
+++ b/scripts/detect_shallow_commentary.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+"""
+Detect shallow/generic commentary entries that need improvement.
+
+Flags entries that have:
+- Generic templated questions ("How does X:Y deepen my understanding...")
+- Boilerplate historical sections
+- Missing Greek/Hebrew terms
+- Very short analysis sections
+- Templated analysis patterns
+"""
+
+import json
+import os
+import re
+from pathlib import Path
+
+# Patterns that indicate generic/shallow commentary
+GENERIC_QUESTION_PATTERNS = [
+ r"How does .+ deepen my understanding of the gospel",
+ r"What specific action or attitude change does this verse call me to make",
+ r"How can I more sacrificially love the people",
+ r"How does this passage point to Christ and His redemptive work\?$",
+]
+
+GENERIC_HISTORICAL_PATTERNS = [
+ r"Historical Setting: .+ was written around \d+ CE from",
+ r"Occasion: Preparing for visit to Rome",
+ r"The Greco-Roman world valued rhetoric, philosophy, and social status",
+ r"First-century believers lived in a pluralistic, pagan society",
+ r"Paul's instructions addressed both timeless theological truths and specific cultural situations",
+]
+
+GENERIC_ANALYSIS_PATTERNS = [
+ r"This verse contributes to .+'s overall purpose in",
+ r"The key themes of justification by faith, law and grace, Israel and the church are evident",
+ r"Paul carefully explains the law's role: revealing sin and pointing to Christ",
+ r"The Holy Spirit empowers believers for holiness and service",
+ r"Christ is the center of Paul's theology and message",
+ r"Paul's discussion of Israel's role in God's redemptive plan\.$",
+ r"Paul's teaching on sanctification and life in the Spirit\.$",
+]
+
+# Good indicators (if missing, flag the entry)
+GOOD_INDICATORS = {
+ 'greek': [r'[^<]+', r'[Gg]reek', r'[α-ωΑ-Ω]'],
+ 'hebrew': [r'[^<]+', r'[Hh]ebrew', r'[\u0590-\u05FF]'],
+}
+
+
+def check_entry(book: str, chapter: str, verse: str, entry: dict) -> list[str]:
+ """Check a single commentary entry for quality issues."""
+ issues = []
+
+ analysis = entry.get('analysis', '')
+ historical = entry.get('historical', '')
+ questions = entry.get('questions', [])
+
+ ref = f"{book} {chapter}:{verse}"
+
+ # Check for generic questions
+ for q in questions:
+ for pattern in GENERIC_QUESTION_PATTERNS:
+ if re.search(pattern, q):
+ issues.append(f"{ref}: Generic question pattern detected")
+ break
+
+ # Check for generic historical content
+ for pattern in GENERIC_HISTORICAL_PATTERNS:
+ if re.search(pattern, historical):
+ issues.append(f"{ref}: Generic historical boilerplate detected")
+ break
+
+ # Check for generic analysis patterns
+ for pattern in GENERIC_ANALYSIS_PATTERNS:
+ if re.search(pattern, analysis):
+ issues.append(f"{ref}: Generic analysis pattern detected")
+ break
+
+ # Check analysis length (too short is suspicious)
+ # Good commentary should be at least 500 chars
+ if len(analysis) < 400:
+ issues.append(f"{ref}: Analysis too short ({len(analysis)} chars)")
+
+ # Check for presence of original language terms in NT books
+ nt_books = ['matthew', 'mark', 'luke', 'john', 'acts', 'romans',
+ '1_corinthians', '2_corinthians', 'galatians', 'ephesians',
+ 'philippians', 'colossians', '1_thessalonians', '2_thessalonians',
+ '1_timothy', '2_timothy', 'titus', 'philemon', 'hebrews',
+ 'james', '1_peter', '2_peter', '1_john', '2_john', '3_john',
+ 'jude', 'revelation']
+
+ book_lower = book.lower().replace(' ', '_')
+
+ # Check for Greek in NT
+ if book_lower in nt_books:
+ has_greek = any(re.search(p, analysis) for p in GOOD_INDICATORS['greek'])
+ if not has_greek:
+ issues.append(f"{ref}: Missing Greek terms (NT book)")
+
+ # Check for Hebrew in OT
+ if book_lower not in nt_books:
+ has_hebrew = any(re.search(p, analysis) for p in GOOD_INDICATORS['hebrew'])
+ if not has_hebrew:
+ issues.append(f"{ref}: Missing Hebrew terms (OT book)")
+
+ return issues
+
+
+def scan_book(filepath: Path) -> list[str]:
+ """Scan a single book's commentary file."""
+ all_issues = []
+
+ with open(filepath) as f:
+ data = json.load(f)
+
+ book = data.get('book', filepath.stem.replace('_', ' ').title())
+ commentary = data.get('commentary', {})
+
+ for chapter, verses in commentary.items():
+ if not isinstance(verses, dict):
+ continue
+ for verse, entry in verses.items():
+ if not isinstance(entry, dict) or 'analysis' not in entry:
+ continue
+ issues = check_entry(book, chapter, verse, entry)
+ all_issues.extend(issues)
+
+ return all_issues
+
+
+def main():
+ """Scan all commentary files and report issues."""
+ import argparse
+ parser = argparse.ArgumentParser(description='Detect shallow commentary')
+ parser.add_argument('--worst', action='store_true', help='Show only worst offenders (3+ issues)')
+ parser.add_argument('--book', type=str, help='Check specific book only')
+ parser.add_argument('--export', type=str, help='Export problem verses to file')
+ args = parser.parse_args()
+
+ commentary_dir = Path('kjvstudy_org/data/verse_commentary')
+
+ # Track issues per verse
+ verse_issues = {} # ref -> list of issues
+
+ for filepath in sorted(commentary_dir.glob('*.json')):
+ if args.book and args.book.lower() not in filepath.stem.lower():
+ continue
+
+ with open(filepath) as f:
+ data = json.load(f)
+
+ book = data.get('book', filepath.stem.replace('_', ' ').title())
+ commentary = data.get('commentary', {})
+
+ for chapter, verses in commentary.items():
+ if not isinstance(verses, dict):
+ continue
+ for verse, entry in verses.items():
+ if not isinstance(entry, dict) or 'analysis' not in entry:
+ continue
+ issues = check_entry(book, chapter, verse, entry)
+ if issues:
+ ref = f"{book} {chapter}:{verse}"
+ verse_issues[ref] = [i.split(': ', 1)[1] for i in issues]
+
+ # Summary
+ print(f"\n{'='*60}")
+ print(f"SHALLOW COMMENTARY DETECTION REPORT")
+ print(f"{'='*60}\n")
+
+ if not verse_issues:
+ print("✅ No issues detected! All commentary appears to be high quality.")
+ return
+
+ # Filter to worst offenders if requested
+ if args.worst:
+ verse_issues = {k: v for k, v in verse_issues.items() if len(v) >= 3}
+ print(f"Showing only verses with 3+ issues:\n")
+
+ # Sort by number of issues (worst first)
+ sorted_verses = sorted(verse_issues.items(), key=lambda x: -len(x[1]))
+
+ # Count by severity
+ severe = sum(1 for v in verse_issues.values() if len(v) >= 3)
+ moderate = sum(1 for v in verse_issues.values() if len(v) == 2)
+ minor = sum(1 for v in verse_issues.values() if len(v) == 1)
+
+ print(f"📊 Issue Summary:")
+ print(f" 🔴 Severe (3+ issues): {severe} verses")
+ print(f" 🟡 Moderate (2 issues): {moderate} verses")
+ print(f" 🟢 Minor (1 issue): {minor} verses")
+ print(f" Total: {len(verse_issues)} verses with issues\n")
+
+ # Show worst offenders
+ print(f"\n🔴 WORST OFFENDERS (need immediate attention):")
+ print("-" * 60)
+
+ shown = 0
+ for ref, issues in sorted_verses:
+ if len(issues) >= 3:
+ print(f"\n{ref} ({len(issues)} issues):")
+ for issue in issues:
+ print(f" • {issue}")
+ shown += 1
+ if shown >= 50:
+ remaining = sum(1 for _, v in sorted_verses if len(v) >= 3) - 50
+ if remaining > 0:
+ print(f"\n ... and {remaining} more severe cases")
+ break
+
+ # Export if requested
+ if args.export:
+ with open(args.export, 'w') as f:
+ for ref, issues in sorted_verses:
+ if len(issues) >= 3:
+ f.write(f"{ref}\n")
+ print(f"\n📁 Exported {severe} severe cases to {args.export}")
+
+ print(f"\n{'='*60}")
+
+
+if __name__ == '__main__':
+ main()