mirror of
https://github.com/kennethreitz/kjvstudy.org.git
synced 2026-06-05 23:00:16 +00:00
d56a2ea243
Fixed 48 verses where narrative introductions (like "Jesus answered them,") were incorrectly marked in red. Only Christ's actual spoken words should be red. Automatically fixed verses including: - John 8:34: "Jesus answered them," → now in black - John 8:19, 8:49, 8:54: "Jesus answered," → now in black - Matthew 11:25, 12:39, 15:3, 15:13, 17:22, 17:26, 21:30, 21:31, 25:12, 26:10 - Mark 12:29, Luke 4:4, John 5:17, 6:70, 7:16, 10:25, 10:32, 10:34, 13:8 - And 29 more verses across Matthew, Mark, Luke, and John Added script: scripts/fix_red_letter_narrative.py - Automatically extracts spoken words from narrative text - Uses regex patterns to identify narrative introductions - 48 verses fixed, 33 remaining for manual review 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
150 lines
5.9 KiB
Python
150 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fix red letter verses that incorrectly include narrative text.
|
|
|
|
This script finds verses marked as "full" that contain narrative introductions
|
|
(like "Jesus answered them,") and extracts only the actual spoken words.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from kjvstudy_org.kjv import Bible
|
|
|
|
# Common narrative patterns that should NOT be in red
|
|
NARRATIVE_PATTERNS = [
|
|
# "Jesus answered and said unto them," -> extract after the comma
|
|
r'^(.*?Jesus answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?Jesus answered\s+(?:them|him|her|it)[,:])\s*(.+)$',
|
|
r'^(.*?Jesus answered(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?And Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?Jesus saith(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?Then said Jesus(?:\s+(?:to|unto)\s+(?:them|him|her))?[,:])\s*(.+)$',
|
|
r'^(.*?And he said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?he answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?he answered(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?But he answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?But Jesus answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?But Jesus called(?:\s+(?:them|him|her))?(?:\s+unto\s+him)? and said[,:])\s*(.+)$',
|
|
r'^(.*?When Jesus (?:perceived|understood) it, he said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?And when he had called (?:all )?(?:the )?people(?:\s+unto him)?(?:\s+with his disciples also)?, he said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?Jesus answereth again, and saith(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?And while they abode in Galilee, Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?Jesus called them(?:\s+unto him)? and said[,:])\s*(.+)$',
|
|
|
|
# Handle cases with preceding dialogue
|
|
r'^(.*?\. (?:And )?Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
|
|
r'^(.*?\. And he answered and said[,:])\s*(.+)$',
|
|
]
|
|
|
|
|
|
def extract_spoken_words(verse_text: str) -> str | None:
|
|
"""
|
|
Extract only the spoken words from a verse, removing narrative introduction.
|
|
|
|
Returns the spoken words, or None if no clear pattern is found.
|
|
"""
|
|
# Try each pattern
|
|
for pattern in NARRATIVE_PATTERNS:
|
|
match = re.match(pattern, verse_text, re.IGNORECASE)
|
|
if match:
|
|
narrative = match.group(1)
|
|
spoken = match.group(2)
|
|
return spoken.strip()
|
|
|
|
return None
|
|
|
|
|
|
def main():
|
|
# Load the Bible and red letter data
|
|
bible = Bible()
|
|
data_path = Path(__file__).parent.parent / "kjvstudy_org" / "data" / "red_letter_verses.json"
|
|
|
|
with open(data_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
verses = data.get("verses", {})
|
|
|
|
# Find problematic verses
|
|
fixes = []
|
|
no_match = []
|
|
|
|
for verse_key, value in verses.items():
|
|
if value == 'full':
|
|
# Parse the verse key
|
|
parts = verse_key.rsplit(':', 1)
|
|
if len(parts) == 2:
|
|
book_chapter, verse_num = parts
|
|
book_parts = book_chapter.rsplit(' ', 1)
|
|
if len(book_parts) == 2:
|
|
book, chapter = book_parts
|
|
try:
|
|
text = bible.get_verse_text(book, int(chapter), int(verse_num))
|
|
|
|
# Check if it has narrative introduction
|
|
if any(phrase in text for phrase in ['Jesus answered', 'Jesus said', 'he answered', 'he said', 'Jesus saith']):
|
|
spoken = extract_spoken_words(text)
|
|
if spoken:
|
|
fixes.append((verse_key, text, spoken))
|
|
else:
|
|
no_match.append((verse_key, text))
|
|
except Exception as e:
|
|
print(f"Error processing {verse_key}: {e}")
|
|
|
|
# Display findings
|
|
print(f"\n{'='*80}")
|
|
print(f"Found {len(fixes)} verses that can be automatically fixed")
|
|
print(f"Found {len(no_match)} verses that need manual review")
|
|
print(f"{'='*80}\n")
|
|
|
|
if fixes:
|
|
print(f"\nVERSES TO FIX ({len(fixes)}):")
|
|
print("="*80)
|
|
for verse_key, original, spoken in fixes[:10]:
|
|
print(f"\n{verse_key}")
|
|
print(f" Original: {original}")
|
|
print(f" Spoken: {spoken}")
|
|
if len(fixes) > 10:
|
|
print(f"\n... and {len(fixes) - 10} more")
|
|
|
|
if no_match:
|
|
print(f"\n\nVERSES NEEDING MANUAL REVIEW ({len(no_match)}):")
|
|
print("="*80)
|
|
for verse_key, text in no_match[:5]:
|
|
print(f"\n{verse_key}")
|
|
print(f" {text}")
|
|
if len(no_match) > 5:
|
|
print(f"\n... and {len(no_match) - 5} more")
|
|
|
|
# Apply fixes automatically
|
|
print(f"\n{'='*80}")
|
|
print(f"Applying fixes to {len(fixes)} verses...")
|
|
|
|
# Apply fixes
|
|
for verse_key, _, spoken in fixes:
|
|
verses[verse_key] = spoken
|
|
|
|
# Save updated data
|
|
with open(data_path, 'w', encoding='utf-8') as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"\n✓ Updated {len(fixes)} verses in {data_path}")
|
|
|
|
if no_match:
|
|
print(f"\n⚠ {len(no_match)} verses still need manual review")
|
|
print("These verses may have complex narrative structures that couldn't be")
|
|
print("automatically parsed. Please review them manually.")
|
|
|
|
# Save list of verses needing manual review
|
|
manual_path = Path(__file__).parent.parent / "scripts" / "red_letter_manual_review.txt"
|
|
with open(manual_path, 'w', encoding='utf-8') as f:
|
|
for verse_key, text in no_match:
|
|
f.write(f"{verse_key}\n")
|
|
f.write(f" {text}\n\n")
|
|
print(f"\nSaved list to: {manual_path}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|