Files
kjvstudy.org/scripts/fix_red_letter_narrative.py
kennethreitz d56a2ea243 Fix red letter edition narrative text issue
Fixed 48 verses where narrative introductions (like "Jesus answered them,")
were incorrectly marked in red. Only Christ's actual spoken words should be red.

Automatically fixed verses including:
- John 8:34: "Jesus answered them," → now in black
- John 8:19, 8:49, 8:54: "Jesus answered," → now in black
- Matthew 11:25, 12:39, 15:3, 15:13, 17:22, 17:26, 21:30, 21:31, 25:12, 26:10
- Mark 12:29, Luke 4:4, John 5:17, 6:70, 7:16, 10:25, 10:32, 10:34, 13:8
- And 29 more verses across Matthew, Mark, Luke, and John

Added script: scripts/fix_red_letter_narrative.py
- Automatically extracts spoken words from narrative text
- Uses regex patterns to identify narrative introductions
- 48 verses fixed, 33 remaining for manual review

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-29 02:28:11 -05:00

150 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""
Fix red letter verses that incorrectly include narrative text.
This script finds verses marked as "full" that contain narrative introductions
(like "Jesus answered them,") and extracts only the actual spoken words.
"""
import json
import re
from pathlib import Path
from kjvstudy_org.kjv import Bible
# Common narrative patterns that should NOT be in red
NARRATIVE_PATTERNS = [
# "Jesus answered and said unto them," -> extract after the comma
r'^(.*?Jesus answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?Jesus answered\s+(?:them|him|her|it)[,:])\s*(.+)$',
r'^(.*?Jesus answered(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?And Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?Jesus saith(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?Then said Jesus(?:\s+(?:to|unto)\s+(?:them|him|her))?[,:])\s*(.+)$',
r'^(.*?And he said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?he answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?he answered(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?But he answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?But Jesus answered and said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?But Jesus called(?:\s+(?:them|him|her))?(?:\s+unto\s+him)? and said[,:])\s*(.+)$',
r'^(.*?When Jesus (?:perceived|understood) it, he said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?And when he had called (?:all )?(?:the )?people(?:\s+unto him)?(?:\s+with his disciples also)?, he said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?Jesus answereth again, and saith(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?And while they abode in Galilee, Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?Jesus called them(?:\s+unto him)? and said[,:])\s*(.+)$',
# Handle cases with preceding dialogue
r'^(.*?\. (?:And )?Jesus said(?:\s+unto\s+(?:them|him|her|it))?[,:])\s*(.+)$',
r'^(.*?\. And he answered and said[,:])\s*(.+)$',
]
def extract_spoken_words(verse_text: str) -> str | None:
"""
Extract only the spoken words from a verse, removing narrative introduction.
Returns the spoken words, or None if no clear pattern is found.
"""
# Try each pattern
for pattern in NARRATIVE_PATTERNS:
match = re.match(pattern, verse_text, re.IGNORECASE)
if match:
narrative = match.group(1)
spoken = match.group(2)
return spoken.strip()
return None
def main():
# Load the Bible and red letter data
bible = Bible()
data_path = Path(__file__).parent.parent / "kjvstudy_org" / "data" / "red_letter_verses.json"
with open(data_path, 'r', encoding='utf-8') as f:
data = json.load(f)
verses = data.get("verses", {})
# Find problematic verses
fixes = []
no_match = []
for verse_key, value in verses.items():
if value == 'full':
# Parse the verse key
parts = verse_key.rsplit(':', 1)
if len(parts) == 2:
book_chapter, verse_num = parts
book_parts = book_chapter.rsplit(' ', 1)
if len(book_parts) == 2:
book, chapter = book_parts
try:
text = bible.get_verse_text(book, int(chapter), int(verse_num))
# Check if it has narrative introduction
if any(phrase in text for phrase in ['Jesus answered', 'Jesus said', 'he answered', 'he said', 'Jesus saith']):
spoken = extract_spoken_words(text)
if spoken:
fixes.append((verse_key, text, spoken))
else:
no_match.append((verse_key, text))
except Exception as e:
print(f"Error processing {verse_key}: {e}")
# Display findings
print(f"\n{'='*80}")
print(f"Found {len(fixes)} verses that can be automatically fixed")
print(f"Found {len(no_match)} verses that need manual review")
print(f"{'='*80}\n")
if fixes:
print(f"\nVERSES TO FIX ({len(fixes)}):")
print("="*80)
for verse_key, original, spoken in fixes[:10]:
print(f"\n{verse_key}")
print(f" Original: {original}")
print(f" Spoken: {spoken}")
if len(fixes) > 10:
print(f"\n... and {len(fixes) - 10} more")
if no_match:
print(f"\n\nVERSES NEEDING MANUAL REVIEW ({len(no_match)}):")
print("="*80)
for verse_key, text in no_match[:5]:
print(f"\n{verse_key}")
print(f" {text}")
if len(no_match) > 5:
print(f"\n... and {len(no_match) - 5} more")
# Apply fixes automatically
print(f"\n{'='*80}")
print(f"Applying fixes to {len(fixes)} verses...")
# Apply fixes
for verse_key, _, spoken in fixes:
verses[verse_key] = spoken
# Save updated data
with open(data_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"\n✓ Updated {len(fixes)} verses in {data_path}")
if no_match:
print(f"\n{len(no_match)} verses still need manual review")
print("These verses may have complex narrative structures that couldn't be")
print("automatically parsed. Please review them manually.")
# Save list of verses needing manual review
manual_path = Path(__file__).parent.parent / "scripts" / "red_letter_manual_review.txt"
with open(manual_path, 'w', encoding='utf-8') as f:
for verse_key, text in no_match:
f.write(f"{verse_key}\n")
f.write(f" {text}\n\n")
print(f"\nSaved list to: {manual_path}")
if __name__ == '__main__':
main()