Restore validate_data.py script

Accidentally deleted in previous cleanup. Required by test_data_validation.py. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-05 14:50:17 +00:00 · 2025-12-09 10:49:47 -05:00
parent 5cc61774ba
commit 6a2212d078
1 changed files with 799 additions and 0 deletions
@@ -0,0 +1,799 @@
+#!/usr/bin/env python3
+"""
+Validate JSON data files using Pydantic models.
+
+This script validates all data files in kjvstudy_org/data/ using Pydantic models
+for type safety and validation. Pydantic provides better error messages and
+integrates naturally with FastAPI.
+
+Usage:
+    python scripts/validate_data.py              # Validate all files
+    python scripts/validate_data.py --file bible_metadata.json  # Validate specific file
+    python scripts/validate_data.py --verbose    # Show detailed output
+    python scripts/validate_data.py --generate-schemas  # Generate JSON schemas
+
+Requirements:
+    pip install pydantic (already installed with FastAPI)
+"""
+
+import json
+import sys
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+
+try:
+    from pydantic import BaseModel, RootModel, Field, field_validator, ValidationError
+except ImportError:
+    print("Error: pydantic package not found")
+    print("Install with: pip install pydantic")
+    sys.exit(1)
+
+# Path to data directory
+DATA_DIR = Path(__file__).parent.parent / "kjvstudy_org" / "data"
+SCHEMAS_DIR = DATA_DIR / "schemas"
+
+
+# ============================================================================
+# Pydantic Models for Data Validation
+# ============================================================================
+
+class BibleMetadata(BaseModel):
+    """Schema for bible_metadata.json"""
+    old_testament_books: List[str] = Field(..., min_length=39, max_length=39)
+    new_testament_books: List[str] = Field(..., min_length=27, max_length=27)
+    book_abbreviations: Dict[str, str] = Field(..., min_length=1)
+
+    @field_validator('old_testament_books', 'new_testament_books')
+    @classmethod
+    def check_unique_books(cls, v):
+        if len(v) != len(set(v)):
+            raise ValueError("Duplicate book names found")
+        return v
+
+
+class WordStudy(BaseModel):
+    """Schema for individual word study entry"""
+    ot_term: Optional[str] = Field(None, min_length=1)
+    ot_transliteration: Optional[str] = Field(None, min_length=1)
+    ot_meaning: Optional[str] = Field(None, min_length=1)
+    ot_note: Optional[str] = Field(None, min_length=1)
+    nt_term: Optional[str] = Field(None, min_length=1)
+    nt_transliteration: Optional[str] = Field(None, min_length=1)
+    nt_meaning: Optional[str] = Field(None, min_length=1)
+    nt_note: Optional[str] = Field(None, min_length=1)
+
+
+class WordStudies(RootModel[Dict[str, WordStudy]]):
+    """Schema for word_studies.json"""
+    root: Dict[str, WordStudy]
+
+
+class CatalogEntry(BaseModel):
+    """Schema for study guide catalog entry"""
+    title: str = Field(..., min_length=1)
+    description: str = Field(..., min_length=1)
+    slug: str = Field(..., pattern=r'^[a-z-]+$')
+    verses: List[str]
+
+    @field_validator('verses')
+    @classmethod
+    def check_verse_format(cls, v):
+        import re
+        pattern = r'^[A-Za-z0-9 ]+ \d+:\d+(-\d+)?$'
+        for verse in v:
+            if not re.match(pattern, verse):
+                raise ValueError(f"Invalid verse reference format: {verse}")
+        return v
+
+
+class StudySection(BaseModel):
+    """Schema for study guide section"""
+    title: str = Field(..., min_length=1)
+    verses: List[str]
+    content: str = Field(..., min_length=1)
+
+    @field_validator('verses')
+    @classmethod
+    def check_verse_format(cls, v):
+        import re
+        pattern = r'^[A-Za-z0-9 ]+ \d+:\d+(-\d+)?$'
+        for verse in v:
+            if not re.match(pattern, verse):
+                raise ValueError(f"Invalid verse reference format: {verse}")
+        return v
+
+
+class GuideContent(BaseModel):
+    """Schema for study guide content"""
+    title: str = Field(..., min_length=1)
+    description: str = Field(..., min_length=1)
+    sections: List[StudySection] = Field(..., min_length=1)
+
+
+class StudyGuideFile(BaseModel):
+    """Schema for a single study guide file"""
+    content: GuideContent
+    catalog_entry: Optional[CatalogEntry] = None
+    category: Optional[str] = None
+
+
+class TopicsFile(BaseModel):
+    """Schema for a single topics file"""
+    root: Dict[str, dict]
+
+
+class ReadingPlanFile(BaseModel):
+    """Schema for a single reading plan file"""
+    plan: Dict[str, List[Dict[str, object]]]
+
+
+class VerseCommentaryEntry(BaseModel):
+    """Schema for verse commentary entry"""
+    analysis: str = Field(..., min_length=1)
+    historical: str = Field(..., min_length=1)
+    questions: List[str] = Field(..., min_length=1)
+
+
+class VerseCommentaryBook(BaseModel):
+    """Schema for a single verse commentary book file"""
+    book: str = Field(..., min_length=1)
+    commentary: Dict[str, Dict[str, VerseCommentaryEntry]] = Field(..., min_length=1)
+
+    @field_validator('commentary')
+    @classmethod
+    def check_numeric_keys(cls, v):
+        for chapter_key, verses in v.items():
+            if not str(chapter_key).isdigit():
+                raise ValueError(f"Invalid chapter key: {chapter_key}")
+            if not isinstance(verses, dict) or len(verses) == 0:
+                raise ValueError(f"Chapter {chapter_key} must contain verse entries")
+            for verse_key, entry in verses.items():
+                if not str(verse_key).isdigit():
+                    raise ValueError(f"Invalid verse key: {verse_key}")
+                if not isinstance(entry, (dict, BaseModel)):
+                    raise ValueError(f"Verse {chapter_key}:{verse_key} must be an object")
+        return v
+
+
+class Devotional(BaseModel):
+    """Schema for verse devotional content"""
+    title: str = Field(..., min_length=1, max_length=100)
+    theme: str = Field(..., min_length=1, max_length=50)
+    opening: str = Field(..., min_length=10)
+    meditation: str = Field(..., min_length=20)
+    application: str = Field(..., min_length=10)
+    prayer: str = Field(..., min_length=10)
+
+    @field_validator('prayer')
+    @classmethod
+    def prayer_ends_with_amen(cls, v):
+        if not v.strip().endswith('Amen.'):
+            raise ValueError("Prayer must end with 'Amen.'")
+        return v
+
+
+class FeaturedVerse(BaseModel):
+    """Schema for individual featured verse with optional devotional"""
+    book: str = Field(..., min_length=1)
+    chapter: int = Field(..., ge=1)
+    verse: int = Field(..., ge=1)
+    devotional: Optional[Devotional] = None
+
+
+class FeaturedVerses(BaseModel):
+    """Schema for featured_verses.json - 365 verses with devotionals"""
+    verses: List[FeaturedVerse] = Field(..., min_length=1)
+
+    @field_validator('verses')
+    @classmethod
+    def check_devotional_coverage(cls, v):
+        # Warn if not all verses have devotionals
+        with_devotional = sum(1 for verse in v if verse.devotional is not None)
+        if with_devotional < len(v):
+            # This is just informational, not an error
+            pass
+        return v
+
+
+class RedLetterVerses(BaseModel):
+    """Schema for red_letter_verses.json"""
+    description: str = Field(..., min_length=1)
+    note: str = Field(..., min_length=1)
+    verses: Dict[str, str] = Field(..., min_length=1)
+
+    @field_validator('verses')
+    @classmethod
+    def check_verses(cls, v):
+        import re
+        # Validate verse reference format
+        pattern = r"^[A-Za-z0-9 ']+ \d+:\d+$"
+        for key, value in v.items():
+            if not re.match(pattern, key):
+                raise ValueError(f"Invalid verse reference key: {key}")
+            # Value must be either "full" or a non-empty string
+            if value != "full" and (not isinstance(value, str) or len(value) == 0):
+                raise ValueError(f"Invalid value for {key}: must be 'full' or a non-empty string")
+        return v
+
+
+class ResourceSlugs(BaseModel):
+    """Schema for resource_slugs.json"""
+    study_guides: List[str]
+    angels: List[str]
+    prophets: List[str]
+    names_of_god: List[str]
+    parables: List[str]
+    covenants: List[str]
+    apostles: List[str]
+    women: List[str]
+    festivals: List[str]
+    fruits_of_spirit: List[str]
+
+    @field_validator('*')
+    @classmethod
+    def check_slugs(cls, v):
+        # Check for duplicates
+        if len(v) != len(set(v)):
+            raise ValueError("Duplicate slugs found")
+        # Check slug format
+        import re
+        pattern = r'^[a-z-]+$'
+        for slug in v:
+            if not re.match(pattern, slug):
+                raise ValueError(f"Invalid slug format: {slug}")
+        return v
+
+
+class PoetryBookData(BaseModel):
+    """Schema for individual book poetry data"""
+    is_poetry: bool = Field(..., description="Whether the entire book is poetry")
+    poetry_chapters: List[int] | str = Field(..., description="List of chapter numbers that are poetry, or 'all'")
+    stanza_breaks: Dict[str, List[int]] = Field(..., description="Map of chapter number to list of verse numbers with stanza breaks")
+
+    @field_validator('poetry_chapters')
+    @classmethod
+    def check_chapters_sorted(cls, v):
+        # Allow "all" as a special value for entirely poetry books
+        if v == "all":
+            return v
+        if v != sorted(v):
+            raise ValueError("poetry_chapters must be sorted")
+        if len(v) != len(set(v)):
+            raise ValueError("Duplicate chapter numbers found")
+        return v
+
+    @field_validator('stanza_breaks')
+    @classmethod
+    def check_stanza_breaks(cls, v):
+        for chapter_key, verses in v.items():
+            if not chapter_key.isdigit():
+                raise ValueError(f"Invalid chapter key: {chapter_key}")
+            if verses != sorted(verses):
+                raise ValueError(f"Stanza breaks for chapter {chapter_key} must be sorted")
+            if len(verses) != len(set(verses)):
+                raise ValueError(f"Duplicate verse numbers in chapter {chapter_key}")
+        return v
+
+
+class PoetryFormatting(BaseModel):
+    """Schema for poetry_formatting.json"""
+    books: Dict[str, PoetryBookData] = Field(..., min_length=1)
+
+    @field_validator('books')
+    @classmethod
+    def check_valid_books(cls, v):
+        # Many books have poetic sections (Psalms, Prophets, NT hymns, etc.)
+        # Just validate that book names are valid Bible books
+        valid_books = {
+            'Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy',
+            'Joshua', 'Judges', 'Ruth', '1 Samuel', '2 Samuel', '1 Kings', '2 Kings',
+            '1 Chronicles', '2 Chronicles', 'Ezra', 'Nehemiah', 'Esther',
+            'Job', 'Psalms', 'Proverbs', 'Ecclesiastes', 'Song of Solomon',
+            'Isaiah', 'Jeremiah', 'Lamentations', 'Ezekiel', 'Daniel',
+            'Hosea', 'Joel', 'Amos', 'Obadiah', 'Jonah', 'Micah', 'Nahum',
+            'Habakkuk', 'Zephaniah', 'Haggai', 'Zechariah', 'Malachi',
+            'Matthew', 'Mark', 'Luke', 'John', 'Acts',
+            'Romans', '1 Corinthians', '2 Corinthians', 'Galatians', 'Ephesians',
+            'Philippians', 'Colossians', '1 Thessalonians', '2 Thessalonians',
+            '1 Timothy', '2 Timothy', 'Titus', 'Philemon', 'Hebrews',
+            'James', '1 Peter', '2 Peter', '1 John', '2 John', '3 John', 'Jude', 'Revelation'
+        }
+        for book_name in v.keys():
+            if book_name not in valid_books:
+                raise ValueError(f"Invalid book name: {book_name}")
+        return v
+
+
+class OutlineSection(BaseModel):
+    """Schema for book outline section"""
+    section: str = Field(..., min_length=1)
+    chapters: str = Field(..., min_length=1)
+    description: str = Field(..., min_length=1)
+
+
+class KeyTheme(BaseModel):
+    """Schema for book key theme"""
+    theme: str = Field(..., min_length=1)
+    description: str = Field(..., min_length=1)
+
+
+class KeyVerse(BaseModel):
+    """Schema for book key verse"""
+    reference: str = Field(..., min_length=1)
+    text: str = Field(..., min_length=1)
+
+
+class BookIntroduction(BaseModel):
+    """Schema for individual book introduction file"""
+    name: str = Field(..., min_length=1)
+    abbreviation: str = Field(..., min_length=1)
+    testament: str = Field(..., pattern=r'^(Old Testament|New Testament)$')
+    position: int = Field(..., ge=1, le=66)
+    chapters: int = Field(..., ge=1)
+    category: str = Field(..., min_length=1)
+    author: str = Field(..., min_length=1)
+    date_written: str = Field(..., min_length=1)
+    introduction: str = Field(..., min_length=1)
+    outline: List[OutlineSection] = Field(..., min_length=1)
+    key_themes: List[KeyTheme] = Field(..., min_length=1)
+    key_verses: List[KeyVerse] = Field(..., min_length=1)
+    christ_in_book: Optional[str] = None
+
+
+# ============================================================================
+# Validation Logic
+# ============================================================================
+
+# Mapping of data files to their Pydantic models
+MODEL_MAPPING = {
+    "bible_metadata.json": BibleMetadata,
+    "word_studies.json": WordStudies,
+    "study_guides": StudyGuideFile,
+    "verse_commentary": VerseCommentaryBook,
+    "topics": TopicsFile,
+    "reading_plans": ReadingPlanFile,
+    "featured_verses.json": FeaturedVerses,
+    "red_letter_verses.json": RedLetterVerses,
+    "resource_slugs.json": ResourceSlugs,
+    "poetry_formatting.json": PoetryFormatting,
+}
+
+
+def load_json(file_path: Path) -> Tuple[dict, Optional[str]]:
+    """Load JSON file and return data and error message if any."""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return json.load(f), None
+    except json.JSONDecodeError as e:
+        return None, f"JSON syntax error: {e}"
+    except Exception as e:
+        return None, f"Error loading file: {e}"
+
+
+def validate_file(data_file: str, verbose: bool = False) -> bool:
+    """Validate a single data file using its Pydantic model."""
+    if data_file == "verse_commentary":
+        return validate_verse_commentary_directory(verbose)
+    if data_file == "study_guides":
+        return validate_study_guides_directory(verbose)
+    if data_file == "topics":
+        return validate_topics_directory(verbose)
+    if data_file == "reading_plans":
+        return validate_reading_plans_directory(verbose)
+
+    if data_file not in MODEL_MAPPING:
+        if verbose:
+            print(f"⚠️  {data_file}: No validation model defined (skipped)")
+        return True
+
+    model_class = MODEL_MAPPING[data_file]
+    data_path = DATA_DIR / data_file
+
+    # Check if file exists
+    if not data_path.exists():
+        print(f"❌ {data_file}: File not found at {data_path}")
+        return False
+
+    # Load data file
+    data, error = load_json(data_path)
+    if error:
+        print(f"❌ {data_file}: {error}")
+        return False
+
+    # Validate using Pydantic model
+    try:
+        # For RootModel subclasses, pass data directly to constructor
+        # For regular BaseModel subclasses, unpack as kwargs
+        if issubclass(model_class, RootModel):
+            model_class(data)
+        else:
+            model_class(**data)
+
+        print(f"✅ {data_file}: Valid")
+        if verbose:
+            print(f"   Model: {model_class.__name__}")
+            print(f"   Size: {data_path.stat().st_size:,} bytes")
+        return True
+
+    except ValidationError as e:
+        print(f"❌ {data_file}: Validation failed")
+        for error_detail in e.errors():
+            location = " -> ".join(str(loc) for loc in error_detail['loc'])
+            print(f"   {location}: {error_detail['msg']}")
+            if verbose and 'ctx' in error_detail:
+                print(f"   Context: {error_detail['ctx']}")
+        return False
+
+    except Exception as e:
+        print(f"❌ {data_file}: Unexpected error")
+        print(f"   Error: {str(e)}")
+        return False
+
+
+def validate_verse_commentary_directory(verbose: bool = False) -> bool:
+    """Validate all per-book verse commentary files."""
+    dir_path = DATA_DIR / "verse_commentary"
+    if not dir_path.exists():
+        print(f"❌ verse_commentary: Directory not found at {dir_path}")
+        return False
+
+    passed = 0
+    failed = 0
+
+    for file_path in sorted(dir_path.glob("*.json")):
+        data, error = load_json(file_path)
+        if error:
+            print(f"❌ {file_path.name}: {error}")
+            failed += 1
+            continue
+
+        try:
+            VerseCommentaryBook(**data)
+            if verbose:
+                print(f"✅ {file_path.name}: Valid")
+            passed += 1
+        except ValidationError as e:
+            print(f"❌ {file_path.name}: Validation failed")
+            for error_detail in e.errors():
+                location = " -> ".join(str(loc) for loc in error_detail['loc'])
+                print(f"   {location}: {error_detail['msg']}")
+            failed += 1
+        except Exception as e:
+            print(f"❌ {file_path.name}: Unexpected error")
+            print(f"   Error: {str(e)}")
+            failed += 1
+
+    if failed == 0:
+        print(f"✅ verse_commentary: Valid ({passed} files)")
+        return True
+
+    print(f"❌ verse_commentary: {failed} files failed validation")
+    return False
+
+
+def validate_study_guides_directory(verbose: bool = False) -> bool:
+    """Validate per-guide study guide files."""
+    dir_path = DATA_DIR / "study_guides"
+    if not dir_path.exists():
+        print(f"❌ study_guides: Directory not found at {dir_path}")
+        return False
+
+    passed = 0
+    failed = 0
+
+    for file_path in sorted(dir_path.glob("*.json")):
+        data, error = load_json(file_path)
+        if error:
+            print(f"❌ {file_path.name}: {error}")
+            failed += 1
+            continue
+
+        try:
+            StudyGuideFile(**data)
+            if verbose:
+                print(f"✅ {file_path.name}: Valid")
+            passed += 1
+        except ValidationError as e:
+            print(f"❌ {file_path.name}: Validation failed")
+            for error_detail in e.errors():
+                location = " -> ".join(str(loc) for loc in error_detail['loc'])
+                print(f"   {location}: {error_detail['msg']}")
+            failed += 1
+        except Exception as e:
+            print(f"❌ {file_path.name}: Unexpected error")
+            print(f"   Error: {str(e)}")
+            failed += 1
+
+    if failed == 0:
+        print(f"✅ study_guides: Valid ({passed} files)")
+        return True
+
+    print(f"❌ study_guides: {failed} files failed validation")
+    return False
+
+
+def validate_topics_directory(verbose: bool = False) -> bool:
+    """Validate per-topic files."""
+    dir_path = DATA_DIR / "topics"
+    if not dir_path.exists():
+        print(f"❌ topics: Directory not found at {dir_path}")
+        return False
+
+    passed = 0
+    failed = 0
+
+    for file_path in sorted(dir_path.glob("*.json")):
+        data, error = load_json(file_path)
+        if error:
+            print(f"❌ {file_path.name}: {error}")
+            failed += 1
+            continue
+
+        try:
+            TopicsFile(root=data)
+            if verbose:
+                print(f"✅ {file_path.name}: Valid")
+            passed += 1
+        except ValidationError as e:
+            print(f"❌ {file_path.name}: Validation failed")
+            for error_detail in e.errors():
+                location = " -> ".join(str(loc) for loc in error_detail['loc'])
+                print(f"   {location}: {error_detail['msg']}")
+            failed += 1
+        except Exception as e:
+            print(f"❌ {file_path.name}: Unexpected error")
+            print(f"   Error: {str(e)}")
+            failed += 1
+
+    if failed == 0:
+        print(f"✅ topics: Valid ({passed} files)")
+        return True
+
+    print(f"❌ topics: {failed} files failed validation")
+    return False
+
+
+def validate_reading_plans_directory(verbose: bool = False) -> bool:
+    """Validate per-plan reading plan files."""
+    dir_path = DATA_DIR / "reading_plans"
+    if not dir_path.exists():
+        print(f"❌ reading_plans: Directory not found at {dir_path}")
+        return False
+
+    passed = 0
+    failed = 0
+
+    for file_path in sorted(dir_path.glob("*.json")):
+        data, error = load_json(file_path)
+        if error:
+            print(f"❌ {file_path.name}: {error}")
+            failed += 1
+            continue
+
+        try:
+            # Each file has one key with the plan id mapping to the plan object
+            if len(data) != 1:
+                raise ValueError("Reading plan file must contain exactly one plan")
+            ReadingPlanFile(plan=data)
+            if verbose:
+                print(f"✅ {file_path.name}: Valid")
+            passed += 1
+        except ValidationError as e:
+            print(f"❌ {file_path.name}: Validation failed")
+            for error_detail in e.errors():
+                location = " -> ".join(str(loc) for loc in error_detail['loc'])
+                print(f"   {location}: {error_detail['msg']}")
+            failed += 1
+        except Exception as e:
+            print(f"❌ {file_path.name}: Unexpected error")
+            print(f"   Error: {str(e)}")
+            failed += 1
+
+    if failed == 0:
+        print(f"✅ reading_plans: Valid ({passed} files)")
+        return True
+
+    print(f"❌ reading_plans: {failed} files failed validation")
+    return False
+
+
+def validate_all(verbose: bool = False) -> Tuple[int, int]:
+    """Validate all data files with models. Returns (passed, failed) counts."""
+    passed = 0
+    failed = 0
+
+    print("=" * 60)
+    print("Validating JSON data files with Pydantic models")
+    print("=" * 60)
+    print()
+
+    for data_file in sorted(MODEL_MAPPING.keys()):
+        if validate_file(data_file, verbose):
+            passed += 1
+        else:
+            failed += 1
+        if verbose:
+            print()
+
+    return passed, failed
+
+
+def validate_book_file(book_file: Path, verbose: bool = False) -> bool:
+    """Validate a single book JSON file using BookIntroduction model."""
+    # Load data file
+    data, error = load_json(book_file)
+    if error:
+        print(f"❌ {book_file.name}: {error}")
+        return False
+
+    # Validate using Pydantic model
+    try:
+        BookIntroduction(**data)
+        print(f"✅ {book_file.name}: Valid")
+        if verbose:
+            print(f"   Size: {book_file.stat().st_size:,} bytes")
+        return True
+
+    except ValidationError as e:
+        print(f"❌ {book_file.name}: Validation failed")
+        for error_detail in e.errors():
+            location = " -> ".join(str(loc) for loc in error_detail['loc'])
+            print(f"   {location}: {error_detail['msg']}")
+        return False
+
+    except Exception as e:
+        print(f"❌ {book_file.name}: Unexpected error")
+        print(f"   Error: {str(e)}")
+        return False
+
+
+def validate_all_books(verbose: bool = False) -> Tuple[int, int]:
+    """Validate all 66 book introduction files. Returns (passed, failed) counts."""
+    passed = 0
+    failed = 0
+
+    books_dir = DATA_DIR / "books"
+    if not books_dir.exists():
+        print(f"❌ Books directory not found: {books_dir}")
+        return 0, 0
+
+    print("=" * 60)
+    print("Validating 66 book introduction files")
+    print("=" * 60)
+    print()
+
+    book_files = sorted(books_dir.glob("*.json"))
+    for book_file in book_files:
+        if validate_book_file(book_file, verbose):
+            passed += 1
+        else:
+            failed += 1
+        if verbose:
+            print()
+
+    return passed, failed
+
+
+def generate_json_schemas():
+    """Generate JSON Schema files from Pydantic models."""
+    print("=" * 60)
+    print("Generating JSON Schema files from Pydantic models")
+    print("=" * 60)
+    print()
+
+    SCHEMAS_DIR.mkdir(exist_ok=True)
+
+    # Generate schemas for main data files
+    for data_file, model_class in MODEL_MAPPING.items():
+        schema_file = data_file.replace('.json', '.schema.json') if data_file.endswith('.json') else f"{data_file}.schema.json"
+        schema_path = SCHEMAS_DIR / schema_file
+
+        try:
+            # Generate JSON Schema from Pydantic model
+            schema = model_class.model_json_schema()
+
+            # Add metadata
+            schema['$id'] = f"https://kjvstudy.org/schemas/{schema_file}"
+            schema['title'] = model_class.__doc__ or model_class.__name__
+
+            # Write schema file
+            with open(schema_path, 'w', encoding='utf-8') as f:
+                json.dump(schema, f, indent=2, ensure_ascii=False)
+
+            print(f"✅ Generated {schema_file}")
+
+        except Exception as e:
+            print(f"❌ Failed to generate {schema_file}: {e}")
+
+    # Generate schema for book introduction files
+    try:
+        schema_file = "book_introduction.schema.json"
+        schema_path = SCHEMAS_DIR / schema_file
+
+        schema = BookIntroduction.model_json_schema()
+        schema['$id'] = f"https://kjvstudy.org/schemas/{schema_file}"
+        schema['title'] = "Schema for individual book introduction files"
+
+        with open(schema_path, 'w', encoding='utf-8') as f:
+            json.dump(schema, f, indent=2, ensure_ascii=False)
+
+        print(f"✅ Generated {schema_file}")
+
+    except Exception as e:
+        print(f"❌ Failed to generate {schema_file}: {e}")
+
+    print()
+    print(f"Schemas written to: {SCHEMAS_DIR}")
+
+
+def main():
+    """Main entry point."""
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Validate JSON data files with Pydantic models",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python scripts/validate_data.py                    # Validate all files
+  python scripts/validate_data.py -f bible_metadata.json  # Validate one file
+  python scripts/validate_data.py --verbose          # Show details
+  python scripts/validate_data.py --generate-schemas # Generate JSON schemas
+        """
+    )
+    parser.add_argument(
+        '-f', '--file',
+        help='Validate specific file only',
+        metavar='FILE'
+    )
+    parser.add_argument(
+        '-v', '--verbose',
+        action='store_true',
+        help='Show detailed output'
+    )
+    parser.add_argument(
+        '--generate-schemas',
+        action='store_true',
+        help='Generate JSON Schema files from Pydantic models'
+    )
+    parser.add_argument(
+        '--books',
+        action='store_true',
+        help='Validate all 66 book introduction files'
+    )
+
+    args = parser.parse_args()
+
+    # Generate schemas if requested
+    if args.generate_schemas:
+        generate_json_schemas()
+        sys.exit(0)
+
+    # Validate books if requested
+    if args.books:
+        passed, failed = validate_all_books(args.verbose)
+
+        print()
+        print("=" * 60)
+        print(f"Results: {passed} passed, {failed} failed")
+        print("=" * 60)
+
+        sys.exit(0 if failed == 0 else 1)
+
+    # Validate specific file or all files
+    if args.file:
+        success = validate_file(args.file, args.verbose)
+        sys.exit(0 if success else 1)
+    else:
+        passed, failed = validate_all(args.verbose)
+
+        print()
+        print("=" * 60)
+        print(f"Results: {passed} passed, {failed} failed")
+        print("=" * 60)
+
+        sys.exit(0 if failed == 0 else 1)
+
+
+if __name__ == "__main__":
+    main()