From c226c4bbbf2a0222f848531111371f383d090bf3 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 27 Nov 2025 18:42:20 -0500 Subject: [PATCH] Add JSON schema validation for 66 book introduction files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds comprehensive validation for all book introduction files in data/books/ using Pydantic models and JSON Schema. Changes: - Added BookIntroduction Pydantic model with nested models: - OutlineSection: Validates book outline sections - KeyTheme: Validates key themes with descriptions - KeyVerse: Validates key verses with references and text - Added validate_all_books() function to validate all 66 book files - Added validate_book_file() helper function - Added --books CLI flag to validate book files separately - Generated book_introduction.schema.json JSON Schema file - Fixed KeyVerse model field name from 'verse' to 'reference' - Added 4 new tests to validate book directory and all 66 books All 66 book files now validate successfully against the schema. Test suite updated: 268 tests passing (added 4 book validation tests). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../schemas/book_introduction.schema.json | 175 ++++++++++++++++++ scripts/validate_data.py | 92 ++++++++- tests/test_data_validation.py | 39 +++- 3 files changed, 304 insertions(+), 2 deletions(-) create mode 100644 kjvstudy_org/data/schemas/book_introduction.schema.json diff --git a/kjvstudy_org/data/schemas/book_introduction.schema.json b/kjvstudy_org/data/schemas/book_introduction.schema.json new file mode 100644 index 0000000..6d86e19 --- /dev/null +++ b/kjvstudy_org/data/schemas/book_introduction.schema.json @@ -0,0 +1,175 @@ +{ + "$defs": { + "KeyTheme": { + "description": "Schema for book key theme", + "properties": { + "theme": { + "minLength": 1, + "title": "Theme", + "type": "string" + }, + "description": { + "minLength": 1, + "title": "Description", + "type": "string" + } + }, + "required": [ + "theme", + "description" + ], + "title": "KeyTheme", + "type": "object" + }, + "KeyVerse": { + "description": "Schema for book key verse", + "properties": { + "reference": { + "minLength": 1, + "title": "Reference", + "type": "string" + }, + "text": { + "minLength": 1, + "title": "Text", + "type": "string" + } + }, + "required": [ + "reference", + "text" + ], + "title": "KeyVerse", + "type": "object" + }, + "OutlineSection": { + "description": "Schema for book outline section", + "properties": { + "section": { + "minLength": 1, + "title": "Section", + "type": "string" + }, + "chapters": { + "minLength": 1, + "title": "Chapters", + "type": "string" + }, + "description": { + "minLength": 1, + "title": "Description", + "type": "string" + } + }, + "required": [ + "section", + "chapters", + "description" + ], + "title": "OutlineSection", + "type": "object" + } + }, + "description": "Schema for individual book introduction file", + "properties": { + "name": { + "minLength": 1, + "title": "Name", + "type": "string" + }, + "abbreviation": { + "minLength": 1, + "title": "Abbreviation", + "type": "string" + }, + "testament": { + "pattern": "^(Old Testament|New Testament)$", + "title": "Testament", + "type": "string" + }, + "position": { + "maximum": 66, + "minimum": 1, + "title": "Position", + "type": "integer" + }, + "chapters": { + "minimum": 1, + "title": "Chapters", + "type": "integer" + }, + "category": { + "minLength": 1, + "title": "Category", + "type": "string" + }, + "author": { + "minLength": 1, + "title": "Author", + "type": "string" + }, + "date_written": { + "minLength": 1, + "title": "Date Written", + "type": "string" + }, + "introduction": { + "minLength": 1, + "title": "Introduction", + "type": "string" + }, + "outline": { + "items": { + "$ref": "#/$defs/OutlineSection" + }, + "minItems": 1, + "title": "Outline", + "type": "array" + }, + "key_themes": { + "items": { + "$ref": "#/$defs/KeyTheme" + }, + "minItems": 1, + "title": "Key Themes", + "type": "array" + }, + "key_verses": { + "items": { + "$ref": "#/$defs/KeyVerse" + }, + "minItems": 1, + "title": "Key Verses", + "type": "array" + }, + "christ_in_book": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Christ In Book" + } + }, + "required": [ + "name", + "abbreviation", + "testament", + "position", + "chapters", + "category", + "author", + "date_written", + "introduction", + "outline", + "key_themes", + "key_verses" + ], + "title": "Schema for individual book introduction files", + "type": "object", + "$id": "https://kjvstudy.org/schemas/book_introduction.schema.json" +} \ No newline at end of file diff --git a/scripts/validate_data.py b/scripts/validate_data.py index c47e854..f82c5aa 100755 --- a/scripts/validate_data.py +++ b/scripts/validate_data.py @@ -194,7 +194,7 @@ class KeyTheme(BaseModel): class KeyVerse(BaseModel): """Schema for book key verse""" - verse: str = Field(..., min_length=1) + reference: str = Field(..., min_length=1) text: str = Field(..., min_length=1) @@ -313,6 +313,62 @@ def validate_all(verbose: bool = False) -> Tuple[int, int]: return passed, failed +def validate_book_file(book_file: Path, verbose: bool = False) -> bool: + """Validate a single book JSON file using BookIntroduction model.""" + # Load data file + data, error = load_json(book_file) + if error: + print(f"❌ {book_file.name}: {error}") + return False + + # Validate using Pydantic model + try: + BookIntroduction(**data) + print(f"✅ {book_file.name}: Valid") + if verbose: + print(f" Size: {book_file.stat().st_size:,} bytes") + return True + + except ValidationError as e: + print(f"❌ {book_file.name}: Validation failed") + for error_detail in e.errors(): + location = " -> ".join(str(loc) for loc in error_detail['loc']) + print(f" {location}: {error_detail['msg']}") + return False + + except Exception as e: + print(f"❌ {book_file.name}: Unexpected error") + print(f" Error: {str(e)}") + return False + + +def validate_all_books(verbose: bool = False) -> Tuple[int, int]: + """Validate all 66 book introduction files. Returns (passed, failed) counts.""" + passed = 0 + failed = 0 + + books_dir = DATA_DIR / "books" + if not books_dir.exists(): + print(f"❌ Books directory not found: {books_dir}") + return 0, 0 + + print("=" * 60) + print("Validating 66 book introduction files") + print("=" * 60) + print() + + book_files = sorted(books_dir.glob("*.json")) + for book_file in book_files: + if validate_book_file(book_file, verbose): + passed += 1 + else: + failed += 1 + if verbose: + print() + + return passed, failed + + def generate_json_schemas(): """Generate JSON Schema files from Pydantic models.""" print("=" * 60) @@ -322,6 +378,7 @@ def generate_json_schemas(): SCHEMAS_DIR.mkdir(exist_ok=True) + # Generate schemas for main data files for data_file, model_class in MODEL_MAPPING.items(): schema_file = data_file.replace('.json', '.schema.json') schema_path = SCHEMAS_DIR / schema_file @@ -343,6 +400,23 @@ def generate_json_schemas(): except Exception as e: print(f"❌ Failed to generate {schema_file}: {e}") + # Generate schema for book introduction files + try: + schema_file = "book_introduction.schema.json" + schema_path = SCHEMAS_DIR / schema_file + + schema = BookIntroduction.model_json_schema() + schema['$id'] = f"https://kjvstudy.org/schemas/{schema_file}" + schema['title'] = "Schema for individual book introduction files" + + with open(schema_path, 'w', encoding='utf-8') as f: + json.dump(schema, f, indent=2, ensure_ascii=False) + + print(f"✅ Generated {schema_file}") + + except Exception as e: + print(f"❌ Failed to generate {schema_file}: {e}") + print() print(f"Schemas written to: {SCHEMAS_DIR}") @@ -377,6 +451,11 @@ Examples: action='store_true', help='Generate JSON Schema files from Pydantic models' ) + parser.add_argument( + '--books', + action='store_true', + help='Validate all 66 book introduction files' + ) args = parser.parse_args() @@ -385,6 +464,17 @@ Examples: generate_json_schemas() sys.exit(0) + # Validate books if requested + if args.books: + passed, failed = validate_all_books(args.verbose) + + print() + print("=" * 60) + print(f"Results: {passed} passed, {failed} failed") + print("=" * 60) + + sys.exit(0 if failed == 0 else 1) + # Validate specific file or all files if args.file: success = validate_file(args.file, args.verbose) diff --git a/tests/test_data_validation.py b/tests/test_data_validation.py index 1bbe85d..a169230 100644 --- a/tests/test_data_validation.py +++ b/tests/test_data_validation.py @@ -12,7 +12,7 @@ from pathlib import Path import sys sys.path.insert(0, str(Path(__file__).parent.parent)) -from scripts.validate_data import validate_file, MODEL_MAPPING +from scripts.validate_data import validate_file, validate_book_file, MODEL_MAPPING, DATA_DIR class TestDataValidation: @@ -46,3 +46,40 @@ class TestDataValidation: def test_resource_slugs_structure(self): """Test that resource_slugs.json has correct structure.""" assert validate_file("resource_slugs.json") + + +class TestBookValidation: + """Test validation of 66 book introduction files.""" + + def test_books_directory_exists(self): + """Test that the books directory exists.""" + books_dir = DATA_DIR / "books" + assert books_dir.exists(), f"Books directory not found: {books_dir}" + + def test_all_66_books_exist(self): + """Test that all 66 book files exist.""" + books_dir = DATA_DIR / "books" + book_files = list(books_dir.glob("*.json")) + assert len(book_files) == 66, f"Expected 66 book files, found {len(book_files)}" + + def test_validate_all_book_files(self): + """Test that all 66 book files validate against BookIntroduction schema.""" + books_dir = DATA_DIR / "books" + book_files = sorted(books_dir.glob("*.json")) + + failed_books = [] + for book_file in book_files: + if not validate_book_file(book_file): + failed_books.append(book_file.name) + + assert len(failed_books) == 0, f"The following books failed validation: {', '.join(failed_books)}" + + def test_sample_books(self): + """Test specific book files to ensure proper validation.""" + books_to_test = ["genesis.json", "john.json", "revelation.json"] + books_dir = DATA_DIR / "books" + + for book_name in books_to_test: + book_file = books_dir / book_name + assert book_file.exists(), f"{book_name} not found" + assert validate_book_file(book_file), f"{book_name} failed validation"