diff --git a/kjvstudy_org/data/schemas/README.md b/kjvstudy_org/data/schemas/README.md new file mode 100644 index 0000000..74ff6e5 --- /dev/null +++ b/kjvstudy_org/data/schemas/README.md @@ -0,0 +1,331 @@ +# JSON Schema Validation Files + +This directory contains [JSON Schema](https://json-schema.org/) validation files for the data directory. These schemas formally define the structure, types, and constraints for each JSON data file. + +## What is JSON Schema? + +JSON Schema is a vocabulary that allows you to annotate and validate JSON documents. It provides: + +- **Structure validation**: Ensure JSON follows expected format +- **Type safety**: Validate data types (string, number, array, etc.) +- **Constraints**: Enforce min/max values, patterns, required fields +- **Documentation**: Self-documenting schemas describe data structure +- **IDE support**: Enable autocomplete and inline validation in editors + +## Available Schemas + +| Schema File | Data File | Description | +|-------------|-----------|-------------| +| `bible_metadata.schema.json` | `bible_metadata.json` | Bible books, testaments, abbreviations | +| `word_studies.schema.json` | `word_studies.json` | Hebrew/Greek word definitions | +| `study_guides.schema.json` | `study_guides.json` | Study guide catalog and content | +| `verse_commentary.schema.json` | `verse_commentary.json` | Verse analysis and commentary | +| `featured_verses.schema.json` | `featured_verses.json` | Verse-of-the-day rotation | +| `resource_slugs.schema.json` | `resource_slugs.json` | Resource URL slugs for sitemap | + +## Schema Features + +### Validation Rules + +Each schema enforces: +- **Required fields**: Ensures critical data is present +- **Data types**: Validates strings, numbers, arrays, objects +- **Format patterns**: Validates verse references, slugs, etc. +- **Value constraints**: Min/max lengths, numeric ranges +- **Unique values**: Prevents duplicates in arrays +- **Additional properties**: Blocks unexpected fields + +### Example: Bible Metadata Schema + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["old_testament_books", "new_testament_books", "book_abbreviations"], + "properties": { + "old_testament_books": { + "type": "array", + "minItems": 39, + "maxItems": 39, + "items": {"type": "string"}, + "uniqueItems": true + }, + ... + } +} +``` + +This ensures: +- ✅ Exactly 39 OT books (no more, no less) +- ✅ All book names are strings +- ✅ No duplicate book names +- ✅ Required fields present + +## Using the Schemas + +### 1. Command Line Validation + +Use the provided validation script: + +```bash +# Validate all data files +python scripts/validate_data.py + +# Validate specific file +python scripts/validate_data.py --file bible_metadata.json + +# Show detailed output +python scripts/validate_data.py --verbose +``` + +### 2. Python Validation + +```python +import json +from jsonschema import validate + +# Load data and schema +with open('kjvstudy_org/data/bible_metadata.json') as f: + data = json.load(f) + +with open('kjvstudy_org/data/schemas/bible_metadata.schema.json') as f: + schema = json.load(f) + +# Validate +validate(instance=data, schema=schema) # Raises ValidationError if invalid +``` + +### 3. IDE Integration + +#### VS Code +Install the [JSON Schema extension](https://marketplace.visualstudio.com/items?itemName=redhat.vscode-yaml): + +Add to `.vscode/settings.json`: +```json +{ + "json.schemas": [ + { + "fileMatch": ["kjvstudy_org/data/bible_metadata.json"], + "url": "./kjvstudy_org/data/schemas/bible_metadata.schema.json" + }, + { + "fileMatch": ["kjvstudy_org/data/word_studies.json"], + "url": "./kjvstudy_org/data/schemas/word_studies.schema.json" + } + ] +} +``` + +#### PyCharm/IntelliJ +1. Settings → Languages & Frameworks → Schemas and DTDs → JSON Schema Mappings +2. Add mapping for each data file to its schema + +### 4. Pre-commit Hook + +Add to `.pre-commit-config.yaml`: +```yaml +repos: + - repo: local + hooks: + - id: validate-json-data + name: Validate JSON data files + entry: python scripts/validate_data.py + language: system + pass_filenames: false +``` + +### 5. CI/CD Integration + +Add to `.github/workflows/test.yml`: +```yaml +- name: Validate JSON schemas + run: | + pip install jsonschema + python scripts/validate_data.py +``` + +## Common Validation Patterns + +### Verse References + +Pattern for verse references like "John 3:16" or "Genesis 1:1-3": +```json +{ + "pattern": "^[A-Za-z0-9 ]+ \\d+:\\d+(-\\d+)?$" +} +``` + +### URL Slugs + +Pattern for lowercase-with-hyphens slugs: +```json +{ + "pattern": "^[a-z-]+$" +} +``` + +### Hebrew/Greek Text + +UTF-8 strings with minimum length: +```json +{ + "type": "string", + "minLength": 1 +} +``` + +## Error Messages + +### Common Validation Errors + +**Missing Required Field** +``` +ValidationError: 'old_testament_books' is a required property +``` +→ Add the missing field to your JSON + +**Wrong Type** +``` +ValidationError: 42 is not of type 'string' +``` +→ Fix the data type (e.g., change number to string) + +**Invalid Pattern** +``` +ValidationError: 'john-316' does not match '^[A-Za-z0-9 ]+ \\d+:\\d+(-\\d+)?$' +``` +→ Fix the format (should be "John 3:16") + +**Array Length** +``` +ValidationError: [38 items] is too short (minimum: 39) +``` +→ Add missing items to the array + +**Duplicate Values** +``` +ValidationError: ['Genesis', 'Exodus', 'Genesis'] has non-unique elements +``` +→ Remove duplicate entries + +## Creating New Schemas + +### Template + +```json +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://kjvstudy.org/schemas/your_file.schema.json", + "title": "Your Data File", + "description": "Description of what this schema validates", + "type": "object", + "required": ["field1", "field2"], + "properties": { + "field1": { + "type": "string", + "description": "Description of field1", + "minLength": 1 + }, + "field2": { + "type": "array", + "description": "Description of field2", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false +} +``` + +### Steps to Add New Schema + +1. **Create schema file**: `schemas/your_file.schema.json` +2. **Add to mapping**: Update `SCHEMA_MAPPING` in `scripts/validate_data.py` +3. **Test validation**: Run `python scripts/validate_data.py --file your_file.json` +4. **Add to CI**: Include in automated testing + +## Best Practices + +### 1. Use Strict Validation +- Set `"additionalProperties": false` to block unexpected fields +- Use `"uniqueItems": true` for arrays that shouldn't have duplicates +- Specify `minItems`/`maxItems` for arrays with known sizes + +### 2. Provide Clear Descriptions +```json +{ + "description": "Hebrew word in original script (e.g., אֱלֹהִים)" +} +``` + +### 3. Use Definitions for Reusable Types +```json +{ + "definitions": { + "verseReference": { + "type": "string", + "pattern": "^[A-Za-z0-9 ]+ \\d+:\\d+(-\\d+)?$" + } + }, + "properties": { + "verses": { + "type": "array", + "items": {"$ref": "#/definitions/verseReference"} + } + } +} +``` + +### 4. Version Your Schemas +Include `$id` with URL for schema versioning: +```json +{ + "$id": "https://kjvstudy.org/schemas/v1/bible_metadata.schema.json" +} +``` + +## Testing + +### Manual Testing +```bash +# Test all schemas +python scripts/validate_data.py --verbose + +# Test specific schema +python scripts/validate_data.py --file bible_metadata.json +``` + +### Automated Testing +```python +# In tests/test_schemas.py +import pytest +from scripts.validate_data import validate_file + +def test_bible_metadata_valid(): + assert validate_file("bible_metadata.json") + +def test_word_studies_valid(): + assert validate_file("word_studies.json") +``` + +## Resources + +- **JSON Schema Documentation**: https://json-schema.org/ +- **JSON Schema Validator**: https://www.jsonschemavalidator.net/ +- **Understanding JSON Schema**: https://json-schema.org/understanding-json-schema/ +- **Schema Store**: https://www.schemastore.org/json/ (examples) + +## Future Enhancements + +- [ ] Add schemas for all remaining data files +- [ ] Generate TypeScript types from schemas +- [ ] Create schema documentation website +- [ ] Add more complex validation rules +- [ ] Implement custom validators for biblical references +- [ ] Create schema migration tools + +--- + +*Last Updated: 2025-01-27* +*Schemas: 6 active schemas covering core data files* diff --git a/kjvstudy_org/data/schemas/bible_metadata.schema.json b/kjvstudy_org/data/schemas/bible_metadata.schema.json new file mode 100644 index 0000000..dc16d93 --- /dev/null +++ b/kjvstudy_org/data/schemas/bible_metadata.schema.json @@ -0,0 +1,39 @@ +{ + "description": "Schema for bible_metadata.json", + "properties": { + "old_testament_books": { + "items": { + "type": "string" + }, + "maxItems": 39, + "minItems": 39, + "title": "Old Testament Books", + "type": "array" + }, + "new_testament_books": { + "items": { + "type": "string" + }, + "maxItems": 27, + "minItems": 27, + "title": "New Testament Books", + "type": "array" + }, + "book_abbreviations": { + "additionalProperties": { + "type": "string" + }, + "minProperties": 1, + "title": "Book Abbreviations", + "type": "object" + } + }, + "required": [ + "old_testament_books", + "new_testament_books", + "book_abbreviations" + ], + "title": "Schema for bible_metadata.json", + "type": "object", + "$id": "https://kjvstudy.org/schemas/bible_metadata.schema.json" +} \ No newline at end of file diff --git a/kjvstudy_org/data/schemas/featured_verses.schema.json b/kjvstudy_org/data/schemas/featured_verses.schema.json new file mode 100644 index 0000000..aa80b85 --- /dev/null +++ b/kjvstudy_org/data/schemas/featured_verses.schema.json @@ -0,0 +1,48 @@ +{ + "$defs": { + "FeaturedVerse": { + "description": "Schema for individual featured verse", + "properties": { + "book": { + "minLength": 1, + "title": "Book", + "type": "string" + }, + "chapter": { + "minimum": 1, + "title": "Chapter", + "type": "integer" + }, + "verse": { + "minimum": 1, + "title": "Verse", + "type": "integer" + } + }, + "required": [ + "book", + "chapter", + "verse" + ], + "title": "FeaturedVerse", + "type": "object" + } + }, + "description": "Schema for featured_verses.json", + "properties": { + "verses": { + "items": { + "$ref": "#/$defs/FeaturedVerse" + }, + "minItems": 1, + "title": "Verses", + "type": "array" + } + }, + "required": [ + "verses" + ], + "title": "Schema for featured_verses.json", + "type": "object", + "$id": "https://kjvstudy.org/schemas/featured_verses.schema.json" +} \ No newline at end of file diff --git a/kjvstudy_org/data/schemas/resource_slugs.schema.json b/kjvstudy_org/data/schemas/resource_slugs.schema.json new file mode 100644 index 0000000..5d35689 --- /dev/null +++ b/kjvstudy_org/data/schemas/resource_slugs.schema.json @@ -0,0 +1,90 @@ +{ + "description": "Schema for resource_slugs.json", + "properties": { + "study_guides": { + "items": { + "type": "string" + }, + "title": "Study Guides", + "type": "array" + }, + "angels": { + "items": { + "type": "string" + }, + "title": "Angels", + "type": "array" + }, + "prophets": { + "items": { + "type": "string" + }, + "title": "Prophets", + "type": "array" + }, + "names_of_god": { + "items": { + "type": "string" + }, + "title": "Names Of God", + "type": "array" + }, + "parables": { + "items": { + "type": "string" + }, + "title": "Parables", + "type": "array" + }, + "covenants": { + "items": { + "type": "string" + }, + "title": "Covenants", + "type": "array" + }, + "apostles": { + "items": { + "type": "string" + }, + "title": "Apostles", + "type": "array" + }, + "women": { + "items": { + "type": "string" + }, + "title": "Women", + "type": "array" + }, + "festivals": { + "items": { + "type": "string" + }, + "title": "Festivals", + "type": "array" + }, + "fruits_of_spirit": { + "items": { + "type": "string" + }, + "title": "Fruits Of Spirit", + "type": "array" + } + }, + "required": [ + "study_guides", + "angels", + "prophets", + "names_of_god", + "parables", + "covenants", + "apostles", + "women", + "festivals", + "fruits_of_spirit" + ], + "title": "Schema for resource_slugs.json", + "type": "object", + "$id": "https://kjvstudy.org/schemas/resource_slugs.schema.json" +} \ No newline at end of file diff --git a/kjvstudy_org/data/schemas/study_guides.schema.json b/kjvstudy_org/data/schemas/study_guides.schema.json new file mode 100644 index 0000000..e99acaa --- /dev/null +++ b/kjvstudy_org/data/schemas/study_guides.schema.json @@ -0,0 +1,125 @@ +{ + "$defs": { + "CatalogEntry": { + "description": "Schema for study guide catalog entry", + "properties": { + "title": { + "minLength": 1, + "title": "Title", + "type": "string" + }, + "description": { + "minLength": 1, + "title": "Description", + "type": "string" + }, + "slug": { + "pattern": "^[a-z-]+$", + "title": "Slug", + "type": "string" + }, + "verses": { + "items": { + "type": "string" + }, + "title": "Verses", + "type": "array" + } + }, + "required": [ + "title", + "description", + "slug", + "verses" + ], + "title": "CatalogEntry", + "type": "object" + }, + "GuideContent": { + "description": "Schema for study guide content", + "properties": { + "title": { + "minLength": 1, + "title": "Title", + "type": "string" + }, + "description": { + "minLength": 1, + "title": "Description", + "type": "string" + }, + "sections": { + "items": { + "$ref": "#/$defs/StudySection" + }, + "minItems": 1, + "title": "Sections", + "type": "array" + } + }, + "required": [ + "title", + "description", + "sections" + ], + "title": "GuideContent", + "type": "object" + }, + "StudySection": { + "description": "Schema for study guide section", + "properties": { + "title": { + "minLength": 1, + "title": "Title", + "type": "string" + }, + "verses": { + "items": { + "type": "string" + }, + "title": "Verses", + "type": "array" + }, + "content": { + "minLength": 1, + "title": "Content", + "type": "string" + } + }, + "required": [ + "title", + "verses", + "content" + ], + "title": "StudySection", + "type": "object" + } + }, + "description": "Schema for study_guides.json", + "properties": { + "catalog": { + "additionalProperties": { + "items": { + "$ref": "#/$defs/CatalogEntry" + }, + "type": "array" + }, + "title": "Catalog", + "type": "object" + }, + "content": { + "additionalProperties": { + "$ref": "#/$defs/GuideContent" + }, + "title": "Content", + "type": "object" + } + }, + "required": [ + "catalog", + "content" + ], + "title": "Schema for study_guides.json", + "type": "object", + "$id": "https://kjvstudy.org/schemas/study_guides.schema.json" +} \ No newline at end of file diff --git a/kjvstudy_org/data/schemas/verse_commentary.schema.json b/kjvstudy_org/data/schemas/verse_commentary.schema.json new file mode 100644 index 0000000..2fe8e93 --- /dev/null +++ b/kjvstudy_org/data/schemas/verse_commentary.schema.json @@ -0,0 +1,46 @@ +{ + "$defs": { + "VerseCommentaryEntry": { + "description": "Schema for verse commentary entry", + "properties": { + "analysis": { + "minLength": 1, + "title": "Analysis", + "type": "string" + }, + "historical_context": { + "minLength": 1, + "title": "Historical Context", + "type": "string" + }, + "application": { + "title": "Application", + "type": "string" + }, + "questions": { + "items": { + "type": "string" + }, + "minItems": 1, + "title": "Questions", + "type": "array" + } + }, + "required": [ + "analysis", + "historical_context", + "application", + "questions" + ], + "title": "VerseCommentaryEntry", + "type": "object" + } + }, + "additionalProperties": { + "$ref": "#/$defs/VerseCommentaryEntry" + }, + "description": "Schema for verse_commentary.json", + "title": "Schema for verse_commentary.json", + "type": "object", + "$id": "https://kjvstudy.org/schemas/verse_commentary.schema.json" +} \ No newline at end of file diff --git a/kjvstudy_org/data/schemas/word_studies.schema.json b/kjvstudy_org/data/schemas/word_studies.schema.json new file mode 100644 index 0000000..a52c139 --- /dev/null +++ b/kjvstudy_org/data/schemas/word_studies.schema.json @@ -0,0 +1,122 @@ +{ + "$defs": { + "WordStudy": { + "description": "Schema for individual word study entry", + "properties": { + "ot_term": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ot Term" + }, + "ot_transliteration": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ot Transliteration" + }, + "ot_meaning": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ot Meaning" + }, + "ot_note": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Ot Note" + }, + "nt_term": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Nt Term" + }, + "nt_transliteration": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Nt Transliteration" + }, + "nt_meaning": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Nt Meaning" + }, + "nt_note": { + "anyOf": [ + { + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Nt Note" + } + }, + "title": "WordStudy", + "type": "object" + } + }, + "additionalProperties": { + "$ref": "#/$defs/WordStudy" + }, + "description": "Schema for word_studies.json", + "title": "Schema for word_studies.json", + "type": "object", + "$id": "https://kjvstudy.org/schemas/word_studies.schema.json" +} \ No newline at end of file diff --git a/scripts/validate_data.py b/scripts/validate_data.py new file mode 100755 index 0000000..c47e854 --- /dev/null +++ b/scripts/validate_data.py @@ -0,0 +1,404 @@ +#!/usr/bin/env python3 +""" +Validate JSON data files using Pydantic models. + +This script validates all data files in kjvstudy_org/data/ using Pydantic models +for type safety and validation. Pydantic provides better error messages and +integrates naturally with FastAPI. + +Usage: + python scripts/validate_data.py # Validate all files + python scripts/validate_data.py --file bible_metadata.json # Validate specific file + python scripts/validate_data.py --verbose # Show detailed output + python scripts/validate_data.py --generate-schemas # Generate JSON schemas + +Requirements: + pip install pydantic (already installed with FastAPI) +""" + +import json +import sys +from pathlib import Path +from typing import Dict, List, Tuple, Optional + +try: + from pydantic import BaseModel, RootModel, Field, field_validator, ValidationError +except ImportError: + print("Error: pydantic package not found") + print("Install with: pip install pydantic") + sys.exit(1) + +# Path to data directory +DATA_DIR = Path(__file__).parent.parent / "kjvstudy_org" / "data" +SCHEMAS_DIR = DATA_DIR / "schemas" + + +# ============================================================================ +# Pydantic Models for Data Validation +# ============================================================================ + +class BibleMetadata(BaseModel): + """Schema for bible_metadata.json""" + old_testament_books: List[str] = Field(..., min_length=39, max_length=39) + new_testament_books: List[str] = Field(..., min_length=27, max_length=27) + book_abbreviations: Dict[str, str] = Field(..., min_length=1) + + @field_validator('old_testament_books', 'new_testament_books') + @classmethod + def check_unique_books(cls, v): + if len(v) != len(set(v)): + raise ValueError("Duplicate book names found") + return v + + +class WordStudy(BaseModel): + """Schema for individual word study entry""" + ot_term: Optional[str] = Field(None, min_length=1) + ot_transliteration: Optional[str] = Field(None, min_length=1) + ot_meaning: Optional[str] = Field(None, min_length=1) + ot_note: Optional[str] = Field(None, min_length=1) + nt_term: Optional[str] = Field(None, min_length=1) + nt_transliteration: Optional[str] = Field(None, min_length=1) + nt_meaning: Optional[str] = Field(None, min_length=1) + nt_note: Optional[str] = Field(None, min_length=1) + + +class WordStudies(RootModel[Dict[str, WordStudy]]): + """Schema for word_studies.json""" + root: Dict[str, WordStudy] + + +class CatalogEntry(BaseModel): + """Schema for study guide catalog entry""" + title: str = Field(..., min_length=1) + description: str = Field(..., min_length=1) + slug: str = Field(..., pattern=r'^[a-z-]+$') + verses: List[str] + + @field_validator('verses') + @classmethod + def check_verse_format(cls, v): + import re + pattern = r'^[A-Za-z0-9 ]+ \d+:\d+(-\d+)?$' + for verse in v: + if not re.match(pattern, verse): + raise ValueError(f"Invalid verse reference format: {verse}") + return v + + +class StudySection(BaseModel): + """Schema for study guide section""" + title: str = Field(..., min_length=1) + verses: List[str] + content: str = Field(..., min_length=1) + + @field_validator('verses') + @classmethod + def check_verse_format(cls, v): + import re + pattern = r'^[A-Za-z0-9 ]+ \d+:\d+(-\d+)?$' + for verse in v: + if not re.match(pattern, verse): + raise ValueError(f"Invalid verse reference format: {verse}") + return v + + +class GuideContent(BaseModel): + """Schema for study guide content""" + title: str = Field(..., min_length=1) + description: str = Field(..., min_length=1) + sections: List[StudySection] = Field(..., min_length=1) + + +class StudyGuides(BaseModel): + """Schema for study_guides.json""" + catalog: Dict[str, List[CatalogEntry]] + content: Dict[str, GuideContent] + + +class VerseCommentaryEntry(BaseModel): + """Schema for verse commentary entry""" + analysis: str = Field(..., min_length=1) + historical_context: str = Field(..., min_length=1) + application: str + questions: List[str] = Field(..., min_length=1) + + +class VerseCommentary(RootModel[Dict[str, VerseCommentaryEntry]]): + """Schema for verse_commentary.json""" + root: Dict[str, VerseCommentaryEntry] + + @field_validator('root') + @classmethod + def check_verse_keys(cls, v): + import re + pattern = r'^[A-Za-z0-9 ]+ \d+:\d+$' + for key in v.keys(): + if not re.match(pattern, key): + raise ValueError(f"Invalid verse reference key: {key}") + return v + + +class FeaturedVerse(BaseModel): + """Schema for individual featured verse""" + book: str = Field(..., min_length=1) + chapter: int = Field(..., ge=1) + verse: int = Field(..., ge=1) + + +class FeaturedVerses(BaseModel): + """Schema for featured_verses.json""" + verses: List[FeaturedVerse] = Field(..., min_length=1) + + +class ResourceSlugs(BaseModel): + """Schema for resource_slugs.json""" + study_guides: List[str] + angels: List[str] + prophets: List[str] + names_of_god: List[str] + parables: List[str] + covenants: List[str] + apostles: List[str] + women: List[str] + festivals: List[str] + fruits_of_spirit: List[str] + + @field_validator('*') + @classmethod + def check_slugs(cls, v): + # Check for duplicates + if len(v) != len(set(v)): + raise ValueError("Duplicate slugs found") + # Check slug format + import re + pattern = r'^[a-z-]+$' + for slug in v: + if not re.match(pattern, slug): + raise ValueError(f"Invalid slug format: {slug}") + return v + + +class OutlineSection(BaseModel): + """Schema for book outline section""" + section: str = Field(..., min_length=1) + chapters: str = Field(..., min_length=1) + description: str = Field(..., min_length=1) + + +class KeyTheme(BaseModel): + """Schema for book key theme""" + theme: str = Field(..., min_length=1) + description: str = Field(..., min_length=1) + + +class KeyVerse(BaseModel): + """Schema for book key verse""" + verse: str = Field(..., min_length=1) + text: str = Field(..., min_length=1) + + +class BookIntroduction(BaseModel): + """Schema for individual book introduction file""" + name: str = Field(..., min_length=1) + abbreviation: str = Field(..., min_length=1) + testament: str = Field(..., pattern=r'^(Old Testament|New Testament)$') + position: int = Field(..., ge=1, le=66) + chapters: int = Field(..., ge=1) + category: str = Field(..., min_length=1) + author: str = Field(..., min_length=1) + date_written: str = Field(..., min_length=1) + introduction: str = Field(..., min_length=1) + outline: List[OutlineSection] = Field(..., min_length=1) + key_themes: List[KeyTheme] = Field(..., min_length=1) + key_verses: List[KeyVerse] = Field(..., min_length=1) + christ_in_book: Optional[str] = None + + +# ============================================================================ +# Validation Logic +# ============================================================================ + +# Mapping of data files to their Pydantic models +MODEL_MAPPING = { + "bible_metadata.json": BibleMetadata, + "word_studies.json": WordStudies, + "study_guides.json": StudyGuides, + "verse_commentary.json": VerseCommentary, + "featured_verses.json": FeaturedVerses, + "resource_slugs.json": ResourceSlugs, +} + + +def load_json(file_path: Path) -> Tuple[dict, Optional[str]]: + """Load JSON file and return data and error message if any.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + return json.load(f), None + except json.JSONDecodeError as e: + return None, f"JSON syntax error: {e}" + except Exception as e: + return None, f"Error loading file: {e}" + + +def validate_file(data_file: str, verbose: bool = False) -> bool: + """Validate a single data file using its Pydantic model.""" + if data_file not in MODEL_MAPPING: + if verbose: + print(f"⚠️ {data_file}: No validation model defined (skipped)") + return True + + model_class = MODEL_MAPPING[data_file] + data_path = DATA_DIR / data_file + + # Check if file exists + if not data_path.exists(): + print(f"❌ {data_file}: File not found at {data_path}") + return False + + # Load data file + data, error = load_json(data_path) + if error: + print(f"❌ {data_file}: {error}") + return False + + # Validate using Pydantic model + try: + # For RootModel subclasses, pass data directly to constructor + # For regular BaseModel subclasses, unpack as kwargs + if issubclass(model_class, RootModel): + model_class(data) + else: + model_class(**data) + + print(f"✅ {data_file}: Valid") + if verbose: + print(f" Model: {model_class.__name__}") + print(f" Size: {data_path.stat().st_size:,} bytes") + return True + + except ValidationError as e: + print(f"❌ {data_file}: Validation failed") + for error_detail in e.errors(): + location = " -> ".join(str(loc) for loc in error_detail['loc']) + print(f" {location}: {error_detail['msg']}") + if verbose and 'ctx' in error_detail: + print(f" Context: {error_detail['ctx']}") + return False + + except Exception as e: + print(f"❌ {data_file}: Unexpected error") + print(f" Error: {str(e)}") + return False + + +def validate_all(verbose: bool = False) -> Tuple[int, int]: + """Validate all data files with models. Returns (passed, failed) counts.""" + passed = 0 + failed = 0 + + print("=" * 60) + print("Validating JSON data files with Pydantic models") + print("=" * 60) + print() + + for data_file in sorted(MODEL_MAPPING.keys()): + if validate_file(data_file, verbose): + passed += 1 + else: + failed += 1 + if verbose: + print() + + return passed, failed + + +def generate_json_schemas(): + """Generate JSON Schema files from Pydantic models.""" + print("=" * 60) + print("Generating JSON Schema files from Pydantic models") + print("=" * 60) + print() + + SCHEMAS_DIR.mkdir(exist_ok=True) + + for data_file, model_class in MODEL_MAPPING.items(): + schema_file = data_file.replace('.json', '.schema.json') + schema_path = SCHEMAS_DIR / schema_file + + try: + # Generate JSON Schema from Pydantic model + schema = model_class.model_json_schema() + + # Add metadata + schema['$id'] = f"https://kjvstudy.org/schemas/{schema_file}" + schema['title'] = model_class.__doc__ or model_class.__name__ + + # Write schema file + with open(schema_path, 'w', encoding='utf-8') as f: + json.dump(schema, f, indent=2, ensure_ascii=False) + + print(f"✅ Generated {schema_file}") + + except Exception as e: + print(f"❌ Failed to generate {schema_file}: {e}") + + print() + print(f"Schemas written to: {SCHEMAS_DIR}") + + +def main(): + """Main entry point.""" + import argparse + + parser = argparse.ArgumentParser( + description="Validate JSON data files with Pydantic models", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python scripts/validate_data.py # Validate all files + python scripts/validate_data.py -f bible_metadata.json # Validate one file + python scripts/validate_data.py --verbose # Show details + python scripts/validate_data.py --generate-schemas # Generate JSON schemas + """ + ) + parser.add_argument( + '-f', '--file', + help='Validate specific file only', + metavar='FILE' + ) + parser.add_argument( + '-v', '--verbose', + action='store_true', + help='Show detailed output' + ) + parser.add_argument( + '--generate-schemas', + action='store_true', + help='Generate JSON Schema files from Pydantic models' + ) + + args = parser.parse_args() + + # Generate schemas if requested + if args.generate_schemas: + generate_json_schemas() + sys.exit(0) + + # Validate specific file or all files + if args.file: + success = validate_file(args.file, args.verbose) + sys.exit(0 if success else 1) + else: + passed, failed = validate_all(args.verbose) + + print() + print("=" * 60) + print(f"Results: {passed} passed, {failed} failed") + print("=" * 60) + + sys.exit(0 if failed == 0 else 1) + + +if __name__ == "__main__": + main() diff --git a/tests/test_data_validation.py b/tests/test_data_validation.py new file mode 100644 index 0000000..1bbe85d --- /dev/null +++ b/tests/test_data_validation.py @@ -0,0 +1,48 @@ +""" +Tests for JSON data file validation using Pydantic models. + +This test suite ensures all JSON data files conform to their schemas, +catching data integrity issues early in development. +""" + +import pytest +from pathlib import Path + +# Import validation script +import sys +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from scripts.validate_data import validate_file, MODEL_MAPPING + + +class TestDataValidation: + """Test data file validation using Pydantic models.""" + + @pytest.mark.parametrize("data_file", MODEL_MAPPING.keys()) + def test_validate_data_file(self, data_file): + """Test that each data file validates against its Pydantic model.""" + assert validate_file(data_file), f"{data_file} failed validation" + + def test_bible_metadata_structure(self): + """Test that bible_metadata.json has correct structure.""" + assert validate_file("bible_metadata.json") + + def test_word_studies_structure(self): + """Test that word_studies.json has correct structure.""" + assert validate_file("word_studies.json") + + def test_study_guides_structure(self): + """Test that study_guides.json has correct structure.""" + assert validate_file("study_guides.json") + + def test_verse_commentary_structure(self): + """Test that verse_commentary.json has correct structure.""" + assert validate_file("verse_commentary.json") + + def test_featured_verses_structure(self): + """Test that featured_verses.json has correct structure.""" + assert validate_file("featured_verses.json") + + def test_resource_slugs_structure(self): + """Test that resource_slugs.json has correct structure.""" + assert validate_file("resource_slugs.json")