Random Verse
+Picking a random verse…
+diff --git a/Dockerfile b/Dockerfile index fcb22ea..2f18af8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,5 +49,9 @@ COPY . . # Build search index at image build time for fast searches RUN python3 -c "from kjvstudy_org.utils.search_index import init_search_index; init_search_index()" -# Run uvicorn directly (no nginx sidecar) -CMD ["sh", "-c", "uv run uvicorn kjvstudy_org.server:app --host ${HOST:-0.0.0.0} --port ${PORT:-8000} --workers ${WORKERS:-1} --proxy-headers"] +# Run with gunicorn + uvicorn workers for production resilience: +# --max-requests: recycle workers after N requests (prevents memory leaks) +# --max-requests-jitter: stagger recycling so workers don't all restart at once +# --timeout: kill workers that hang for >60s +# --graceful-timeout: give workers 10s to finish after SIGTERM +CMD ["sh", "-c", "uv run gunicorn kjvstudy_org.server:app --worker-class uvicorn.workers.UvicornWorker --bind ${HOST:-0.0.0.0}:${PORT:-8000} --workers ${WORKERS:-2} --max-requests 2000 --max-requests-jitter 500 --timeout 60 --graceful-timeout 10 --proxy-protocol --forwarded-allow-ips='*' --access-logfile -"] diff --git a/Dockerfile.static b/Dockerfile.static new file mode 100644 index 0000000..b81e306 --- /dev/null +++ b/Dockerfile.static @@ -0,0 +1,89 @@ +# ============================================================================= +# Stage 1: Builder — install deps, generate static HTML pages +# ============================================================================= +FROM python:3.13 AS builder + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + UV_COMPILE_BYTECODE=1 \ + UV_LINK_MODE=copy + +WORKDIR /app + +COPY pyproject.toml uv.lock ./ +RUN uv sync --frozen --no-install-project --no-dev + +COPY . . + +# Build search index (needed by app startup) +RUN uv run python3 -c "from kjvstudy_org.utils.search_index import init_search_index; init_search_index()" + +# Generate static HTML pages (~50K files, no PDFs or API JSON) +RUN uv run python scripts/generate_static_site.py --output /app/dist --workers 4 + +# ============================================================================= +# Stage 2: Runtime — nginx for static files + FastAPI sidecar for dynamic routes +# ============================================================================= +FROM python:3.13-slim + +# Install nginx + runtime deps for WeasyPrint (PDF generation in sidecar) +RUN apt-get update && apt-get install -y --no-install-recommends \ + nginx \ + curl \ + libpango-1.0-0 \ + libharfbuzz0b \ + libpangoft2-1.0-0 \ + libffi8 \ + libgdk-pixbuf-2.0-0 \ + shared-mime-info \ + fonts-dejavu-core \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv + +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONPATH="/app" \ + PATH="/app/.venv/bin:$PATH" + +WORKDIR /app + +# Copy virtualenv from builder +COPY --from=builder /app/.venv /app/.venv + +# Copy application code (needed by the sidecar) +COPY --from=builder /app/kjvstudy_org /app/kjvstudy_org +COPY --from=builder /app/scripts/search_api.py /app/scripts/search_api.py + +# Copy pre-rendered static site +COPY --from=builder /app/dist /app/dist + +# Copy nginx config +COPY nginx.conf /etc/nginx/nginx.conf + +# Entrypoint: start FastAPI sidecar + nginx +COPY <<'ENTRY' /app/start.sh +#!/bin/sh +set -e + +# Start the FastAPI sidecar in the background +# It handles: search, API, PDFs, OG images, and any uncached pages +python3 /app/scripts/search_api.py & +SIDECAR_PID=$! + +# Wait briefly for sidecar to be ready +sleep 1 + +# Start nginx in the foreground +exec nginx -g 'daemon off;' +ENTRY +RUN chmod +x /app/start.sh + +EXPOSE 8000 + +HEALTHCHECK --interval=15s --timeout=5s --start-period=10s \ + CMD curl -f http://localhost:8000/health || exit 1 + +CMD ["/app/start.sh"] diff --git a/fly.toml b/fly.toml index 23ce4f3..113f96c 100644 --- a/fly.toml +++ b/fly.toml @@ -10,6 +10,7 @@ primary_region = 'iad' strategy = "bluegreen" [build] +dockerfile = "Dockerfile.static" [http_service] internal_port = 8000 @@ -45,5 +46,5 @@ PYTHONDONTWRITEBYTECODE = "1" # Lazy-load interlinear data to reduce memory usage PRELOAD_INTERLINEAR = "false" -# Number of Uvicorn workers -WORKERS = "2" +# Sidecar workers (gunicorn) +SIDECAR_WORKERS = "1" diff --git a/kjvstudy_org/server.py b/kjvstudy_org/server.py index 71fb150..f4c35bb 100644 --- a/kjvstudy_org/server.py +++ b/kjvstudy_org/server.py @@ -3,6 +3,7 @@ import json import os import re import random +import time from contextlib import asynccontextmanager from datetime import datetime, timedelta from pathlib import Path as PathLib @@ -236,6 +237,70 @@ class BotLoggerMiddleware(BaseHTTPMiddleware): return response +# Rate limiting middleware — per-IP request throttle +class RateLimitMiddleware(BaseHTTPMiddleware): + """Simple in-memory per-IP rate limiter using a sliding window.""" + + def __init__(self, app, requests_per_second: float = 10.0): + super().__init__(app) + self.rate = requests_per_second + # {ip: (token_count, last_refill_time)} + self._buckets: dict[str, tuple[float, float]] = {} + self._max_tokens = requests_per_second * 5 # burst allowance + + async def dispatch(self, request: Request, call_next): + # Skip rate limiting for health checks + if request.url.path == "/health": + return await call_next(request) + + ip = request.client.host if request.client else "unknown" + now = time.monotonic() + + tokens, last = self._buckets.get(ip, (self._max_tokens, now)) + elapsed = now - last + tokens = min(self._max_tokens, tokens + elapsed * self.rate) + + if tokens < 1.0: + return JSONResponse( + {"detail": "Too many requests"}, + status_code=429, + headers={"Retry-After": "1"}, + ) + + self._buckets[ip] = (tokens - 1.0, now) + + # Periodic cleanup — evict stale entries every ~1000 requests + if len(self._buckets) > 5000: + cutoff = now - 60 + self._buckets = { + k: (t, ts) for k, (t, ts) in self._buckets.items() if ts > cutoff + } + + return await call_next(request) + + +# Request timeout middleware — kill requests that take too long +class TimeoutMiddleware(BaseHTTPMiddleware): + """Cancel requests that exceed a time limit.""" + + def __init__(self, app, timeout_seconds: float = 30.0): + super().__init__(app) + self.timeout = timeout_seconds + + async def dispatch(self, request: Request, call_next): + import asyncio + try: + return await asyncio.wait_for( + call_next(request), + timeout=self.timeout, + ) + except asyncio.TimeoutError: + return JSONResponse( + {"detail": "Request timeout"}, + status_code=504, + ) + + # Add GZip compression middleware (compress responses > 500 bytes) app.add_middleware(GZipMiddleware, minimum_size=500) @@ -245,6 +310,12 @@ app.add_middleware(CacheControlMiddleware) # Add bot logging middleware app.add_middleware(BotLoggerMiddleware) +# Add rate limiting (10 req/s per IP, burst of 50) +app.add_middleware(RateLimitMiddleware, requests_per_second=10.0) + +# Add request timeout (30 seconds max, 60 for PDFs handled by route-level timeout) +app.add_middleware(TimeoutMiddleware, timeout_seconds=30.0) + # Set up Jinja2 templates and static files current_dir = PathLib(__file__).parent diff --git a/nginx.conf b/nginx.conf new file mode 100644 index 0000000..a10eb09 --- /dev/null +++ b/nginx.conf @@ -0,0 +1,184 @@ +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - [$time_local] "$request" $status $body_bytes_sent "$http_user_agent"'; + access_log /var/log/nginx/access.log main; + + sendfile on; + keepalive_timeout 65; + + # Gzip + gzip on; + gzip_vary on; + gzip_proxied any; + gzip_comp_level 6; + gzip_min_length 500; + gzip_types text/plain text/css text/xml text/javascript + application/json application/javascript application/xml + application/rss+xml image/svg+xml; + + # Upstream: FastAPI sidecar for dynamic routes + upstream sidecar { + server 127.0.0.1:8001; + } + + server { + listen 8000; + server_name _; + root /app/dist; + + # Security headers + add_header X-Content-Type-Options nosniff always; + add_header X-Frame-Options SAMEORIGIN always; + + # ----------------------------------------------------------- + # Dynamic routes — proxy to FastAPI sidecar + # ----------------------------------------------------------- + + # Search (dynamic query results) + location = /search { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # All API endpoints + location /api/ { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # PDF generation (on-demand) + location ~ /pdf$ { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 30s; + } + + # Verse of the day redirect (needs server-side date logic) + location = /verse-of-the-day { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # OG images (dynamically generated) + location /og/ { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Family tree search (dynamic query) + location = /family-tree/search { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Family tree SVG (dynamically rendered) + location = /family-tree/lineage.svg { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # OpenAPI docs + location /api/docs { + proxy_pass http://sidecar; + proxy_set_header Host $host; + } + location /api/redoc { + proxy_pass http://sidecar; + proxy_set_header Host $host; + } + location /api/openapi.json { + proxy_pass http://sidecar; + proxy_set_header Host $host; + } + + # ----------------------------------------------------------- + # Health check — static (no sidecar dependency) + # ----------------------------------------------------------- + location = /health { + default_type application/json; + return 200 '{"status":"healthy","service":"kjv-study"}'; + } + + # ----------------------------------------------------------- + # Static assets — aggressive caching + # ----------------------------------------------------------- + location /static/ { + expires 1y; + add_header Cache-Control "public, immutable"; + try_files $uri =404; + } + + # ----------------------------------------------------------- + # Robots / sitemaps + # ----------------------------------------------------------- + location = /robots.txt { + default_type text/plain; + expires 1d; + } + location ~ ^/sitemap.*\.xml$ { + default_type application/xml; + expires 1d; + } + + # Random verse list JSON + location = /random-verse-list.json { + default_type application/json; + expires 7d; + } + + # ----------------------------------------------------------- + # Default — serve pre-rendered HTML with clean URLs + # ----------------------------------------------------------- + location / { + try_files $uri $uri/index.html $uri/ @sidecar; + expires 7d; + add_header Cache-Control "public"; + } + + # Fallback: if no static file exists, proxy to sidecar + location @sidecar { + proxy_pass http://sidecar; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + + # Custom 404 + error_page 404 /404.html; + location = /404.html { + internal; + } + } +} diff --git a/pyproject.toml b/pyproject.toml index e6e8ac3..8b66f1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ requires-python = ">=3.13" dependencies = [ "fastapi[standard]>=0.115.12", "ged4py>=0.5.2", + "gunicorn>=24.1.1", "mistune>=3.0.2", "parse>=1.20.2", "python-gedcom>=1.0.0", diff --git a/scripts/generate_static_site.py b/scripts/generate_static_site.py new file mode 100644 index 0000000..def39d5 --- /dev/null +++ b/scripts/generate_static_site.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +"""Static site generator for kjvstudy.org. + +Pre-renders high-traffic HTML pages (~1,300) using FastAPI's TestClient. +Everything else is served by the FastAPI sidecar at runtime. +""" + +import argparse +import json +import shutil +import sys +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + + +def create_app(): + from kjvstudy_org.server import app + return app + + +def get_test_client(app): + from fastapi.testclient import TestClient + return TestClient(app, raise_server_exceptions=False) + + +def enumerate_urls(): + """Enumerate high-traffic HTML pages only (~1,300 URLs). + + Covers: homepage, book listing, 66 book pages, ~1,189 chapter pages, + and a handful of top-level resource/about pages. + """ + from kjvstudy_org.kjv import bible + + urls = [ + "/", + "/books", + "/resources", + "/about", + "/about/stats", + "/about/cross-references", + "/about/accessibility", + "/about/commentary", + "/topics", + "/reading-plans", + "/study-guides", + "/stories", + "/stories/kids", + "/strongs", + "/strongs/hebrew", + "/strongs/greek", + "/interlinear", + "/family-tree", + "/biblical-timeline", + "/biblical-maps", + "/red-letter", + "/stars", + ] + + # 66 book pages + ~1,189 chapter pages + for book in bible.get_books(): + urls.append(f"/book/{book}") + for chapter in bible.get_chapters_for_book(book): + urls.append(f"/book/{book}/chapter/{chapter}") + + return urls + + +def url_to_filepath(output_dir: Path, url: str) -> Path: + path = url.strip("/") + if path == "": + return output_dir / "index.html" + return output_dir / path / "index.html" + + +def render_url(client, output_dir: Path, url: str) -> tuple[str, bool, str]: + try: + filepath = url_to_filepath(output_dir, url) + response = client.get(url) + + if response.status_code >= 400: + return (url, False, f"HTTP {response.status_code}") + + if response.status_code in (301, 302, 307, 308): + location = response.headers.get("location", "/") + redirect_html = ( + f'
' + f'' + f'' + f'Redirecting...' + ) + filepath.parent.mkdir(parents=True, exist_ok=True) + filepath.write_text(redirect_html, encoding="utf-8") + return (url, True, "redirect") + + filepath.parent.mkdir(parents=True, exist_ok=True) + filepath.write_bytes(response.content) + return (url, True, "ok") + + except Exception as e: + return (url, False, str(e)[:200]) + + +def generate_random_verse_page(output_dir: Path): + """Generate /random-verse as a client-side JS page + verse-list JSON.""" + from kjvstudy_org.kjv import bible + + verse_urls = [] + for book in bible.get_books(): + for chapter in bible.get_chapters_for_book(book): + for v in bible.get_verses_by_book_chapter(book, chapter): + verse_urls.append(f"/book/{book}/chapter/{chapter}/verse/{v.verse}") + + json_path = output_dir / "random-verse-list.json" + json_path.write_text(json.dumps(verse_urls), encoding="utf-8") + + html = """ + + + + +Picking a random verse…
+