Add static site generation with nginx + hardened FastAPI sidecar

Pre-render ~1,277 high-traffic HTML pages (homepage, books, chapters)
at build time and serve them directly via nginx. All other routes
(verses, search, API, PDFs, Strong's) fall through to a FastAPI
sidecar. If the sidecar crashes, nginx continues serving static
pages and health checks.

Also harden the FastAPI app against the memory/crash issues:
- Switch from bare uvicorn to gunicorn with uvicorn workers
- Add --max-requests worker recycling to prevent memory leaks
- Add --timeout to kill hung workers
- Add per-IP rate limiting middleware (10 req/s, burst of 50)
- Add request timeout middleware (30s max per request)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-31 21:59:34 -05:00
parent c0825705d2
commit 2ab3dfa142
9 changed files with 662 additions and 4 deletions
+6 -2
View File
@@ -49,5 +49,9 @@ COPY . .
# Build search index at image build time for fast searches
RUN python3 -c "from kjvstudy_org.utils.search_index import init_search_index; init_search_index()"
# Run uvicorn directly (no nginx sidecar)
CMD ["sh", "-c", "uv run uvicorn kjvstudy_org.server:app --host ${HOST:-0.0.0.0} --port ${PORT:-8000} --workers ${WORKERS:-1} --proxy-headers"]
# Run with gunicorn + uvicorn workers for production resilience:
# --max-requests: recycle workers after N requests (prevents memory leaks)
# --max-requests-jitter: stagger recycling so workers don't all restart at once
# --timeout: kill workers that hang for >60s
# --graceful-timeout: give workers 10s to finish after SIGTERM
CMD ["sh", "-c", "uv run gunicorn kjvstudy_org.server:app --worker-class uvicorn.workers.UvicornWorker --bind ${HOST:-0.0.0.0}:${PORT:-8000} --workers ${WORKERS:-2} --max-requests 2000 --max-requests-jitter 500 --timeout 60 --graceful-timeout 10 --proxy-protocol --forwarded-allow-ips='*' --access-logfile -"]
+89
View File
@@ -0,0 +1,89 @@
# =============================================================================
# Stage 1: Builder — install deps, generate static HTML pages
# =============================================================================
FROM python:3.13 AS builder
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
UV_COMPILE_BYTECODE=1 \
UV_LINK_MODE=copy
WORKDIR /app
COPY pyproject.toml uv.lock ./
RUN uv sync --frozen --no-install-project --no-dev
COPY . .
# Build search index (needed by app startup)
RUN uv run python3 -c "from kjvstudy_org.utils.search_index import init_search_index; init_search_index()"
# Generate static HTML pages (~50K files, no PDFs or API JSON)
RUN uv run python scripts/generate_static_site.py --output /app/dist --workers 4
# =============================================================================
# Stage 2: Runtime — nginx for static files + FastAPI sidecar for dynamic routes
# =============================================================================
FROM python:3.13-slim
# Install nginx + runtime deps for WeasyPrint (PDF generation in sidecar)
RUN apt-get update && apt-get install -y --no-install-recommends \
nginx \
curl \
libpango-1.0-0 \
libharfbuzz0b \
libpangoft2-1.0-0 \
libffi8 \
libgdk-pixbuf-2.0-0 \
shared-mime-info \
fonts-dejavu-core \
&& rm -rf /var/lib/apt/lists/*
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONPATH="/app" \
PATH="/app/.venv/bin:$PATH"
WORKDIR /app
# Copy virtualenv from builder
COPY --from=builder /app/.venv /app/.venv
# Copy application code (needed by the sidecar)
COPY --from=builder /app/kjvstudy_org /app/kjvstudy_org
COPY --from=builder /app/scripts/search_api.py /app/scripts/search_api.py
# Copy pre-rendered static site
COPY --from=builder /app/dist /app/dist
# Copy nginx config
COPY nginx.conf /etc/nginx/nginx.conf
# Entrypoint: start FastAPI sidecar + nginx
COPY <<'ENTRY' /app/start.sh
#!/bin/sh
set -e
# Start the FastAPI sidecar in the background
# It handles: search, API, PDFs, OG images, and any uncached pages
python3 /app/scripts/search_api.py &
SIDECAR_PID=$!
# Wait briefly for sidecar to be ready
sleep 1
# Start nginx in the foreground
exec nginx -g 'daemon off;'
ENTRY
RUN chmod +x /app/start.sh
EXPOSE 8000
HEALTHCHECK --interval=15s --timeout=5s --start-period=10s \
CMD curl -f http://localhost:8000/health || exit 1
CMD ["/app/start.sh"]
+3 -2
View File
@@ -10,6 +10,7 @@ primary_region = 'iad'
strategy = "bluegreen"
[build]
dockerfile = "Dockerfile.static"
[http_service]
internal_port = 8000
@@ -45,5 +46,5 @@ PYTHONDONTWRITEBYTECODE = "1"
# Lazy-load interlinear data to reduce memory usage
PRELOAD_INTERLINEAR = "false"
# Number of Uvicorn workers
WORKERS = "2"
# Sidecar workers (gunicorn)
SIDECAR_WORKERS = "1"
+71
View File
@@ -3,6 +3,7 @@ import json
import os
import re
import random
import time
from contextlib import asynccontextmanager
from datetime import datetime, timedelta
from pathlib import Path as PathLib
@@ -236,6 +237,70 @@ class BotLoggerMiddleware(BaseHTTPMiddleware):
return response
# Rate limiting middleware — per-IP request throttle
class RateLimitMiddleware(BaseHTTPMiddleware):
"""Simple in-memory per-IP rate limiter using a sliding window."""
def __init__(self, app, requests_per_second: float = 10.0):
super().__init__(app)
self.rate = requests_per_second
# {ip: (token_count, last_refill_time)}
self._buckets: dict[str, tuple[float, float]] = {}
self._max_tokens = requests_per_second * 5 # burst allowance
async def dispatch(self, request: Request, call_next):
# Skip rate limiting for health checks
if request.url.path == "/health":
return await call_next(request)
ip = request.client.host if request.client else "unknown"
now = time.monotonic()
tokens, last = self._buckets.get(ip, (self._max_tokens, now))
elapsed = now - last
tokens = min(self._max_tokens, tokens + elapsed * self.rate)
if tokens < 1.0:
return JSONResponse(
{"detail": "Too many requests"},
status_code=429,
headers={"Retry-After": "1"},
)
self._buckets[ip] = (tokens - 1.0, now)
# Periodic cleanup — evict stale entries every ~1000 requests
if len(self._buckets) > 5000:
cutoff = now - 60
self._buckets = {
k: (t, ts) for k, (t, ts) in self._buckets.items() if ts > cutoff
}
return await call_next(request)
# Request timeout middleware — kill requests that take too long
class TimeoutMiddleware(BaseHTTPMiddleware):
"""Cancel requests that exceed a time limit."""
def __init__(self, app, timeout_seconds: float = 30.0):
super().__init__(app)
self.timeout = timeout_seconds
async def dispatch(self, request: Request, call_next):
import asyncio
try:
return await asyncio.wait_for(
call_next(request),
timeout=self.timeout,
)
except asyncio.TimeoutError:
return JSONResponse(
{"detail": "Request timeout"},
status_code=504,
)
# Add GZip compression middleware (compress responses > 500 bytes)
app.add_middleware(GZipMiddleware, minimum_size=500)
@@ -245,6 +310,12 @@ app.add_middleware(CacheControlMiddleware)
# Add bot logging middleware
app.add_middleware(BotLoggerMiddleware)
# Add rate limiting (10 req/s per IP, burst of 50)
app.add_middleware(RateLimitMiddleware, requests_per_second=10.0)
# Add request timeout (30 seconds max, 60 for PDFs handled by route-level timeout)
app.add_middleware(TimeoutMiddleware, timeout_seconds=30.0)
# Set up Jinja2 templates and static files
current_dir = PathLib(__file__).parent
+184
View File
@@ -0,0 +1,184 @@
worker_processes auto;
error_log /var/log/nginx/error.log warn;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format main '$remote_addr - [$time_local] "$request" $status $body_bytes_sent "$http_user_agent"';
access_log /var/log/nginx/access.log main;
sendfile on;
keepalive_timeout 65;
# Gzip
gzip on;
gzip_vary on;
gzip_proxied any;
gzip_comp_level 6;
gzip_min_length 500;
gzip_types text/plain text/css text/xml text/javascript
application/json application/javascript application/xml
application/rss+xml image/svg+xml;
# Upstream: FastAPI sidecar for dynamic routes
upstream sidecar {
server 127.0.0.1:8001;
}
server {
listen 8000;
server_name _;
root /app/dist;
# Security headers
add_header X-Content-Type-Options nosniff always;
add_header X-Frame-Options SAMEORIGIN always;
# -----------------------------------------------------------
# Dynamic routes — proxy to FastAPI sidecar
# -----------------------------------------------------------
# Search (dynamic query results)
location = /search {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# All API endpoints
location /api/ {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# PDF generation (on-demand)
location ~ /pdf$ {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 30s;
}
# Verse of the day redirect (needs server-side date logic)
location = /verse-of-the-day {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# OG images (dynamically generated)
location /og/ {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Family tree search (dynamic query)
location = /family-tree/search {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Family tree SVG (dynamically rendered)
location = /family-tree/lineage.svg {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# OpenAPI docs
location /api/docs {
proxy_pass http://sidecar;
proxy_set_header Host $host;
}
location /api/redoc {
proxy_pass http://sidecar;
proxy_set_header Host $host;
}
location /api/openapi.json {
proxy_pass http://sidecar;
proxy_set_header Host $host;
}
# -----------------------------------------------------------
# Health check — static (no sidecar dependency)
# -----------------------------------------------------------
location = /health {
default_type application/json;
return 200 '{"status":"healthy","service":"kjv-study"}';
}
# -----------------------------------------------------------
# Static assets — aggressive caching
# -----------------------------------------------------------
location /static/ {
expires 1y;
add_header Cache-Control "public, immutable";
try_files $uri =404;
}
# -----------------------------------------------------------
# Robots / sitemaps
# -----------------------------------------------------------
location = /robots.txt {
default_type text/plain;
expires 1d;
}
location ~ ^/sitemap.*\.xml$ {
default_type application/xml;
expires 1d;
}
# Random verse list JSON
location = /random-verse-list.json {
default_type application/json;
expires 7d;
}
# -----------------------------------------------------------
# Default — serve pre-rendered HTML with clean URLs
# -----------------------------------------------------------
location / {
try_files $uri $uri/index.html $uri/ @sidecar;
expires 7d;
add_header Cache-Control "public";
}
# Fallback: if no static file exists, proxy to sidecar
location @sidecar {
proxy_pass http://sidecar;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Custom 404
error_page 404 /404.html;
location = /404.html {
internal;
}
}
}
+1
View File
@@ -7,6 +7,7 @@ requires-python = ">=3.13"
dependencies = [
"fastapi[standard]>=0.115.12",
"ged4py>=0.5.2",
"gunicorn>=24.1.1",
"mistune>=3.0.2",
"parse>=1.20.2",
"python-gedcom>=1.0.0",
+251
View File
@@ -0,0 +1,251 @@
#!/usr/bin/env python3
"""Static site generator for kjvstudy.org.
Pre-renders high-traffic HTML pages (~1,300) using FastAPI's TestClient.
Everything else is served by the FastAPI sidecar at runtime.
"""
import argparse
import json
import shutil
import sys
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
def create_app():
from kjvstudy_org.server import app
return app
def get_test_client(app):
from fastapi.testclient import TestClient
return TestClient(app, raise_server_exceptions=False)
def enumerate_urls():
"""Enumerate high-traffic HTML pages only (~1,300 URLs).
Covers: homepage, book listing, 66 book pages, ~1,189 chapter pages,
and a handful of top-level resource/about pages.
"""
from kjvstudy_org.kjv import bible
urls = [
"/",
"/books",
"/resources",
"/about",
"/about/stats",
"/about/cross-references",
"/about/accessibility",
"/about/commentary",
"/topics",
"/reading-plans",
"/study-guides",
"/stories",
"/stories/kids",
"/strongs",
"/strongs/hebrew",
"/strongs/greek",
"/interlinear",
"/family-tree",
"/biblical-timeline",
"/biblical-maps",
"/red-letter",
"/stars",
]
# 66 book pages + ~1,189 chapter pages
for book in bible.get_books():
urls.append(f"/book/{book}")
for chapter in bible.get_chapters_for_book(book):
urls.append(f"/book/{book}/chapter/{chapter}")
return urls
def url_to_filepath(output_dir: Path, url: str) -> Path:
path = url.strip("/")
if path == "":
return output_dir / "index.html"
return output_dir / path / "index.html"
def render_url(client, output_dir: Path, url: str) -> tuple[str, bool, str]:
try:
filepath = url_to_filepath(output_dir, url)
response = client.get(url)
if response.status_code >= 400:
return (url, False, f"HTTP {response.status_code}")
if response.status_code in (301, 302, 307, 308):
location = response.headers.get("location", "/")
redirect_html = (
f'<!DOCTYPE html><html><head>'
f'<meta http-equiv="refresh" content="0;url={location}">'
f'<link rel="canonical" href="{location}">'
f'</head><body><a href="{location}">Redirecting...</a></body></html>'
)
filepath.parent.mkdir(parents=True, exist_ok=True)
filepath.write_text(redirect_html, encoding="utf-8")
return (url, True, "redirect")
filepath.parent.mkdir(parents=True, exist_ok=True)
filepath.write_bytes(response.content)
return (url, True, "ok")
except Exception as e:
return (url, False, str(e)[:200])
def generate_random_verse_page(output_dir: Path):
"""Generate /random-verse as a client-side JS page + verse-list JSON."""
from kjvstudy_org.kjv import bible
verse_urls = []
for book in bible.get_books():
for chapter in bible.get_chapters_for_book(book):
for v in bible.get_verses_by_book_chapter(book, chapter):
verse_urls.append(f"/book/{book}/chapter/{chapter}/verse/{v.verse}")
json_path = output_dir / "random-verse-list.json"
json_path.write_text(json.dumps(verse_urls), encoding="utf-8")
html = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>Random Verse - KJV Study</title>
<link rel="stylesheet" href="/static/tufte.css"/>
<link rel="stylesheet" href="/static/style.css"/>
</head>
<body>
<article>
<h1>Random Verse</h1>
<p style="font-style:italic;color:#666">Picking a random verse&hellip;</p>
</article>
<script>
fetch('/random-verse-list.json')
.then(function(r){return r.json()})
.then(function(vs){window.location.replace(vs[Math.floor(Math.random()*vs.length)])})
.catch(function(){window.location.replace('/book/John/chapter/3/verse/16')});
</script>
</body>
</html>"""
html_path = output_dir / "random-verse" / "index.html"
html_path.parent.mkdir(parents=True, exist_ok=True)
html_path.write_text(html, encoding="utf-8")
print(f" Random verse page + {len(verse_urls)} verse list written")
def generate_utility_files(client, output_dir: Path):
for url, filename in [
("/robots.txt", "robots.txt"),
("/sitemap.xml", "sitemap.xml"),
("/sitemap-main.xml", "sitemap-main.xml"),
("/sitemap-verses.xml", "sitemap-verses.xml"),
]:
resp = client.get(url)
if resp.status_code == 200:
(output_dir / filename).write_bytes(resp.content)
print(f" {filename}")
def copy_static_assets(output_dir: Path):
src = PROJECT_ROOT / "kjvstudy_org" / "static"
dst = output_dir / "static"
if dst.exists():
shutil.rmtree(dst)
skip = {"search_index.db", "scofield_commentary.json"}
shutil.copytree(src, dst, ignore=lambda d, files: [f for f in files if f in skip])
print(f" Static assets copied to {dst}")
def main():
parser = argparse.ArgumentParser(description="Generate static HTML site for kjvstudy.org")
parser.add_argument("--output", "-o", default="dist", help="Output directory")
parser.add_argument("--workers", "-w", type=int, default=4, help="Parallel workers")
parser.add_argument("--dry-run", action="store_true", help="Enumerate URLs without rendering")
args = parser.parse_args()
output_dir = Path(args.output).resolve()
print("Static site generator for kjvstudy.org")
print(f"Output: {output_dir}")
print(f"Workers: {args.workers}")
print()
print("Initializing FastAPI app...")
app = create_app()
client = get_test_client(app)
client.__enter__()
try:
print("Enumerating URLs...")
all_urls = enumerate_urls()
print(f" Total: {len(all_urls)} HTML pages")
if args.dry_run:
print("\nDry run — not rendering.")
return
if output_dir.exists():
shutil.rmtree(output_dir)
output_dir.mkdir(parents=True)
print("\nCopying static assets...")
copy_static_assets(output_dir)
print("\nGenerating utility files...")
generate_utility_files(client, output_dir)
print("\nGenerating random verse page...")
generate_random_verse_page(output_dir)
print(f"\nRendering {len(all_urls)} HTML pages...")
start = time.time()
ok = 0
fail = 0
errors = []
with ThreadPoolExecutor(max_workers=args.workers) as pool:
futures = {pool.submit(render_url, client, output_dir, u): u for u in all_urls}
done = 0
for future in as_completed(futures):
done += 1
url, success, msg = future.result()
if success:
ok += 1
else:
fail += 1
errors.append((url, msg))
if done % 200 == 0 or done == len(all_urls):
elapsed = time.time() - start
rate = done / elapsed if elapsed > 0 else 0
print(f" [{done}/{len(all_urls)}] {rate:.0f}/sec errors={fail}")
elapsed = time.time() - start
print(f"\nDone in {elapsed:.1f}s")
print(f" Success: {ok}")
print(f" Errors: {fail}")
if errors:
print(f"\nErrors:")
for url, msg in errors[:20]:
print(f" {url}: {msg}")
finally:
client.__exit__(None, None, None)
if __name__ == "__main__":
main()
+43
View File
@@ -0,0 +1,43 @@
#!/usr/bin/env python3
"""FastAPI sidecar for kjvstudy.org static site deployment.
Handles routes that can't be pre-rendered as static HTML:
- /search and /api/search (dynamic query)
- /api/* (JSON API endpoints)
- /*/pdf (on-demand PDF generation)
- /random-verse (server-side redirect fallback)
- /verse-of-the-day (redirect to today's date)
- /og/* (dynamic OG images)
Runs on port 8001, proxied by nginx.
Uses gunicorn with uvicorn workers for resilience.
"""
import os
import subprocess
import sys
from pathlib import Path
PROJECT_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(PROJECT_ROOT))
from kjvstudy_org.server import app # noqa: E402, F401
if __name__ == "__main__":
port = int(os.getenv("SIDECAR_PORT", "8001"))
workers = int(os.getenv("SIDECAR_WORKERS", "1"))
subprocess.run([
sys.executable, "-m", "gunicorn",
"kjvstudy_org.server:app",
"--worker-class", "uvicorn.workers.UvicornWorker",
"--bind", f"0.0.0.0:{port}",
"--workers", str(workers),
"--max-requests", "1000",
"--max-requests-jitter", "200",
"--timeout", "60",
"--graceful-timeout", "10",
"--forwarded-allow-ips", "*",
"--log-level", "warning",
"--access-logfile", "-",
])
Generated
+14
View File
@@ -387,6 +387,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/57/bb/60f6f5162110ae5364af560c04e5aabdde8e5b8a43872cac3437333a5500/ged4py-0.5.2-py3-none-any.whl", hash = "sha256:33d24a4b06431c2b8931b430071e39421e7788b41b720b66dc2f35b8d825f521", size = 29920, upload-time = "2025-03-19T23:16:19.505Z" },
]
[[package]]
name = "gunicorn"
version = "24.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "packaging" },
]
sdist = { url = "https://files.pythonhosted.org/packages/78/0a/10739c03537ec5b131a867bf94df2e412b437696c7e5d26970e2198a80d2/gunicorn-24.1.1.tar.gz", hash = "sha256:f006d110e5cb3102859b4f5cd48335dbd9cc28d0d27cd24ddbdafa6c60929408", size = 287567, upload-time = "2026-01-24T01:15:31.359Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/96/90/cfe637677916fc6f53cd2b05d5746e249f683e1fa14c9e745a88c66f7290/gunicorn-24.1.1-py3-none-any.whl", hash = "sha256:757f6b621fc4f7581a90600b2cd9df593461f06a41d7259cb9b94499dc4095a8", size = 114920, upload-time = "2026-01-24T01:15:29.656Z" },
]
[[package]]
name = "h11"
version = "0.16.0"
@@ -476,6 +488,7 @@ source = { editable = "." }
dependencies = [
{ name = "fastapi", extra = ["standard"] },
{ name = "ged4py" },
{ name = "gunicorn" },
{ name = "mistune" },
{ name = "parse" },
{ name = "python-gedcom" },
@@ -501,6 +514,7 @@ dev = [
requires-dist = [
{ name = "fastapi", extras = ["standard"], specifier = ">=0.115.12" },
{ name = "ged4py", specifier = ">=0.5.2" },
{ name = "gunicorn", specifier = ">=24.1.1" },
{ name = "mistune", specifier = ">=3.0.2" },
{ name = "parse", specifier = ">=1.20.2" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.5" },