Files
kennethreitz fd970be0ba Upgrade to Responder 3.12; thumbnails, smart 404s, and a faster site
- Responder 3.10-3.12: auto ETags, request size limits, timeouts,
  Prometheus metrics at /metrics; essay release count is now seven
- Gallery thumbnails via /thumb route (24MB pages drop to ~400KB)
- Dark-mode flash eliminated; theme applies before first paint
- 404 pages suggest the closest-matching content
- Long-lived caching for static assets; lazy-loaded content images
- Skip link and keyboard-accessible nav dropdowns
- Per-essay OG image in Article structured data
- Homepage: trim the quest log clause

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 00:40:27 -04:00

227 lines
8.4 KiB
Python

"""Markdown processing core module."""
import re
from html import escape as html_escape
from urllib.parse import quote as url_quote
import mistune
import yaml
from ..utils.svg_icons import generate_unique_svg_icon
from ..utils.content import (
calculate_reading_time,
extract_tags_from_content,
find_series_posts,
)
def _process_oembed(html):
"""Replace bare URLs in paragraphs with client-side oEmbed placeholders."""
def _replace_link(match):
url = match.group(1)
# SoundCloud: render inline, no discovery needed.
if re.match(r"https?://(?:www\.)?soundcloud\.com/.+", url):
encoded = url_quote(url, safe="")
return (
f'<iframe width="100%" height="20" scrolling="no" frameborder="no"'
f' src="https://w.soundcloud.com/player/?url={encoded}'
f'&color=%23333333&auto_play=false&hide_related=true'
f'&show_comments=false&show_user=false&show_reposts=false'
f'&show_teaser=false&show_artwork=false&visual=false"></iframe>'
)
# Everything else: render a placeholder for client-side oEmbed.
safe_url = html_escape(url, quote=True)
return (
f'<div class="oembed-placeholder" data-oembed-url="{safe_url}">'
f'<a href="{safe_url}" target="_blank" rel="noopener">{safe_url}</a>'
f'</div>'
)
# Match any <p> containing only a single bare <a> link (any https URL).
return re.sub(
r'<p><a href="(https?://[^"]+)">[^<]+</a></p>',
_replace_link,
html,
)
class TufteMarkdownRenderer:
"""Custom Markdown renderer for Tufte-style output."""
def __init__(self):
"""Initialize the renderer with Mistune."""
self.markdown = mistune.create_markdown(
escape=False,
plugins=[
"strikethrough",
"footnotes",
"table",
"task_lists",
"def_list",
"url",
],
)
def render(self, content):
"""Render markdown content to HTML."""
html = self.markdown(content.strip())
html = _process_oembed(html)
# Native lazy loading for content images.
html = re.sub(
r"<img(?![^>]*\bloading=)", '<img loading="lazy" decoding="async"', html
)
return html
def render_markdown_file(file_path):
"""Render a markdown file to HTML with metadata extraction."""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
# Extract YAML front matter if it exists
metadata = {}
yaml_pattern = r"^---\s*\n(.*?)\n---\s*\n"
yaml_match = re.match(yaml_pattern, content, re.DOTALL)
if yaml_match:
try:
metadata = yaml.safe_load(yaml_match.group(1)) or {}
content = content[yaml_match.end() :]
except:
pass
# Extract first h1 header if it exists
first_h1 = None
# Look for the first H1 at the start of the file (must be on first line or after blank line)
h1_match = re.search(r"^# (.+?)$", content, re.MULTILINE)
if h1_match:
first_h1 = h1_match.group(1).strip()
# Remove only the first h1 line from content to avoid duplication
content = re.sub(r"^# .+?$", "", content, count=1, flags=re.MULTILINE)
# Extract date from italic date pattern (e.g., "*August 2025*")
# Only match dates that look like month/year patterns, not quotes or long text
date_match = re.search(
r"^\*([A-Za-z]+ \d{4}|\d{4})\*\s*$", content, re.MULTILINE
)
if date_match and not metadata.get("date"):
date_text = date_match.group(1).strip()
# Skip only if it's "January 2025" (current year placeholder)
if not (date_text.lower().startswith("january") and "2025" in date_text):
# Format "January YYYY" (not 2025) as just "YYYY" for cleaner display
if (
re.match(r"^january\s+(\d{4})$", date_text.lower())
and "2025" not in date_text
):
year_match = re.search(r"(\d{4})", date_text)
if year_match:
date_text = year_match.group(1)
# Keep other months like "August 2025" as full format
metadata["date"] = date_text
# Remove the date line from content
content = re.sub(
r"^\*([A-Za-z]+ \d{4}|\d{4})\*\s*$",
"",
content,
count=1,
flags=re.MULTILINE,
)
# Create renderer and process content
renderer = TufteMarkdownRenderer()
html_content = renderer.render(content)
# Add anchor IDs to headings using post-processing on HTML
html_content = add_heading_anchor_ids(html_content)
# Post-processing for poetry line breaks
# Check if this is likely a poetry file based on file path
if file_path and "poetry" in str(file_path):
# For poetry, convert single line breaks within paragraphs to <br> tags
html_content = re.sub(
r"<p>(.*?)</p>",
lambda m: "<p>" + m.group(1).replace("\n", "<br>\n") + "</p>",
html_content,
flags=re.DOTALL,
)
# Add classes to headers to prevent conflicts with page headers
html_content = html_content.replace("<h1>", '<h1 class="content-header">')
html_content = html_content.replace("<h2>", '<h2 class="content-header">')
html_content = html_content.replace("<h3>", '<h3 class="content-header">')
html_content = html_content.replace("<h4>", '<h4 class="content-header">')
html_content = html_content.replace("<h5>", '<h5 class="content-header">')
html_content = html_content.replace("<h6>", '<h6 class="content-header">')
# Use the first h1 as title if available, otherwise fallback to metadata or filename
if first_h1:
title = first_h1
elif "title" in metadata:
title = metadata["title"]
else:
title = file_path.stem.replace("-", " ").replace("_", " ").title()
# Calculate reading time
reading_time, word_count = calculate_reading_time(html_content)
# Extract tags
tags = extract_tags_from_content(html_content, metadata, file_path)
# Find series posts if this post is part of a series
series_posts = find_series_posts(metadata, file_path)
# Generate unique icon for this content
# Use folder icon for index.md files (directory pages)
if file_path.name == "index.md":
from ..utils.content import generate_folder_icon
unique_icon = generate_folder_icon(title, size=32)
else:
unique_icon = generate_unique_svg_icon(title, size=32)
return {
"content": html_content,
"title": title,
"metadata": metadata,
"reading_time": reading_time,
"word_count": word_count,
"tags": tags,
"series_posts": series_posts,
"series_name": metadata.get("series"),
"unique_icon": unique_icon,
}
except Exception as e:
return {
"content": f"<p>Error reading file: {str(e)}</p>",
"title": "Error",
"metadata": {},
}
def add_heading_anchor_ids(html_content):
"""Add anchor IDs to headings for navigation."""
def replace_heading(match):
tag = match.group(1) # h1, h2, etc.
level = int(tag[1]) # 1, 2, etc.
classes = match.group(2) or "" # existing classes if any
text = match.group(3)
# Generate anchor ID from heading text (remove HTML tags first)
clean_text = re.sub(r"<[^>]+>", "", text)
anchor_id = re.sub(r"[^\w\s-]", "", clean_text.lower()).replace(" ", "-")
anchor_id = re.sub(r"-+", "-", anchor_id).strip("-") # Clean up multiple dashes
# Add id attribute, preserving any existing classes
if classes:
return f'<{tag} id="{anchor_id}"{classes}>{text}</{tag}>'
else:
return f'<{tag} id="{anchor_id}">{text}</{tag}>'
# Match h1-h6 tags with optional class attributes
return re.sub(
r"<(h[1-6])(\s+[^>]*)?>([^<]+)</h[1-6]>", replace_heading, html_content
)