Files
kennethreitz.org/tuftecms/core/cache.py
T
2026-04-06 17:23:07 -04:00

627 lines
23 KiB
Python

"""Cache management for TufteCMS."""
import re
from functools import lru_cache
from pathlib import Path
DATA_DIR = Path("data")
# Simple in-memory cache
_cache_store = {}
def clear_cache():
"""Clear all caches."""
global _cache_store
_cache_store.clear()
get_blog_cache.cache_clear()
get_sidenotes_cache.cache_clear()
get_outlines_cache.cache_clear()
get_quotes_cache.cache_clear()
get_connections_cache.cache_clear()
get_terms_cache.cache_clear()
get_themes_cache.cache_clear()
@lru_cache(maxsize=1)
def get_blog_cache():
"""Get cached blog posts data."""
if "blog_posts" in _cache_store:
return _cache_store["blog_posts"]
blog_posts = []
# Get all markdown files from essays directory
essays_dir = DATA_DIR / "essays"
if essays_dir.exists():
for file_path in essays_dir.glob("*.md"):
if file_path.name == "index.md":
continue
try:
# Import here to avoid circular imports
from ..core.markdown import render_markdown_file
from ..utils.content import extract_intelligent_date
# Render the markdown file to get metadata
content_data = render_markdown_file(file_path)
# Extract date
date_obj = extract_intelligent_date(file_path, content_data)
if date_obj:
# Extract excerpt and full content
raw_content = file_path.read_text()
excerpt = extract_excerpt(raw_content)
# Get clean content for search (remove markdown formatting but keep text)
clean_content = clean_content_for_search(raw_content)
blog_posts.append(
{
"title": content_data["title"],
"path": f"/essays/{file_path.stem}",
"url": f"/essays/{file_path.stem}",
"file_path": str(file_path),
"pub_date": date_obj,
"date_str": date_obj.strftime("%Y-%m-%d"),
"excerpt": excerpt,
"description": excerpt,
"content": clean_content,
"word_count": content_data.get("word_count", 0),
"category": "essays",
"unique_icon": content_data.get("unique_icon"),
}
)
except Exception as e:
print(f"Error processing {file_path}: {e}")
continue
# Sort by date (newest first)
blog_posts.sort(key=lambda x: x["pub_date"], reverse=True)
result = {"posts": blog_posts, "stats": {"total_posts": len(blog_posts)}}
_cache_store["blog_posts"] = result
return result
def clean_content_for_search(content):
"""Clean content for search indexing - preserve text, remove formatting."""
# Remove front matter
content = re.sub(r"^---\s*\n.*?\n---\s*\n", "", content, flags=re.DOTALL)
# Remove title (first # line)
content = re.sub(r"^# .+?$", "", content, flags=re.MULTILINE)
# Remove date lines
content = re.sub(r"^\*[A-Za-z]+ \d{4}\*\s*$", "", content, flags=re.MULTILINE)
# Remove ALL HTML tags (including sidenotes) but keep the text content
content = re.sub(r"<[^>]+>", "", content)
# Remove code blocks but keep content
content = re.sub(r"```[a-z]*\n(.*?)\n```", r"\1", content, flags=re.DOTALL)
# Remove inline code but keep content
content = re.sub(r"`([^`]+)`", r"\1", content)
# Remove images
content = re.sub(r"!\[[^\]]*\]\([^)]*\)", "", content)
# Remove links but keep text
content = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", content)
# Remove markdown formatting chars but keep text
content = re.sub(r"[*_#]", "", content)
# Clean up multiple whitespace
content = re.sub(r"\s+", " ", content)
return content.strip()
def extract_excerpt(content, max_words=50):
"""Simple excerpt extraction."""
# Use the same cleaning function for consistency
clean_content = clean_content_for_search(content)
# Get first meaningful paragraph
lines = [line.strip() for line in clean_content.split("\n") if line.strip()]
for line in lines:
if len(line) > 20: # Skip very short lines
words = line.split()[:max_words]
excerpt = " ".join(words)
if len(words) == max_words:
excerpt += "..."
return excerpt
return ""
@lru_cache(maxsize=1)
def get_sidenotes_cache():
"""Get cached sidenotes data."""
if "sidenotes" in _cache_store:
return _cache_store["sidenotes"]
sidenotes_data = {}
total_sidenotes = 0
total_articles = 0
# Process all markdown files in essays directory
essays_dir = DATA_DIR / "essays"
if essays_dir.exists():
for file_path in essays_dir.glob("*.md"):
if file_path.name == "index.md":
continue
try:
# Import here to avoid circular imports
from ..core.markdown import render_markdown_file
content_data = render_markdown_file(file_path)
html_content = content_data["content"]
# Extract sidenotes with their IDs
# Pattern matches the full sidenote structure: input + span
sidenote_pattern = r'<input[^>]*id="([^"]*)"[^>]*class="margin-toggle"[^>]*/>.*?<span class="sidenote">(.*?)</span>'
sidenotes = re.findall(sidenote_pattern, html_content, re.DOTALL)
if sidenotes:
file_key = str(file_path)
sidenotes_data[file_key] = []
for sidenote_id, sidenote_content in sidenotes:
clean_sidenote = re.sub(
r"<[^>]+>", "", sidenote_content
).strip()
if clean_sidenote:
sidenotes_data[file_key].append(
{
"text": clean_sidenote,
"html": sidenote_content.strip(),
"id": sidenote_id,
"title": content_data["title"],
"url": f"/essays/{file_path.stem}",
}
)
total_sidenotes += 1
if sidenotes_data[file_key]:
total_articles += 1
except Exception as e:
print(f"Error processing sidenotes in {file_path}: {e}")
continue
result = {
"sidenotes": sidenotes_data,
"stats": {"total_sidenotes": total_sidenotes, "total_articles": total_articles},
}
_cache_store["sidenotes"] = result
return result
@lru_cache(maxsize=1)
def get_outlines_cache():
"""Get cached outlines data."""
if "outlines" in _cache_store:
return _cache_store["outlines"]
outlines_data = {}
total_headings = 0
total_articles = 0
# Process all markdown files in essays directory
essays_dir = DATA_DIR / "essays"
if essays_dir.exists():
for file_path in essays_dir.glob("*.md"):
if file_path.name == "index.md":
continue
try:
# Import here to avoid circular imports
from ..core.markdown import render_markdown_file
content_data = render_markdown_file(file_path)
html_content = content_data["content"]
# Extract outlines (headings)
heading_pattern = r"(<h([1-6])[^>]*>.*?</h[1-6]>)"
headings = re.findall(heading_pattern, html_content)
if headings:
file_key = str(file_path)
outlines_data[file_key] = []
for full_tag, level in headings:
# Extract just the inner content for text
inner_pattern = r"<h[1-6][^>]*>(.*?)</h[1-6]>"
inner_match = re.search(inner_pattern, full_tag)
if inner_match:
clean_heading = re.sub(
r"<[^>]+>", "", inner_match.group(1)
).strip()
if clean_heading and not clean_heading.startswith("fn:"):
outlines_data[file_key].append(
{
"level": int(level),
"text": clean_heading,
"html": full_tag.strip(),
"title": content_data["title"],
"url": f"/essays/{file_path.stem}",
}
)
total_headings += 1
if outlines_data[file_key]:
total_articles += 1
except Exception as e:
print(f"Error processing outlines in {file_path}: {e}")
continue
result = {
"outlines": outlines_data,
"stats": {"total_headings": total_headings, "total_articles": total_articles},
}
_cache_store["outlines"] = result
return result
@lru_cache(maxsize=1)
def get_quotes_cache():
"""Get cached quotes data."""
if "quotes" in _cache_store:
return _cache_store["quotes"]
quotes_data = {}
total_quotes = 0
total_articles = 0
# Process all markdown files in essays directory
essays_dir = DATA_DIR / "essays"
if essays_dir.exists():
for file_path in essays_dir.glob("*.md"):
if file_path.name == "index.md":
continue
try:
# Import here to avoid circular imports
from ..core.markdown import render_markdown_file
content_data = render_markdown_file(file_path)
html_content = content_data["content"]
# Extract quotes (blockquotes)
quote_pattern = r"<blockquote[^>]*>(.*?)</blockquote>"
quotes = re.findall(quote_pattern, html_content, re.DOTALL)
if quotes:
file_key = str(file_path)
quotes_data[file_key] = []
for quote in quotes:
clean_quote = re.sub(r"<[^>]+>", "", quote).strip()
if clean_quote:
quotes_data[file_key].append(
{
"text": clean_quote,
"html": quote.strip(),
"title": content_data["title"],
"url": f"/essays/{file_path.stem}",
}
)
total_quotes += 1
if quotes_data[file_key]:
total_articles += 1
except Exception as e:
print(f"Error processing quotes in {file_path}: {e}")
continue
result = {
"quotes": quotes_data,
"stats": {"total_quotes": total_quotes, "total_articles": total_articles},
}
_cache_store["quotes"] = result
return result
@lru_cache(maxsize=1)
def get_connections_cache():
"""Get cached connections data."""
if "connections" in _cache_store:
return _cache_store["connections"]
connections_outgoing = {}
connections_incoming = {}
total_outgoing = 0
total_incoming = 0
# Process all markdown files in essays directory
essays_dir = DATA_DIR / "essays"
if essays_dir.exists():
for file_path in essays_dir.glob("*.md"):
if file_path.name == "index.md":
continue
try:
raw_content = file_path.read_text()
# Extract connections (cross-references)
connection_pattern = r"\[([^\]]+)\](\(/[^)]+\))"
connections = re.findall(connection_pattern, raw_content)
if connections:
file_key = str(file_path)
connections_outgoing[file_key] = []
for link_text, link_url in connections:
# Remove parentheses from URL
clean_url = link_url.strip("()")
# Include all internal links (starting with /) except external ones
if clean_url.startswith("/") and not clean_url.startswith("//"):
connections_outgoing[file_key].append(
{
"text": link_text,
"url": clean_url,
"target_file": clean_url,
}
)
total_outgoing += 1
# Track incoming references
if clean_url not in connections_incoming:
connections_incoming[clean_url] = []
connections_incoming[clean_url].append(
{
"text": link_text,
"source_file": file_key,
"context": link_text,
"source_url": f"/essays/{file_path.stem}",
}
)
total_incoming += 1
except Exception as e:
print(f"Error processing connections in {file_path}: {e}")
continue
result = {
"outgoing": connections_outgoing,
"incoming": connections_incoming,
"stats": {
"total_outgoing": total_outgoing,
"total_incoming": total_incoming,
"total_connections": total_outgoing + total_incoming,
"total_articles": len(connections_outgoing),
},
}
_cache_store["connections"] = result
return result
@lru_cache(maxsize=1)
def get_terms_cache():
"""Get cached terms data."""
if "terms" in _cache_store:
return _cache_store["terms"]
terms_data = {}
total_term_occurrences = 0
# Process all markdown files in essays directory
essays_dir = DATA_DIR / "essays"
if essays_dir.exists():
for file_path in essays_dir.glob("*.md"):
if file_path.name == "index.md":
continue
try:
# Import here to avoid circular imports
from ..core.markdown import render_markdown_file
content_data = render_markdown_file(file_path)
raw_content = file_path.read_text()
article_title = content_data["title"]
# Extract terms for index
# Simple approach: extract words that appear in multiple files
words = re.findall(r"\b[A-Z][a-zA-Z]{3,}\b", raw_content)
for word in set(words):
if len(word) > 3 and word not in [
"This",
"That",
"They",
"When",
"Where",
"What",
"Which",
"HTTP",
"HTML",
"JSON",
"API",
]:
if word not in terms_data:
terms_data[word] = []
terms_data[word].append(
{
"file": str(file_path),
"context": word,
"url": f"/essays/{file_path.stem}",
"title": article_title,
}
)
total_term_occurrences += 1
except Exception as e:
print(f"Error processing terms in {file_path}: {e}")
continue
# Filter terms to only include ones that appear in multiple files
filtered_terms = {
term: refs
for term, refs in terms_data.items()
if len(set(ref["file"] for ref in refs)) >= 2
}
final_terms = {}
total_occurrences = 0
for term, refs in sorted(filtered_terms.items()):
# Group by file to get counts per article
file_counts = {}
for ref in refs:
file_path = ref["file"]
if file_path not in file_counts:
file_counts[file_path] = 0
file_counts[file_path] += 1
articles = []
for file_path, count in file_counts.items():
# Get the URL and title for this file
matching_refs = [ref for ref in refs if ref["file"] == file_path]
if matching_refs:
ref = matching_refs[0]
articles.append(
{"url": ref["url"], "title": ref["title"], "count": count}
)
if articles:
final_terms[term] = {
"articles": articles,
"total_count": sum(file_counts.values()),
"article_count": len(articles),
}
total_occurrences += sum(file_counts.values())
result = {
"terms": final_terms,
"stats": {
"total_terms": len(final_terms),
"total_references": total_occurrences,
},
}
_cache_store["terms"] = result
return result
@lru_cache(maxsize=1)
def get_themes_cache():
"""Get cached themes data."""
if "themes" in _cache_store:
return _cache_store["themes"]
themes_data = {}
total_themes = 0
essays_dir = DATA_DIR / "essays"
if essays_dir.exists():
for file_path in essays_dir.glob("*.md"):
if file_path.name == "index.md":
continue
try:
from ..core.markdown import render_markdown_file
from ..utils.content import extract_intelligent_date
content_data = render_markdown_file(file_path)
raw_content = file_path.read_text()
theme_patterns = {
r"algorithm eats": ("The Algorithm Eats", "How engagement optimization systematically consumes human virtue, language, love, and time."),
r"for humans.*philosophy|http for humans|for humans™|\"for humans\"": ("For Humans", "Technology should serve human mental models, not force humans to adapt to machine logic."),
r"api design|sensible defaults|cognitive overhead": ("API Design", "Interfaces that match how people actually think."),
r"open.source.*communit|communit.*open.source|maintainer.*project|maintainer.*open": ("Open Source", "Building in public, maintaining for strangers, and what happens when you stop."),
r"schizoaffective.*disorder|bipolar.*disorder|manic episode|psychotic.*episode": ("Lived Experience", "First-person accounts of living with serious mental illness in tech."),
r"plurality|plural self|system 777|dissociative identity": ("Plurality", "Multiple aspects of one consciousness, working together."),
r"narcissi|gaslighting|emotional.abuse|love.bombing": ("Manipulation & Abuse", "Recognizing exploitation patterns at personal and systemic scale."),
r"\blumina\b|ai personalit|digital goddess": ("AI Personalities", "What happens when you treat AI as a creative partner with its own voice."),
r"meditation.*practice|yoga.*practice|vedic|jnana yoga|contemplative.*practice": ("Contemplative Practice", "Programming as meditation. Debugging as self-inquiry."),
r"burnout|nothing left to give|burned out": ("Burnout", "The cost of building things that matter, and what comes after."),
r"\bleica\b|photograph.*street|street.*photograph": ("Photography", "Exploring the material world in an electronic universe."),
r"study bible|prayer.*faith|scripture.*verse|digital study bible": ("Faith", "Building technology downstream of prayer."),
r"import requests|requests library|pip install requests": ("Requests", "The HTTP library and the philosophy behind it."),
}
article_themes = []
content_lower = raw_content.lower()
for pattern, (theme_name, description) in theme_patterns.items():
regex_pattern = pattern.replace(" ", r"[- ]")
if re.search(regex_pattern, content_lower):
article_themes.append(theme_name)
if theme_name not in themes_data:
themes_data[theme_name] = {
"description": description,
"articles": []
}
themes_data[theme_name]["articles"].append(
{
"title": content_data["title"],
"url": f"/essays/{file_path.stem}",
"date": extract_intelligent_date(
file_path, content_data
).strftime("%Y-%m-%d")
if extract_intelligent_date(file_path, content_data)
else "",
"unique_icon": content_data.get("unique_icon"),
}
)
total_themes += 1
except Exception as e:
print(f"Error processing themes in {file_path}: {e}")
continue
result = {
"themes": themes_data,
"stats": {
"total_themes": len(themes_data),
"total_occurrences": total_themes,
},
}
_cache_store["themes"] = result
return result
def clear_all_caches():
"""Clear all LRU caches and in-memory cache store."""
global _cache_store
# Clear the in-memory cache store
_cache_store.clear()
# Clear all LRU caches
get_blog_cache.cache_clear()
get_sidenotes_cache.cache_clear()
get_outlines_cache.cache_clear()
get_quotes_cache.cache_clear()
get_connections_cache.cache_clear()
get_terms_cache.cache_clear()
get_themes_cache.cache_clear()
print("🧹 All caches cleared!")
class CacheManager:
"""Manager for application caching."""
def __init__(self):
"""Initialize cache manager."""
self._cache = {}
def get(self, key):
"""Get value from cache."""
return self._cache.get(key)
def set(self, key, value):
"""Set value in cache."""
self._cache[key] = value
def clear(self):
"""Clear all cache."""
self._cache.clear()