Files
kennethreitz.org/tuftedoc.py
T
2025-05-19 18:32:23 -04:00

1037 lines
36 KiB
Python

import logging
import os
import pathlib
import random
import difflib
import re
import shutil
import tempfile
import zipfile
import xml.etree.ElementTree as ET
import json
import yaml
from contextlib import asynccontextmanager
from datetime import datetime
from typing import List, Optional, Tuple, Dict, Any, Union
import background
import boto3
import mistune
from fastapi import FastAPI, HTTPException, Request, Query
from fastapi.responses import FileResponse, HTMLResponse, Response, RedirectResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.middleware.cors import CORSMiddleware
from PIL import Image
from PIL.ExifTags import TAGS
from pydantic import BaseModel, Field
# Set up logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# Configuration
MARKDOWN_DIR = pathlib.Path("./data").resolve()
STATIC_DIR = pathlib.Path("./static").resolve()
if "BUCKET_NAME" in os.environ:
BUCKET_NAME = os.environ["BUCKET_NAME"]
else:
BUCKET_NAME = None
S3_ZIP_FILES = ["photos.zip"]
logger.info(f"MARKDOWN_DIR set to: {MARKDOWN_DIR}")
logger.info(f"STATIC_DIR set to: {STATIC_DIR}")
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info("Starting up...")
download_and_extract_s3_zips()
yield
logger.info("Shutting down...")
# FastAPI setup
app = FastAPI(
title="Tufte Markdown Browser API",
description="API for browsing and rendering markdown content using Tailwind CSS and tuftedoc",
version="2.0.0",
lifespan=lifespan,
docs_url="/api/docs",
redoc_url="/api/redoc",
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # For development; in production, specify domains
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Jinja2 setup
templates = Jinja2Templates(directory="templates")
templates.env.filters["datetime"] = lambda value: (
datetime.fromtimestamp(value).strftime("%Y-%m-%d %H:%M:%S")
if isinstance(value, (int, float))
else value.strftime("%Y-%m-%d %H:%M:%S")
)
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
app.mount("/data", StaticFiles(directory=str(MARKDOWN_DIR)), name="data")
# Mistune setup
markdown = mistune.create_markdown(
plugins=["table", "url", "strikethrough", "footnotes"], escape=False, hard_wrap=True
)
# Models
class Metadata(BaseModel):
"""Metadata extracted from frontmatter in markdown files."""
title: Optional[str] = Field(None, description="Title from the document frontmatter")
date: Optional[str] = Field(None, description="Date of publication")
author: Optional[str] = Field(None, description="Author of the document")
tags: Optional[List[str]] = Field(None, description="Tags associated with the document")
description: Optional[str] = Field(None, description="Brief description of the content")
featured_image: Optional[str] = Field(None, description="Path to a featured image")
draft: Optional[bool] = Field(False, description="Whether the content is a draft")
layout: Optional[str] = Field(None, description="Layout template to use")
extra: Optional[Dict[str, Any]] = Field(None, description="Additional custom metadata")
class FileInfo(BaseModel):
name: str = Field(..., description="Name of the file or directory")
url: str = Field(..., description="URL path to the file or directory")
title: Optional[str] = Field(
None, description="Title of the file or directory, if available"
)
slug: str = Field(..., description="Slug/basename of the file or directory")
ctime: float = Field(..., description="Creation time of the file or directory")
mtime: float = Field(..., description="Modification time of the file or directory")
is_dir: bool = Field(..., description="Whether the item is a directory")
is_image: bool = Field(False, description="Whether the item is an image")
exif_data: Optional[dict] = Field(None, description="EXIF data for image files")
metadata: Optional[Metadata] = Field(None, description="Frontmatter metadata if available")
summary: Optional[str] = Field(None, description="Brief summary of the content")
class Breadcrumb(BaseModel):
name: str = Field(..., description="Name of the breadcrumb")
url: str = Field(..., description="URL path of the breadcrumb")
title: Optional[str] = Field(
None, description="Title of the breadcrumb, if available"
)
# Helper functions
@background.task
def download_and_extract_s3_zips():
s3 = boto3.client("s3")
for zip_file in S3_ZIP_FILES:
if not BUCKET_NAME:
logger.error("BUCKET_NAME is not set. Skipping download.")
continue
extracted_folder = MARKDOWN_DIR / zip_file.split(".")[0]
if extracted_folder.exists() and any(extracted_folder.iterdir()):
logger.info(
f"Folder {extracted_folder} already exists and is not empty. Skipping download."
)
continue
logger.info(f"Downloading {zip_file} from S3 bucket: {BUCKET_NAME}")
with tempfile.NamedTemporaryFile(delete=True) as temp_file:
try:
s3.download_fileobj(BUCKET_NAME, zip_file, temp_file)
temp_file_path = pathlib.Path(temp_file.name)
except Exception as e:
logger.error(f"Failed to download {zip_file}: {str(e)}")
continue
logger.info(f"Extracting {zip_file} to {MARKDOWN_DIR}")
try:
with zipfile.ZipFile(temp_file_path, "r") as zip_ref:
zip_ref.extractall(MARKDOWN_DIR)
except Exception as e:
logger.error(f"Failed to extract {zip_file}: {str(e)}")
finally:
temp_file_path.unlink(missing_ok=True)
# Remove the __MACOSX folder.
macos_folder = MARKDOWN_DIR / "__MACOSX"
if macos_folder.exists():
shutil.rmtree(macos_folder)
logger.info("S3 zip downloads and extractions completed.")
def find_similar_path(path: str, threshold: float = 0.6) -> Optional[str]:
target_name = os.path.basename(path)
all_paths = [
str(p.relative_to(MARKDOWN_DIR))
for p in MARKDOWN_DIR.glob("**/*")
if p.is_file() or p.is_dir()
]
# Filter paths that have the same final component length (±1) as the target
filtered_paths = [
p for p in all_paths if abs(len(os.path.basename(p)) - len(target_name)) <= 1
]
if not filtered_paths:
return None
matches = difflib.get_close_matches(
target_name,
[os.path.basename(p) for p in filtered_paths],
n=1,
cutoff=threshold,
)
if matches:
matched_name = matches[0]
# Find the full path that matches the similar filename
for p in filtered_paths:
if os.path.basename(p) == matched_name:
return p
return None
def title_case(s: str) -> str:
return " ".join(
word.capitalize() for word in s.replace("-", " ").replace("_", " ").split()
)
def extract_frontmatter(markdown_content: str) -> Tuple[Optional[Dict[str, Any]], str]:
"""Extract frontmatter from markdown content."""
frontmatter_match = re.match(r"^---\n(.*?)\n---\n(.*)", markdown_content, re.DOTALL)
if frontmatter_match:
try:
# Parse the YAML frontmatter
frontmatter_yaml = frontmatter_match.group(1)
frontmatter = yaml.safe_load(frontmatter_yaml)
content = frontmatter_match.group(2)
return frontmatter, content
except Exception as e:
logger.error(f"Error parsing frontmatter: {e}")
# If there's an error in parsing, return the original content without frontmatter
content = re.sub(r"^---\n.*?^---\n", "", markdown_content, flags=re.MULTILINE | re.DOTALL)
return None, content
else:
# No frontmatter found
return None, markdown_content
def parse_metadata(frontmatter: Optional[Dict[str, Any]]) -> Optional[Metadata]:
"""Parse frontmatter into a Metadata object."""
if not frontmatter:
return None
# Extract known fields
metadata_dict = {
"title": frontmatter.get("title"),
"date": frontmatter.get("date"),
"author": frontmatter.get("author"),
"tags": frontmatter.get("tags", []),
"description": frontmatter.get("description"),
"featured_image": frontmatter.get("featured_image"),
"draft": frontmatter.get("draft", False),
"layout": frontmatter.get("layout"),
}
# Copy any extra fields to the extra dict
extra_fields = {k: v for k, v in frontmatter.items()
if k not in metadata_dict}
if extra_fields:
metadata_dict["extra"] = extra_fields
return Metadata(**metadata_dict)
def get_h1_from_markdown(file_path: pathlib.Path) -> Optional[str]:
with file_path.open("r", encoding="utf-8") as f:
content = f.read()
# Extract frontmatter and content
_, content_without_frontmatter = extract_frontmatter(content)
# Look for the first heading
match = re.search(r"^#\s+(.+)$", content_without_frontmatter, re.MULTILINE)
return match.group(1) if match else None
def get_markdown_metadata(file_path: pathlib.Path) -> Optional[Metadata]:
"""Extract metadata from a markdown file."""
try:
with file_path.open("r", encoding="utf-8") as f:
content = f.read()
frontmatter, _ = extract_frontmatter(content)
return parse_metadata(frontmatter)
except Exception as e:
logger.error(f"Error reading metadata from {file_path}: {e}")
return None
def generate_summary(markdown_content: str, max_length: int = 150) -> str:
"""Generate a summary from markdown content."""
# Remove frontmatter if present
_, content = extract_frontmatter(markdown_content)
# Remove markdown formatting
# Strip headers
content = re.sub(r"^#{1,6}\s+.*$", "", content, flags=re.MULTILINE)
# Strip links
content = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", content)
# Strip images
content = re.sub(r"!\[.*?\]\(.*?\)", "", content)
# Strip code blocks
content = re.sub(r"```.*?```", "", content, flags=re.DOTALL)
# Strip inline code
content = re.sub(r"`[^`]+`", "", content)
# Strip bold/italic
content = re.sub(r"\*\*|\*|__|\b_\b", "", content)
# Clean up whitespace
content = re.sub(r"\s+", " ", content).strip()
# Truncate to max_length
if len(content) > max_length:
content = content[:max_length-3] + "..."
return content
def get_directory_title(dir_path: pathlib.Path) -> str:
index_file = dir_path / "index.md"
return (
get_h1_from_markdown(index_file)
if index_file.exists()
else title_case(dir_path.name)
)
def get_clean_url(path: str) -> str:
"""
Get a clean URL for a path.
- For markdown files: remove the .md extension
- For directories: ensure they end with a trailing slash
"""
if path.endswith(".md"):
return "/" + path.rsplit(".", 1)[0]
else:
# Check if it's a directory path by checking the filesystem
full_path = MARKDOWN_DIR / path
if full_path.is_dir() and not path.endswith('/'):
return "/" + path + "/"
return "/" + path
def get_file_creation_date(file_path: pathlib.Path) -> datetime:
return datetime.fromtimestamp(file_path.stat().st_ctime)
def is_image(file_path: pathlib.Path) -> bool:
return file_path.suffix.lower() in (".jpg", ".jpeg", ".png", ".gif")
def get_exif_data(image_path: pathlib.Path) -> dict:
try:
with Image.open(image_path) as img:
exif_data = img._getexif()
if exif_data:
exif = {TAGS.get(key, key): value for key, value in exif_data.items()}
return {
"Camera": exif.get("Model", "Unknown"),
"Lens": exif.get("LensModel", "Unknown"),
"Aperture": exif.get("FNumber", "Unknown"),
"Shutter Speed": exif.get("ExposureTime", "Unknown"),
"ISO": exif.get("ISOSpeedRatings", "Unknown"),
"Focal Length": exif.get("FocalLength", "Unknown"),
}
except Exception as e:
logger.error(f"Error reading EXIF data: {e}")
return {}
def process_directory(full_path: pathlib.Path) -> Tuple[List[FileInfo], Optional[str], Optional[Metadata]]:
logger.info(f"Processing directory: {full_path}")
all_items = []
# For sorting, we'll track directories and files separately
directories = []
files = []
for item in full_path.iterdir():
if item.name in [".DS_Store", ".git", "node_modules"] or item.name.startswith("."):
continue
if item.name == "index.md":
continue
is_image_file = is_image(item)
exif_data = get_exif_data(item) if is_image_file else None
# Get metadata for markdown files
metadata = get_markdown_metadata(item) if item.suffix == ".md" else None
# Get summary for markdown files
summary = None
if item.suffix == ".md":
try:
with item.open("r", encoding="utf-8") as f:
content = f.read()
summary = generate_summary(content)
except Exception as e:
logger.error(f"Error generating summary for {item}: {e}")
# Use metadata title if available, otherwise use H1 or directory title
title = None
if metadata and metadata.title:
title = metadata.title
else:
title = (
get_directory_title(item)
if item.is_dir()
else (get_h1_from_markdown(item) if item.suffix == ".md" else None)
)
# Ensure directory URLs end with a trailing slash
item_url = get_clean_url(str(item.relative_to(MARKDOWN_DIR)))
if item.is_dir() and not item_url.endswith('/'):
item_url += '/'
item_data = FileInfo(
name=title_case(item.name),
url=item_url, # Use the corrected URL with trailing slash for directories
slug=item.name,
ctime=os.path.getctime(item),
mtime=os.path.getmtime(item),
is_dir=item.is_dir(),
title=title,
is_image=is_image_file,
exif_data=exif_data,
metadata=metadata,
summary=summary,
)
if item.is_dir():
directories.append(item_data)
else:
# Skip draft content unless we're in debug mode
if metadata and metadata.draft and not os.environ.get("DEBUG"):
continue
files.append(item_data)
# Sort directories and files separately
directories.sort(key=lambda x: x.name.lower())
# Sort files by modification time (newest first) by default,
# but can be sorted by other fields based on query parameters
files.sort(key=lambda x: x.mtime, reverse=True)
# Combine directories and files
all_items = directories + files
# Process index file
index_file = full_path / "index.md"
index_content = None
index_metadata = None
if index_file.exists():
index_content, index_metadata = process_markdown_file_with_metadata(index_file)
logger.info(f"Processed {len(all_items)} items in directory: {full_path}")
return all_items, index_content, index_metadata
def process_markdown_file(file_path: pathlib.Path) -> str:
"""Process markdown file and return the rendered HTML content."""
logger.info(f"Processing markdown file: {file_path}")
try:
with file_path.open("r", encoding="utf-8") as f:
markdown_content = f.read()
# Extract frontmatter and content
_, content_without_frontmatter = extract_frontmatter(markdown_content)
# Convert markdown to HTML
html_content = markdown(content_without_frontmatter)
return html_content
except Exception as e:
logger.error(f"Error processing markdown file {file_path}: {e}")
return f"<p>Error processing markdown file: {str(e)}</p>"
def process_markdown_file_with_metadata(file_path: pathlib.Path) -> Tuple[str, Optional[Metadata]]:
"""Process markdown file and return both the rendered content and metadata."""
logger.info(f"Processing markdown file with metadata: {file_path}")
try:
with file_path.open("r", encoding="utf-8") as f:
markdown_content = f.read()
# Extract frontmatter and content
frontmatter, content_without_frontmatter = extract_frontmatter(markdown_content)
# Parse metadata
metadata = parse_metadata(frontmatter)
# Convert markdown to HTML
html_content = markdown(content_without_frontmatter)
return html_content, metadata
except Exception as e:
logger.error(f"Error processing markdown file with metadata {file_path}: {e}")
return f"<p>Error processing markdown file: {str(e)}</p>", None
def generate_breadcrumbs(path: str) -> List[Breadcrumb]:
breadcrumbs = []
current_path = MARKDOWN_DIR
for part in pathlib.Path(path).parts:
current_path = current_path / part
crumb = Breadcrumb(
name=title_case(part),
url=get_clean_url(str(current_path.relative_to(MARKDOWN_DIR))),
title=(
get_directory_title(current_path)
if current_path.is_dir()
else (
get_h1_from_markdown(current_path)
if current_path.suffix == ".md"
else None
)
),
)
breadcrumbs.append(crumb)
return breadcrumbs
def generate_title_from_breadcrumbs(breadcrumbs: List[Breadcrumb]) -> str:
return " > ".join(crumb.title or crumb.name for crumb in breadcrumbs)
def generate_directory_tree(directory_path: pathlib.Path, max_depth: int = 3, current_depth: int = 0) -> List[Dict[str, Any]]:
"""
Generate a recursive directory tree structure.
Args:
directory_path: The path to the directory
max_depth: Maximum depth to traverse (to avoid infinite recursion)
current_depth: Current recursion depth
Returns:
A list of dictionaries representing the directory tree structure
"""
if current_depth > max_depth:
return []
result = []
try:
for item in sorted(directory_path.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower())):
# Skip hidden files and directories
if item.name.startswith(".") or item.name in ["__pycache__", "node_modules"]:
continue
# Get item metadata
is_dir = item.is_dir()
metadata = get_markdown_metadata(item) if item.suffix == ".md" else None
# Skip draft content unless in debug mode
if metadata and metadata.draft and not os.environ.get("DEBUG"):
continue
# Get title (prioritize metadata, then H1, then formatted name)
title = None
if metadata and metadata.title:
title = metadata.title
elif item.suffix == ".md":
title = get_h1_from_markdown(item)
if not title:
title = title_case(item.name)
# Create node
node = {
"name": item.name,
"title": title,
"path": str(item.relative_to(MARKDOWN_DIR)),
"url": get_clean_url(str(item.relative_to(MARKDOWN_DIR))),
"is_dir": is_dir,
"has_index": (item / "index.md").exists() if is_dir else False
}
# Add children recursively if it's a directory
if is_dir:
children = generate_directory_tree(item, max_depth, current_depth + 1)
if children:
node["children"] = children
result.append(node)
except Exception as e:
logger.error(f"Error generating directory tree for {directory_path}: {e}")
return result
class XMLResponse(Response):
media_type = "application/xml"
def __init__(self, content: str, *args, **kwargs):
super().__init__(content=content, *args, **kwargs)
def clean_url(url: str) -> str:
return re.sub(r"(?<!:)//+", "/", url)
def generate_sitemap(base_url: str) -> str:
urlset = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
def add_url(
loc: str,
lastmod: datetime = None,
changefreq: str = None,
priority: float = None,
):
url = ET.SubElement(urlset, "url")
ET.SubElement(url, "loc").text = clean_url(
f"{base_url.rstrip('/')}/{loc.lstrip('/')}"
)
if lastmod:
ET.SubElement(url, "lastmod").text = lastmod.strftime("%Y-%m-%d")
if changefreq:
ET.SubElement(url, "changefreq").text = changefreq
if priority:
ET.SubElement(url, "priority").text = str(priority)
def traverse_directory(path: pathlib.Path, relative_path: str = ""):
for item in path.iterdir():
if item.name.startswith(".") or item.name == "index.md":
continue
item_relative_path = f"{relative_path}/{item.name}"
clean_item_path = clean_url(get_clean_url(item_relative_path))
if item.is_dir():
add_url(
clean_item_path,
lastmod=datetime.fromtimestamp(item.stat().st_mtime),
changefreq="weekly",
priority=0.8,
)
traverse_directory(item, item_relative_path)
elif item.suffix == ".md":
add_url(
clean_item_path,
lastmod=datetime.fromtimestamp(item.stat().st_mtime),
changefreq="monthly",
priority=0.6,
)
add_url("/", lastmod=datetime.now(), changefreq="daily", priority=1.0)
traverse_directory(MARKDOWN_DIR)
return ET.tostring(urlset, encoding="unicode", method="xml")
@app.get("/sitemap.xml", response_class=XMLResponse)
async def sitemap(request: Request):
base_url = str(request.base_url)
sitemap_content = generate_sitemap(base_url)
return XMLResponse(content=sitemap_content, media_type="application/xml")
# API endpoints
@app.get("/api/content/{path:path}", response_model=Dict[str, Any], tags=["API"])
async def get_content_api(
request: Request,
path: str = "",
format: str = Query("json", description="Response format (json or html)"),
tree: bool = Query(False, description="Return directory tree structure")
):
"""
Get the content and metadata for a specific path.
- For directories: returns list of items and index content if available
- For files: returns the file content and metadata
- With tree=true: returns a recursive directory tree structure
"""
logger.info(f"API request for path: {path}, format={format}, tree={tree}")
try:
full_path = MARKDOWN_DIR / path
# Handle file extensions
if not full_path.exists():
full_path_with_md = full_path.with_suffix(".md")
if full_path_with_md.exists():
full_path = full_path_with_md
# Process directory
if full_path.is_dir():
all_items, content, metadata = process_directory(full_path)
result = {
"type": "directory",
"items": [item.dict() for item in all_items],
"index_content": content,
"metadata": metadata.dict() if metadata else None,
"path": path,
}
# Generate tree structure if requested
if tree:
result["tree"] = generate_directory_tree(full_path)
# Process file
elif full_path.suffix == ".md":
content, metadata = process_markdown_file_with_metadata(full_path)
result = {
"type": "file",
"content": content,
"metadata": metadata.dict() if metadata else None,
"path": path,
"last_modified": datetime.fromtimestamp(full_path.stat().st_mtime).isoformat(),
}
# Other file types
else:
return JSONResponse(
status_code=400,
content={"error": f"Unsupported file type: {full_path.suffix}"}
)
if format.lower() == "html":
# Return HTML response
return HTMLResponse(content=content if content else "")
else:
# Return JSON response
return result
except Exception as e:
logger.error(f"API error: {str(e)}")
return JSONResponse(
status_code=500,
content={"error": str(e)}
)
@app.get("/api/search", response_model=Dict[str, Any], tags=["API"])
async def search_content(
q: str = Query(..., description="Search query"),
path: str = Query("", description="Path to limit search to"),
tags: Optional[List[str]] = Query(None, description="Filter by tags")
):
"""Search content in the repository."""
logger.info(f"Search request: q={q}, path={path}, tags={tags}")
results = []
search_path = MARKDOWN_DIR / path if path else MARKDOWN_DIR
# Simple search implementation - could be improved with a real search engine
for item in search_path.glob("**/*.md"):
try:
relative_path = str(item.relative_to(MARKDOWN_DIR))
# Skip hidden files and directories
if any(part.startswith(".") for part in item.parts):
continue
with item.open("r", encoding="utf-8") as f:
content = f.read()
# Get metadata
frontmatter, content_text = extract_frontmatter(content)
metadata = parse_metadata(frontmatter)
# Skip drafts
if metadata and metadata.draft and not os.environ.get("DEBUG"):
continue
# Filter by tags if specified
if tags and metadata and metadata.tags:
if not any(tag in metadata.tags for tag in tags):
continue
# Check for matches in content or title
if (q.lower() in content_text.lower() or
(metadata and metadata.title and q.lower() in metadata.title.lower())):
title = metadata.title if metadata and metadata.title else get_h1_from_markdown(item)
summary = generate_summary(content)
results.append({
"title": title or item.stem,
"path": get_clean_url(relative_path),
"summary": summary,
"tags": metadata.tags if metadata and metadata.tags else [],
"date": metadata.date if metadata and metadata.date else None,
})
except Exception as e:
logger.error(f"Error processing file {item} during search: {e}")
# Sort results by relevance (could be improved)
results.sort(key=lambda x: 0 if x.get("title", "").lower().startswith(q.lower()) else 1)
return {
"query": q,
"path": path,
"tags": tags,
"count": len(results),
"results": results
}
@app.get("/api/tags", response_model=Dict[str, Any], tags=["API"])
async def get_all_tags():
"""Get all tags used in the content with counts."""
tags_count = {}
# Collect all tags
for item in MARKDOWN_DIR.glob("**/*.md"):
try:
# Skip hidden files
if any(part.startswith(".") for part in item.parts):
continue
metadata = get_markdown_metadata(item)
if metadata and metadata.tags:
for tag in metadata.tags:
if tag in tags_count:
tags_count[tag] += 1
else:
tags_count[tag] = 1
except Exception as e:
logger.error(f"Error processing tags for file {item}: {e}")
# Sort tags by count
sorted_tags = [{"name": tag, "count": count} for tag, count in tags_count.items()]
sorted_tags.sort(key=lambda x: x["count"], reverse=True)
return {
"count": len(sorted_tags),
"tags": sorted_tags
}
@app.get("/api/tree", response_model=Dict[str, Any], tags=["API"])
async def get_directory_tree_api(
path: str = Query("", description="Base path for the tree"),
max_depth: int = Query(3, description="Maximum depth to traverse"),
):
"""
Get a recursive directory tree structure starting from the specified path.
- Returns a hierarchical tree of directories and files
- Includes metadata like titles and URLs
- Helps construct file browsers and navigation menus
"""
logger.info(f"Tree request for path: {path}, max_depth: {max_depth}")
try:
# Get the full path
full_path = MARKDOWN_DIR / path
if not full_path.exists() or not full_path.is_dir():
return JSONResponse(
status_code=404,
content={"error": f"Directory not found: {path}"}
)
# Generate the tree
tree = generate_directory_tree(full_path, max_depth=max_depth)
return {
"path": path,
"tree": tree
}
except Exception as e:
logger.error(f"Error generating tree: {str(e)}")
return JSONResponse(
status_code=500,
content={"error": str(e)}
)
@app.get("/mindmap", response_class=HTMLResponse, include_in_schema=False)
async def mindmap(request: Request):
"""Renders the mind map visualization of the website."""
logger.info("Rendering mindmap view")
return templates.TemplateResponse(
"mindmap.html",
{"request": request, "title": "Mind Map - Kenneth Reitz", "content": None, "files": None}
)
@app.get("/", response_class=HTMLResponse, include_in_schema=False)
@app.get("/{path:path}", response_class=HTMLResponse, include_in_schema=False)
async def browse(
request: Request,
path: str = "",
sort: str = Query(None, description="Sort order for files"),
tag: str = Query(None, description="Filter by tag")
):
logger.info(f"Browsing path: {path}")
full_path = MARKDOWN_DIR / path
# Check if it's a directory and the URL doesn't end with '/'
if full_path.is_dir() and path and not path.endswith('/'):
# Preserve query parameters in the redirect
query_params = []
if sort:
query_params.append(f"sort={sort}")
if tag:
query_params.append(f"tag={tag}")
query_string = f"?{'&'.join(query_params)}" if query_params else ""
redirect_url = f"/{path}/{query_string}"
logger.info(f"Redirecting directory without trailing slash: {path} to {redirect_url}")
return RedirectResponse(url=redirect_url, status_code=301)
if not full_path.exists():
full_path_with_md = full_path.with_suffix(".md")
if full_path_with_md.exists():
full_path = full_path_with_md
elif is_image(full_path.with_suffix(".jpg")) or is_image(
full_path.with_suffix(".jpeg")
):
image_path = (
full_path.with_suffix(".jpg")
if is_image(full_path.with_suffix(".jpg"))
else full_path.with_suffix(".jpeg")
)
return FileResponse(image_path, media_type="image/jpeg")
else:
logger.warning(f"Path not found: {full_path}")
similar_path = find_similar_path(path)
if similar_path:
return RedirectResponse(url=f"/{similar_path}", status_code=301)
else:
raise HTTPException(status_code=404, detail="Item not found")
metadata = None
template_name = "index.html"
if full_path.is_dir():
all_items, content, metadata = process_directory(full_path)
date_created = None
# Apply tag filter if specified
if tag:
all_items = [
item for item in all_items
if not item.is_dir and item.metadata and item.metadata.tags and tag in item.metadata.tags
]
# Apply sort if specified
if sort and all_items:
if sort == "name":
all_items.sort(key=lambda x: x.name.lower())
elif sort == "name-desc":
all_items.sort(key=lambda x: x.name.lower(), reverse=True)
elif sort == "date":
all_items.sort(key=lambda x: x.mtime)
elif sort == "date-desc":
all_items.sort(key=lambda x: x.mtime, reverse=True)
else:
try:
if is_image(full_path):
return FileResponse(full_path, media_type="image/jpeg")
content, metadata = process_markdown_file_with_metadata(full_path)
all_items = None
date_created = get_file_creation_date(full_path)
# Use post template for markdown files
template_name = "post.html"
except UnicodeDecodeError:
logger.error(f"UnicodeDecodeError when processing file: {full_path}")
return FileResponse(full_path, filename=full_path.name)
breadcrumbs = generate_breadcrumbs(path)
# Use metadata title if available, otherwise use breadcrumbs
if metadata and metadata.title:
page_title = metadata.title
else:
page_title = generate_title_from_breadcrumbs(breadcrumbs)
has_images = any(item.is_image for item in all_items or [])
# If in image gallery, consider randomizing
if has_images and template_name == "index.html":
# Separate images and non-images
image_items = [item for item in all_items if item.is_image]
non_image_items = [item for item in all_items if not item.is_image]
# Randomize images
random.shuffle(image_items)
# Recombine
all_items = non_image_items + image_items
# Special handling for index page
is_root = path == ""
# Determine if we should use photo_browser for image-heavy directories
if has_images and len([item for item in all_items if item.is_image]) > 5:
template_name = "photo_browser.html"
# Check for layout override in metadata
if metadata and metadata.layout:
template_name = f"{metadata.layout}.html"
# If we're at root, load index.md content and use post.html template
if is_root:
index_file = MARKDOWN_DIR / "index.md"
if index_file.exists():
content, index_metadata = process_markdown_file_with_metadata(index_file)
metadata = index_metadata
template_name = "post.html"
else:
template_name = "directory.html"
# Determine if we're in photos path
is_photos = path.startswith("photos") or path.find("/photos") >= 0
return templates.TemplateResponse(
template_name,
{
"title": page_title,
"request": request,
"breadcrumbs": breadcrumbs,
"files": all_items,
"content": content,
"date_created": date_created,
"has_images": has_images,
"is_root": is_root,
"is_photos": is_photos,
"metadata": metadata,
"path": path,
"tag_filter": tag,
"sort": sort,
},
)
@app.exception_handler(500)
async def custom_404_handler(request: Request, exc: HTTPException):
return RedirectResponse(url="/")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)