Files
kennethreitz.org/engine.py
T
kennethreitz e7e0bf9a64 Implement clean URLs and sitemap functionality
Add clean URL routing that removes .md extensions from markdown files,
add comprehensive sitemap generation with both HTML and XML formats,
update navigation with new links, and style improvements including
better project card links and enhanced mobile navigation.
2025-05-23 11:51:02 -04:00

517 lines
19 KiB
Python

import os
import markdown
from flask import Flask, render_template, abort, request, url_for, jsonify
from pathlib import Path
import re
from datetime import datetime
from urllib.parse import quote
import json
from functools import lru_cache
app = Flask(__name__, template_folder='templates')
DATA_DIR = Path('data')
def get_directory_structure(path):
"""Get the directory structure for a given path."""
items = []
if not path.exists() or not path.is_dir():
return items
# Separate directories and files for better organization
dirs = []
files = []
for item in sorted(path.iterdir()):
if item.name.startswith('.') or item.name.lower() == 'index.md':
continue
# Create display name without extension for files
display_name = item.stem if item.is_file() and item.suffix else item.name
display_name = display_name.replace('-', ' ').replace('_', ' ').title()
# Create clean URL path without .md extension
if item.is_dir():
url_path = '/' + str(item.relative_to(DATA_DIR)) + '/'
elif item.suffix == '.md':
# Remove .md extension for clean URLs
relative_path = str(item.relative_to(DATA_DIR))
url_path = '/' + relative_path[:-3] # Remove .md extension
else:
url_path = '/' + str(item.relative_to(DATA_DIR))
item_info = {
'name': item.name,
'display_name': display_name,
'path': str(item.relative_to(DATA_DIR)),
'url_path': url_path,
'is_dir': item.is_dir(),
'is_markdown': item.suffix == '.md',
'is_image': item.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp'],
'size': item.stat().st_size if item.is_file() else None,
'modified': datetime.fromtimestamp(item.stat().st_mtime),
'file_type': item.suffix.lower() if item.is_file() else 'directory'
}
if item.is_dir():
dirs.append(item_info)
else:
files.append(item_info)
# Return directories first, then files
return dirs + files
def build_mindmap_data():
"""Build a hierarchical data structure for the mindmap."""
def process_directory(path, relative_path=""):
node = {
'name': path.name if path.name else 'Kenneth Reitz',
'type': 'directory',
'path': relative_path,
'children': []
}
if not path.exists() or not path.is_dir():
return node
for item in sorted(path.iterdir()):
if item.name.startswith('.'):
continue
item_relative_path = str(item.relative_to(DATA_DIR)) if item != DATA_DIR else ""
if item.is_dir():
child_node = process_directory(item, item_relative_path)
node['children'].append(child_node)
elif item.suffix == '.md':
# Create display name without extension
display_name = item.stem.replace('-', ' ').replace('_', ' ').title()
# Get content for full-text search
content = ""
try:
with open(item, 'r', encoding='utf-8') as f:
content = f.read()
except Exception:
pass
child_node = {
'name': display_name,
'type': 'markdown',
'path': item_relative_path,
'size': item.stat().st_size,
'modified': item.stat().st_mtime,
'content': content
}
node['children'].append(child_node)
return node
return process_directory(DATA_DIR)
@lru_cache(maxsize=128)
def render_markdown_file(file_path):
"""Render a markdown file to HTML with caching for performance."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Extract first h1 header if it exists
first_h1 = None
import re
# Look for the first H1 at the start of the file or after metadata
h1_match = re.search(r'(?:^|\n\n)# (.+?)(?:\n|$)', content)
if h1_match:
first_h1 = h1_match.group(1).strip()
# Remove the first h1 from content to avoid duplication
content = content.replace(h1_match.group(0), '', 1)
# Configure markdown with extensions
md = markdown.Markdown(extensions=[
'meta',
'toc',
'codehilite',
'fenced_code',
'tables',
'footnotes',
'smarty',
'nl2br',
'sane_lists'
])
# Process content to add classes to headers for styling
html_content = md.convert(content.strip())
# Add classes to headers to prevent conflicts with page headers
html_content = html_content.replace('<h1>', '<h1 class="content-header">')
html_content = html_content.replace('<h2>', '<h2 class="content-header">')
html_content = html_content.replace('<h3>', '<h3 class="content-header">')
html_content = html_content.replace('<h4>', '<h4 class="content-header">')
html_content = html_content.replace('<h5>', '<h5 class="content-header">')
html_content = html_content.replace('<h6>', '<h6 class="content-header">')
# Get metadata
metadata = getattr(md, 'Meta', {})
# Use the first h1 as title if available, otherwise fallback to metadata or filename
if first_h1:
title = first_h1
else:
title = metadata.get('title', [file_path.stem.replace('-', ' ').replace('_', ' ').title()])[0]
return {
'content': html_content,
'title': title,
'metadata': metadata
}
except Exception as e:
return {
'content': f'<p>Error reading file: {str(e)}</p>',
'title': 'Error',
'metadata': {}
}
@app.route('/')
def index():
"""Homepage showing the mindmap visualization."""
return render_template('mindmap.html',
title='Kenneth Reitz - Digital Mind Map',
breadcrumbs=[],
current_year=datetime.now().year,
current_page='Mind Map')
@app.route('/directory')
def directory_index():
"""Directory listing that was previously the homepage."""
items = get_directory_structure(DATA_DIR)
# Check for index.md in the root data directory
index_file = DATA_DIR / 'index.md'
index_content = None
content_position = 'top' # Default position
if index_file.exists():
index_content = render_markdown_file(index_file)
# Determine content position based on length
# Count words in the HTML content (after stripping HTML tags)
content_text = re.sub(r'<[^>]+>', '', index_content['content'])
word_count = len(content_text.split())
# If content is longer than 150 words, put it at the bottom
if word_count > 150:
content_position = 'bottom'
return render_template('directory.html',
items=items,
current_path='',
title='Kenneth Reitz',
breadcrumbs=[],
index_content=index_content,
content_position=content_position,
current_year=datetime.now().year)
@app.route('/<path:path>')
def serve_path(path):
"""Serve files and directories from the data folder."""
full_path = DATA_DIR / path
# If the path doesn't exist, try adding .md extension for markdown files
if not full_path.exists():
md_path = DATA_DIR / (path + '.md')
if md_path.exists() and md_path.suffix == '.md':
full_path = md_path
else:
abort(404)
# Generate breadcrumbs
# For clean URLs, we need to handle the case where path might not include .md
original_path = path
if full_path.suffix == '.md' and not path.endswith('.md'):
# This is a clean URL for a markdown file
path_parts = path.split('/')
else:
path_parts = path.split('/')
breadcrumbs = []
current = ''
for part in path_parts[:-1]: # Exclude the current page
current = f"{current}/{part}" if current else part
breadcrumbs.append({
'name': part.replace('-', ' ').replace('_', ' ').title(),
'url': f"/{current}"
})
if full_path.is_dir():
# Directory listing
items = get_directory_structure(full_path)
# Check for index.md in the directory
index_file = full_path / 'index.md'
index_content = None
content_position = 'top' # Default position
if index_file.exists():
index_content = render_markdown_file(index_file)
# Determine content position based on length
# Count words in the HTML content (after stripping HTML tags)
content_text = re.sub(r'<[^>]+>', '', index_content['content'])
word_count = len(content_text.split())
# If content is longer than 150 words, put it at the bottom
if word_count > 150:
content_position = 'bottom'
title = path_parts[-1].replace('-', ' ').replace('_', ' ').title()
return render_template('directory.html',
items=items,
current_path=original_path,
title=title,
breadcrumbs=breadcrumbs,
index_content=index_content,
content_position=content_position,
current_year=datetime.now().year,
current_page=title)
elif full_path.suffix == '.md':
# Markdown file
content_data = render_markdown_file(full_path)
return render_template('post.html',
content=content_data['content'],
title=content_data['title'],
metadata=content_data['metadata'],
breadcrumbs=breadcrumbs,
current_path=path,
current_year=datetime.now().year,
current_page=content_data['title'])
elif full_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
# Image file - check if it's in a gallery directory
parent_dir = full_path.parent
gallery_images = []
if parent_dir.exists():
for img in sorted(parent_dir.iterdir()):
if img.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
gallery_images.append({
'name': img.name,
'path': f"/static/data/{img.relative_to(DATA_DIR)}",
'url': f"/{img.relative_to(DATA_DIR)}",
'is_current': img == full_path
})
return render_template('photo.html',
image_path=f"/static/data/{path}",
title=full_path.stem.replace('-', ' ').replace('_', ' ').title(),
breadcrumbs=breadcrumbs,
gallery_images=gallery_images,
current_path=path,
current_year=datetime.now().year,
current_page=full_path.stem.replace('-', ' ').replace('_', ' ').title())
else:
# Other files - serve directly
from flask import send_file
return send_file(full_path)
@app.route('/static/data/<path:path>')
def serve_data_file(path):
"""Serve static files from the data directory."""
full_path = DATA_DIR / path
if not full_path.exists() or not full_path.is_file():
abort(404)
from flask import send_file
return send_file(full_path)
@app.route('/api/mindmap')
def api_mindmap():
"""API endpoint to get mindmap data."""
mindmap_data = build_mindmap_data()
# Strip content from response to reduce size
def remove_content(node):
if 'content' in node:
del node['content']
if 'children' in node:
for child in node['children']:
remove_content(child)
# Create a copy to avoid modifying the original
response_data = json.loads(json.dumps(mindmap_data))
remove_content(response_data)
return jsonify(response_data)
@app.route('/api/search')
def api_search():
"""API endpoint for full-text search across the knowledge base."""
query = request.args.get('q', '').lower()
if not query:
return jsonify([])
results = []
def search_node(node, path=""):
# Check if the node itself matches
node_name = node.get('name', '').lower()
node_path = node.get('path', '').lower()
node_content = node.get('content', '').lower()
# Calculate path for display
display_path = path + '/' + node.get('name', '') if path else node.get('name', '')
if query in node_name or query in node_path or query in node_content:
# Create a result object with relevant info
result = {
'name': node.get('name', ''),
'type': node.get('type', ''),
'path': node.get('path', ''),
'display_path': display_path,
'relevance': 0 # Will be calculated below
}
# Calculate relevance score
relevance = 0
if query in node_name:
relevance += 10
if node_name.startswith(query):
relevance += 5
if query in node_path:
relevance += 3
if query in node_content:
relevance += 1
# Extra points for each occurrence in content
relevance += node_content.count(query) * 0.1
result['relevance'] = relevance
results.append(result)
# Recursively search children
if 'children' in node:
for child in node['children']:
search_node(child, display_path)
# Start the search from the root
mindmap_data = build_mindmap_data()
search_node(mindmap_data)
# Sort results by relevance
results.sort(key=lambda x: x['relevance'], reverse=True)
return jsonify(results)
@app.route('/mindmap')
def mindmap():
"""Show the mindmap visualization."""
return render_template('mindmap.html',
title='Mind Map',
breadcrumbs=[],
current_year=datetime.now().year,
current_page='Mind Map')
def generate_sitemap_data():
"""Generate sitemap data by recursively scanning the data directory."""
sitemap_items = []
def scan_directory(path, url_path=""):
if not path.exists() or not path.is_dir():
return
for item in sorted(path.iterdir()):
if item.name.startswith('.'):
continue
item_url_path = f"{url_path}/{item.name}" if url_path else item.name
if item.is_dir():
# Add directory to sitemap
sitemap_items.append({
'url': f"/{item_url_path}",
'title': item.name.replace('-', ' ').replace('_', ' ').title(),
'type': 'directory',
'modified': datetime.fromtimestamp(item.stat().st_mtime)
})
# Recursively scan subdirectories
scan_directory(item, item_url_path)
elif item.suffix == '.md':
# Remove .md extension for clean URLs
clean_url_path = item_url_path[:-3] if item_url_path.endswith('.md') else item_url_path
# Get title from file content
title = item.stem.replace('-', ' ').replace('_', ' ').title()
try:
content_data = render_markdown_file(item)
title = content_data['title']
except:
pass
sitemap_items.append({
'url': f"/{clean_url_path}",
'title': title,
'type': 'article',
'modified': datetime.fromtimestamp(item.stat().st_mtime)
})
# Start scanning from data directory
scan_directory(DATA_DIR)
# Add static pages
static_pages = [
{'url': '/', 'title': 'Kenneth Reitz - Digital Mind Map', 'type': 'homepage'},
{'url': '/directory', 'title': 'File Explorer', 'type': 'directory'},
{'url': '/sitemap', 'title': 'Site Map', 'type': 'sitemap'}
]
return static_pages + sitemap_items
@app.route('/sitemap')
def sitemap():
"""Show the site sitemap."""
sitemap_data = generate_sitemap_data()
# Group by type
grouped_sitemap = {
'homepage': [],
'directory': [],
'article': [],
'sitemap': []
}
for item in sitemap_data:
item_type = item.get('type', 'article')
if item_type in grouped_sitemap:
grouped_sitemap[item_type].append(item)
return render_template('sitemap.html',
title='Site Map',
sitemap_data=grouped_sitemap,
total_items=len(sitemap_data),
breadcrumbs=[],
current_year=datetime.now().year,
current_page='Site Map')
@app.route('/sitemap.xml')
def sitemap_xml():
"""Generate XML sitemap for search engines."""
sitemap_data = generate_sitemap_data()
xml_content = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml_content += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
for item in sitemap_data:
xml_content += ' <url>\n'
xml_content += f' <loc>https://kennethreitz.org{item["url"]}</loc>\n'
if 'modified' in item:
xml_content += f' <lastmod>{item["modified"].strftime("%Y-%m-%d")}</lastmod>\n'
xml_content += ' </url>\n'
xml_content += '</urlset>'
from flask import Response
return Response(xml_content, mimetype='application/xml')
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=8000)