Files
kennethreitz.org/engine.py
T
kennethreitz 5c13eec028 Add created timestamp to directory listing
Display both created and modified dates for files and directories in
the template, along with improved mobile responsiveness throughout
the interface.
2025-05-24 14:30:59 -04:00

460 lines
17 KiB
Python

import os
import markdown
from flask import Flask, render_template, abort, request, url_for, jsonify, redirect
from pathlib import Path
import re
from datetime import datetime
from urllib.parse import quote
import json
from functools import lru_cache
app = Flask(__name__, template_folder='templates')
DATA_DIR = Path('data')
def get_directory_structure(path):
"""Get the directory structure for a given path."""
items = []
if not path.exists() or not path.is_dir():
return items
# Separate directories and files for better organization
dirs = []
files = []
for item in sorted(path.iterdir()):
if item.name.startswith('.') or item.name.lower() == 'index.md':
continue
# Create display name without extension for files
display_name = item.stem if item.is_file() and item.suffix else item.name
display_name = display_name.replace('-', ' ').replace('_', ' ').title()
# Create clean URL path without .md extension
if item.is_dir():
url_path = '/' + str(item.relative_to(DATA_DIR)) + '/'
elif item.suffix == '.md':
# Remove .md extension for clean URLs
relative_path = str(item.relative_to(DATA_DIR))
url_path = '/' + relative_path[:-3] # Remove .md extension
else:
url_path = '/' + str(item.relative_to(DATA_DIR))
item_info = {
'name': item.name,
'display_name': display_name,
'path': str(item.relative_to(DATA_DIR)),
'url_path': url_path,
'is_dir': item.is_dir(),
'is_markdown': item.suffix == '.md',
'is_image': item.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp'],
'size': item.stat().st_size if item.is_file() else None,
'created': datetime.fromtimestamp(item.stat().st_ctime),
'modified': datetime.fromtimestamp(item.stat().st_mtime),
'file_type': item.suffix.lower() if item.is_file() else 'directory',
'static_path': f"/static/data/{item.relative_to(DATA_DIR)}" if not item.is_dir() else None
}
if item.is_dir():
dirs.append(item_info)
else:
files.append(item_info)
# Return directories first, then files
return dirs + files
@lru_cache(maxsize=128)
def render_markdown_file(file_path):
"""Render a markdown file to HTML with caching for performance."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# Extract first h1 header if it exists
first_h1 = None
import re
# Look for the first H1 at the start of the file or after metadata
h1_match = re.search(r'(?:^|\n\n)# (.+?)(?:\n|$)', content)
if h1_match:
first_h1 = h1_match.group(1).strip()
# Remove the first h1 from content to avoid duplication
content = content.replace(h1_match.group(0), '', 1)
# Configure markdown with extensions
md = markdown.Markdown(extensions=[
'meta',
'toc',
'codehilite',
'fenced_code',
'tables',
'footnotes',
'smarty',
'nl2br',
'sane_lists'
])
# Process content to add classes to headers for styling
html_content = md.convert(content.strip())
# Add classes to headers to prevent conflicts with page headers
html_content = html_content.replace('<h1>', '<h1 class="content-header">')
html_content = html_content.replace('<h2>', '<h2 class="content-header">')
html_content = html_content.replace('<h3>', '<h3 class="content-header">')
html_content = html_content.replace('<h4>', '<h4 class="content-header">')
html_content = html_content.replace('<h5>', '<h5 class="content-header">')
html_content = html_content.replace('<h6>', '<h6 class="content-header">')
# Get metadata
metadata = getattr(md, 'Meta', {})
# Use the first h1 as title if available, otherwise fallback to metadata or filename
if first_h1:
title = first_h1
else:
title = metadata.get('title', [file_path.stem.replace('-', ' ').replace('_', ' ').title()])[0]
return {
'content': html_content,
'title': title,
'metadata': metadata
}
except Exception as e:
return {
'content': f'<p>Error reading file: {str(e)}</p>',
'title': 'Error',
'metadata': {}
}
@app.route('/')
def index():
"""Homepage showcasing download statistics."""
return render_template('homepage.html',
current_year=datetime.now().year,
title="Home")
@app.route('/directory')
def directory_index():
"""Directory listing that was previously the homepage."""
items = get_directory_structure(DATA_DIR)
# Check for index.md in the root data directory
index_file = DATA_DIR / 'index.md'
index_content = None
content_position = 'top' # Default position
if index_file.exists():
index_content = render_markdown_file(index_file)
# Determine content position based on length
# Count words in the HTML content (after stripping HTML tags)
content_text = re.sub(r'<[^>]+>', '', index_content['content'])
word_count = len(content_text.split())
# If content is longer than 150 words, put it at the bottom
if word_count > 150:
content_position = 'bottom'
# Check if root directory is an image gallery
image_items = [item for item in items if item['is_image']]
total_files = [item for item in items if not item['is_dir']]
is_image_gallery = len(image_items) >= 3 and len(total_files) > 0 and (len(image_items) / len(total_files)) >= 0.5
return render_template('directory.html',
items=items,
current_path='',
title='Kenneth Reitz',
breadcrumbs=[],
index_content=index_content,
content_position=content_position,
is_image_gallery=is_image_gallery,
image_items=image_items,
current_year=datetime.now().year)
@app.route('/<path:path>')
def serve_path(path):
"""Serve files and directories from the data folder."""
full_path = DATA_DIR / path
# If the path doesn't exist, try adding .md extension for markdown files
if not full_path.exists():
md_path = DATA_DIR / (path + '.md')
if md_path.exists() and md_path.suffix == '.md':
full_path = md_path
else:
abort(404)
# Generate breadcrumbs
# For clean URLs, we need to handle the case where path might not include .md
original_path = path
if full_path.suffix == '.md' and not path.endswith('.md'):
# This is a clean URL for a markdown file
path_parts = path.split('/')
else:
path_parts = path.split('/')
breadcrumbs = []
current = ''
for part in path_parts[:-1]: # Exclude the current page
current = f"{current}/{part}" if current else part
breadcrumbs.append({
'name': part.replace('-', ' ').replace('_', ' ').title(),
'url': f"/{current}"
})
if full_path.is_dir():
# Directory listing
items = get_directory_structure(full_path)
# Check if this is an image gallery (50% or more images)
image_items = [item for item in items if item['is_image']]
total_files = [item for item in items if not item['is_dir']]
is_image_gallery = len(image_items) >= 3 and len(total_files) > 0 and (len(image_items) / len(total_files)) >= 0.5
# Check for index.md in the directory
index_file = full_path / 'index.md'
index_content = None
content_position = 'top' # Default position
if index_file.exists():
index_content = render_markdown_file(index_file)
# Determine content position based on length
# Count words in the HTML content (after stripping HTML tags)
content_text = re.sub(r'<[^>]+>', '', index_content['content'])
word_count = len(content_text.split())
# If content is longer than 150 words, put it at the bottom
if word_count > 150:
content_position = 'bottom'
title = path_parts[-1].replace('-', ' ').replace('_', ' ').title()
return render_template('directory.html',
items=items,
current_path=original_path,
title=title,
breadcrumbs=breadcrumbs,
index_content=index_content,
content_position=content_position,
is_image_gallery=is_image_gallery,
image_items=image_items,
current_year=datetime.now().year,
current_page=title)
elif full_path.suffix == '.md':
# Markdown file
content_data = render_markdown_file(full_path)
return render_template('post.html',
content=content_data['content'],
title=content_data['title'],
metadata=content_data['metadata'],
breadcrumbs=breadcrumbs,
current_path=path,
current_year=datetime.now().year,
current_page=content_data['title'])
elif full_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
# Image file - check if it's in a gallery directory
parent_dir = full_path.parent
gallery_images = []
if parent_dir.exists():
for img in sorted(parent_dir.iterdir()):
if img.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
gallery_images.append({
'name': img.name,
'path': f"/static/data/{img.relative_to(DATA_DIR)}",
'url': f"/{img.relative_to(DATA_DIR)}",
'is_current': img == full_path
})
return render_template('photo.html',
image_path=f"/static/data/{path}",
title=full_path.stem.replace('-', ' ').replace('_', ' ').title(),
breadcrumbs=breadcrumbs,
gallery_images=gallery_images,
current_path=path,
current_year=datetime.now().year,
current_page=full_path.stem.replace('-', ' ').replace('_', ' ').title())
else:
# Other files - serve directly
from flask import send_file
return send_file(full_path)
@app.route('/static/data/<path:path>')
def serve_data_file(path):
"""Serve static files from the data directory."""
full_path = DATA_DIR / path
if not full_path.exists() or not full_path.is_file():
abort(404)
from flask import send_file
return send_file(full_path)
@app.route('/api/search')
def api_search():
"""API endpoint for full-text search across the knowledge base."""
query = request.args.get('q', '').lower()
if not query:
return jsonify([])
results = []
def search_node(node, path=""):
# Check if the node itself matches
node_name = node.get('name', '').lower()
node_path = node.get('path', '').lower()
node_content = node.get('content', '').lower()
# Calculate path for display
display_path = path + '/' + node.get('name', '') if path else node.get('name', '')
if query in node_name or query in node_path or query in node_content:
# Create a result object with relevant info
result = {
'name': node.get('name', ''),
'type': node.get('type', ''),
'path': node.get('path', ''),
'display_path': display_path,
'relevance': 0 # Will be calculated below
}
# Calculate relevance score
relevance = 0
if query in node_name:
relevance += 10
if node_name.startswith(query):
relevance += 5
if query in node_path:
relevance += 3
if query in node_content:
relevance += 1
# Extra points for each occurrence in content
relevance += node_content.count(query) * 0.1
result['relevance'] = relevance
results.append(result)
# Recursively search children
if 'children' in node:
for child in node['children']:
search_node(child, display_path)
# Start the search from the root data directory
search_node({'name': 'root', 'type': 'directory', 'path': '', 'children': []})
# Sort results by relevance
results.sort(key=lambda x: x['relevance'], reverse=True)
return jsonify(results)
def generate_sitemap_data():
"""Generate sitemap data by recursively scanning the data directory."""
sitemap_items = []
def scan_directory(path, url_path=""):
if not path.exists() or not path.is_dir():
return
for item in sorted(path.iterdir()):
if item.name.startswith('.'):
continue
item_url_path = f"{url_path}/{item.name}" if url_path else item.name
if item.is_dir():
# Add directory to sitemap
sitemap_items.append({
'url': f"/{item_url_path}",
'title': item.name.replace('-', ' ').replace('_', ' ').title(),
'type': 'directory',
'modified': datetime.fromtimestamp(item.stat().st_mtime)
})
# Recursively scan subdirectories
scan_directory(item, item_url_path)
elif item.suffix == '.md':
# Remove .md extension for clean URLs
clean_url_path = item_url_path[:-3] if item_url_path.endswith('.md') else item_url_path
# Get title from file content
title = item.stem.replace('-', ' ').replace('_', ' ').title()
try:
content_data = render_markdown_file(item)
title = content_data['title']
except:
pass
sitemap_items.append({
'url': f"/{clean_url_path}",
'title': title,
'type': 'article',
'modified': datetime.fromtimestamp(item.stat().st_mtime)
})
# Start scanning from data directory
scan_directory(DATA_DIR)
# Add static pages
static_pages = [
{'url': '/', 'title': 'Kenneth Reitz - Digital Mind Map', 'type': 'homepage'},
{'url': '/directory', 'title': 'File Explorer', 'type': 'directory'},
{'url': '/sitemap', 'title': 'Site Map', 'type': 'sitemap'}
]
return static_pages + sitemap_items
@app.route('/sitemap')
def sitemap():
"""Show the site sitemap."""
sitemap_data = generate_sitemap_data()
# Group by type
grouped_sitemap = {
'homepage': [],
'directory': [],
'article': [],
'sitemap': []
}
for item in sitemap_data:
item_type = item.get('type', 'article')
if item_type in grouped_sitemap:
grouped_sitemap[item_type].append(item)
return render_template('sitemap.html',
title='Site Map',
sitemap_data=grouped_sitemap,
total_items=len(sitemap_data),
breadcrumbs=[],
current_year=datetime.now().year,
current_page='Site Map')
@app.route('/sitemap.xml')
def sitemap_xml():
"""Generate XML sitemap for search engines."""
sitemap_data = generate_sitemap_data()
xml_content = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml_content += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
for item in sitemap_data:
xml_content += ' <url>\n'
xml_content += f' <loc>https://kennethreitz.org{item["url"]}</loc>\n'
if 'modified' in item:
xml_content += f' <lastmod>{item["modified"].strftime("%Y-%m-%d")}</lastmod>\n'
xml_content += ' </url>\n'
xml_content += '</urlset>'
from flask import Response
return Response(xml_content, mimetype='application/xml')
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0', port=8000)