import os import mistune from flask import Flask, render_template, abort, request, url_for, jsonify, redirect from pathlib import Path import re from datetime import datetime from urllib.parse import quote import json from functools import lru_cache app = Flask(__name__, template_folder='templates') # Add custom Jinja2 filters @app.template_filter('strftime') def strftime_filter(date, fmt='%Y-%m-%d'): """Format a datetime object using strftime.""" if date is None: return '' if isinstance(date, str) and date.lower() == 'now': date = datetime.now() return date.strftime(fmt) DATA_DIR = Path('data') def get_directory_structure(path): """Get the directory structure for a given path.""" items = [] if not path.exists() or not path.is_dir(): return items # Separate directories and files for better organization dirs = [] files = [] for item in sorted(path.iterdir(), reverse=True): if item.name.startswith('.') or item.name.lower() == 'index.md': continue # Create display name without extension for files display_name = item.stem if item.is_file() and item.suffix else item.name display_name = display_name.replace('-', ' ').replace('_', ' ').title() # Create clean URL path without .md extension if item.is_dir(): url_path = '/' + str(item.relative_to(DATA_DIR)) + '/' elif item.suffix == '.md': # Remove .md extension for clean URLs relative_path = str(item.relative_to(DATA_DIR)) url_path = '/' + relative_path[:-3] # Remove .md extension else: url_path = '/' + str(item.relative_to(DATA_DIR)) item_info = { 'name': item.name, 'display_name': display_name, 'path': str(item.relative_to(DATA_DIR)), 'url_path': url_path, 'is_dir': item.is_dir(), 'is_markdown': item.suffix == '.md', 'is_image': item.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp'], 'size': item.stat().st_size if item.is_file() else None, 'created': datetime.fromtimestamp(item.stat().st_ctime), 'modified': datetime.fromtimestamp(item.stat().st_mtime), 'file_type': item.suffix.lower() if item.is_file() else 'directory', 'static_path': f"/static/data/{item.relative_to(DATA_DIR)}" if not item.is_dir() else None } if item.is_dir(): dirs.append(item_info) else: files.append(item_info) # Return directories first, then files return dirs + files @lru_cache(maxsize=16) def render_markdown_file(file_path): """Render a markdown file to HTML with caching for performance.""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Extract YAML front matter if it exists metadata = {} yaml_pattern = r'^---\s*\n(.*?)\n---\s*\n' yaml_match = re.match(yaml_pattern, content, re.DOTALL) if yaml_match: try: import yaml metadata = yaml.safe_load(yaml_match.group(1)) or {} content = content[yaml_match.end():] except: pass # Extract first h1 header if it exists first_h1 = None # Look for the first H1 at the start of the file (must be on first line or after blank line) h1_match = re.search(r'^# (.+?)$', content, re.MULTILINE) if h1_match: first_h1 = h1_match.group(1).strip() # Remove only the first h1 line from content to avoid duplication content = re.sub(r'^# .+?$', '', content, count=1, flags=re.MULTILINE) # Configure mistune renderer markdown = mistune.create_markdown( escape=False, plugins=['strikethrough', 'footnotes', 'table', 'task_lists', 'def_list'] ) # Process content to HTML html_content = markdown(content.strip()) # Add classes to headers to prevent conflicts with page headers html_content = html_content.replace('

', '

') html_content = html_content.replace('

', '

') html_content = html_content.replace('

', '

') html_content = html_content.replace('

', '

') html_content = html_content.replace('

', '
') html_content = html_content.replace('
', '
') # Use the first h1 as title if available, otherwise fallback to metadata or filename if first_h1: title = first_h1 elif 'title' in metadata: title = metadata['title'] else: title = file_path.stem.replace('-', ' ').replace('_', ' ').title() return { 'content': html_content, 'title': title, 'metadata': metadata } except Exception as e: return { 'content': f'

Error reading file: {str(e)}

', 'title': 'Error', 'metadata': {} } @app.route('/') def index(): """Homepage showcasing download statistics.""" return render_template('homepage.html', current_year=datetime.now().year, title="Home") @app.route('/directory') def directory_index(): """Directory listing that was previously the homepage.""" items = get_directory_structure(DATA_DIR) # Check for index.md in the root data directory index_file = DATA_DIR / 'index.md' index_content = None content_position = 'top' # Default position if index_file.exists(): index_content = render_markdown_file(index_file) # Determine content position based on length # Count words in the HTML content (after stripping HTML tags) content_text = re.sub(r'<[^>]+>', '', index_content['content']) word_count = len(content_text.split()) # If content is longer than 150 words, put it at the bottom if word_count > 150: content_position = 'bottom' # Check if root directory is an image gallery image_items = [item for item in items if item['is_image']] total_files = [item for item in items if not item['is_dir']] is_image_gallery = len(image_items) >= 3 and len(total_files) > 0 and (len(image_items) / len(total_files)) >= 0.5 return render_template('directory.html', items=items, current_path='', title='Kenneth Reitz', breadcrumbs=[], index_content=index_content, content_position=content_position, is_image_gallery=is_image_gallery, image_items=image_items, current_year=datetime.now().year) @app.route('/') def serve_path(path): """Serve files and directories from the data folder.""" full_path = DATA_DIR / path # If the path doesn't exist, try adding .md extension for markdown files if not full_path.exists(): md_path = DATA_DIR / (path + '.md') if md_path.exists() and md_path.suffix == '.md': full_path = md_path else: abort(404) # Generate breadcrumbs # For clean URLs, we need to handle the case where path might not include .md original_path = path if full_path.suffix == '.md' and not path.endswith('.md'): # This is a clean URL for a markdown file path_parts = path.split('/') else: path_parts = path.split('/') breadcrumbs = [] current = '' for part in path_parts[:-1]: # Exclude the current page current = f"{current}/{part}" if current else part breadcrumbs.append({ 'name': part.replace('-', ' ').replace('_', ' ').title(), 'url': f"/{current}" }) if full_path.is_dir(): # Directory listing items = get_directory_structure(full_path) # Check if this is an image gallery (50% or more images) image_items = [item for item in items if item['is_image']] total_files = [item for item in items if not item['is_dir']] is_image_gallery = len(image_items) >= 3 and len(total_files) > 0 and (len(image_items) / len(total_files)) >= 0.5 # Check for index.md in the directory index_file = full_path / 'index.md' index_content = None content_position = 'top' # Default position if index_file.exists(): index_content = render_markdown_file(index_file) # Determine content position based on length # Count words in the HTML content (after stripping HTML tags) content_text = re.sub(r'<[^>]+>', '', index_content['content']) word_count = len(content_text.split()) # If content is longer than 150 words, put it at the bottom if word_count > 150: content_position = 'bottom' title = path_parts[-1].replace('-', ' ').replace('_', ' ').title() return render_template('directory.html', items=items, current_path=original_path, title=title, breadcrumbs=breadcrumbs, index_content=index_content, content_position=content_position, is_image_gallery=is_image_gallery, image_items=image_items, current_year=datetime.now().year, current_page=title) elif full_path.suffix == '.md': # Markdown file content_data = render_markdown_file(full_path) return render_template('post.html', content=content_data['content'], title=content_data['title'], metadata=content_data['metadata'], breadcrumbs=breadcrumbs, current_path=path, current_year=datetime.now().year, current_page=content_data['title']) elif full_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']: # Image file - check if it's in a gallery directory parent_dir = full_path.parent gallery_images = [] if parent_dir.exists(): for img in sorted(parent_dir.iterdir()): if img.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']: gallery_images.append({ 'name': img.name, 'path': f"/static/data/{img.relative_to(DATA_DIR)}", 'url': f"/{img.relative_to(DATA_DIR)}", 'is_current': img == full_path }) return render_template('photo.html', image_path=f"/static/data/{path}", title=full_path.stem.replace('-', ' ').replace('_', ' ').title(), breadcrumbs=breadcrumbs, gallery_images=gallery_images, current_path=path, current_year=datetime.now().year, current_page=full_path.stem.replace('-', ' ').replace('_', ' ').title()) else: # Other files - serve directly from flask import send_file return send_file(full_path) @app.route('/static/data/') def serve_data_file(path): """Serve static files from the data directory.""" full_path = DATA_DIR / path if not full_path.exists() or not full_path.is_file(): abort(404) from flask import send_file return send_file(full_path) @app.route('/api/search') def api_search(): """API endpoint for full-text search across the knowledge base.""" query = request.args.get('q', '').lower() if not query: return jsonify([]) results = [] def search_path(current_path: Path, display_path: str = ""): """Recursively search files and directories under ``current_path``. This replaces the previous implementation that searched an in-memory tree representation but never actually scanned the filesystem, resulting in an empty search index. We now walk the ``data`` directory directly so queries return real results. """ for item in current_path.iterdir(): if item.name.startswith('.'): continue relative_path = str(item.relative_to(DATA_DIR)) node_name = item.name.lower() node_path = relative_path.lower() node_content = "" if item.is_file() and item.suffix == '.md': try: node_content = item.read_text(encoding='utf-8').lower() except Exception: node_content = "" item_display_path = f"{display_path}/{item.name}" if display_path else item.name if query in node_name or query in node_path or query in node_content: result = { 'name': item.name, 'type': 'directory' if item.is_dir() else ('article' if item.suffix == '.md' else 'file'), 'path': relative_path, 'display_path': item_display_path, 'relevance': 0, } relevance = 0 if query in node_name: relevance += 10 if node_name.startswith(query): relevance += 5 if query in node_path: relevance += 3 if query in node_content: relevance += 1 relevance += node_content.count(query) * 0.1 result['relevance'] = relevance results.append(result) if item.is_dir(): search_path(item, item_display_path) # Start searching from the data directory search_path(DATA_DIR) results.sort(key=lambda x: x['relevance'], reverse=True) return jsonify(results) def generate_sitemap_data(): """Generate sitemap data by recursively scanning the data directory.""" sitemap_items = [] def scan_directory(path, url_path=""): if not path.exists() or not path.is_dir(): return for item in sorted(path.iterdir()): if item.name.startswith('.'): continue item_url_path = f"{url_path}/{item.name}" if url_path else item.name if item.is_dir(): # Add directory to sitemap sitemap_items.append({ 'url': f"/{item_url_path}", 'title': item.name.replace('-', ' ').replace('_', ' ').title(), 'type': 'directory', 'modified': datetime.fromtimestamp(item.stat().st_mtime) }) # Recursively scan subdirectories scan_directory(item, item_url_path) elif item.suffix == '.md': # Remove .md extension for clean URLs clean_url_path = item_url_path[:-3] if item_url_path.endswith('.md') else item_url_path # Get title from file content title = item.stem.replace('-', ' ').replace('_', ' ').title() try: content_data = render_markdown_file(item) title = content_data['title'] except: pass sitemap_items.append({ 'url': f"/{clean_url_path}", 'title': title, 'type': 'article', 'modified': datetime.fromtimestamp(item.stat().st_mtime) }) # Start scanning from data directory scan_directory(DATA_DIR) # Add static pages static_pages = [ {'url': '/', 'title': 'Kenneth Reitz - Digital Mind Map', 'type': 'homepage'}, {'url': '/directory', 'title': 'File Explorer', 'type': 'directory'}, {'url': '/sitemap', 'title': 'Site Map', 'type': 'sitemap'} ] return static_pages + sitemap_items @app.route('/sitemap') def sitemap(): """Show the site sitemap.""" sitemap_data = generate_sitemap_data() # Group by type grouped_sitemap = { 'homepage': [], 'directory': [], 'article': [], 'sitemap': [] } for item in sitemap_data: item_type = item.get('type', 'article') if item_type in grouped_sitemap: grouped_sitemap[item_type].append(item) return render_template('sitemap.html', title='Site Map', sitemap_data=grouped_sitemap, total_items=len(sitemap_data), breadcrumbs=[], current_year=datetime.now().year, current_page='Site Map') @app.route('/sitemap.xml') def sitemap_xml(): """Generate XML sitemap for search engines.""" sitemap_data = generate_sitemap_data() xml_content = '\n' xml_content += '\n' for item in sitemap_data: xml_content += ' \n' xml_content += f' https://kennethreitz.org{item["url"]}\n' if 'modified' in item: xml_content += f' {item["modified"].strftime("%Y-%m-%d")}\n' xml_content += ' \n' xml_content += '' from flask import Response return Response(xml_content, mimetype='application/xml') if __name__ == '__main__': app.run(debug=True, host='0.0.0.0', port=8000)