kennethreitz.org/engine.py

# Enable gevent async I/O optimizations
import gevent
from gevent import monkey
monkey.patch_all()  # Patch standard library for async I/O
from gevent.pool import Pool

import os
import mistune
from flask import Flask, render_template, abort, request, url_for, jsonify, redirect, Response
from pathlib import Path
import re
from datetime import datetime
from urllib.parse import quote
import json
import time
from xml.sax.saxutils import escape
import html
from collections import defaultdict
import hashlib
import base64
import math
from functools import lru_cache

app = Flask(__name__, template_folder='templates')

# Configuration
app.config['DISABLE_ANALYTICS'] = os.environ.get('DISABLE_ANALYTICS', 'false').lower() == 'true'

# Add custom Jinja2 filters
@app.template_filter('strftime')
def strftime_filter(date, fmt='%Y-%m-%d'):
    """Format a datetime object using strftime."""
    if date is None:
        return ''
    if isinstance(date, str) and date.lower() == 'now':
        date = datetime.now()
    return date.strftime(fmt)

@app.template_filter('unescape')
def unescape_filter(text):
    """Unescape HTML entities in text."""
    if text is None:
        return ''
    return html.unescape(text)

def _process_single_file(file_path):
    """Process a single file for cache generation. Returns data structure for the file."""
    try:
        full_path = Path(file_path)

        # Read raw content directly for processing
        with open(full_path, 'r', encoding='utf-8') as f:
            raw_content = f.read()

        # Get processed content data
        content_data = render_markdown_file(full_path)
        html_content = content_data['content']

        result = {
            'file_path': file_path,
            'full_path': full_path,
            'raw_content': raw_content,
            'content_data': content_data,
            'html_content': html_content,
            'success': True
        }

        return result
    except Exception as e:
        return {
            'file_path': file_path,
            'error': str(e),
            'success': False
        }


def _generate_all_caches_unified():
    """Generate all caches in a single sweep through the data."""
    import glob
    from collections import defaultdict
    import re

    def simple_extract_excerpt(content, max_words=50):
        """Simple excerpt extraction for unified cache generation."""
        # Remove front matter
        content = re.sub(r'^---\s*\n.*?\n---\s*\n', '', content, flags=re.DOTALL)
        # Remove title (first # line)
        content = re.sub(r'^# .+?$', '', content, flags=re.MULTILINE)
        # Remove date lines
        content = re.sub(r'^\*[A-Za-z]+ \d{4}\*\s*$', '', content, flags=re.MULTILINE)
        # Remove linked images ([![](url)](url))
        content = re.sub(r'\[!\[[^\]]*\]\([^)]*\)\]\([^)]*\)', '', content)
        # Remove standalone images
        content = re.sub(r'!\[[^\]]*\]\([^)]*\)', '', content)
        # Remove image references
        content = re.sub(r'\[Image #\d+\]', '', content)
        # Remove sidenotes (label + input + span structure)
        content = re.sub(r'<label[^>]*class="margin-toggle sidenote-number"[^>]*></label><input[^>]*class="margin-toggle"[^>]*/>(<span class="sidenote">.*?</span>)', '', content, flags=re.DOTALL)
        # Remove any remaining HTML tags
        content = re.sub(r'<[^>]+>', '', content)
        # Remove markdown links but keep the text
        content = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', content)
        # Remove markdown emphasis
        content = re.sub(r'[*_`]', '', content)
        # Get first meaningful line (skip empty and markdown-only lines)
        lines = [line.strip() for line in content.split('\n') if line.strip()]
        first_para = None
        for line in lines:
            # Skip header lines
            if re.match(r'^#{1,6}\s', line):
                continue
            # Skip lines that are just punctuation or very short
            if len(line) > 10:
                first_para = line
                break

        if first_para:
            words = first_para.split()[:max_words]
            excerpt = ' '.join(words)
            if len(words) == max_words:
                excerpt += '...'
            return excerpt
        return ''

    # Initialize all data structures
    sidenotes_data = defaultdict(list)
    outlines_data = defaultdict(list)
    quotes_data = defaultdict(list)
    connections_outgoing = defaultdict(list)
    connections_incoming = defaultdict(list)
    terms_data = defaultdict(list)
    blog_posts = []

    # Get all markdown files from /data/ directory
    all_files = glob.glob('data/**/*.md', recursive=True)
    all_files = [f for f in all_files if not f.endswith('index.md')]

    print(f"Unified cache generation: Processing {len(all_files)} files...")

    # Use gevent pool for parallel file processing
    pool = Pool(20)  # Process up to 20 files concurrently
    file_results = pool.map(_process_single_file, all_files)

    # Process results from parallel file processing
    for result in file_results:
        if not result['success']:
            print(f"Error processing {result['file_path']}: {result['error']}")
            continue

        file_path = result['file_path']
        full_path = result['full_path']
        raw_content = result['raw_content']
        content_data = result['content_data']
        html_content = result['html_content']

        # Generate blog post entry if this is an essay
        if full_path.parent.name == 'essays':
            # Use the robust extract_intelligent_date function
            date_obj = extract_intelligent_date(full_path, content_data)

            if date_obj is not None:
                blog_posts.append({
                    'title': content_data['title'],
                    'path': f"/{full_path.relative_to(Path('data')).with_suffix('')}",
                    'url': f"/{full_path.relative_to(Path('data')).with_suffix('')}",
                    'file_path': str(full_path),  # Add actual file path for mapping
                    'pub_date': date_obj,
                    'date_str': date_obj.strftime('%Y-%m-%d'),
                    'excerpt': simple_extract_excerpt(raw_content),
                    'description': simple_extract_excerpt(raw_content),
                    'word_count': len(raw_content.split()),
                    'category': full_path.parent.name,
                    'unique_icon': generate_unique_svg_icon(content_data['title'], size=24)
                })
            else:
                print(f"DEBUG: Could not extract date from {full_path.name} in unified cache")

            # Extract sidenotes with their IDs
            # Pattern matches the full sidenote structure: input + span
            sidenote_pattern = r'<input[^>]*id="([^"]*)"[^>]*class="margin-toggle"[^>]*/>.*?<span class="sidenote">(.*?)</span>'
            sidenotes = re.findall(sidenote_pattern, html_content, re.DOTALL)
            if sidenotes:
                for sidenote_id, sidenote_content in sidenotes:
                    clean_sidenote = re.sub(r'<[^>]+>', '', sidenote_content).strip()
                    if clean_sidenote:
                        sidenotes_data[file_path].append({
                            'text': clean_sidenote,
                            'html': sidenote_content.strip(),
                            'id': sidenote_id
                        })

            # Extract outlines (headings)
            heading_pattern = r'(<h([1-6])[^>]*>.*?</h[1-6]>)'
            headings = re.findall(heading_pattern, html_content)
            if headings:
                for full_tag, level in headings:
                    # Extract just the inner content for text
                    inner_pattern = r'<h[1-6][^>]*>(.*?)</h[1-6]>'
                    inner_match = re.search(inner_pattern, full_tag)
                    if inner_match:
                        clean_heading = re.sub(r'<[^>]+>', '', inner_match.group(1)).strip()
                        if clean_heading and not clean_heading.startswith('fn:'):
                            outlines_data[file_path].append({
                                'level': int(level),
                                'text': clean_heading,
                                'html': full_tag.strip()
                            })

            # Extract quotes (blockquotes)
            quote_pattern = r'<blockquote[^>]*>(.*?)</blockquote>'
            quotes = re.findall(quote_pattern, html_content, re.DOTALL)
            if quotes:
                for quote in quotes:
                    clean_quote = re.sub(r'<[^>]+>', '', quote).strip()
                    if clean_quote:
                        quotes_data[file_path].append({
                            'text': clean_quote,
                            'html': quote.strip()
                        })

            # Extract connections (cross-references)
            connection_pattern = r'\[([^\]]+)\]\((/[^)]+)\)'
            connections = re.findall(connection_pattern, raw_content)
            if connections:
                for link_text, link_url in connections:
                    # Include all internal links (starting with /) except external ones
                    if link_url.startswith('/') and not link_url.startswith('//'):
                        connections_outgoing[file_path].append({
                            'text': link_text,
                            'url': link_url,
                            'target_file': link_url
                        })
                        # Track incoming references
                        connections_incoming[link_url].append({
                            'text': link_text,
                            'source_file': file_path,
                            'context': link_text
                        })

            # Extract terms for index
            # Simple approach: extract words that appear in multiple files
            words = re.findall(r'\b[A-Z][a-zA-Z]{3,}\b', raw_content)
            for word in set(words):
                if len(word) > 3 and word not in ['This', 'That', 'They', 'When', 'Where', 'What', 'Which']:
                    terms_data[word].append({
                        'file': file_path,
                        'context': word
                    })

    # Sort blog posts by date (newest first)
    blog_posts.sort(key=lambda x: x['pub_date'], reverse=True)

    # Create URL and metadata mappings for terms processing
    url_metadata = {}
    file_to_url = {}
    for post in blog_posts:
        url_metadata[post['url']] = post
        file_path = post.get('file_path') or post.get('path')
        if file_path:
            file_to_url[file_path] = post['url']

    # Process terms to only include ones that appear in multiple files
    filtered_terms = {term: refs for term, refs in terms_data.items() if len(refs) >= 2}
    final_terms = {}
    total_term_occurrences = 0
    for term, refs in sorted(filtered_terms.items()):
        # Convert refs to articles format expected by template
        # Group by file to get counts per article
        file_counts = {}
        for ref in refs:
            file_path = ref['file']
            if file_path not in file_counts:
                file_counts[file_path] = 0
            file_counts[file_path] += 1

        articles = []
        for file_path, count in file_counts.items():
            # Map file path to article URL and title
            url = file_to_url.get(file_path, '')
            if url:
                metadata = url_metadata.get(url, {})
                title = metadata.get('title', '')
                if title:  # Only include articles with valid titles
                    articles.append({
                        'url': url,
                        'title': title,
                        'count': count
                    })

        if articles:  # Only include terms that have valid articles
            final_terms[term] = {
                'articles': articles,
                'total_count': sum(file_counts.values()),
                'article_count': len(articles)
            }
            total_term_occurrences += sum(file_counts.values())

    # Build final cache structures
    total_sidenotes = sum(len(notes) for notes in sidenotes_data.values())

    unified_cache = {
        'blog_posts': blog_posts,
        'sidenotes': {
            'articles': dict(sidenotes_data),
            'total_count': total_sidenotes
        },
        'outlines': {
            'articles': dict(outlines_data),
            'total_count': sum(len(headings) for headings in outlines_data.values())
        },
        'quotes': {
            'articles': dict(quotes_data),
            'total_count': sum(len(quotes) for quotes in quotes_data.values())
        },
        'connections': {
            'outgoing_refs': dict(connections_outgoing),
            'incoming_refs': dict(connections_incoming),
            'total_count': sum(len(refs) for refs in connections_outgoing.values())
        },
        'terms': {
            'terms': final_terms,
            'total_occurrences': total_term_occurrences
        }
    }

    return unified_cache

@app.context_processor
def inject_index_counts():
    """Make index counts available to all templates."""
    try:
        # Use optimized MetadataCache instead of old cached functions
        sidenotes_data = metadata_cache.get_sidenotes()
        outlines_data = metadata_cache.get_outlines()
        quotes_data = metadata_cache.get_quotes()
        connections_data = metadata_cache.get_connections()
        terms_data = metadata_cache.get_terms()

        return {
            'index_counts': {
                'sidenotes': sidenotes_data.get('total_count', 0),
                'outlines': outlines_data.get('total_count', 0),
                'quotes': quotes_data.get('total_count', 0),
                'connections_outgoing': connections_data.get('total_outgoing', 0),
                'connections_incoming': connections_data.get('total_incoming', 0),
                'terms': terms_data.get('total_terms', 0),
                'terms_total_refs': terms_data.get('total_occurrences', 0)
            }
        }
    except Exception:
        # Fallback to prevent template errors
        return {
            'index_counts': {
                'sidenotes': 0,
                'outlines': 0,
                'quotes': 0,
                'connections_outgoing': 0,
                'connections_incoming': 0,
                'terms': 0,
                'terms_total_refs': 0
            }
        }

DATA_DIR = Path('data')

# Import the clean SVG icon generator
from svg_icon_generator import generate_unique_svg_icon

def generate_unique_svg_icon_OLD(title, size=24):
    """Generate a sophisticated unique SVG icon based on the title string."""
    # Create multiple hashes for more entropy
    hash_obj = hashlib.md5(title.encode())
    hash_bytes = hash_obj.digest()

    # Use SHA256 for additional entropy
    sha_hash = hashlib.sha256(title.encode()).digest()

    # Extract values from hash for various parameters
    hue1 = (hash_bytes[0] * 360) // 256
    hue2 = (hash_bytes[1] * 360) // 256
    saturation = 50 + (hash_bytes[2] * 30) // 256  # 50-80% saturation
    lightness = 40 + (hash_bytes[3] * 35) // 256   # 40-75% lightness

    # Choose pattern type - expanded to 20 different patterns for much more diversity
    pattern_type = hash_bytes[4] % 20

    # Create gradient colors
    color1 = f"hsl({hue1}, {saturation}%, {lightness}%)"
    color2 = f"hsl({hue2}, {saturation + 10}%, {lightness + 15}%)"

    # Generate gradient definition
    gradient_angle = (sha_hash[0] * 360) // 256
    gradient_id = f"grad_{abs(hash(title)) % 10000}"

    shapes = []
    defs = []

    if pattern_type == 0:  # Layered circles with gradients
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="0%" x2="100%" y2="100%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </linearGradient>''')

        # Multiple concentric circles
        for i in range(3):
            radius = size // 3 - i * (size // 12)
            opacity = 0.7 + i * 0.1
            shapes.append(f'<circle cx="{size//2}" cy="{size//2}" r="{radius}" fill="url(#{gradient_id})" opacity="{opacity}"/>')

    elif pattern_type == 1:  # Flower of Life
        defs.append(f'''<radialGradient id="{gradient_id}" cx="50%" cy="50%" r="50%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </radialGradient>''')

        # Sacred Flower of Life pattern - 6 surrounding circles around center
        center_x, center_y = size // 2, size // 2
        radius = size // 5

        # Center circle
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="{radius}" fill="none" stroke="url(#{gradient_id})" stroke-width="2" opacity="0.8"/>')

        # Six surrounding circles
        for i in range(6):
            angle = (i * 60) * math.pi / 180
            x = center_x + radius * math.cos(angle)
            y = center_y + radius * math.sin(angle)
            shapes.append(f'<circle cx="{x:.1f}" cy="{y:.1f}" r="{radius}" fill="none" stroke="url(#{gradient_id})" stroke-width="2" opacity="0.7"/>')

        # Outer petals for extended flower
        for i in range(12):
            angle = (i * 30) * math.pi / 180
            x = center_x + radius * 1.732 * math.cos(angle)  # sqrt(3) spacing
            y = center_y + radius * 1.732 * math.sin(angle)
            shapes.append(f'<circle cx="{x:.1f}" cy="{y:.1f}" r="{radius//2}" fill="none" stroke="{color2}" stroke-width="1" opacity="0.5"/>')

    elif pattern_type == 2:  # Crystalline line art
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="0%" x2="100%" y2="100%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="50%" stop-color="{color2}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </linearGradient>''')

        # Create elegant crystal structure in line art
        center_x, center_y = size // 2, size // 2
        points = []
        for i in range(6):
            angle = (i * 60) * math.pi / 180
            x = center_x + (size // 3) * math.cos(angle)
            y = center_y + (size // 3) * math.sin(angle)
            points.append(f"{x:.1f},{y:.1f}")

        # Main hexagonal outline with elegant stroke
        shapes.append(f'<polygon points="{" ".join(points)}" fill="none" stroke="url(#{gradient_id})" stroke-width="2.5" opacity="0.8"/>')

        # Inner crystalline structure with delicate lines
        for i in range(6):
            angle = (i * 60) * math.pi / 180
            x = center_x + (size // 6) * math.cos(angle)
            y = center_y + (size // 6) * math.sin(angle)
            shapes.append(f'<line x1="{center_x}" y1="{center_y}" x2="{x:.1f}" y2="{y:.1f}" stroke="{color2}" stroke-width="1.5" opacity="0.6"/>')

        # Central sacred point
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="2" fill="none" stroke="{color1}" stroke-width="1.5" opacity="0.9"/>')

    elif pattern_type == 3:  # Flowing wave interference - line art
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="50%" x2="100%" y2="50%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="30%" stop-color="{color2}"/>
            <stop offset="70%" stop-color="{color2}"/>
            <stop offset="100%" stop-color="{color1}"/>
        </linearGradient>''')

        # Create flowing wave-like paths with graceful curves
        for wave in range(3):
            path_data = f"M 0,{size//2}"
            for x in range(0, size, 1):
                frequency = 0.15 + wave * 0.08
                amplitude = size // 8
                phase_shift = wave * 1.5
                y = size // 2 + amplitude * math.sin(x * frequency + phase_shift)
                path_data += f" L {x},{y:.1f}"

            stroke_width = 2.5 - wave * 0.5
            opacity = 0.85 - wave * 0.15
            shapes.append(f'<path d="{path_data}" stroke="url(#{gradient_id})" stroke-width="{stroke_width:.1f}" fill="none" opacity="{opacity:.2f}" stroke-linecap="round"/>')

    elif pattern_type == 4:  # Sacred Golden Ratio Spiral - refined line art
        defs.append(f'''<linearGradient id="{gradient_id}" x1="20%" y1="20%" x2="80%" y2="80%">
            <stop offset="0%" stop-color="{color2}"/>
            <stop offset="40%" stop-color="{color2}"/>
            <stop offset="100%" stop-color="{color1}"/>
        </linearGradient>''')

        # Sacred golden ratio spiral with elegant curves
        center_x, center_y = size // 2, size // 2
        golden_ratio = 1.618033988749

        # Create smooth logarithmic spiral based on golden ratio
        path_data = f"M {center_x},{center_y}"
        for t in range(0, 400, 2):  # Smoother curve with more points
            angle = t * math.pi / 180
            # Golden ratio growth with refined scaling
            radius = (size // 10) * math.pow(golden_ratio, angle / (math.pi / 1.8))
            if radius > size // 2 - 2:
                break
            x = center_x + radius * math.cos(angle)
            y = center_y + radius * math.sin(angle)
            path_data += f" L {x:.1f},{y:.1f}"

        shapes.append(f'<path d="{path_data}" stroke="url(#{gradient_id})" stroke-width="3" fill="none" opacity="0.85" stroke-linecap="round"/>')

        # Subtle Fibonacci rectangle outlines
        fib_sizes = [2, 3, 5, 8]
        for i, fib in enumerate(fib_sizes):
            if fib * 2 > size // 4:
                break
            square_size = fib * 2
            x = center_x - square_size // 2 + i * 1.5
            y = center_y - square_size // 2 + i * 1.5
            opacity = 0.4 - i * 0.08
            shapes.append(f'<rect x="{x}" y="{y}" width="{square_size}" height="{square_size}" fill="none" stroke="{color2}" stroke-width="1" opacity="{opacity:.2f}"/>')

        # Sacred center - golden ratio point
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="1.5" fill="none" stroke="{color1}" stroke-width="2" opacity="0.9"/>')

    elif pattern_type == 5:  # Tessellation pattern
        defs.append(f'''<pattern id="{gradient_id}" x="0" y="0" width="8" height="8" patternUnits="userSpaceOnUse">
            <rect width="8" height="8" fill="{color1}"/>
            <circle cx="4" cy="4" r="2" fill="{color2}" opacity="0.7"/>
        </pattern>''')

        # Create tessellated hexagon
        points = []
        for i in range(6):
            angle = (i * 60) * 3.14159 / 180
            x = size // 2 + (size // 2.5) * math.cos(angle)
            y = size // 2 + (size // 2.5) * math.sin(angle)
            points.append(f"{x:.1f},{y:.1f}")

        shapes.append(f'<polygon points="{" ".join(points)}" fill="url(#{gradient_id})" stroke="{color2}" stroke-width="1"/>')

    elif pattern_type == 6:  # Fractal tree
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="100%" x2="0%" y2="0%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </linearGradient>''')

        def draw_branch(x, y, angle, length, depth):
            if depth == 0 or length < 2:
                return []
            end_x = x + length * math.cos(angle)
            end_y = y + length * math.sin(angle)
            branches = [f'<line x1="{x:.1f}" y1="{y:.1f}" x2="{end_x:.1f}" y2="{end_y:.1f}" stroke="url(#{gradient_id})" stroke-width="{depth}" opacity="{0.8 if depth > 1 else 0.6}"/>']
            branches.extend(draw_branch(end_x, end_y, angle - 0.5, length * 0.7, depth - 1))
            branches.extend(draw_branch(end_x, end_y, angle + 0.5, length * 0.7, depth - 1))
            return branches

        shapes.extend(draw_branch(size//2, size*0.9, -math.pi/2, size//3, 4))

    elif pattern_type == 7:  # Dot matrix
        dot_size = size // 12
        spacing = size // 6
        for x in range(spacing, size - spacing + 1, spacing):
            for y in range(spacing, size - spacing + 1, spacing):
                opacity = 0.4 + (hash(f"{x},{y}") % 6) * 0.1
                color = color1 if (x + y) % 2 == 0 else color2
                shapes.append(f'<circle cx="{x}" cy="{y}" r="{dot_size}" fill="{color}" opacity="{opacity}"/>')

    elif pattern_type == 8:  # Triangular mosaic
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="0%" x2="100%" y2="100%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="50%" stop-color="{color2}"/>
            <stop offset="100%" stop-color="{color1}"/>
        </linearGradient>''')

        # Create triangular pattern
        tri_size = size // 3
        for i in range(3):
            for j in range(3):
                x = j * tri_size
                y = i * tri_size
                if (i + j) % 2 == 0:
                    shapes.append(f'<polygon points="{x},{y} {x+tri_size},{y} {x+tri_size//2},{y+tri_size}" fill="url(#{gradient_id})" opacity="0.8"/>')
                else:
                    shapes.append(f'<polygon points="{x},{y+tri_size} {x+tri_size},{y+tri_size} {x+tri_size//2},{y}" fill="{color2}" opacity="0.6"/>')

    elif pattern_type == 9:  # Organic bubbles
        defs.append(f'''<radialGradient id="{gradient_id}" cx="30%" cy="30%" r="70%">
            <stop offset="0%" stop-color="{color2}"/>
            <stop offset="100%" stop-color="{color1}"/>
        </radialGradient>''')

        # Create organic bubble pattern
        bubble_positions = [
            (size * 0.3, size * 0.25, size // 6),
            (size * 0.7, size * 0.4, size // 8),
            (size * 0.5, size * 0.7, size // 5),
            (size * 0.2, size * 0.6, size // 10),
            (size * 0.8, size * 0.8, size // 7),
            (size * 0.6, size * 0.2, size // 9)
        ]

        for i, (x, y, radius) in enumerate(bubble_positions):
            opacity = 0.7 - (i % 3) * 0.15
            bubble_color = f"url(#{gradient_id})" if i % 2 == 0 else color2
            shapes.append(f'<circle cx="{x:.1f}" cy="{y:.1f}" r="{radius}" fill="{bubble_color}" opacity="{opacity}"/>')

    elif pattern_type == 10:  # Metatron's Cube
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="0%" x2="100%" y2="100%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </radialGradient>''')

        # Sacred Metatron's Cube - 13 circles of creation
        center_x, center_y = size // 2, size // 2
        radius = size // 8

        # Center circle
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="{radius//2}" fill="url(#{gradient_id})" opacity="0.9"/>')

        # Inner 6 circles (hexagonal pattern)
        for i in range(6):
            angle = (i * 60) * math.pi / 180
            x = center_x + radius * math.cos(angle)
            y = center_y + radius * math.sin(angle)
            shapes.append(f'<circle cx="{x:.1f}" cy="{y:.1f}" r="{radius//3}" fill="none" stroke="url(#{gradient_id})" stroke-width="2" opacity="0.8"/>')

        # Outer 6 circles
        for i in range(6):
            angle = (i * 60) * math.pi / 180
            x = center_x + radius * 2 * math.cos(angle)
            y = center_y + radius * 2 * math.sin(angle)
            shapes.append(f'<circle cx="{x:.1f}" cy="{y:.1f}" r="{radius//4}" fill="none" stroke="{color2}" stroke-width="1" opacity="0.6"/>')

        # Connect with sacred lines (Fruit of Life pattern)
        for i in range(6):
            angle1 = (i * 60) * math.pi / 180
            angle2 = ((i + 1) * 60) * math.pi / 180
            x1 = center_x + radius * math.cos(angle1)
            y1 = center_y + radius * math.sin(angle1)
            x2 = center_x + radius * math.cos(angle2)
            y2 = center_y + radius * math.sin(angle2)
            shapes.append(f'<line x1="{x1:.1f}" y1="{y1:.1f}" x2="{x2:.1f}" y2="{y2:.1f}" stroke="{color2}" stroke-width="1" opacity="0.4"/>')

    elif pattern_type == 11:  # Flower petals
        defs.append(f'''<radialGradient id="{gradient_id}" cx="50%" cy="50%" r="50%">
            <stop offset="0%" stop-color="{color2}"/>
            <stop offset="100%" stop-color="{color1}"/>
        </radialGradient>''')

        num_petals = 6 + (hash_bytes[6] % 6)  # 6-12 petals
        for i in range(num_petals):
            angle = (i * 360 / num_petals) * math.pi / 180
            x = size // 2 + (size // 3) * math.cos(angle)
            y = size // 2 + (size // 3) * math.sin(angle)
            shapes.append(f'<ellipse cx="{x:.1f}" cy="{y:.1f}" rx="{size//8}" ry="{size//6}" fill="url(#{gradient_id})" opacity="0.8" transform="rotate({i * 360 / num_petals} {x:.1f} {y:.1f})"/>')

        # Center
        shapes.append(f'<circle cx="{size//2}" cy="{size//2}" r="{size//10}" fill="{color1}"/>')

    elif pattern_type == 12:  # Diamond lattice
        diamond_size = size // 6
        for x in range(diamond_size, size, diamond_size * 2):
            for y in range(diamond_size, size, diamond_size * 2):
                points = [
                    f"{x},{y - diamond_size//2}",
                    f"{x + diamond_size//2},{y}",
                    f"{x},{y + diamond_size//2}",
                    f"{x - diamond_size//2},{y}"
                ]
                color = color1 if (x + y) % 4 == 0 else color2
                shapes.append(f'<polygon points="{" ".join(points)}" fill="{color}" opacity="0.7"/>')

    elif pattern_type == 13:  # Sine wave pattern
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="0%" x2="0%" y2="100%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </linearGradient>''')

        for wave in range(5):
            path_data = f"M 0,{size//2}"
            for x in range(0, size, 2):
                frequency = 0.3 + wave * 0.1
                amplitude = size // 8
                phase = wave * math.pi / 3
                y = size // 2 + amplitude * math.sin(x * frequency + phase)
                path_data += f" L {x},{y:.1f}"
            shapes.append(f'<path d="{path_data}" stroke="url(#{gradient_id})" stroke-width="2" fill="none" opacity="{0.9 - wave * 0.15}"/>')

    elif pattern_type == 14:  # Hexagonal grid
        hex_size = size // 8
        for row in range(4):
            for col in range(4):
                x = col * hex_size * 1.5 + (row % 2) * hex_size * 0.75
                y = row * hex_size * 0.866
                if x < size and y < size:
                    points = []
                    for i in range(6):
                        angle = (i * 60) * math.pi / 180
                        px = x + hex_size * math.cos(angle)
                        py = y + hex_size * math.sin(angle)
                        points.append(f"{px:.1f},{py:.1f}")
                    color = color1 if (row + col) % 2 == 0 else color2
                    shapes.append(f'<polygon points="{" ".join(points)}" fill="{color}" opacity="0.6" stroke="{color2}" stroke-width="1"/>')

    elif pattern_type == 15:  # Sri Yantra
        defs.append(f'''<radialGradient id="{gradient_id}" cx="50%" cy="50%" r="50%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </radialGradient>''')

        # Sacred Sri Yantra - 9 interlocking triangles
        center_x, center_y = size // 2, size // 2
        outer_radius = size // 2.5

        # 4 upward pointing triangles (Shiva)
        for i in range(4):
            scale = 1 - i * 0.2
            triangle_size = outer_radius * scale

            # Calculate triangle points
            x1 = center_x
            y1 = center_y - triangle_size
            x2 = center_x - triangle_size * 0.866  # sin(60°)
            y2 = center_y + triangle_size * 0.5
            x3 = center_x + triangle_size * 0.866
            y3 = center_y + triangle_size * 0.5

            opacity = 0.7 - i * 0.1
            shapes.append(f'<polygon points="{x1:.1f},{y1:.1f} {x2:.1f},{y2:.1f} {x3:.1f},{y3:.1f}" fill="none" stroke="url(#{gradient_id})" stroke-width="2" opacity="{opacity}"/>')

        # 5 downward pointing triangles (Shakti)
        for i in range(5):
            scale = 0.9 - i * 0.15
            triangle_size = outer_radius * scale
            rotation = i * 8  # Slight rotation for interlocking effect

            # Calculate inverted triangle points
            x1 = center_x
            y1 = center_y + triangle_size
            x2 = center_x - triangle_size * 0.866
            y2 = center_y - triangle_size * 0.5
            x3 = center_x + triangle_size * 0.866
            y3 = center_y - triangle_size * 0.5

            opacity = 0.6 - i * 0.08
            shapes.append(f'<polygon points="{x1:.1f},{y1:.1f} {x2:.1f},{y2:.1f} {x3:.1f},{y3:.1f}" fill="none" stroke="{color2}" stroke-width="1" opacity="{opacity}"/>')

        # Central bindu (divine point)
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="{size//20}" fill="{color1}" opacity="0.9"/>')

        # Outer protective circles
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="{outer_radius * 1.1}" fill="none" stroke="{color2}" stroke-width="1" opacity="0.5"/>')
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="{outer_radius * 1.25}" fill="none" stroke="{color1}" stroke-width="1" opacity="0.3"/>')

    elif pattern_type == 16:  # Mosaic tiles
        tile_size = size // 5
        for x in range(0, size, tile_size):
            for y in range(0, size, tile_size):
                # Random tile pattern based on position
                tile_hash = hash(f"{x}-{y}-{title}") % 4
                if tile_hash == 0:
                    shapes.append(f'<rect x="{x}" y="{y}" width="{tile_size}" height="{tile_size}" fill="{color1}" opacity="0.8"/>')
                elif tile_hash == 1:
                    shapes.append(f'<circle cx="{x + tile_size//2}" cy="{y + tile_size//2}" r="{tile_size//3}" fill="{color2}" opacity="0.7"/>')
                elif tile_hash == 2:
                    points = f"{x},{y+tile_size} {x+tile_size//2},{y} {x+tile_size},{y+tile_size}"
                    shapes.append(f'<polygon points="{points}" fill="{color1}" opacity="0.6"/>')
                else:
                    shapes.append(f'<rect x="{x}" y="{y}" width="{tile_size}" height="{tile_size}" fill="{color2}" opacity="0.5" rx="{tile_size//4}"/>')

    elif pattern_type == 17:  # Orbital rings
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="0%" x2="100%" y2="100%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="100%" stop-color="{color2}"/>
        </linearGradient>''')

        for i in range(4):
            radius = size // 6 + i * size // 12
            rotation = i * 45
            shapes.append(f'<circle cx="{size//2}" cy="{size//2}" r="{radius}" fill="none" stroke="url(#{gradient_id})" stroke-width="2" opacity="{0.8 - i*0.15}" transform="rotate({rotation} {size//2} {size//2})"/>')
            # Small planet
            planet_x = size // 2 + radius
            planet_y = size // 2
            shapes.append(f'<circle cx="{planet_x}" cy="{planet_y}" r="3" fill="{color2}" transform="rotate({rotation} {size//2} {size//2})"/>')

    elif pattern_type == 18:  # Woven pattern
        defs.append(f'''<pattern id="{gradient_id}" x="0" y="0" width="6" height="6" patternUnits="userSpaceOnUse">
            <rect width="6" height="6" fill="{color1}"/>
            <rect x="0" y="0" width="3" height="3" fill="{color2}"/>
            <rect x="3" y="3" width="3" height="3" fill="{color2}"/>
        </pattern>''')

        # Create woven effect with overlapping rectangles
        for i in range(6):
            x = i * size // 6
            shapes.append(f'<rect x="{x}" y="0" width="{size//12}" height="{size}" fill="url(#{gradient_id})" opacity="0.7"/>')
            shapes.append(f'<rect x="0" y="{x}" width="{size}" height="{size//12}" fill="{color2}" opacity="0.5"/>')

    else:  # pattern_type == 19: Platonic Tetrahedron
        defs.append(f'''<linearGradient id="{gradient_id}" x1="0%" y1="0%" x2="100%" y2="100%">
            <stop offset="0%" stop-color="{color1}"/>
            <stop offset="50%" stop-color="{color2}"/>
            <stop offset="100%" stop-color="{color1}"/>
        </radialGradient>''')

        # Sacred Tetrahedron - representing Fire element and divine trinity
        center_x, center_y = size // 2, size // 2
        tet_size = size // 2.8

        # Main large triangle (upward - divine masculine)
        x1 = center_x
        y1 = center_y - tet_size * 0.7
        x2 = center_x - tet_size * 0.866
        y2 = center_y + tet_size * 0.5
        x3 = center_x + tet_size * 0.866
        y3 = center_y + tet_size * 0.5

        shapes.append(f'<polygon points="{x1:.1f},{y1:.1f} {x2:.1f},{y2:.1f} {x3:.1f},{y3:.1f}" fill="none" stroke="url(#{gradient_id})" stroke-width="3" opacity="0.9"/>')

        # Inverted triangle (downward - divine feminine)
        y1_inv = center_y + tet_size * 0.4
        y2_inv = center_y - tet_size * 0.3
        y3_inv = center_y - tet_size * 0.3
        x2_inv = center_x - tet_size * 0.5
        x3_inv = center_x + tet_size * 0.5

        shapes.append(f'<polygon points="{center_x:.1f},{y1_inv:.1f} {x2_inv:.1f},{y2_inv:.1f} {x3_inv:.1f},{y3_inv:.1f}" fill="none" stroke="{color2}" stroke-width="2" opacity="0.8"/>')

        # Inner sacred triangles (tetraktys pattern)
        for i in range(3):
            scale = 0.6 - i * 0.15
            inner_size = tet_size * scale
            x1_i = center_x
            y1_i = center_y - inner_size * 0.4
            x2_i = center_x - inner_size * 0.5
            y2_i = center_y + inner_size * 0.2
            x3_i = center_x + inner_size * 0.5
            y3_i = center_y + inner_size * 0.2

            opacity = 0.7 - i * 0.15
            shapes.append(f'<polygon points="{x1_i:.1f},{y1_i:.1f} {x2_i:.1f},{y2_i:.1f} {x3_i:.1f},{y3_i:.1f}" fill="none" stroke="{color1}" stroke-width="1" opacity="{opacity}"/>')

        # Central point of unity
        shapes.append(f'<circle cx="{center_x}" cy="{center_y}" r="{size//25}" fill="{color1}" opacity="1.0"/>')

        # Corner vertices (tetraktys dots)
        vertex_radius = size // 30
        shapes.append(f'<circle cx="{x1:.1f}" cy="{y1:.1f}" r="{vertex_radius}" fill="{color2}" opacity="0.8"/>')
        shapes.append(f'<circle cx="{x2:.1f}" cy="{y2:.1f}" r="{vertex_radius}" fill="{color2}" opacity="0.8"/>')
        shapes.append(f'<circle cx="{x3:.1f}" cy="{y3:.1f}" r="{vertex_radius}" fill="{color2}" opacity="0.8"/>')

    # Compose SVG
    defs_content = "\n        ".join(defs) if defs else ""
    shapes_content = "\n        ".join(shapes)

    svg = f'''<svg width="{size}" height="{size}" viewBox="0 0 {size} {size}" xmlns="http://www.w3.org/2000/svg">
        <defs>
            {defs_content}
        </defs>
        {shapes_content}
    </svg>'''

    # Convert to data URL
    svg_b64 = base64.b64encode(svg.encode()).decode()
    return f"data:image/svg+xml;base64,{svg_b64}"

@lru_cache(maxsize=1000)
def get_cached_markdown_title(file_path):
    """Extract and cache the H1 title from a markdown file for performance."""
    try:
        # Convert Path object to string for caching compatibility
        file_path_str = str(file_path)

        # Quick check - if file was modified recently, we might want to skip cache
        # For now, let's just extract title efficiently
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read(1000)  # Only read first 1000 chars to find title

        # Look for first H1 markdown title
        title_match = re.search(r'^#\s+(.+?)$', content, re.MULTILINE)
        if title_match:
            return title_match.group(1).strip()

        # Fallback: look for HTML H1 if markdown was already rendered
        title_match = re.search(r'<h1[^>]*>(.*?)</h1>', content, re.IGNORECASE)
        if title_match:
            # Remove HTML tags from title
            title = re.sub(r'<[^>]+>', '', title_match.group(1))
            return html.unescape(title).strip()

        return None
    except:
        return None

@lru_cache(maxsize=500)
def generate_folder_icon(title, size=24):
    """Generate a folder icon with unique accent color based on title."""
    hash_obj = hashlib.md5(title.encode())
    hash_bytes = hash_obj.digest()

    # Generate accent color
    hue = (hash_bytes[0] * 360) // 256
    saturation = 60 + (hash_bytes[1] * 20) // 256  # 60-80%
    lightness = 45 + (hash_bytes[2] * 20) // 256   # 45-65%

    accent_color = f"hsl({hue}, {saturation}%, {lightness}%)"
    folder_base = "#e8e8e8"

    svg = f'''<svg width="{size}" height="{size}" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
        <defs>
            <linearGradient id="folder_grad_{abs(hash(title)) % 1000}" x1="0%" y1="0%" x2="100%" y2="100%">
                <stop offset="0%" stop-color="{folder_base}"/>
                <stop offset="100%" stop-color="{accent_color}"/>
            </linearGradient>
        </defs>
        <path d="M10 4H4c-1.11 0-2 .89-2 2v12c0 1.11.89 2 2 2h16c1.11 0 2-.89 2-2V8c0-1.11-.89-2-2-2h-8l-2-2z"
              fill="url(#folder_grad_{abs(hash(title)) % 1000})"
              stroke="{accent_color}"
              stroke-width="0.5"/>
        <circle cx="18" cy="7" r="2" fill="{accent_color}" opacity="0.8"/>
    </svg>'''

    svg_b64 = base64.b64encode(svg.encode()).decode()
    return f"data:image/svg+xml;base64,{svg_b64}"

def get_directory_structure(path):
    """Get the directory structure for a given path."""
    items = []
    if not path.exists() or not path.is_dir():
        return items

    # Separate directories and files for better organization
    dirs = []
    files = []

    for item in sorted(path.iterdir(), reverse=True):
        if item.name.startswith('.') or item.name.lower() == 'index.md' or item.name.endswith('.bak'):
            continue

        # Create display name without extension for files
        display_name = item.stem if item.is_file() and item.suffix else item.name
        display_name = display_name.replace('-', ' ').replace('_', ' ').title()

        # Create clean URL path without .md extension
        if item.is_dir():
            url_path = '/' + str(item.relative_to(DATA_DIR)) + '/'
        elif item.suffix == '.md':
            # Remove .md extension for clean URLs
            relative_path = str(item.relative_to(DATA_DIR))
            url_path = '/' + relative_path[:-3]  # Remove .md extension
        else:
            url_path = '/' + str(item.relative_to(DATA_DIR))

        # Extract date from markdown files
        file_date = None
        if item.is_file() and item.suffix == '.md':
            try:
                with open(item, 'r', encoding='utf-8') as f:
                    # Read first few lines to find date
                    for i, line in enumerate(f):
                        if i > 10:  # Only check first 10 lines
                            break
                        # Look for date patterns like *January 2009* or *2014*
                        date_match = re.match(r'^\*([A-Za-z]+ \d{4}|\d{4})\*\s*$', line.strip())
                        if date_match:
                            file_date = date_match.group(1)
                            break
            except:
                pass

        # Generate unique SVG icon based on actual content title for consistency
        icon_title = display_name  # Default to filename-based display name

        # For markdown files, try to extract the actual H1 title from content
        if item.is_file() and item.suffix == '.md':
            try:
                # Use cached title extraction for performance
                cached_title = get_cached_markdown_title(item)
                if cached_title:
                    icon_title = cached_title
                    # Also update display_name to use the actual title
                    display_name = cached_title
            except:
                # Fallback to filename-based display name if parsing fails
                pass

        if item.is_dir():
            unique_icon = generate_folder_icon(icon_title, size=32)
        else:
            unique_icon = generate_unique_svg_icon(icon_title, size=32)

        item_info = {
            'name': item.name,
            'display_name': display_name,
            'path': str(item.relative_to(DATA_DIR)),
            'url_path': url_path,
            'is_dir': item.is_dir(),
            'is_markdown': item.suffix == '.md',
            'is_image': item.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp'],
            'size': item.stat().st_size if item.is_file() else None,
            'created': datetime.fromtimestamp(item.stat().st_ctime),
            'modified': datetime.fromtimestamp(item.stat().st_mtime),
            'file_date': file_date,  # Date extracted from file content
            'file_type': item.suffix.lower() if item.is_file() else 'directory',
            'static_path': f"/static/data/{item.relative_to(DATA_DIR)}" if not item.is_dir() else None,
            'unique_icon': unique_icon  # Generated SVG icon
        }

        if item.is_dir():
            dirs.append(item_info)
        else:
            files.append(item_info)

    # Return directories first, then files
    return dirs + files


def calculate_reading_time(text):
    """Calculate estimated reading time based on word count."""
    # Remove HTML tags for more accurate word count
    clean_text = re.sub(r'<[^>]+>', '', text)
    # Average reading speed is 200-250 words per minute, using 225 as middle ground
    word_count = len(clean_text.split())
    reading_time = max(1, round(word_count / 225))  # Minimum 1 minute
    return reading_time, word_count


def find_series_posts(metadata, current_path):
    """Find all posts in the same series as the current post."""
    series_posts = []
    if not metadata.get('series'):
        return series_posts

    series_name = metadata['series']

    # Search through all markdown files to find posts in the same series
    for root, dirs, files in os.walk(DATA_DIR):
        for file in files:
            if file.endswith('.md') and file != 'index.md':
                file_path = Path(root) / file

                # Skip the current file
                if str(file_path) == str(current_path):
                    continue

                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        content = f.read()

                    # Extract metadata
                    yaml_pattern = r'^---\s*\n(.*?)\n---\s*\n'
                    yaml_match = re.match(yaml_pattern, content, re.DOTALL)
                    if yaml_match:
                        import yaml
                        post_metadata = yaml.safe_load(yaml_match.group(1)) or {}

                        if post_metadata.get('series') == series_name:
                            # Create URL path for this post
                            relative_path = str(file_path.relative_to(DATA_DIR))
                            url_path = '/' + relative_path[:-3]  # Remove .md

                            # Get title from metadata or filename
                            title = post_metadata.get('title') or file_path.stem.replace('-', ' ').title()

                            series_posts.append({
                                'title': title,
                                'url': url_path,
                                'order': post_metadata.get('series_order', 999),
                                'description': post_metadata.get('description', '')
                            })
                except:
                    continue

    # Sort by series_order
    series_posts.sort(key=lambda x: x['order'])
    return series_posts

def extract_tags_from_content(content, metadata, file_path):
    """Extract tags from content and metadata for categorization."""
    tags = set()

    # Only use explicitly defined tags from YAML front matter
    if metadata.get('tags'):
        if isinstance(metadata['tags'], list):
            tags.update(tag.lower().strip() for tag in metadata['tags'])
        else:
            tags.update(tag.lower().strip() for tag in str(metadata['tags']).split(','))

    return list(tags)


def render_markdown_file(file_path):
    """Render a markdown file to HTML."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()

        # Extract YAML front matter if it exists
        metadata = {}
        yaml_pattern = r'^---\s*\n(.*?)\n---\s*\n'
        yaml_match = re.match(yaml_pattern, content, re.DOTALL)
        if yaml_match:
            try:
                import yaml
                metadata = yaml.safe_load(yaml_match.group(1)) or {}
                content = content[yaml_match.end():]
            except:
                pass

        # Extract first h1 header if it exists
        first_h1 = None
        # Look for the first H1 at the start of the file (must be on first line or after blank line)
        h1_match = re.search(r'^# (.+?)$', content, re.MULTILINE)
        if h1_match:
            first_h1 = h1_match.group(1).strip()
            # Remove only the first h1 line from content to avoid duplication
            content = re.sub(r'^# .+?$', '', content, count=1, flags=re.MULTILINE)

        # Extract date from italic date pattern (e.g., "*August 2025*")
        # Only match dates that look like month/year patterns, not quotes or long text
        date_match = re.search(r'^\*([A-Za-z]+ \d{4}|\d{4})\*\s*$', content, re.MULTILINE)
        if date_match and not metadata.get('date'):
            date_text = date_match.group(1).strip()
            # Skip only if it's "January 2025" (current year placeholder)
            if not (date_text.lower().startswith('january') and '2025' in date_text):
                # Format "January YYYY" (not 2025) as just "YYYY" for cleaner display
                if re.match(r'^january\s+(\d{4})$', date_text.lower()) and '2025' not in date_text:
                    year_match = re.search(r'(\d{4})', date_text)
                    if year_match:
                        date_text = year_match.group(1)
                # Keep other months like "August 2025" as full format
                metadata['date'] = date_text
            # Remove the date line from content
            content = re.sub(r'^\*([A-Za-z]+ \d{4}|\d{4})\*\s*$', '', content, count=1, flags=re.MULTILINE)

        # Configure mistune renderer with URL plugin for bare links
        markdown = mistune.create_markdown(
            escape=False,
            plugins=['strikethrough', 'footnotes', 'table', 'task_lists', 'def_list', 'url']
        )

        # Process content to HTML
        html_content = markdown(content.strip())

        # Add anchor IDs to headings using post-processing on HTML
        def add_heading_anchor_ids(html_content):
            def replace_heading(match):
                tag = match.group(1)  # h1, h2, etc.
                level = int(tag[1])  # 1, 2, etc.
                classes = match.group(2) or ''  # existing classes if any
                text = match.group(3)

                # Generate anchor ID from heading text (remove HTML tags first)
                clean_text = re.sub(r'<[^>]+>', '', text)
                anchor_id = re.sub(r'[^\w\s-]', '', clean_text.lower()).replace(' ', '-')
                anchor_id = re.sub(r'-+', '-', anchor_id).strip('-')  # Clean up multiple dashes

                # Add id attribute, preserving any existing classes
                if classes:
                    return f'<{tag} id="{anchor_id}"{classes}>{text}</{tag}>'
                else:
                    return f'<{tag} id="{anchor_id}">{text}</{tag}>'

            # Match h1-h6 tags with optional class attributes
            return re.sub(r'<(h[1-6])(\s+[^>]*)?>([^<]+)</h[1-6]>', replace_heading, html_content)

        html_content = add_heading_anchor_ids(html_content)

        # Post-processing for poetry line breaks
        # Check if this is likely a poetry file based on file path
        if file_path and 'poetry' in str(file_path):
            # For poetry, convert single line breaks within paragraphs to <br> tags
            html_content = re.sub(r'<p>(.*?)</p>',
                                lambda m: '<p>' + m.group(1).replace('\n', '<br>\n') + '</p>',
                                html_content, flags=re.DOTALL)

        # Add classes to headers to prevent conflicts with page headers
        html_content = html_content.replace('<h1>', '<h1 class="content-header">')
        html_content = html_content.replace('<h2>', '<h2 class="content-header">')
        html_content = html_content.replace('<h3>', '<h3 class="content-header">')
        html_content = html_content.replace('<h4>', '<h4 class="content-header">')
        html_content = html_content.replace('<h5>', '<h5 class="content-header">')
        html_content = html_content.replace('<h6>', '<h6 class="content-header">')

        # Use the first h1 as title if available, otherwise fallback to metadata or filename
        if first_h1:
            title = first_h1
        elif 'title' in metadata:
            title = metadata['title']
        else:
            title = file_path.stem.replace('-', ' ').replace('_', ' ').title()

        # Calculate reading time
        reading_time, word_count = calculate_reading_time(html_content)

        # Extract tags
        tags = extract_tags_from_content(html_content, metadata, file_path)

        # Find series posts if this post is part of a series
        series_posts = find_series_posts(metadata, file_path)

        # Generate unique icon for this content
        unique_icon = generate_unique_svg_icon(title, size=32)

        return {
            'content': html_content,
            'title': title,
            'metadata': metadata,
            'reading_time': reading_time,
            'word_count': word_count,
            'tags': tags,
            'series_posts': series_posts,
            'series_name': metadata.get('series'),
            'unique_icon': unique_icon
        }
    except Exception as e:
        return {
            'content': f'<p>Error reading file: {str(e)}</p>',
            'title': 'Error',
            'metadata': {}
        }

@app.route('/')
def index():
    """Homepage showcasing download statistics."""
    return render_template('homepage.html',
                         current_year=datetime.now().year,
                         title="Home")


@app.route('/health')
def health_check():
    """Simple health check endpoint for monitoring."""
    return {'status': 'healthy', 'timestamp': datetime.now().isoformat()}


@app.route('/search')
def search_page():
    """Search page with interactive search functionality."""
    return render_template('search.html',
                         title='Search',
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Search')


def _convert_unified_outlines_cache(unified_cache):
    """Convert unified cache outlines format to template-expected format."""
    articles_list = []

    for file_path, outlines in unified_cache.get('articles', {}).items():
        if not outlines:
            continue

        try:
            full_path = Path(file_path)
            content_data = render_markdown_file(full_path)
            pub_date = extract_intelligent_date(full_path, content_data)
            relative_path = str(full_path.relative_to(DATA_DIR))
            url_path = '/' + relative_path[:-3]

            processed_outlines = []
            for outline in outlines:
                if isinstance(outline, dict) and 'text' in outline:
                    processed_outlines.append(outline)

            if processed_outlines:
                articles_list.append({
                    'title': content_data['title'],
                    'url': url_path,
                    'date': pub_date,
                    'category': full_path.parent.name.replace('-', ' ').title(),
                    'outlines': processed_outlines
                })
        except Exception:
            continue

    articles_list.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)
    return {
        'articles': articles_list,
        'total_count': unified_cache.get('total_count', sum(len(article['outlines']) for article in articles_list))
    }

def _convert_unified_quotes_cache(unified_cache):
    """Convert unified cache quotes format to template-expected format."""
    articles_list = []

    for file_path, quotes in unified_cache.get('articles', {}).items():
        if not quotes:
            continue

        try:
            full_path = Path(file_path)
            content_data = render_markdown_file(full_path)
            pub_date = extract_intelligent_date(full_path, content_data)
            relative_path = str(full_path.relative_to(DATA_DIR))
            url_path = '/' + relative_path[:-3]

            processed_quotes = []
            for quote in quotes:
                if isinstance(quote, dict) and 'text' in quote:
                    processed_quotes.append(quote)

            if processed_quotes:
                articles_list.append({
                    'title': content_data['title'],
                    'url': url_path,
                    'date': pub_date,
                    'category': full_path.parent.name.replace('-', ' ').title(),
                    'quotes': processed_quotes
                })
        except Exception:
            continue

    articles_list.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)
    return {
        'articles': articles_list,
        'total_count': unified_cache.get('total_count', sum(len(article['quotes']) for article in articles_list))
    }

def _convert_unified_connections_cache(unified_cache):
    """Convert unified cache connections format to template-expected format."""
    return {
        'outgoing_refs': unified_cache.get('outgoing_refs', {}),
        'incoming_refs': unified_cache.get('incoming_refs', {}),
        'total_count': unified_cache.get('total_count', 0)
    }

def _convert_unified_terms_cache(unified_cache):
    """Convert unified cache terms format to template-expected format."""
    return {
        'terms': unified_cache.get('terms', []),
        'total_occurrences': unified_cache.get('total_occurrences', 0)
    }

def _convert_unified_sidenotes_cache(unified_cache):
    """Convert unified cache sidenotes format to template-expected format."""
    articles_list = []

    # The unified cache has structure: {'articles': {file_path: [sidenotes]}, 'total_count': int}
    articles_data = unified_cache.get('articles', {})

    for file_path, sidenotes in articles_data.items():
        if not sidenotes:
            continue

        try:
            # Get file info
            full_path = Path(file_path)
            content_data = render_markdown_file(full_path)

            # Extract date for sorting
            pub_date = extract_intelligent_date(full_path, content_data)

            # Create URL for this file
            relative_path = str(full_path.relative_to(DATA_DIR))
            url_path = '/' + relative_path[:-3]  # Remove .md extension

            # Convert sidenotes to expected format
            processed_sidenotes = []
            for sidenote in sidenotes:
                if isinstance(sidenote, dict) and 'text' in sidenote:
                    processed_sidenotes.append({
                        'text': sidenote['text'],
                        'id': sidenote.get('id')  # May be None
                    })

            if processed_sidenotes:
                articles_list.append({
                    'title': content_data['title'],
                    'url': url_path,
                    'date': pub_date,
                    'category': full_path.parent.name.replace('-', ' ').title(),
                    'sidenotes': processed_sidenotes
                })

        except Exception as e:
            # Skip files that can't be processed
            continue

    # Sort by date (most recent first)
    articles_list.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

    print(f"Sidenotes conversion: {len(articles_list)} articles processed from {len(unified_cache.get('articles', {}))} files")
    return {
        'articles': articles_list,
        'total_count': unified_cache.get('total_count', sum(len(article['sidenotes']) for article in articles_list))
    }

def _extract_all_sidenotes_cached():
    """Return pre-loaded sidenotes cache data (pure RAM, no TTL)."""
    # Return pre-loaded cache data if available
    if _sidenotes_cache['data'] is not None:
        return _sidenotes_cache['data']

    # Fallback to rebuild if cache somehow wasn't initialized
    import glob
    from collections import defaultdict

    articles_with_sidenotes = defaultdict(list)

    # Get all markdown files from /data/ directory
    all_files = glob.glob('data/**/*.md', recursive=True)

    # Filter out index files
    all_files = [f for f in all_files if not f.endswith('index.md')]

    for file_path in all_files:
        try:
            # Read the file and render it
            full_path = Path(file_path)
            content_data = render_markdown_file(full_path)
            html_content = content_data['content']

            # Extract sidenotes from the HTML using regex
            # Pattern matches <span class="sidenote">content</span>
            sidenote_pattern = r'<span class="sidenote">(.*?)</span>'
            file_sidenotes = re.findall(sidenote_pattern, html_content, re.DOTALL)

            if file_sidenotes:
                # Create URL for this file
                relative_path = str(full_path.relative_to(DATA_DIR))
                url_path = '/' + relative_path[:-3]  # Remove .md extension

                # Extract date for sorting
                pub_date = extract_intelligent_date(full_path, content_data)

                # Clean up sidenotes and add to article group with IDs
                cleaned_sidenotes = []

                # Also extract sidenote IDs from the HTML
                # Pattern to match the full sidenote structure with ID
                full_pattern = r'<input type="checkbox" id="(sn-[^"]+)"[^>]*\/><span class="sidenote">(.*?)</span>'
                full_matches = re.findall(full_pattern, html_content, re.DOTALL)

                if full_matches:
                    # We have IDs for the sidenotes
                    for sidenote_id, sidenote_text in full_matches:
                        # Remove HTML links but keep the link text
                        sidenote_text = re.sub(r'<a[^>]*?>(.*?)</a>', r'\1', sidenote_text)
                        # Clean up the sidenote text (remove extra whitespace)
                        sidenote_text = re.sub(r'\s+', ' ', sidenote_text).strip()
                        cleaned_sidenotes.append({
                            'text': sidenote_text,
                            'id': sidenote_id
                        })
                else:
                    # Fallback for sidenotes without IDs
                    for i, sidenote in enumerate(file_sidenotes):
                        # Remove HTML links but keep the link text
                        sidenote_text = re.sub(r'<a[^>]*?>(.*?)</a>', r'\1', sidenote)
                        # Clean up the sidenote text (remove extra whitespace)
                        sidenote_text = re.sub(r'\s+', ' ', sidenote_text).strip()
                        cleaned_sidenotes.append({
                            'text': sidenote_text,
                            'id': None
                        })

                articles_with_sidenotes[content_data['title']].append({
                    'sidenotes': cleaned_sidenotes,
                    'url': url_path,
                    'date': pub_date,
                    'category': full_path.parent.name.replace('-', ' ').title()
                })
        except Exception as e:
            # Skip files that can't be processed
            continue

    # Convert to list and sort by date (most recent first)
    articles_list = []
    for title, article_data in articles_with_sidenotes.items():
        # Should only be one entry per article
        data = article_data[0]
        articles_list.append({
            'title': title,
            'url': data['url'],
            'date': data['date'],
            'category': data['category'],
            'sidenotes': data['sidenotes']
        })

    articles_list.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

    # Count total sidenotes
    total_count = sum(len(article['sidenotes']) for article in articles_list)

    # Update cache
    result = {
        'articles': articles_list,
        'total_count': total_count
    }
    _sidenotes_cache['data'] = result

    return result


@app.route('/sidenotes')
def sidenotes_index():
    """Extract and display all sidenotes from across the site as an index."""
    # Use clean MetadataCache interface
    sidenotes_data = metadata_cache.get_sidenotes()

    return render_template('sidenotes.html',
                         articles=sidenotes_data['articles'],
                         total_count=sidenotes_data['total_count'],
                         title='Sidenotes Index',
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Sidenotes')


def _extract_all_outlines_cached():
    """Return pre-loaded outlines cache data (pure RAM, no TTL)."""
    # Return pre-loaded cache data if available
    if _outlines_cache['data'] is not None:
        return _outlines_cache['data']

    # Fallback to rebuild if cache somehow wasn't initialized
    import glob
    from collections import defaultdict

    articles_with_outlines = defaultdict(list)

    # Get all markdown files from /data/ directory
    all_files = glob.glob('data/**/*.md', recursive=True)

    # Filter out index files
    all_files = [f for f in all_files if not f.endswith('index.md')]

    for file_path in all_files:
        try:
            # Read the file and render it
            full_path = Path(file_path)
            content_data = render_markdown_file(full_path)
            html_content = content_data['content']

            # Extract headings from the HTML using regex
            # Pattern matches <h1>, <h2>, <h3>, etc. with optional IDs and content
            heading_pattern = r'<h([1-6])(?:[^>]*id="([^"]*)")?[^>]*>([^<]+)</h[1-6]>'
            headings = re.findall(heading_pattern, html_content)

            if headings:
                # Create URL for this file
                relative_path = str(full_path.relative_to(DATA_DIR))
                url_path = '/' + relative_path[:-3]  # Remove .md extension

                # Extract date for sorting
                pub_date = extract_intelligent_date(full_path, content_data)

                # Clean up headings and create outline structure
                cleaned_headings = []
                for level, heading_id, heading_text in headings:
                    # Skip h1 if it matches the title (avoid duplication)
                    if level == '1' and heading_text.strip() == content_data['title'].strip():
                        continue

                    cleaned_headings.append({
                        'level': int(level),
                        'text': heading_text.strip(),
                        'id': heading_id if heading_id else None,
                        'anchor_url': f"{url_path}#{heading_id}" if heading_id else url_path
                    })

                if cleaned_headings:  # Only add if there are headings after filtering
                    articles_with_outlines[content_data['title']].append({
                        'headings': cleaned_headings,
                        'url': url_path,
                        'date': pub_date,
                        'category': full_path.parent.name.replace('-', ' ').title()
                    })
        except Exception as e:
            # Skip files that can't be processed
            continue

    # Convert to list and sort by date (most recent first)
    articles_list = []
    for title, article_data in articles_with_outlines.items():
        # Should only be one entry per article
        data = article_data[0]
        articles_list.append({
            'title': title,
            'url': data['url'],
            'date': data['date'],
            'category': data['category'],
            'headings': data['headings']
        })

    articles_list.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

    # Count total headings
    total_count = sum(len(article['headings']) for article in articles_list)

    # Update cache
    result = {
        'articles': articles_list,
        'total_count': total_count
    }
    _outlines_cache['data'] = result

    return result


@app.route('/outlines')
def outlines_index():
    """Extract and display all essay outlines from across the site as an index."""
    # Use clean MetadataCache interface
    outlines_data = metadata_cache.get_outlines()

    return render_template('outlines.html',
                         articles=outlines_data['articles'],
                         total_count=outlines_data['total_count'],
                         title='Outlines Index',
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Outlines')


def _extract_all_quotes_cached():
    """Return pre-loaded quotes cache data (pure RAM, no TTL)."""
    # Return pre-loaded cache data if available
    if _quotes_cache['data'] is not None:
        return _quotes_cache['data']

    # Fallback to rebuild if cache somehow wasn't initialized
    import glob
    from collections import defaultdict

    articles_with_quotes = defaultdict(list)

    # Get all markdown files from /data/ directory
    all_files = glob.glob('data/**/*.md', recursive=True)

    # Filter out index files
    all_files = [f for f in all_files if not f.endswith('index.md')]

    for file_path in all_files:
        try:
            # Read the file and render it
            full_path = Path(file_path)
            content_data = render_markdown_file(full_path)
            html_content = content_data['content']

            # Extract blockquotes from the HTML using regex
            # Pattern matches <blockquote>content</blockquote>
            quote_pattern = r'<blockquote[^>]*>(.*?)</blockquote>'
            quotes = re.findall(quote_pattern, html_content, re.DOTALL)

            if quotes:
                # Create URL for this file
                relative_path = str(full_path.relative_to(DATA_DIR))
                url_path = '/' + relative_path[:-3]  # Remove .md extension

                # Extract date for sorting
                pub_date = extract_intelligent_date(full_path, content_data)

                # Clean up quotes
                cleaned_quotes = []
                for quote in quotes:
                    # Skip quotes that start with bold labels (like "Note:", "Analysis:", "The Prompt:", etc.)
                    # Pattern matches: <p><strong>Label</strong>: content or similar
                    if re.match(r'^\s*<p[^>]*><(?:strong|b)[^>]*>[^<]*</(?:strong|b)>:', quote):
                        continue

                    # Remove inner HTML tags but preserve basic formatting
                    quote_text = re.sub(r'<(?!/?(?:em|strong|i|b)\b)[^>]*>', '', quote)
                    quote_text = re.sub(r'\s+', ' ', quote_text).strip()

                    # Skip very short quotes (likely not substantive)
                    if len(quote_text) > 20:
                        cleaned_quotes.append(quote_text)

                if cleaned_quotes:
                    articles_with_quotes[content_data['title']].append({
                        'quotes': cleaned_quotes,
                        'url': url_path,
                        'date': pub_date,
                        'category': full_path.parent.name.replace('-', ' ').title()
                    })
        except Exception as e:
            # Skip files that can't be processed
            continue

    # Convert to list and sort by date (most recent first)
    articles_list = []
    for title, article_data in articles_with_quotes.items():
        # Should only be one entry per article
        data = article_data[0]
        articles_list.append({
            'title': title,
            'url': data['url'],
            'date': data['date'],
            'category': data['category'],
            'quotes': data['quotes']
        })

    articles_list.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

    # Count total quotes
    total_count = sum(len(article['quotes']) for article in articles_list)

    # Update cache
    result = {
        'articles': articles_list,
        'total_count': total_count
    }
    _quotes_cache['data'] = result

    return result


@app.route('/quotes')
def quotes_index():
    """Extract and display all blockquotes from across the site as an index."""
    # Use clean MetadataCache interface
    quotes_data = metadata_cache.get_quotes()

    return render_template('quotes.html',
                         articles=quotes_data['articles'],
                         total_count=quotes_data['total_count'],
                         title='Quotes Index',
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Quotes')


def _extract_all_connections_cached():
    """Return pre-loaded connections cache data (pure RAM, no TTL)."""
    # Return pre-loaded cache data if available
    if _connections_cache['data'] is not None:
        return _connections_cache['data']

    # Fallback to rebuild if cache somehow wasn't initialized
    import glob
    from collections import defaultdict

    # Track both outgoing and incoming connections
    articles_data = {}  # url -> {title, date, category, outgoing_connections}
    incoming_connections = defaultdict(list)  # target_url -> [source connections]

    # Get all markdown files from /data/ directory
    all_files = glob.glob('data/**/*.md', recursive=True)

    # Filter out index files
    all_files = [f for f in all_files if not f.endswith('index.md')]

    # First pass: collect all articles and their outgoing connections
    for file_path in all_files:
        try:
            # Read the file and render it
            full_path = Path(file_path)
            content_data = render_markdown_file(full_path)
            html_content = content_data['content']

            # Create URL for this file
            relative_path = str(full_path.relative_to(DATA_DIR))
            source_url = '/' + relative_path[:-3]  # Remove .md extension

            # Extract date for sorting
            pub_date = extract_intelligent_date(full_path, content_data)

            # Initialize article data
            articles_data[source_url] = {
                'title': content_data['title'],
                'url': source_url,
                'date': pub_date,
                'category': full_path.parent.name.replace('-', ' ').title(),
                'outgoing_connections': []
            }

            # Extract internal links from the HTML
            # Pattern matches <a href="/internal/path">link text</a>
            link_pattern = r'<a[^>]*href="(/[^"]*)"[^>]*>(.*?)</a>'
            links = re.findall(link_pattern, html_content, re.DOTALL)

            # Collect outgoing connections for this article
            for link_url, link_text in links:
                if (link_url.startswith('/') and
                    not link_url.startswith('//') and
                    not link_url.startswith('/static') and
                    link_url != source_url):  # Don't include self-references

                    # Clean up link text
                    link_text = re.sub(r'<[^>]*>', '', link_text)
                    link_text = re.sub(r'\s+', ' ', link_text).strip()

                    connection = {
                        'target_url': link_url,
                        'link_text': link_text,
                        'source_url': source_url,
                        'source_title': content_data['title']
                    }

                    # Add to outgoing connections
                    articles_data[source_url]['outgoing_connections'].append({
                        'target_url': link_url,
                        'link_text': link_text
                    })

                    # Add to incoming connections map
                    incoming_connections[link_url].append({
                        'source_url': source_url,
                        'source_title': content_data['title'],
                        'link_text': link_text
                    })

        except Exception as e:
            # Skip files that can't be processed
            continue

    # Second pass: add incoming connections to each article
    for url, article in articles_data.items():
        article['incoming_connections'] = incoming_connections.get(url, [])

    # Convert to list format and filter articles with connections
    articles_list = []
    for url, article in articles_data.items():
        # Only include articles that have outgoing OR incoming connections
        if article['outgoing_connections'] or article['incoming_connections']:
            articles_list.append({
                'title': article['title'],
                'url': article['url'],
                'date': article['date'],
                'category': article['category'],
                'connections': article['outgoing_connections'],  # Keep for backward compatibility
                'outgoing_connections': article['outgoing_connections'],
                'incoming_connections': article['incoming_connections']
            })

    articles_list.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

    # Count total connections (both directions)
    total_outgoing = sum(len(article['outgoing_connections']) for article in articles_list)
    total_incoming = sum(len(article['incoming_connections']) for article in articles_list)

    # Update cache
    result = {
        'articles': articles_list,
        'total_count': total_outgoing,  # Keep backward compatibility
        'total_outgoing': total_outgoing,
        'total_incoming': total_incoming
    }
    _connections_cache['data'] = result

    return result


@app.route('/connections')
def connections_index():
    """Extract and display all cross-references between essays."""
    # Use clean MetadataCache interface
    connections_data = metadata_cache.get_connections()

    return render_template('connections.html',
                         articles=connections_data['articles'],
                         total_count=connections_data['total_count'],
                         total_outgoing=connections_data.get('total_outgoing'),
                         total_incoming=connections_data.get('total_incoming'),
                         title='Connections Index',
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Connections')


@app.route('/graph/data')
def graph_data():
    """API endpoint that returns graph data for network visualization."""
    # Use clean MetadataCache interface
    connections_data = metadata_cache.get_connections()

    nodes = []
    edges = []
    node_ids = set()

    # Create nodes and edges from connections
    for article in connections_data['articles']:
        source_id = article['url']
        node_ids.add(source_id)

        # Use outgoing_connections for graph edges (backward compatibility: also check connections)
        connections_list = article.get('outgoing_connections', article.get('connections', []))
        for connection in connections_list:
            target_id = connection['target_url']
            node_ids.add(target_id)

            edges.append({
                'source': source_id,
                'target': target_id,
                'link_text': connection['link_text']
            })

    # Create node objects with titles using MetadataCache
    posts = metadata_cache.get_blog_posts()
    post_lookup = {post['url']: post for post in posts}

    for node_id in node_ids:
        post = post_lookup.get(node_id)
        nodes.append({
            'id': node_id,
            'title': post['title'] if post else node_id.split('/')[-1],
            'category': post['category'] if post else 'Unknown',
            'url': node_id
        })

    return jsonify({
        'nodes': nodes,
        'edges': edges,
        'stats': {
            'total_nodes': len(nodes),
            'total_edges': len(edges)
        }
    })


@app.route('/graph')
def graph_visualization():
    """Interactive network graph of cross-references."""
    return render_template('graph.html',
                         title='Cross-Reference Graph',
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Cross-Reference Graph')


@app.route('/terms')
def terms_index():
    """Extract and display all significant terms like a book index."""
    # Use clean MetadataCache interface
    terms_data = metadata_cache.get_terms()

    return render_template('terms.html',
                         terms=terms_data['terms'],
                         total_terms=terms_data['total_terms'],
                         total_occurrences=terms_data['total_occurrences'],
                         title='Term Index',
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Term Index')


@app.route('/random')
def random_post():
    """Redirect to a random document from anywhere in /data/."""
    import random
    import glob

    # Get all markdown files from /data/ directory
    all_files = glob.glob('data/**/*.md', recursive=True)

    # Filter out index files
    all_files = [f for f in all_files if not f.endswith('index.md')]

    if not all_files:
        return redirect('/directory')

    # Choose random file and convert to URL
    random_file = random.choice(all_files)
    # Convert data/essays/2010-01-example.md -> /essays/2010-01-example
    url_path = '/' + random_file.replace('data/', '').replace('.md', '')
    return redirect(url_path)


def get_random_personality_from_collection(collection_path):
    """Helper function to get a random personality from a collection."""
    import random
    import glob

    if collection_path:
        # Get files from specific collection
        pattern = f'data/artificial-intelligence/personalities/{collection_path}/*.md'
        fallback_url = f'/artificial-intelligence/personalities/{collection_path}'
    else:
        # Get all personality files
        pattern = 'data/artificial-intelligence/personalities/**/*.md'
        fallback_url = '/artificial-intelligence/personalities'

    personality_files = glob.glob(pattern, recursive=True)
    # Filter out index files
    personality_files = [f for f in personality_files if not f.endswith('index.md')]

    if not personality_files:
        return redirect(fallback_url)

    # Choose random personality and convert to URL
    random_file = random.choice(personality_files)
    # Convert data/artificial-intelligence/personalities/major-arcana/the-fool.md -> /artificial-intelligence/personalities/major-arcana/the-fool
    url_path = '/' + random_file.replace('data/', '').replace('.md', '')
    return redirect(url_path)


@app.route('/random/personality')
@app.route('/random/personality/')
def random_personality():
    """Redirect to a random AI personality from any collection."""
    return get_random_personality_from_collection(None)


@app.route('/random/<collection>')
def random_from_collection(collection):
    """Redirect to a random personality from a specific collection."""
    # Validate collection exists
    valid_collections = [
        'major-arcana', 'seven-virtues', 'programming-languages',
        'greek-pantheon', 'roman-pantheon', 'hindu-pantheon',
        'operating-systems', 'supporting-cast', 'goddess-archetypes',
        'biblical-characters', 'biblical-anthology'
    ]

    if collection not in valid_collections:
        return redirect('/artificial-intelligence/personalities')

    return get_random_personality_from_collection(collection)


@app.route('/archive')
def archive_index():
    """Archive index showing all posts by year."""
    posts = metadata_cache.get_blog_posts()

    # Group posts by year
    grouped_posts = {}
    for post in posts:
        year = post['pub_date'].year
        if year not in grouped_posts:
            grouped_posts[year] = []
        grouped_posts[year].append(post)

    # Sort each year's posts by date (most recent first) and years in descending order
    for year in grouped_posts:
        grouped_posts[year].sort(key=lambda x: x['pub_date'], reverse=True)

    grouped_posts = dict(sorted(grouped_posts.items(), reverse=True))

    return render_template('archive.html',
                         archive_title='Complete',
                         archive_description=None,
                         grouped_posts=grouped_posts,
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Archive')


@app.route('/archive/<int:year>')
def archive_year(year):
    """Archive for a specific year."""
    posts = metadata_cache.get_blog_posts()

    # Filter posts for the specific year
    year_posts = [post for post in posts if post['pub_date'].year == year]

    if not year_posts:
        abort(404)

    # Group posts by month
    grouped_posts = {}
    month_names = ['', 'January', 'February', 'March', 'April', 'May', 'June',
                   'July', 'August', 'September', 'October', 'November', 'December']

    for post in year_posts:
        month_name = month_names[post['pub_date'].month]
        if month_name not in grouped_posts:
            grouped_posts[month_name] = []
        grouped_posts[month_name].append(post)

    # Sort posts within each month by date (most recent first)
    for month in grouped_posts:
        grouped_posts[month].sort(key=lambda x: x['pub_date'], reverse=True)

    # Sort months in chronological order (most recent first)
    month_order = {name: idx for idx, name in enumerate(month_names[1:], 1)}
    grouped_posts = dict(sorted(grouped_posts.items(),
                               key=lambda x: month_order[x[0]], reverse=True))

    breadcrumbs = [{'name': 'Archive', 'url': '/archive'}]

    return render_template('archive.html',
                         archive_title=str(year),
                         archive_description=f'Essays and AI writings from {year}.',
                         grouped_posts=grouped_posts,
                         breadcrumbs=breadcrumbs,
                         current_year=datetime.now().year,
                         current_page=f'{year} Archive')


@app.route('/archive/<int:year>/<int:month>')
def archive_month(year, month):
    """Archive for a specific month and year."""
    posts = metadata_cache.get_blog_posts()

    # Filter posts for the specific month and year
    month_posts = [post for post in posts
                   if post['pub_date'].year == year and post['pub_date'].month == month]

    if not month_posts:
        abort(404)

    # Group by category (single level for monthly view)
    grouped_posts = {}
    for post in month_posts:
        category = post['category']
        if category not in grouped_posts:
            grouped_posts[category] = []
        grouped_posts[category].append(post)

    month_names = ['', 'January', 'February', 'March', 'April', 'May', 'June',
                   'July', 'August', 'September', 'October', 'November', 'December']
    month_name = month_names[month]

    breadcrumbs = [
        {'name': 'Archive', 'url': '/archive'},
        {'name': str(year), 'url': f'/archive/{year}'}
    ]

    return render_template('archive.html',
                         archive_title=f'{month_name} {year}',
                         archive_description=f'Essays and AI writings from {month_name} {year}.',
                         grouped_posts=grouped_posts,
                         breadcrumbs=breadcrumbs,
                         current_year=datetime.now().year,
                         current_page=f'{month_name} {year} Archive')


@app.route('/themes')
def themes_index():
    """Themes page - just displays the index.md content."""
    themes_path = DATA_DIR / 'themes'

    # Check for index.md in the themes directory
    index_file = themes_path / 'index.md'
    if index_file.exists():
        content_data = render_markdown_file(index_file)
        # Generate folder icon for themes directory
        folder_icon = generate_folder_icon('Themes', size=32)
        return render_template('post.html',
                             content=content_data['content'],
                             title='Themes',
                             metadata=content_data.get('metadata', {}),
                             breadcrumbs=[],
                             current_year=datetime.now().year,
                             current_page='Themes',
                             unique_icon=folder_icon,
                             parent_directory=None)
    else:
        # Fallback to directory listing if no index.md
        return serve_path('themes')


@app.route('/directory')
def directory_index():
    """Directory listing that was previously the homepage."""
    items = get_directory_structure(DATA_DIR)

    # Check for index.md in the root data directory
    index_file = DATA_DIR / 'index.md'
    index_content = None
    content_position = 'top'  # Default position
    if index_file.exists():
        index_content = render_markdown_file(index_file)

        # Determine content position based on length
        # Count words in the HTML content (after stripping HTML tags)
        content_text = re.sub(r'<[^>]+>', '', index_content['content'])
        word_count = len(content_text.split())

        # If content is longer than 150 words, put it at the bottom
        if word_count > 150:
            content_position = 'bottom'

    # Check if root directory is an image gallery
    image_items = [item for item in items if item['is_image']]
    total_files = [item for item in items if not item['is_dir']]
    is_image_gallery = len(image_items) >= 3 and len(total_files) > 0 and (len(image_items) / len(total_files)) >= 0.5

    return render_template('directory.html',
                         items=items,
                         current_path='',
                         title='Kenneth Reitz',
                         breadcrumbs=[],
                         index_content=index_content,
                         content_position=content_position,
                         is_image_gallery=is_image_gallery,
                         image_items=image_items,
                         current_year=datetime.now().year)

@app.route('/<path:path>')
def serve_path(path):
    """Serve files and directories from the data folder."""
    full_path = DATA_DIR / path

    # If the path doesn't exist, try adding .md extension for markdown files
    if not full_path.exists():
        md_path = DATA_DIR / (path + '.md')
        if md_path.exists() and md_path.suffix == '.md':
            full_path = md_path
        else:
            abort(404)

    # Generate breadcrumbs
    # For clean URLs, we need to handle the case where path might not include .md
    original_path = path
    if full_path.suffix == '.md' and not path.endswith('.md'):
        # This is a clean URL for a markdown file
        path_parts = path.split('/')
    else:
        path_parts = path.split('/')

    breadcrumbs = []
    current = ''
    for part in path_parts[:-1]:  # Exclude the current page
        current = f"{current}/{part}" if current else part
        breadcrumbs.append({
            'name': part.replace('-', ' ').replace('_', ' ').title(),
            'url': f"/{current}"
        })

    if full_path.is_dir():
        # Directory listing
        items = get_directory_structure(full_path)

        # Check if this is an image gallery (50% or more images)
        image_items = [item for item in items if item['is_image']]
        total_files = [item for item in items if not item['is_dir']]
        is_image_gallery = len(image_items) >= 3 and len(total_files) > 0 and (len(image_items) / len(total_files)) >= 0.5

        # Check for index.md in the directory
        index_file = full_path / 'index.md'
        index_content = None
        content_position = 'top'  # Default position
        if index_file.exists():
            index_content = render_markdown_file(index_file)

            # Determine content position based on length
            # Count words in the HTML content (after stripping HTML tags)
            content_text = re.sub(r'<[^>]+>', '', index_content['content'])
            word_count = len(content_text.split())

            # If content is longer than 150 words, put it at the bottom
            if word_count > 150:
                content_position = 'bottom'

        # Use title from index.md if available, otherwise fall back to directory name
        if index_content and index_content.get('title'):
            title = index_content['title']
        else:
            title = path_parts[-1].replace('-', ' ').replace('_', ' ').title()

        # Generate parent directory information for back link
        parent_directory = None
        if full_path.parent != DATA_DIR:  # Don't show parent for root-level content
            parent_path = full_path.parent
            parent_display_name = parent_path.name.replace('-', ' ').replace('_', ' ').title()
            parent_url = '/' + str(parent_path.relative_to(DATA_DIR))
            if parent_url == '/':
                parent_url = '/directory'
            parent_icon = generate_folder_icon(parent_display_name, size=20)

            parent_directory = {
                'display_name': parent_display_name,
                'url': parent_url,
                'icon': parent_icon
            }

        return render_template('directory.html',
                             items=items,
                             current_path=original_path,
                             title=title,
                             breadcrumbs=breadcrumbs,
                             index_content=index_content,
                             content_position=content_position,
                             is_image_gallery=is_image_gallery,
                             image_items=image_items,
                             parent_directory=parent_directory,
                             current_year=datetime.now().year,
                             current_page=title)

    elif full_path.suffix == '.md':
        # Markdown file
        content_data = render_markdown_file(full_path)

        # Find related posts for essays and AI writings
        related_posts = []
        prev_post = None
        next_post = None
        if 'essays' in path or ('artificial-intelligence' in path and 'writings' in path):
            related_posts = find_related_posts(str(full_path.relative_to(DATA_DIR)))
            prev_post, next_post = find_adjacent_posts(str(full_path.relative_to(DATA_DIR)))

        # Generate description from content for social sharing
        content_text = re.sub(r'<[^>]+>', '', content_data['content'])
        content_text = content_text.strip()
        description = ""
        if content_text:
            # Get first paragraph or first 200 chars
            first_para = content_text.split('\n\n')[0]
            description = first_para[:200] + '...' if len(first_para) > 200 else first_para

        # Generate parent directory information
        parent_directory = None
        if full_path.parent != DATA_DIR:  # Don't show parent for root-level content
            parent_path = full_path.parent
            parent_display_name = parent_path.name.replace('-', ' ').replace('_', ' ').title()
            parent_url = '/' + str(parent_path.relative_to(DATA_DIR))
            parent_icon = generate_folder_icon(parent_display_name, size=20)

            parent_directory = {
                'display_name': parent_display_name,
                'url': parent_url,
                'icon': parent_icon
            }

        return render_template('post.html',
                             content=content_data['content'],
                             title=content_data['title'],
                             metadata=content_data['metadata'],
                             description=description,
                             breadcrumbs=breadcrumbs,
                             current_path=path,
                             current_year=datetime.now().year,
                             current_page=content_data['title'],
                             related_posts=related_posts,
                             reading_time=content_data.get('reading_time'),
                             word_count=content_data.get('word_count'),
                             prev_post=prev_post,
                             next_post=next_post,
                             tags=content_data.get('tags', []),
                             series_posts=content_data.get('series_posts', []),
                             series_name=content_data.get('series_name'),
                             unique_icon=content_data.get('unique_icon'),
                             parent_directory=parent_directory)

    elif full_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
        # Image file - check if it's in a gallery directory
        parent_dir = full_path.parent
        gallery_images = []

        if parent_dir.exists():
            for img in sorted(parent_dir.iterdir()):
                if img.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp']:
                    gallery_images.append({
                        'name': img.name,
                        'path': f"/static/data/{img.relative_to(DATA_DIR)}",
                        'url': f"/{img.relative_to(DATA_DIR)}",
                        'is_current': img == full_path
                    })

        return render_template('photo.html',
                             image_path=f"/static/data/{path}",
                             title=full_path.stem.replace('-', ' ').replace('_', ' ').title(),
                             breadcrumbs=breadcrumbs,
                             gallery_images=gallery_images,
                             current_path=path,
                             current_year=datetime.now().year,
                             current_page=full_path.stem.replace('-', ' ').replace('_', ' ').title())

    else:
        # Other files - serve directly
        from flask import send_file
        return send_file(full_path)

@app.route('/static/data/<path:path>')
def serve_data_file(path):
    """Serve static files from the data directory."""
    full_path = DATA_DIR / path
    if not full_path.exists() or not full_path.is_file():
        abort(404)
    from flask import send_file, make_response
    response = make_response(send_file(full_path))

    # Add caching headers for static assets
    if full_path.suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg']:
        # Cache images for 7 days
        response.headers['Cache-Control'] = 'public, max-age=604800'
    else:
        # Cache other static files for 1 hour
        response.headers['Cache-Control'] = 'public, max-age=3600'

    return response


@lru_cache(maxsize=500)
def _get_article_icon_cached(article_path):
    """Cached function to get article icon data."""
    # Normalize the path - ensure it starts with /
    if not article_path.startswith('/'):
        article_path = '/' + article_path

    # Look up the article in blog_posts cache
    posts = _collect_all_blog_posts_cached()
    for post in posts:
        if post['url'] == article_path:
            if 'unique_icon' in post and post['unique_icon']:
                return {
                    'success': True,
                    'icon': post['unique_icon'],
                    'title': post['title']
                }
            else:
                # Generate icon if not cached
                icon_svg = generate_unique_svg_icon(post['title'], size=20)
                return {
                    'success': True,
                    'icon': icon_svg,
                    'title': post['title']
                }

    # If not found in blog_posts, try to read the file directly
    try:
        # Convert URL path to file path
        file_path = DATA_DIR / article_path.lstrip('/')

        # Check if it's a directory path (ends with / or is a directory)
        if article_path.endswith('/') or file_path.is_dir():
            # For directories, try to read index.md or use directory name
            index_path = file_path / 'index.md' if file_path.is_dir() else file_path.with_name('index.md')

            if index_path.exists():
                # Read index.md to get the title
                content_data = render_markdown_file(index_path)
                if content_data and 'title' in content_data:
                    icon_svg = generate_folder_icon(content_data['title'], size=20)
                    return {
                        'success': True,
                        'icon': icon_svg,
                        'title': content_data['title']
                    }

            # If no index.md, use the directory name
            dir_name = file_path.name if file_path.is_dir() else article_path.strip('/').split('/')[-1]
            title_guess = dir_name.replace('-', ' ').replace('_', ' ').title()
            icon_svg = generate_folder_icon(title_guess, size=20)
            return {
                'success': True,
                'icon': icon_svg,
                'title': title_guess
            }
        else:
            # Regular file handling
            md_file_path = file_path.with_suffix('.md')

            if md_file_path.exists():
                # Read the file and extract the title
                content_data = render_markdown_file(md_file_path)
                if content_data and 'title' in content_data:
                    icon_svg = generate_unique_svg_icon(content_data['title'], size=20)
                    return {
                        'success': True,
                        'icon': icon_svg,
                        'title': content_data['title']
                    }
    except Exception:
        pass  # Fall through to fallback

    # Final fallback: extract title from URL and generate icon
    title_guess = article_path.split('/')[-1].replace('-', ' ').replace('_', ' ').title()
    if title_guess:
        icon_svg = generate_unique_svg_icon(title_guess, size=20)
        return {
            'success': True,
            'icon': icon_svg,
            'title': title_guess
        }

    return {'success': False, 'error': 'Article not found'}

@app.route('/api/icon/<path:article_path>')
def api_get_article_icon(article_path):
    """API endpoint to get icon SVG for a specific article."""
    try:
        result = _get_article_icon_cached(article_path)
        return jsonify(result)

    except Exception as e:
        return jsonify({'success': False, 'error': str(e)})

@app.route('/api/debug-cache')
def debug_cache():
    """Debug endpoint to see what's in the blog posts cache."""
    try:
        posts = _collect_all_blog_posts_cached()
        # Filter for software posts
        software_posts = [p for p in posts if 'software' in p.get('url', '')]
        return jsonify({
            'total_posts': len(posts),
            'software_posts': software_posts[:5],  # First 5 for debugging
            'sample_urls': [p.get('url') for p in posts[:10]]
        })
    except Exception as e:
        return jsonify({'error': str(e)})

@app.route('/api/search')
def api_search():
    """API endpoint for full-text search across the knowledge base."""
    query = request.args.get('q', '').lower()
    if not query:
        return jsonify([])

    results = []

    def search_path(current_path: Path, display_path: str = ""):
        """Recursively search files and directories under ``current_path``.

        This replaces the previous implementation that searched an in-memory
        tree representation but never actually scanned the filesystem,
        resulting in an empty search index. We now walk the ``data`` directory
        directly so queries return real results.
        """
        for item in current_path.iterdir():
            if item.name.startswith('.'):
                continue

            relative_path = str(item.relative_to(DATA_DIR))
            node_name = item.name.lower()
            node_path = relative_path.lower()
            node_content = ""

            if item.is_file() and item.suffix == '.md':
                try:
                    node_content = item.read_text(encoding='utf-8').lower()
                except Exception:
                    node_content = ""

            item_display_path = f"{display_path}/{item.name}" if display_path else item.name

            if query in node_name or query in node_path or query in node_content:
                # Generate snippet with highlighted search terms for markdown files
                snippet = ""
                if item.suffix == '.md' and node_content and query in node_content:
                    # Find the first occurrence of the query in content
                    query_pos = node_content.find(query)
                    if query_pos != -1:
                        # Extract context around the query (200 chars before and after)
                        start = max(0, query_pos - 100)
                        end = min(len(node_content), query_pos + len(query) + 100)
                        snippet_text = node_content[start:end]

                        # Clean up the snippet (remove markdown syntax)
                        import re
                        snippet_text = re.sub(r'[#*`_\[\]()]', '', snippet_text)
                        snippet_text = re.sub(r'\s+', ' ', snippet_text).strip()

                        # Highlight the search term (case-insensitive)
                        snippet = re.sub(f'({re.escape(query)})', r'<mark>\1</mark>', snippet_text, flags=re.IGNORECASE)

                        # Add ellipsis if snippet is truncated
                        if start > 0:
                            snippet = "..." + snippet
                        if end < len(node_content):
                            snippet = snippet + "..."

                result = {
                    'name': item.name,
                    'type': 'directory' if item.is_dir() else ('article' if item.suffix == '.md' else 'file'),
                    'path': relative_path,
                    'display_path': item_display_path,
                    'snippet': snippet,
                    'relevance': 0,
                }

                # Add unique_icon for articles
                if item.suffix == '.md':
                    try:
                        # Convert path to URL for lookup in blog_posts
                        clean_url = '/' + relative_path[:-3]  # Remove .md extension
                        blog_posts = metadata_cache.get_blog_posts()
                        for post in blog_posts:
                            if post['url'] == clean_url:
                                if 'unique_icon' in post:
                                    result['unique_icon'] = post['unique_icon']
                                break
                    except Exception:
                        pass

                relevance = 0
                if query in node_name:
                    relevance += 10
                    if node_name.startswith(query):
                        relevance += 5
                if query in node_path:
                    relevance += 3
                if query in node_content:
                    relevance += 1
                    relevance += node_content.count(query) * 0.1

                result['relevance'] = relevance
                results.append(result)

            if item.is_dir():
                search_path(item, item_display_path)

    # Start searching from the data directory
    search_path(DATA_DIR)

    results.sort(key=lambda x: x['relevance'], reverse=True)
    return jsonify(results)


def collect_blog_posts():
    """Collect blog posts from essays and AI writings for RSS feed."""
    posts = []

    # Define blog post directories
    blog_dirs = [
        DATA_DIR / 'essays',
        DATA_DIR / 'artificial-intelligence'  # This will pick up root AI posts and scan subdirs
    ]

    def scan_for_posts(path, category=""):
        if not path.exists() or not path.is_dir():
            return

        for item in sorted(path.iterdir(), reverse=True):  # Most recent first
            if item.name.startswith('.') or item.name.lower() == 'index.md':
                continue

            if item.is_file() and item.suffix == '.md':
                # Get post data
                try:
                    content_data = render_markdown_file(item)

                    # Extract publication date using intelligent extraction
                    pub_date = extract_intelligent_date(item, content_data)

                    # Skip posts without determinable dates (no filename date, no YAML date, no content date)
                    if pub_date is None:
                        continue

                    # Create clean URL
                    relative_path = str(item.relative_to(DATA_DIR))
                    clean_url = '/' + relative_path[:-3]  # Remove .md extension

                    # Extract description from raw markdown (before HTML conversion)
                    description = ""
                    try:
                        with open(item, 'r', encoding='utf-8') as f:
                            raw_markdown = f.read()

                        # Skip front matter if present
                        if raw_markdown.startswith('---'):
                            parts = raw_markdown.split('---', 2)
                            if len(parts) >= 3:
                                raw_markdown = parts[2].strip()

                        # Split into lines and clean up, then find first meaningful content
                        lines = [line.strip() for line in raw_markdown.split('\n') if line.strip()]

                        # Find first line that contains substantial text content
                        for line in lines:
                            # Skip headers
                            if re.match(r'^\s*#{1,6}\s', line):
                                continue
                            # Skip images
                            if re.match(r'^\s*!\[[^\]]*\]\([^)]*\)\s*$', line):
                                continue
                            # Skip image references
                            if re.match(r'^\s*\[Image #\d+\]\s*$', line):
                                continue
                            # Skip date/metadata lines
                            if re.match(r'^\s*\*[^*]*\*\s*$', line):
                                continue
                            # Skip horizontal rules
                            if re.match(r'^\s*[-*_]{3,}\s*$', line):
                                continue

                            # Clean up markdown formatting in the line
                            clean_line = re.sub(r'!\[[^\]]*\]\([^)]*\)', '', line)  # Remove images
                            clean_line = re.sub(r'\[([^\]]*)\]\([^)]*\)', r'\1', clean_line)  # Convert links to text
                            clean_line = re.sub(r'[*_]{1,3}([^*_]+)[*_]{1,3}', r'\1', clean_line)  # Remove bold/italic
                            clean_line = re.sub(r'`([^`]+)`', r'\1', clean_line)  # Remove code formatting
                            clean_line = re.sub(r'#{1,6}\s*', '', clean_line)  # Remove header markers
                            clean_line = clean_line.strip()

                            if clean_line and len(clean_line) > 20:  # Must have substantial content
                                description = clean_line[:150] + '...' if len(clean_line) > 150 else clean_line
                                break
                    except Exception:
                        # Fallback to HTML method if raw reading fails
                        content_text = re.sub(r'<[^>]+>', '', content_data['content'])
                        if content_text.strip():
                            description = content_text.strip()[:150] + '...'

                    posts.append({
                        'title': content_data['title'],
                        'url': clean_url,
                        'description': description,
                        'pub_date': pub_date,
                        'category': category or item.parent.name.replace('-', ' ').title(),
                        'content': content_data['content'][:1000] + '...' if len(content_data['content']) > 1000 else content_data['content']
                    })
                except Exception:
                    continue
            elif item.is_dir():
                # Recursively scan subdirectories
                scan_for_posts(item, category or item.name.replace('-', ' ').title())

    # Scan each blog directory
    for blog_dir in blog_dirs:
        if blog_dir.exists():
            category = blog_dir.name.replace('-', ' ').title()
            if 'artificial-intelligence' in str(blog_dir):
                category = 'AI & Consciousness'
            scan_for_posts(blog_dir, category)

    # Sort by publication date (most recent first)
    posts.sort(key=lambda x: x['pub_date'], reverse=True)

    return posts[:20]  # Return most recent 20 posts


# Cache with TTL - cleared when date extraction logic changes
_blog_posts_cache = {'data': None, 'timestamp': 0}
_sidenotes_cache = {'data': None, 'timestamp': 0}
_outlines_cache = {'data': None, 'timestamp': 0}
_quotes_cache = {'data': None, 'timestamp': 0}
_connections_cache = {'data': None, 'timestamp': 0}
_external_links_cache = {'data': None, 'timestamp': 0}
_terms_cache = {'data': None, 'timestamp': 0}
CACHE_TTL = 36000  # 10 hours cache

# Force cache invalidation for filename change
import time
_force_cache_clear = time.time()  # Line-by-line filtering instead of paragraph-based


# Initialize unified cache on module load
class MetadataCache:
    """Clean interface to site metadata cache."""

    def __init__(self):
        self._data = None

    def initialize(self):
        """Load all site metadata in a single scan."""
        print("Starting unified cache generation...")
        self._data = _generate_all_caches_unified()
        print("Unified cache generation completed!")

    def get_sidenotes(self):
        """Get all sidenotes with metadata."""
        if not self._data:
            return {'articles': [], 'total_count': 0}

        sidenotes_data = self._data['sidenotes']['articles']  # {file_path: [sidenotes]}

        # Create file metadata lookup from blog_posts (fast dictionary lookup)
        file_metadata = {}
        for post in self._data.get('blog_posts', []):
            # Convert URL back to file path for lookup
            file_path = 'data' + post['url'] + '.md'
            file_metadata[file_path] = post

        articles = []
        for file_path, sidenotes in sidenotes_data.items():
            if not sidenotes:
                continue

            # Use pre-computed metadata instead of re-processing files
            metadata = file_metadata.get(file_path)
            if metadata:
                articles.append({
                    'title': metadata['title'],
                    'url': metadata['url'],
                    'date': metadata.get('pub_date'),
                    'category': metadata['category'].replace('-', ' ').title(),
                    'sidenotes': [{'text': s['text'], 'id': s.get('id')} for s in sidenotes],
                    'unique_icon': metadata.get('unique_icon')
                })

        # Sort by date (most recent first)
        articles.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

        return {
            'articles': articles,
            'total_count': self._data['sidenotes']['total_count']
        }

    def get_outlines(self):
        """Get all outlines with metadata."""
        if not self._data:
            return {'articles': [], 'total_count': 0}

        outlines_data = self._data['outlines']['articles']  # {file_path: [outlines]}

        # Create file metadata lookup from blog_posts (fast dictionary lookup)
        file_metadata = {}
        for post in self._data.get('blog_posts', []):
            # Convert URL back to file path for lookup
            file_path = 'data' + post['url'] + '.md'
            file_metadata[file_path] = post

        articles = []
        for file_path, outlines in outlines_data.items():
            if not outlines:
                continue

            # Use pre-computed metadata instead of re-processing files
            metadata = file_metadata.get(file_path)
            if metadata:
                # Process headings to extract IDs and create anchor URLs
                processed_headings = []
                for o in outlines:
                    # Always generate an ID from the text to ensure links work
                    import re
                    heading_id = re.sub(r'[^\w\s-]', '', o['text'].lower())
                    heading_id = re.sub(r'[-\s]+', '-', heading_id).strip('-')

                    # Try to extract ID from HTML if present (preferred)
                    if 'html' in o and o['html']:
                        id_match = re.search(r'id="([^"]*)"', o['html'])
                        if id_match and id_match.group(1):
                            heading_id = id_match.group(1)

                    processed_headings.append({
                        'level': int(o['level']),
                        'text': o['text'],
                        'id': heading_id,
                        'anchor_url': f"{metadata['url']}#{heading_id}"
                    })

                articles.append({
                    'title': metadata['title'],
                    'url': metadata['url'],
                    'date': metadata.get('pub_date'),
                    'category': metadata['category'].replace('-', ' ').title(),
                    'headings': processed_headings,
                    'unique_icon': metadata.get('unique_icon')
                })

        # Sort by date (most recent first)
        articles.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

        return {
            'articles': articles,
            'total_count': self._data['outlines']['total_count']
        }

    def get_quotes(self):
        """Get all quotes with metadata."""
        if not self._data:
            return {'articles': [], 'total_count': 0}

        quotes_data = self._data['quotes']['articles']

        # Create file metadata lookup from blog_posts (fast dictionary lookup)
        file_metadata = {}
        for post in self._data.get('blog_posts', []):
            # Convert URL back to file path for lookup
            file_path = 'data' + post['url'] + '.md'
            file_metadata[file_path] = post

        articles = []
        for file_path, quotes in quotes_data.items():
            if not quotes:
                continue

            # Use pre-computed metadata instead of re-processing files
            metadata = file_metadata.get(file_path)
            if metadata:
                articles.append({
                    'title': metadata['title'],
                    'url': metadata['url'],
                    'date': metadata.get('pub_date'),
                    'category': metadata['category'].replace('-', ' ').title(),
                    'quotes': [q['text'] for q in quotes],
                    'unique_icon': metadata.get('unique_icon')
                })

        articles.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

        return {
            'articles': articles,
            'total_count': self._data['quotes']['total_count']
        }

    def get_connections(self):
        """Get all connections with metadata in template-expected format."""
        if not self._data:
            return {'articles': [], 'total_count': 0, 'total_outgoing': 0, 'total_incoming': 0}

        connections_cache = self._data['connections']
        outgoing_refs = connections_cache.get('outgoing_refs', {})
        incoming_refs = connections_cache.get('incoming_refs', {})

        print(f"DEBUG: get_connections - outgoing_refs has {len(outgoing_refs)} files")
        print(f"DEBUG: get_connections - incoming_refs has {len(incoming_refs)} refs")

        # Create URL to metadata lookup from blog_posts (fast dictionary lookup)
        url_metadata = {}
        file_to_url = {}
        for post in self._data.get('blog_posts', []):
            url_metadata[post['url']] = post
            # Check for both possible file path keys
            file_path = post.get('file_path') or post.get('path')
            if file_path:
                file_to_url[file_path] = post['url']

        # Build articles with their connections
        articles = []

        print(f"DEBUG: file_to_url mapping has {len(file_to_url)} entries")

        # Process outgoing connections by file path
        for file_path, outgoing_list in outgoing_refs.items():
            article_url = file_to_url.get(file_path)
            if not article_url:
                print(f"DEBUG: No URL found for file_path: {file_path}")
                continue

            metadata = url_metadata.get(article_url)
            if not metadata:
                print(f"DEBUG: No metadata found for article_url: {article_url}")
                continue

            # Build outgoing connections with proper target_url and link_text
            processed_outgoing = []
            for conn in outgoing_list:
                processed_outgoing.append({
                    'target_url': conn['url'],
                    'link_text': conn['text']
                })

            # Get incoming connections for this article
            incoming_list = incoming_refs.get(article_url, [])
            processed_incoming = []
            for conn in incoming_list:
                # Find source metadata
                source_url = file_to_url.get(conn['source_file'])
                source_metadata = url_metadata.get(source_url) if source_url else None

                # If no URL mapping found, try to extract title from file
                source_title = 'Unknown'
                if source_metadata:
                    source_title = source_metadata['title']
                else:
                    # Try to extract title from the file itself
                    try:
                        file_path = conn['source_file']
                        if os.path.exists(file_path):
                            with open(file_path, 'r', encoding='utf-8') as f:
                                content = f.read()
                                # Look for markdown title (# Title)
                                for line in content.split('\n')[:10]:  # Check first 10 lines
                                    line = line.strip()
                                    if line.startswith('# '):
                                        source_title = line[2:].strip()
                                        break
                                    elif line.startswith('title:'):  # YAML frontmatter
                                        source_title = line[6:].strip().strip('"\'')
                                        break
                    except Exception:
                        pass  # Keep 'Unknown' if file reading fails

                processed_incoming.append({
                    'source_url': source_url or conn['source_file'],
                    'source_title': source_title,
                    'link_text': conn['text']
                })

            # Only include articles that have connections
            if processed_outgoing or processed_incoming:
                articles.append({
                    'title': metadata['title'],
                    'url': article_url,
                    'date': metadata.get('pub_date'),
                    'category': metadata['category'].replace('-', ' ').title(),
                    'connections': processed_outgoing,  # For backward compatibility
                    'outgoing_connections': processed_outgoing,
                    'incoming_connections': processed_incoming,
                    'unique_icon': metadata.get('unique_icon')
                })

        # Sort by date (most recent first)
        articles.sort(key=lambda x: x['date'] if x['date'] else datetime(1900, 1, 1), reverse=True)

        # Calculate totals
        total_outgoing = sum(len(article['outgoing_connections']) for article in articles)
        total_incoming = sum(len(article['incoming_connections']) for article in articles)

        return {
            'articles': articles,
            'total_count': total_outgoing + total_incoming,
            'total_outgoing': total_outgoing,
            'total_incoming': total_incoming
        }

    def get_terms(self):
        """Get all terms with metadata."""
        if not self._data:
            return {'terms': [], 'total_terms': 0, 'total_occurrences': 0}

        terms_data = self._data['terms']
        return {
            'terms': terms_data['terms'],
            'total_terms': len(terms_data['terms']),
            'total_occurrences': terms_data['total_occurrences']
        }

    def get_blog_posts(self):
        """Get all blog posts from unified cache."""
        if not self._data:
            return []

        return self._data.get('blog_posts', [])

# Global metadata cache instance
metadata_cache = MetadataCache()

def initialize_unified_cache():
    """Initialize unified cache at startup."""
    global _blog_posts_cache, _sidenotes_cache, _outlines_cache
    global _quotes_cache, _connections_cache, _terms_cache

    # Initialize the clean metadata cache
    metadata_cache.initialize()

def extract_intelligent_date(item_path, content_data=None):
    """Extract date intelligently, prioritizing filename patterns as requested."""
    pub_date = None

    # 1. PRIORITY: Try full YYYY-MM-DD format anywhere in filename first
    date_match = re.search(r'(\d{4}-\d{2}-\d{2})', item_path.name)
    if date_match:
        try:
            pub_date = datetime.strptime(date_match.group(1), '%Y-%m-%d')
            return pub_date
        except:
            pass

    # 2. Try YYYY-MM format at start of filename
    date_match = re.match(r'(\d{4}-\d{2})', item_path.stem)
    if date_match:
        try:
            # Extract day from content if present, otherwise use first of month
            day = 1
            try:
                with open(item_path, 'r', encoding='utf-8') as f:
                    content_preview = f.read(1000)
                day_match = re.search(r'(\d{4}-\d{2}-(\d{2}))', content_preview)
                if day_match:
                    day = int(day_match.group(2))
            except:
                pass

            pub_date = datetime.strptime(date_match.group(1) + f'-{day:02d}', '%Y-%m-%d')
            return pub_date
        except:
            pass

    # 3. Try just year at start of filename (YYYY)
    year_match = re.match(r'(\d{4})', item_path.stem)
    if year_match:
        try:
            # Try to get month from content, otherwise use January
            year = int(year_match.group(1))
            month = 1
            day = 1

            try:
                with open(item_path, 'r', encoding='utf-8') as f:
                    first_few_lines = ''.join(f.readlines()[:10])

                # Look for "*Month YYYY*" pattern in content
                month_match = re.search(r'\*([A-Za-z]+)\s+' + str(year) + r'\*', first_few_lines)
                if month_match:
                    month_name = month_match.group(1)
                    month = datetime.strptime(month_name, '%B').month
            except:
                pass

            pub_date = datetime(year, month, day)
            return pub_date
        except:
            pass

    # 4. Check YAML front matter for date (lower priority now)
    if content_data and content_data['metadata'].get('date'):
        try:
            if isinstance(content_data['metadata']['date'], list):
                pub_date = datetime.strptime(content_data['metadata']['date'][0], '%Y-%m-%d')
            else:
                pub_date = datetime.strptime(str(content_data['metadata']['date']), '%Y-%m-%d')
            return pub_date
        except:
            pass

    # 5. Check for date in content (look for *Month YYYY* pattern)
    try:
        with open(item_path, 'r', encoding='utf-8') as f:
            first_few_lines = ''.join(f.readlines()[:10])

        # Look for patterns like "*January 2025*" or "*Month YYYY*"
        month_year_match = re.search(r'\*([A-Za-z]+\s+\d{4})\*', first_few_lines)
        if month_year_match:
            try:
                pub_date = datetime.strptime(month_year_match.group(1), '%B %Y')
                # Set to first day of month for month-only dates
                pub_date = pub_date.replace(day=1)
                return pub_date
            except:
                pass
    except:
        pass

    # 6. Final fallback: if no date found anywhere, return None
    # (Removed file creation time fallback due to deployment issues)
    return None


def _collect_all_blog_posts_cached():
    """Internal cached function to collect all blog posts with TTL."""
    current_time = time.time()

    # Check if cache is valid
    if (_blog_posts_cache['data'] is not None and
        current_time - _blog_posts_cache['timestamp'] < CACHE_TTL and
        _blog_posts_cache['timestamp'] > _force_cache_clear):
        return _blog_posts_cache['data']

    # Cache miss or expired - rebuild
    posts = []

    # Define blog post directories
    blog_dirs = [
        DATA_DIR / 'essays',
        DATA_DIR / 'artificial-intelligence',  # This will pick up root AI posts and scan subdirs
        DATA_DIR / 'software',
        DATA_DIR / 'poetry',
        DATA_DIR / 'talks',
        DATA_DIR / 'themes'
    ]

    def scan_for_posts(path, category=""):
        if not path.exists() or not path.is_dir():
            return

        for item in sorted(path.iterdir(), reverse=True):  # Most recent first
            if item.name.startswith('.') or item.name.lower() == 'index.md':
                continue

            if item.is_file() and item.suffix == '.md':
                # Get post data
                try:
                    content_data = render_markdown_file(item)

                    # Extract publication date using intelligent extraction
                    pub_date = extract_intelligent_date(item, content_data)

                    # Skip posts without determinable dates (no filename date, no YAML date, no content date)
                    if pub_date is None:
                        continue

                    # Create clean URL
                    relative_path = str(item.relative_to(DATA_DIR))
                    clean_url = '/' + relative_path[:-3]  # Remove .md extension

                    # Extract description from raw markdown (before HTML conversion)
                    description = ""
                    try:
                        with open(item, 'r', encoding='utf-8') as f:
                            raw_markdown = f.read()

                        # Skip front matter if present
                        if raw_markdown.startswith('---'):
                            parts = raw_markdown.split('---', 2)
                            if len(parts) >= 3:
                                raw_markdown = parts[2].strip()

                        # Split into lines and clean up, then find first meaningful content
                        lines = [line.strip() for line in raw_markdown.split('\n') if line.strip()]

                        # Find first line that contains substantial text content
                        for line in lines:
                            # Skip headers
                            if re.match(r'^\s*#{1,6}\s', line):
                                continue
                            # Skip images
                            if re.match(r'^\s*!\[[^\]]*\]\([^)]*\)\s*$', line):
                                continue
                            # Skip image references
                            if re.match(r'^\s*\[Image #\d+\]\s*$', line):
                                continue
                            # Skip date/metadata lines
                            if re.match(r'^\s*\*[^*]*\*\s*$', line):
                                continue
                            # Skip horizontal rules
                            if re.match(r'^\s*[-*_]{3,}\s*$', line):
                                continue

                            # Clean up markdown formatting in the line
                            clean_line = re.sub(r'!\[[^\]]*\]\([^)]*\)', '', line)  # Remove images
                            clean_line = re.sub(r'\[([^\]]*)\]\([^)]*\)', r'\1', clean_line)  # Convert links to text
                            clean_line = re.sub(r'[*_]{1,3}([^*_]+)[*_]{1,3}', r'\1', clean_line)  # Remove bold/italic
                            clean_line = re.sub(r'`([^`]+)`', r'\1', clean_line)  # Remove code formatting
                            clean_line = re.sub(r'#{1,6}\s*', '', clean_line)  # Remove header markers
                            clean_line = clean_line.strip()

                            if clean_line and len(clean_line) > 20:  # Must have substantial content
                                description = clean_line[:150] + '...' if len(clean_line) > 150 else clean_line
                                break
                    except Exception:
                        # Fallback to HTML method if raw reading fails
                        content_text = re.sub(r'<[^>]+>', '', content_data['content'])
                        if content_text.strip():
                            description = content_text.strip()[:150] + '...'

                    posts.append({
                        'title': content_data['title'],
                        'url': clean_url,
                        'description': description,
                        'pub_date': pub_date,
                        'category': category or item.parent.name.replace('-', ' ').title(),
                        'content': content_data['content'][:1000] + '...' if len(content_data['content']) > 1000 else content_data['content']
                    })
                except Exception:
                    continue
            elif item.is_dir():
                # Recursively scan subdirectories
                scan_for_posts(item, category or item.name.replace('-', ' ').title())

    # Scan each blog directory
    for blog_dir in blog_dirs:
        if blog_dir.exists():
            category = blog_dir.name.replace('-', ' ').title()
            if 'artificial-intelligence' in str(blog_dir):
                category = 'AI & Consciousness'
            scan_for_posts(blog_dir, category)

    # Sort by publication date (most recent first)
    posts.sort(key=lambda x: x['pub_date'], reverse=True)

    # Update cache
    result = tuple(posts)
    _blog_posts_cache['data'] = result
    _blog_posts_cache['timestamp'] = time.time()

    return result


def collect_all_blog_posts():
    """Public function to collect all blog posts - converts cached tuple back to list."""
    return list(_collect_all_blog_posts_cached())


def preload_blog_posts():
    """Preload blog posts cache at startup for faster initial page loads."""
    print("Preloading blog posts cache...")
    start_time = time.time()
    posts = _collect_all_blog_posts_cached()
    load_time = time.time() - start_time
    print(f"Loaded {len(posts)} posts in {load_time:.2f}s")


def preload_sidenotes():
    """Preload sidenotes cache at startup for faster initial page loads."""
    print("Preloading sidenotes cache...")
    start_time = time.time()
    sidenotes_data = _extract_all_sidenotes_cached()
    load_time = time.time() - start_time
    print(f"Extracted {sidenotes_data['total_count']} sidenotes from {len(sidenotes_data['articles'])} articles in {load_time:.2f}s")


def preload_outlines():
    """Preload outlines cache at startup for faster initial page loads."""
    print("Preloading outlines cache...")
    start_time = time.time()
    outlines_data = _extract_all_outlines_cached()
    load_time = time.time() - start_time
    print(f"Extracted {outlines_data['total_count']} headings from {len(outlines_data['articles'])} articles in {load_time:.2f}s")


def preload_quotes():
    """Preload quotes cache at startup for faster initial page loads."""
    print("Preloading quotes cache...")
    start_time = time.time()
    quotes_data = _extract_all_quotes_cached()
    load_time = time.time() - start_time
    print(f"Extracted {quotes_data['total_count']} quotes from {len(quotes_data['articles'])} articles in {load_time:.2f}s")


def preload_connections():
    """Preload connections cache at startup for faster initial page loads."""
    print("Preloading connections cache...")
    start_time = time.time()
    connections_data = _extract_all_connections_cached()
    load_time = time.time() - start_time
    print(f"Extracted {connections_data['total_count']} cross-references in {load_time:.2f}s")


def _extract_all_external_links_cached():
    """Extract all external links from articles with 10-hour TTL cache."""
    current_time = time.time()

    # Check if cache is still valid (10 hour TTL)
    if (_external_links_cache['data'] is not None and
        current_time - _external_links_cache['timestamp'] < CACHE_TTL and
        _external_links_cache['timestamp'] > _force_cache_clear):
        return _external_links_cache['data']

    posts = _collect_all_blog_posts_cached()
    articles_with_links = []
    total_count = 0
    domain_counts = defaultdict(int)

    # Pattern to match external links (http/https URLs that don't start with current domain)
    external_link_pattern = r'<a[^>]*href="(https?://[^"]*)"[^>]*>(.*?)</a>'

    for post in posts:
        external_links = []

        # Find all external links in content
        matches = re.findall(external_link_pattern, post['content'], re.IGNORECASE | re.DOTALL)

        for url, link_text in matches:
            # Skip internal links (adjust domain as needed)
            if 'kennethreitz.org' not in url:
                # Clean link text
                clean_text = re.sub(r'<[^>]+>', '', link_text).strip()
                if not clean_text:
                    clean_text = url

                # Extract domain for stats
                domain = re.match(r'https?://(?:www\.)?([^/]+)', url)
                if domain:
                    domain_counts[domain.group(1)] += 1

                external_links.append({
                    'url': url,
                    'link_text': clean_text[:100],  # Truncate very long link text
                    'domain': domain.group(1) if domain else 'unknown'
                })

        if external_links:
            articles_with_links.append({
                'title': post['title'],
                'url': post['url'],
                'date': post.get('date'),
                'category': post.get('category', 'Unknown'),
                'external_links': external_links
            })
            total_count += len(external_links)

    # Sort articles by publication date (most recent first)
    articles_with_links.sort(key=lambda x: x['date'] or datetime.min, reverse=True)

    # Sort domains by frequency
    top_domains = sorted(domain_counts.items(), key=lambda x: x[1], reverse=True)

    result = {
        'articles': articles_with_links,
        'total_count': total_count,
        'domain_stats': top_domains
    }

    # Cache the result
    _external_links_cache['data'] = result
    _external_links_cache['timestamp'] = current_time

    return result


def preload_external_links():
    """Preload external links cache at startup for faster initial page loads."""
    print("Preloading external links cache...")
    start_time = time.time()
    links_data = _extract_all_external_links_cached()
    load_time = time.time() - start_time
    print(f"Extracted {links_data['total_count']} external links from {len(links_data['articles'])} articles in {load_time:.2f}s")


def _extract_all_terms_cached():
    """Return pre-loaded terms cache data (pure RAM, no TTL)."""
    # Return pre-loaded cache data if available
    if _terms_cache['data'] is not None:
        return _terms_cache['data']

    posts = _collect_all_blog_posts_cached()
    term_occurrences = defaultdict(list)  # term -> [(article_title, article_url, count)]

    # Common stop words to filter out
    stop_words = {
        'the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with',
        'by', 'from', 'as', 'an', 'a', 'is', 'was', 'are', 'were', 'be', 'been',
        'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should',
        'may', 'might', 'can', 'this', 'that', 'these', 'those', 'i', 'you', 'he',
        'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them', 'my', 'your',
        'his', 'its', 'our', 'their', 'not', 'all', 'some', 'any', 'each', 'every',
        'one', 'two', 'if', 'then', 'so', 'when', 'where', 'how', 'why', 'what',
        'about', 'into', 'through', 'during', 'before', 'after', 'above', 'below',
        'up', 'down', 'out', 'off', 'over', 'under', 'again', 'further', 'then',
        'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both',
        'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'only',
        'own', 'same', 'than', 'too', 'very', 'just', 'now', 'also', 'often', 'really',
        'much', 'many', 'way', 'well', 'even', 'still', 'get', 'go', 'come', 'make',
        'take', 'know', 'see', 'think', 'say', 'work', 'feel', 'look', 'seem', 'want',
        'use', 'find', 'give', 'tell', 'ask', 'try', 'help', 'need', 'become', 'turn',
        'start', 'show', 'hear', 'play', 'run', 'move', 'live', 'believe', 'hold',
        'bring', 'happen', 'write', 'provide', 'sit', 'stand', 'lose', 'pay', 'meet'
    }

    # Technical terms that should always be included
    important_terms = {
        'API', 'HTTP', 'Python', 'JavaScript', 'AI', 'ML', 'consciousness', 'algorithm',
        'Requests', 'Flask', 'Django', 'GitHub', 'software', 'programming', 'technology',
        'artificial intelligence', 'machine learning', 'open source', 'philosophy'
    }

    for post in posts:
        # Clean content - remove HTML tags and get plain text
        import re
        clean_content = re.sub(r'<[^>]+>', ' ', post['content'])
        clean_content = re.sub(r'\s+', ' ', clean_content)

        # Extract potential terms using multiple strategies
        terms_in_post = defaultdict(int)

        # Strategy 1: Capitalized words/phrases (likely proper nouns, concepts)
        capitalized_terms = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', clean_content)
        for term in capitalized_terms:
            if len(term) > 2 and term.lower() not in stop_words:
                terms_in_post[term] += 1

        # Strategy 2: Technical terms in quotes or emphasized
        quoted_terms = re.findall(r'["\']([^"\']{3,30})["\']', clean_content)
        for term in quoted_terms:
            if not term.lower() in stop_words and len(term.split()) <= 3:
                terms_in_post[term] += 1

        # Strategy 3: Acronyms and technical terms
        acronyms = re.findall(r'\b[A-Z]{2,8}\b', clean_content)
        for term in acronyms:
            if term not in ['THE', 'AND', 'FOR', 'BUT', 'NOT']:
                terms_in_post[term] += 2  # Weight acronyms higher

        # Strategy 4: Important technical words
        words = re.findall(r'\b\w{4,}\b', clean_content.lower())
        for word in words:
            if word in important_terms or word.lower() in important_terms:
                terms_in_post[word] += 1

        # Strategy 5: Multi-word technical phrases
        tech_phrases = [
            'artificial intelligence', 'machine learning', 'open source', 'user experience',
            'mental health', 'spiritual practice', 'human consciousness', 'digital mind',
            'for humans', 'API design', 'software development', 'programming language'
        ]
        for phrase in tech_phrases:
            if phrase.lower() in clean_content.lower():
                terms_in_post[phrase] += 2

        # Add significant terms to the global index
        for term, count in terms_in_post.items():
            if count >= 1:  # Must appear at least once
                term_occurrences[term].append({
                    'title': post['title'],
                    'url': post['url'],
                    'count': count
                })

    # Filter and organize terms
    final_terms = {}
    for term, occurrences in term_occurrences.items():
        # Only include terms that appear in multiple articles OR appear frequently in one
        total_occurrences = sum(occ['count'] for occ in occurrences)
        if len(occurrences) >= 2 or total_occurrences >= 3:
            # Sort articles by term frequency within each article
            occurrences.sort(key=lambda x: x['count'], reverse=True)
            final_terms[term] = {
                'articles': occurrences,
                'total_count': total_occurrences,
                'article_count': len(occurrences)
            }

    # Sort terms alphabetically
    sorted_terms = dict(sorted(final_terms.items(), key=lambda x: x[0].lower()))

    result = {
        'terms': sorted_terms,
        'total_terms': len(sorted_terms),
        'total_occurrences': sum(term_data['total_count'] for term_data in sorted_terms.values())
    }

    # Cache the result
    _terms_cache['data'] = result

    return result


def preload_terms():
    """Preload terms cache at startup for faster initial page loads."""
    print("Preloading terms cache...")
    start_time = time.time()
    terms_data = _extract_all_terms_cached()
    load_time = time.time() - start_time
    print(f"Extracted {terms_data['total_terms']} terms with {terms_data['total_occurrences']} total occurrences in {load_time:.2f}s")


def find_related_posts(current_post_path, limit=3):
    """Find related posts based on category and content similarity."""
    posts = collect_all_blog_posts()
    current_post_url = '/' + current_post_path[:-3] if current_post_path.endswith('.md') else '/' + current_post_path

    # Find current post
    current_post = None
    for post in posts:
        if post['url'] == current_post_url:
            current_post = post
            break

    if not current_post:
        return []

    # Score related posts
    related_posts = []
    for post in posts:
        if post['url'] == current_post_url:
            continue  # Skip current post

        score = 0

        # Category match gets high score
        if post['category'] == current_post['category']:
            score += 10

        # Check for common words in titles (simple text similarity)
        current_title_words = set(current_post['title'].lower().split())
        post_title_words = set(post['title'].lower().split())
        common_title_words = current_title_words.intersection(post_title_words)
        score += len(common_title_words) * 2

        # Check for common words in descriptions
        current_desc_words = set(current_post['description'].lower().split()) if current_post['description'] else set()
        post_desc_words = set(post['description'].lower().split()) if post['description'] else set()
        common_desc_words = current_desc_words.intersection(post_desc_words)
        score += len(common_desc_words) * 0.5

        # Prefer more recent posts (slight boost)
        days_diff = abs((current_post['pub_date'] - post['pub_date']).days)
        if days_diff < 365:  # Posts within a year get a small boost
            score += max(0, (365 - days_diff) / 365)

        if score > 0:
            related_posts.append((post, score))

    # Sort by score and return top N
    related_posts.sort(key=lambda x: x[1], reverse=True)
    return [post for post, score in related_posts[:limit]]


def find_adjacent_posts(current_post_path):
    """Find next and previous posts chronologically."""
    posts = collect_all_blog_posts()
    current_post_url = '/' + current_post_path[:-3] if current_post_path.endswith('.md') else '/' + current_post_path

    # Find current post index
    current_index = None
    for i, post in enumerate(posts):
        if post['url'] == current_post_url:
            current_index = i
            break

    if current_index is None:
        return None, None

    # Get previous (newer) and next (older) posts
    prev_post = posts[current_index - 1] if current_index > 0 else None
    next_post = posts[current_index + 1] if current_index < len(posts) - 1 else None

    return prev_post, next_post


def generate_sitemap_data():
    """Generate sitemap data by recursively scanning the data directory."""
    sitemap_items = []

    def scan_directory(path, url_path=""):
        if not path.exists() or not path.is_dir():
            return

        for item in sorted(path.iterdir()):
            if item.name.startswith('.'):
                continue

            item_url_path = f"{url_path}/{item.name}" if url_path else item.name

            if item.is_dir():
                # Add directory to sitemap
                sitemap_items.append({
                    'url': f"/{item_url_path}",
                    'title': item.name.replace('-', ' ').replace('_', ' ').title(),
                    'type': 'directory',
                    'modified': datetime.fromtimestamp(item.stat().st_mtime)
                })
                # Recursively scan subdirectories
                scan_directory(item, item_url_path)
            elif item.suffix == '.md':
                # Remove .md extension for clean URLs
                clean_url_path = item_url_path[:-3] if item_url_path.endswith('.md') else item_url_path

                # Get title from file content
                title = item.stem.replace('-', ' ').replace('_', ' ').title()
                try:
                    content_data = render_markdown_file(item)
                    title = content_data['title']
                except:
                    pass

                sitemap_items.append({
                    'url': f"/{clean_url_path}",
                    'title': title,
                    'type': 'article',
                    'modified': datetime.fromtimestamp(item.stat().st_mtime)
                })

    # Start scanning from data directory
    scan_directory(DATA_DIR)

    # Add static pages
    static_pages = [
        {'url': '/', 'title': 'Kenneth Reitz - Digital Mind Map', 'type': 'homepage'},
        {'url': '/directory', 'title': 'File Explorer', 'type': 'directory'},
        {'url': '/sitemap', 'title': 'Site Map', 'type': 'sitemap'}
    ]

    return static_pages + sitemap_items

@app.route('/sitemap')
def sitemap():
    """Show the site sitemap."""
    sitemap_data = generate_sitemap_data()

    # Group by type
    grouped_sitemap = {
        'homepage': [],
        'directory': [],
        'article': [],
        'sitemap': []
    }

    for item in sitemap_data:
        item_type = item.get('type', 'article')
        if item_type in grouped_sitemap:
            grouped_sitemap[item_type].append(item)

    return render_template('sitemap.html',
                         title='Site Map',
                         sitemap_data=grouped_sitemap,
                         total_items=len(sitemap_data),
                         breadcrumbs=[],
                         current_year=datetime.now().year,
                         current_page='Site Map')

@app.route('/sitemap.xml')
def sitemap_xml():
    """Generate XML sitemap for search engines."""
    sitemap_data = generate_sitemap_data()

    xml_content = '<?xml version="1.0" encoding="UTF-8"?>\n'
    xml_content += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'

    for item in sitemap_data:
        xml_content += '  <url>\n'
        xml_content += f'    <loc>https://kennethreitz.org{escape(item["url"])}</loc>\n'
        if 'modified' in item:
            xml_content += f'    <lastmod>{item["modified"].strftime("%Y-%m-%d")}</lastmod>\n'
        xml_content += '  </url>\n'

    xml_content += '</urlset>'

    return Response(xml_content, mimetype='application/xml')


@app.route('/feed.xml')
@app.route('/rss.xml')
def rss_feed():
    """Generate RSS feed with full article content."""
    posts = collect_all_blog_posts()  # Use all posts like the archive page

    # Generate RSS XML with full content
    rss_content = '<?xml version="1.0" encoding="UTF-8"?>\n'
    rss_content += '<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">\n'
    rss_content += '  <channel>\n'
    rss_content += '    <title>Kenneth Reitz - Essays &amp; AI Writings</title>\n'
    rss_content += '    <description>Complete archive with full articles - Essays, AI consciousness research, and philosophical explorations</description>\n'
    rss_content += '    <link>https://kennethreitz.org</link>\n'
    rss_content += '    <atom:link href="https://kennethreitz.org/feed.xml" rel="self" type="application/rss+xml" />\n'
    rss_content += f'    <lastBuildDate>{datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")}</lastBuildDate>\n'
    rss_content += '    <language>en-us</language>\n'
    rss_content += '    <managingEditor>me@kennethreitz.org (Kenneth Reitz)</managingEditor>\n'
    rss_content += '    <webMaster>me@kennethreitz.org (Kenneth Reitz)</webMaster>\n'

    for post in posts:
        # Get full content for this post by re-reading the file
        try:
            # Build the file path - post['url'] is like '/essays/2025-09-01-something'
            relative_path = post['url'][1:]  # Remove leading /
            file_path = DATA_DIR / (relative_path + '.md')

            if file_path.exists():
                full_content_data = render_markdown_file(file_path)
                full_content = full_content_data['content']
            else:
                # Fallback to stored content (truncated)
                full_content = post.get('content', post['description'])
        except Exception as e:
            # Debug: use description with error info
            full_content = f"{post['description']} <!-- Error loading full content: {str(e)} -->"

        rss_content += '    <item>\n'
        rss_content += f'      <title>{escape(post["title"])}</title>\n'
        rss_content += f'      <link>https://kennethreitz.org{post["url"]}</link>\n'
        rss_content += f'      <description>{escape(post["description"])}</description>\n'
        rss_content += f'      <content:encoded><![CDATA[{full_content}]]></content:encoded>\n'
        rss_content += f'      <category>{escape(post["category"])}</category>\n'
        rss_content += f'      <pubDate>{post["pub_date"].strftime("%a, %d %b %Y %H:%M:%S GMT")}</pubDate>\n'
        rss_content += f'      <guid>https://kennethreitz.org{post["url"]}</guid>\n'
        rss_content += '    </item>\n'

    rss_content += '  </channel>\n'
    rss_content += '</rss>'

    return Response(rss_content, mimetype='application/rss+xml')


# Preload caches concurrently for faster startup (works with both direct run and Gunicorn)
import concurrent.futures
import threading

def preload_all_caches():
    """Run all cache preloading functions sequentially to reduce memory usage."""
    print("Starting background cache preloading...")

    preload_functions = [
        ("blog posts", preload_blog_posts),
        ("sidenotes", preload_sidenotes),
        ("outlines", preload_outlines),
        ("quotes", preload_quotes),
        ("connections", preload_connections),
        ("terms", preload_terms)
    ]

    for name, func in preload_functions:
        try:
            func()
        except Exception as e:
            print(f"Error preloading {name}: {e}")
            import traceback
            traceback.print_exc()

    print("Background cache preloading completed!")

def start_background_preload():
    """Start cache preloading in a background daemon thread."""
    cache_thread = threading.Thread(target=preload_all_caches, daemon=True)
    cache_thread.start()
    print("Cache preloading started in background. App ready to serve requests!")

# Only start background preloading once, not in every Gunicorn worker
# Use a lock file to ensure only one process does the preloading
import os
import fcntl
import atexit

cache_lock_file = None

def should_preload_caches():
    """Check if this process should handle cache preloading."""
    global cache_lock_file

    # Skip preloading since we already initialized unified cache
    print("Skipping runtime preload - unified cache already loaded!")
    return False

    # Default to preloading (better for reliability and single-container deployments)
    # Only skip if we explicitly can't get the lock
    try:
        # Create a lock file in app directory (more reliable than /tmp in Docker)
        lock_path = '.cache_preload.lock'
        cache_lock_file = open(lock_path, 'w')
        # Try to acquire exclusive lock (non-blocking)
        fcntl.lockf(cache_lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
        # If we got here, we got the lock - we should preload

        # Clean up lock on exit
        def cleanup_lock():
            if cache_lock_file:
                cache_lock_file.close()
                try:
                    os.unlink(lock_path)
                except:
                    pass
        atexit.register(cleanup_lock)
        return True
    except (IOError, OSError):
        # Lock is already held by another process - skip preloading
        if cache_lock_file:
            cache_lock_file.close()
        return False

# Initialize unified cache at startup (after all functions are defined)
initialize_unified_cache()

# Start background preloading only in one process (and only if needed)
if should_preload_caches():
    start_background_preload()

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=8000)