From ba3d23c331baafebbe9abf5eb754d7f04285a730 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 19 May 2025 18:32:23 -0400 Subject: [PATCH] test --- tuftedoc.py | 220 +++++++++++++++++++++++++++++----------------------- 1 file changed, 125 insertions(+), 95 deletions(-) diff --git a/tuftedoc.py b/tuftedoc.py index f5705a2..30a0b00 100644 --- a/tuftedoc.py +++ b/tuftedoc.py @@ -217,7 +217,7 @@ def title_case(s: str) -> str: def extract_frontmatter(markdown_content: str) -> Tuple[Optional[Dict[str, Any]], str]: """Extract frontmatter from markdown content.""" frontmatter_match = re.match(r"^---\n(.*?)\n---\n(.*)", markdown_content, re.DOTALL) - + if frontmatter_match: try: # Parse the YAML frontmatter @@ -239,7 +239,7 @@ def parse_metadata(frontmatter: Optional[Dict[str, Any]]) -> Optional[Metadata]: """Parse frontmatter into a Metadata object.""" if not frontmatter: return None - + # Extract known fields metadata_dict = { "title": frontmatter.get("title"), @@ -251,24 +251,24 @@ def parse_metadata(frontmatter: Optional[Dict[str, Any]]) -> Optional[Metadata]: "draft": frontmatter.get("draft", False), "layout": frontmatter.get("layout"), } - + # Copy any extra fields to the extra dict - extra_fields = {k: v for k, v in frontmatter.items() + extra_fields = {k: v for k, v in frontmatter.items() if k not in metadata_dict} - + if extra_fields: metadata_dict["extra"] = extra_fields - + return Metadata(**metadata_dict) def get_h1_from_markdown(file_path: pathlib.Path) -> Optional[str]: with file_path.open("r", encoding="utf-8") as f: content = f.read() - + # Extract frontmatter and content _, content_without_frontmatter = extract_frontmatter(content) - + # Look for the first heading match = re.search(r"^#\s+(.+)$", content_without_frontmatter, re.MULTILINE) return match.group(1) if match else None @@ -279,7 +279,7 @@ def get_markdown_metadata(file_path: pathlib.Path) -> Optional[Metadata]: try: with file_path.open("r", encoding="utf-8") as f: content = f.read() - + frontmatter, _ = extract_frontmatter(content) return parse_metadata(frontmatter) except Exception as e: @@ -291,7 +291,7 @@ def generate_summary(markdown_content: str, max_length: int = 150) -> str: """Generate a summary from markdown content.""" # Remove frontmatter if present _, content = extract_frontmatter(markdown_content) - + # Remove markdown formatting # Strip headers content = re.sub(r"^#{1,6}\s+.*$", "", content, flags=re.MULTILINE) @@ -305,14 +305,14 @@ def generate_summary(markdown_content: str, max_length: int = 150) -> str: content = re.sub(r"`[^`]+`", "", content) # Strip bold/italic content = re.sub(r"\*\*|\*|__|\b_\b", "", content) - + # Clean up whitespace content = re.sub(r"\s+", " ", content).strip() - + # Truncate to max_length if len(content) > max_length: content = content[:max_length-3] + "..." - + return content @@ -326,8 +326,19 @@ def get_directory_title(dir_path: pathlib.Path) -> str: def get_clean_url(path: str) -> str: - return "/" + path.rsplit(".", 1)[0] if path.endswith(".md") else "/" + path - + """ + Get a clean URL for a path. + - For markdown files: remove the .md extension + - For directories: ensure they end with a trailing slash + """ + if path.endswith(".md"): + return "/" + path.rsplit(".", 1)[0] + else: + # Check if it's a directory path by checking the filesystem + full_path = MARKDOWN_DIR / path + if full_path.is_dir() and not path.endswith('/'): + return "/" + path + "/" + return "/" + path def get_file_creation_date(file_path: pathlib.Path) -> datetime: return datetime.fromtimestamp(file_path.stat().st_ctime) @@ -355,28 +366,27 @@ def get_exif_data(image_path: pathlib.Path) -> dict: logger.error(f"Error reading EXIF data: {e}") return {} - def process_directory(full_path: pathlib.Path) -> Tuple[List[FileInfo], Optional[str], Optional[Metadata]]: logger.info(f"Processing directory: {full_path}") all_items = [] - + # For sorting, we'll track directories and files separately directories = [] files = [] - + for item in full_path.iterdir(): if item.name in [".DS_Store", ".git", "node_modules"] or item.name.startswith("."): continue - + if item.name == "index.md": continue - + is_image_file = is_image(item) exif_data = get_exif_data(item) if is_image_file else None - + # Get metadata for markdown files metadata = get_markdown_metadata(item) if item.suffix == ".md" else None - + # Get summary for markdown files summary = None if item.suffix == ".md": @@ -386,7 +396,7 @@ def process_directory(full_path: pathlib.Path) -> Tuple[List[FileInfo], Optional summary = generate_summary(content) except Exception as e: logger.error(f"Error generating summary for {item}: {e}") - + # Use metadata title if available, otherwise use H1 or directory title title = None if metadata and metadata.title: @@ -397,10 +407,15 @@ def process_directory(full_path: pathlib.Path) -> Tuple[List[FileInfo], Optional if item.is_dir() else (get_h1_from_markdown(item) if item.suffix == ".md" else None) ) - + + # Ensure directory URLs end with a trailing slash + item_url = get_clean_url(str(item.relative_to(MARKDOWN_DIR))) + if item.is_dir() and not item_url.endswith('/'): + item_url += '/' + item_data = FileInfo( name=title_case(item.name), - url=get_clean_url(str(item.relative_to(MARKDOWN_DIR))), + url=item_url, # Use the corrected URL with trailing slash for directories slug=item.name, ctime=os.path.getctime(item), mtime=os.path.getmtime(item), @@ -411,7 +426,7 @@ def process_directory(full_path: pathlib.Path) -> Tuple[List[FileInfo], Optional metadata=metadata, summary=summary, ) - + if item.is_dir(): directories.append(item_data) else: @@ -422,11 +437,11 @@ def process_directory(full_path: pathlib.Path) -> Tuple[List[FileInfo], Optional # Sort directories and files separately directories.sort(key=lambda x: x.name.lower()) - + # Sort files by modification time (newest first) by default, # but can be sorted by other fields based on query parameters files.sort(key=lambda x: x.mtime, reverse=True) - + # Combine directories and files all_items = directories + files @@ -434,25 +449,24 @@ def process_directory(full_path: pathlib.Path) -> Tuple[List[FileInfo], Optional index_file = full_path / "index.md" index_content = None index_metadata = None - + if index_file.exists(): index_content, index_metadata = process_markdown_file_with_metadata(index_file) logger.info(f"Processed {len(all_items)} items in directory: {full_path}") return all_items, index_content, index_metadata - def process_markdown_file(file_path: pathlib.Path) -> str: """Process markdown file and return the rendered HTML content.""" logger.info(f"Processing markdown file: {file_path}") - + try: with file_path.open("r", encoding="utf-8") as f: markdown_content = f.read() # Extract frontmatter and content _, content_without_frontmatter = extract_frontmatter(markdown_content) - + # Convert markdown to HTML html_content = markdown(content_without_frontmatter) return html_content @@ -464,20 +478,20 @@ def process_markdown_file(file_path: pathlib.Path) -> str: def process_markdown_file_with_metadata(file_path: pathlib.Path) -> Tuple[str, Optional[Metadata]]: """Process markdown file and return both the rendered content and metadata.""" logger.info(f"Processing markdown file with metadata: {file_path}") - + try: with file_path.open("r", encoding="utf-8") as f: markdown_content = f.read() # Extract frontmatter and content frontmatter, content_without_frontmatter = extract_frontmatter(markdown_content) - + # Parse metadata metadata = parse_metadata(frontmatter) - + # Convert markdown to HTML html_content = markdown(content_without_frontmatter) - + return html_content, metadata except Exception as e: logger.error(f"Error processing markdown file with metadata {file_path}: {e}") @@ -513,43 +527,43 @@ def generate_title_from_breadcrumbs(breadcrumbs: List[Breadcrumb]) -> str: def generate_directory_tree(directory_path: pathlib.Path, max_depth: int = 3, current_depth: int = 0) -> List[Dict[str, Any]]: """ Generate a recursive directory tree structure. - + Args: directory_path: The path to the directory max_depth: Maximum depth to traverse (to avoid infinite recursion) current_depth: Current recursion depth - + Returns: A list of dictionaries representing the directory tree structure """ if current_depth > max_depth: return [] - + result = [] try: for item in sorted(directory_path.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower())): # Skip hidden files and directories if item.name.startswith(".") or item.name in ["__pycache__", "node_modules"]: continue - + # Get item metadata is_dir = item.is_dir() metadata = get_markdown_metadata(item) if item.suffix == ".md" else None - + # Skip draft content unless in debug mode if metadata and metadata.draft and not os.environ.get("DEBUG"): continue - + # Get title (prioritize metadata, then H1, then formatted name) title = None if metadata and metadata.title: title = metadata.title elif item.suffix == ".md": title = get_h1_from_markdown(item) - + if not title: title = title_case(item.name) - + # Create node node = { "name": item.name, @@ -559,17 +573,17 @@ def generate_directory_tree(directory_path: pathlib.Path, max_depth: int = 3, cu "is_dir": is_dir, "has_index": (item / "index.md").exists() if is_dir else False } - + # Add children recursively if it's a directory if is_dir: children = generate_directory_tree(item, max_depth, current_depth + 1) if children: node["children"] = children - + result.append(node) except Exception as e: logger.error(f"Error generating directory tree for {directory_path}: {e}") - + return result @@ -642,29 +656,29 @@ async def sitemap(request: Request): # API endpoints @app.get("/api/content/{path:path}", response_model=Dict[str, Any], tags=["API"]) async def get_content_api( - request: Request, - path: str = "", + request: Request, + path: str = "", format: str = Query("json", description="Response format (json or html)"), tree: bool = Query(False, description="Return directory tree structure") ): """ Get the content and metadata for a specific path. - + - For directories: returns list of items and index content if available - For files: returns the file content and metadata - With tree=true: returns a recursive directory tree structure """ logger.info(f"API request for path: {path}, format={format}, tree={tree}") - + try: full_path = MARKDOWN_DIR / path - + # Handle file extensions if not full_path.exists(): full_path_with_md = full_path.with_suffix(".md") if full_path_with_md.exists(): full_path = full_path_with_md - + # Process directory if full_path.is_dir(): all_items, content, metadata = process_directory(full_path) @@ -675,11 +689,11 @@ async def get_content_api( "metadata": metadata.dict() if metadata else None, "path": path, } - + # Generate tree structure if requested if tree: result["tree"] = generate_directory_tree(full_path) - + # Process file elif full_path.suffix == ".md": content, metadata = process_markdown_file_with_metadata(full_path) @@ -696,14 +710,14 @@ async def get_content_api( status_code=400, content={"error": f"Unsupported file type: {full_path.suffix}"} ) - + if format.lower() == "html": # Return HTML response return HTMLResponse(content=content if content else "") else: # Return JSON response return result - + except Exception as e: logger.error(f"API error: {str(e)}") return JSONResponse( @@ -720,42 +734,42 @@ async def search_content( ): """Search content in the repository.""" logger.info(f"Search request: q={q}, path={path}, tags={tags}") - + results = [] search_path = MARKDOWN_DIR / path if path else MARKDOWN_DIR - + # Simple search implementation - could be improved with a real search engine for item in search_path.glob("**/*.md"): try: relative_path = str(item.relative_to(MARKDOWN_DIR)) - + # Skip hidden files and directories if any(part.startswith(".") for part in item.parts): continue - + with item.open("r", encoding="utf-8") as f: content = f.read() - + # Get metadata frontmatter, content_text = extract_frontmatter(content) metadata = parse_metadata(frontmatter) - + # Skip drafts if metadata and metadata.draft and not os.environ.get("DEBUG"): continue - + # Filter by tags if specified if tags and metadata and metadata.tags: if not any(tag in metadata.tags for tag in tags): continue - + # Check for matches in content or title - if (q.lower() in content_text.lower() or + if (q.lower() in content_text.lower() or (metadata and metadata.title and q.lower() in metadata.title.lower())): - + title = metadata.title if metadata and metadata.title else get_h1_from_markdown(item) summary = generate_summary(content) - + results.append({ "title": title or item.stem, "path": get_clean_url(relative_path), @@ -765,10 +779,10 @@ async def search_content( }) except Exception as e: logger.error(f"Error processing file {item} during search: {e}") - + # Sort results by relevance (could be improved) results.sort(key=lambda x: 0 if x.get("title", "").lower().startswith(q.lower()) else 1) - + return { "query": q, "path": path, @@ -782,14 +796,14 @@ async def search_content( async def get_all_tags(): """Get all tags used in the content with counts.""" tags_count = {} - + # Collect all tags for item in MARKDOWN_DIR.glob("**/*.md"): try: # Skip hidden files if any(part.startswith(".") for part in item.parts): continue - + metadata = get_markdown_metadata(item) if metadata and metadata.tags: for tag in metadata.tags: @@ -799,11 +813,11 @@ async def get_all_tags(): tags_count[tag] = 1 except Exception as e: logger.error(f"Error processing tags for file {item}: {e}") - + # Sort tags by count sorted_tags = [{"name": tag, "count": count} for tag, count in tags_count.items()] sorted_tags.sort(key=lambda x: x["count"], reverse=True) - + return { "count": len(sorted_tags), "tags": sorted_tags @@ -817,26 +831,26 @@ async def get_directory_tree_api( ): """ Get a recursive directory tree structure starting from the specified path. - + - Returns a hierarchical tree of directories and files - Includes metadata like titles and URLs - Helps construct file browsers and navigation menus """ logger.info(f"Tree request for path: {path}, max_depth: {max_depth}") - + try: # Get the full path full_path = MARKDOWN_DIR / path - + if not full_path.exists() or not full_path.is_dir(): return JSONResponse( status_code=404, content={"error": f"Directory not found: {path}"} ) - + # Generate the tree tree = generate_directory_tree(full_path, max_depth=max_depth) - + return { "path": path, "tree": tree @@ -858,17 +872,33 @@ async def mindmap(request: Request): {"request": request, "title": "Mind Map - Kenneth Reitz", "content": None, "files": None} ) + @app.get("/", response_class=HTMLResponse, include_in_schema=False) @app.get("/{path:path}", response_class=HTMLResponse, include_in_schema=False) async def browse( - request: Request, - path: str = "", + request: Request, + path: str = "", sort: str = Query(None, description="Sort order for files"), tag: str = Query(None, description="Filter by tag") ): logger.info(f"Browsing path: {path}") full_path = MARKDOWN_DIR / path - + + # Check if it's a directory and the URL doesn't end with '/' + if full_path.is_dir() and path and not path.endswith('/'): + # Preserve query parameters in the redirect + query_params = [] + if sort: + query_params.append(f"sort={sort}") + if tag: + query_params.append(f"tag={tag}") + + query_string = f"?{'&'.join(query_params)}" if query_params else "" + redirect_url = f"/{path}/{query_string}" + + logger.info(f"Redirecting directory without trailing slash: {path} to {redirect_url}") + return RedirectResponse(url=redirect_url, status_code=301) + if not full_path.exists(): full_path_with_md = full_path.with_suffix(".md") if full_path_with_md.exists(): @@ -892,18 +922,18 @@ async def browse( metadata = None template_name = "index.html" - + if full_path.is_dir(): all_items, content, metadata = process_directory(full_path) date_created = None - + # Apply tag filter if specified if tag: all_items = [ item for item in all_items if not item.is_dir and item.metadata and item.metadata.tags and tag in item.metadata.tags ] - + # Apply sort if specified if sort and all_items: if sort == "name": @@ -918,11 +948,11 @@ async def browse( try: if is_image(full_path): return FileResponse(full_path, media_type="image/jpeg") - + content, metadata = process_markdown_file_with_metadata(full_path) all_items = None date_created = get_file_creation_date(full_path) - + # Use post template for markdown files template_name = "post.html" except UnicodeDecodeError: @@ -930,13 +960,13 @@ async def browse( return FileResponse(full_path, filename=full_path.name) breadcrumbs = generate_breadcrumbs(path) - + # Use metadata title if available, otherwise use breadcrumbs if metadata and metadata.title: page_title = metadata.title else: page_title = generate_title_from_breadcrumbs(breadcrumbs) - + has_images = any(item.is_image for item in all_items or []) # If in image gallery, consider randomizing @@ -944,20 +974,20 @@ async def browse( # Separate images and non-images image_items = [item for item in all_items if item.is_image] non_image_items = [item for item in all_items if not item.is_image] - + # Randomize images random.shuffle(image_items) - + # Recombine all_items = non_image_items + image_items # Special handling for index page is_root = path == "" - + # Determine if we should use photo_browser for image-heavy directories if has_images and len([item for item in all_items if item.is_image]) > 5: template_name = "photo_browser.html" - + # Check for layout override in metadata if metadata and metadata.layout: template_name = f"{metadata.layout}.html" @@ -971,10 +1001,10 @@ async def browse( template_name = "post.html" else: template_name = "directory.html" - + # Determine if we're in photos path is_photos = path.startswith("photos") or path.find("/photos") >= 0 - + return templates.TemplateResponse( template_name, {