mirror of
https://github.com/kennethreitz/kjvstudy.org.git
synced 2026-06-05 23:00:16 +00:00
Add smart bot detection logging
Only logs requests from known bots/crawlers (Googlebot, PerplexityBot, AmazonBot, etc.) while staying silent for regular user traffic. Helps track SEO crawler activity without cluttering logs. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -171,12 +171,47 @@ class CacheControlMiddleware(BaseHTTPMiddleware):
|
||||
return response
|
||||
|
||||
|
||||
# Bot detection and logging middleware
|
||||
class BotLoggerMiddleware(BaseHTTPMiddleware):
|
||||
"""Log requests from bots/crawlers only"""
|
||||
|
||||
# Common bot identifiers to detect
|
||||
BOT_IDENTIFIERS = [
|
||||
'googlebot', 'bingbot', 'slurp', 'duckduckbot', 'baiduspider',
|
||||
'yandexbot', 'facebookexternalhit', 'twitterbot', 'rogerbot',
|
||||
'linkedinbot', 'embedly', 'quora link preview', 'showyoubot',
|
||||
'outbrain', 'pinterest', 'slackbot', 'vkshare', 'w3c_validator',
|
||||
'redditbot', 'applebot', 'whatsapp', 'flipboard', 'tumblr',
|
||||
'bitlybot', 'skypeuripreview', 'nuzzel', 'discordbot',
|
||||
'telegrambot', 'perplexitybot', 'amazonbot', 'claudebot',
|
||||
'anthropic-ai', 'gptbot', 'chatgpt-user', 'ccbot', 'claudebot',
|
||||
'diffbot', 'bytespider', 'petalbot'
|
||||
]
|
||||
|
||||
async def dispatch(self, request: Request, call_next):
|
||||
user_agent = request.headers.get("user-agent", "").lower()
|
||||
|
||||
# Check if this is a bot
|
||||
is_bot = any(bot in user_agent for bot in self.BOT_IDENTIFIERS)
|
||||
|
||||
if is_bot:
|
||||
# Extract the bot name for cleaner logging
|
||||
bot_name = next((bot for bot in self.BOT_IDENTIFIERS if bot in user_agent), "unknown bot")
|
||||
print(f"[BOT] {bot_name} - {request.method} {request.url.path}")
|
||||
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
|
||||
# Add GZip compression middleware (compress responses > 500 bytes)
|
||||
app.add_middleware(GZipMiddleware, minimum_size=500)
|
||||
|
||||
# Add caching middleware
|
||||
app.add_middleware(CacheControlMiddleware)
|
||||
|
||||
# Add bot logging middleware
|
||||
app.add_middleware(BotLoggerMiddleware)
|
||||
|
||||
|
||||
# Set up Jinja2 templates and static files
|
||||
current_dir = PathLib(__file__).parent
|
||||
|
||||
Reference in New Issue
Block a user