cycle through image servers

This commit is contained in:
Mark Pilgrim
2009-06-30 09:03:15 -04:00
parent c9705a6ed0
commit c9f2f048b5
4 changed files with 41 additions and 9 deletions
+13 -1
View File
@@ -1,12 +1,24 @@
"""Quick-and-dirty HTML minimizer"""
import sys, re, html.entities
import sys
import re
import html.entities
import itertools
_SERVERS = ['a.wearehugh.com',
'b.wearehugh.com',
'c.wearehugh.com',
'd.wearehugh.com']
available_server = itertools.cycle(_SERVERS)
input_file = sys.argv[1]
output_file = sys.argv[2]
in_pre = False
out = open(output_file, 'w', encoding="utf-8") # encoding argument! important!
for line in open(input_file, encoding="utf-8").readlines():
# round-robin image servers
if "<img src=i/" in line:
line = line.replace("<img src=i/", "<img src=http://" + next(available_server) + "/dip3/")
# replace entities with Unicode characters
for e in re.findall('&(.+?);', line):
if e in ('lt', 'amp', 'quot', 'apos', 'nbsp'):