mirror of
https://github.com/kennethreitz/dive-into-python3.git
synced 2026-06-05 23:10:17 +00:00
cycle through image servers
This commit is contained in:
+13
-1
@@ -1,12 +1,24 @@
|
||||
"""Quick-and-dirty HTML minimizer"""
|
||||
|
||||
import sys, re, html.entities
|
||||
import sys
|
||||
import re
|
||||
import html.entities
|
||||
import itertools
|
||||
|
||||
_SERVERS = ['a.wearehugh.com',
|
||||
'b.wearehugh.com',
|
||||
'c.wearehugh.com',
|
||||
'd.wearehugh.com']
|
||||
available_server = itertools.cycle(_SERVERS)
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2]
|
||||
in_pre = False
|
||||
out = open(output_file, 'w', encoding="utf-8") # encoding argument! important!
|
||||
for line in open(input_file, encoding="utf-8").readlines():
|
||||
# round-robin image servers
|
||||
if "<img src=i/" in line:
|
||||
line = line.replace("<img src=i/", "<img src=http://" + next(available_server) + "/dip3/")
|
||||
|
||||
# replace entities with Unicode characters
|
||||
for e in re.findall('&(.+?);', line):
|
||||
if e in ('lt', 'amp', 'quot', 'apos', 'nbsp'):
|
||||
|
||||
Reference in New Issue
Block a user