mirror of
https://github.com/kennethreitz/dive-into-python3.git
synced 2026-06-05 15:00:18 +00:00
7053662ed4
--HG-- rename : htmlminimizer.py => util/htmlminimizer.py rename : yuicompressor-2.4.2.jar => util/yuicompressor-2.4.2.jar
38 lines
1.2 KiB
Python
38 lines
1.2 KiB
Python
"""Quick-and-dirty HTML minimizer"""
|
|
|
|
import sys, re, html.entities
|
|
|
|
input_file = sys.argv[1]
|
|
output_file = sys.argv[2]
|
|
in_pre = False
|
|
out = open(output_file, 'w', encoding="utf-8") # encoding argument! important!
|
|
for line in open(input_file, encoding="utf-8").readlines():
|
|
# replace entities with Unicode characters
|
|
for e in re.findall('&(.+?);', line):
|
|
if e in ('lt', 'amp', 'quot', 'apos', 'nbsp'):
|
|
continue
|
|
n = html.entities.name2codepoint.get(e)
|
|
if not n:
|
|
if e.count('#x'):
|
|
# it's late, forgive me
|
|
n = eval(e.replace('#', '0'))
|
|
elif e.count('#'):
|
|
n = int(e.replace('#', ''))
|
|
else:
|
|
continue
|
|
line = line.replace('&' + e + ';', chr(n))
|
|
|
|
# strip leading and trailing whitespace, except inside <pre> blocks
|
|
g = line.strip()
|
|
if g.count('<pre'):
|
|
in_pre = True
|
|
if g.count('</pre'):
|
|
# XXX this will break if you have </pre><pre> in one line
|
|
in_pre = False
|
|
g = line.rstrip()
|
|
if in_pre:
|
|
out.write(line)
|
|
else:
|
|
out.write(g)
|
|
out.close()
|