mirror of
https://github.com/kennethreitz/dive-into-python3.git
synced 2026-06-05 23:10:17 +00:00
385 lines
15 KiB
Python
385 lines
15 KiB
Python
"""Quick-and-dirty HTML minimizer"""
|
|
|
|
import sys
|
|
|
|
input_file = sys.argv[1]
|
|
output_file = sys.argv[2]
|
|
in_pre = False
|
|
out = open(output_file, 'w')
|
|
for line in open(input_file).readlines():
|
|
g = line.strip()
|
|
if g.count('<pre'):
|
|
in_pre = True
|
|
if g.count('</pre'):
|
|
# this will break if you have a </pre> then <pre>
|
|
# on the same line, so don't do that
|
|
in_pre = False
|
|
g = line.rstrip()
|
|
if in_pre:
|
|
out.write(line)
|
|
else:
|
|
out.write(g)
|
|
out.close()
|
|
|
|
out = open(output_file)
|
|
html = out.read()
|
|
out.close()
|
|
html = html.replace("å", "å")
|
|
html = html.replace(">", ">")
|
|
html = html.replace(">", ">")
|
|
html = html.replace("⊃", "⊃")
|
|
html = html.replace("⊃", "⊃")
|
|
html = html.replace("Ñ", "Ñ")
|
|
html = html.replace("ϒ", "ϒ")
|
|
html = html.replace("ϒ", "ϒ")
|
|
html = html.replace("Ý", "Ý")
|
|
html = html.replace("Ã", "Ã")
|
|
html = html.replace("√", "√")
|
|
html = html.replace("⊗", "⊗")
|
|
html = html.replace("⊗", "⊗")
|
|
html = html.replace("æ", "æ")
|
|
html = html.replace("Ψ", "Ψ")
|
|
html = html.replace("Ψ", "Ψ")
|
|
html = html.replace("Ε", "Ε")
|
|
html = html.replace("Ε", "Ε")
|
|
html = html.replace("Î", "Î")
|
|
html = html.replace("É", "É")
|
|
html = html.replace("Λ", "Λ")
|
|
html = html.replace("Λ", "Λ")
|
|
html = html.replace("″", "″")
|
|
html = html.replace("Κ", "Κ")
|
|
html = html.replace("Κ", "Κ")
|
|
html = html.replace("ς", "ς")
|
|
html = html.replace("ς", "ς")
|
|
html = html.replace("‎", "‎")
|
|
html = html.replace("‎", "‎")
|
|
html = html.replace("¸", "¸")
|
|
html = html.replace(" ", " ")
|
|
html = html.replace(" ", " ")
|
|
html = html.replace("Æ", "Æ")
|
|
html = html.replace("′", "′")
|
|
html = html.replace("Τ", "Τ")
|
|
html = html.replace("Τ", "Τ")
|
|
html = html.replace("⌈", "⌈")
|
|
html = html.replace("⇓", "⇓")
|
|
html = html.replace("⇓", "⇓")
|
|
html = html.replace("≥", "≥")
|
|
html = html.replace("≥", "≥")
|
|
html = html.replace("⋅", "⋅")
|
|
html = html.replace("⋅", "⋅")
|
|
html = html.replace("⌊", "⌊")
|
|
html = html.replace("⌊", "⌊")
|
|
html = html.replace("⇐", "⇐")
|
|
html = html.replace("⇐", "⇐")
|
|
html = html.replace("¦", "¦")
|
|
html = html.replace("Õ", "Õ")
|
|
html = html.replace("Θ", "Θ")
|
|
html = html.replace("Θ", "Θ")
|
|
html = html.replace("Π", "Π")
|
|
html = html.replace("Π", "Π")
|
|
html = html.replace("Œ", "Œ")
|
|
html = html.replace("Œ", "Œ")
|
|
html = html.replace("Š", "Š")
|
|
html = html.replace("Š", "Š")
|
|
html = html.replace("è", "è")
|
|
html = html.replace("⊂", "⊂")
|
|
html = html.replace("⊂", "⊂")
|
|
html = html.replace("¡", "¡")
|
|
html = html.replace("∑", "∑")
|
|
html = html.replace("∑", "∑")
|
|
html = html.replace("ñ", "ñ")
|
|
html = html.replace("ã", "ã")
|
|
html = html.replace("θ", "θ")
|
|
html = html.replace("θ", "θ")
|
|
html = html.replace("⊄", "⊄")
|
|
html = html.replace("⊄", "⊄")
|
|
html = html.replace("⇔", "⇔")
|
|
html = html.replace("⇔", "⇔")
|
|
html = html.replace("Ø", "Ø")
|
|
html = html.replace("Þ", "Þ")
|
|
html = html.replace("Μ", "Μ")
|
|
html = html.replace("Μ", "Μ")
|
|
html = html.replace(" ", " ")
|
|
html = html.replace(" ", " ")
|
|
html = html.replace("ê", "ê")
|
|
html = html.replace("„", "„")
|
|
html = html.replace("Å", "Å")
|
|
html = html.replace("∇", "∇")
|
|
html = html.replace("‰", "‰")
|
|
html = html.replace("‰", "‰")
|
|
html = html.replace("Ù", "Ù")
|
|
html = html.replace("η", "η")
|
|
html = html.replace("η", "η")
|
|
html = html.replace("À", "À")
|
|
html = html.replace("∀", "∀")
|
|
html = html.replace("∀", "∀")
|
|
html = html.replace("ð", "ð")
|
|
html = html.replace("ð", "ð")
|
|
html = html.replace("⌉", "⌉")
|
|
html = html.replace("È", "È")
|
|
html = html.replace("÷", "÷")
|
|
html = html.replace("ì", "ì")
|
|
html = html.replace("õ", "õ")
|
|
html = html.replace("£", "£")
|
|
html = html.replace("⁄", "⁄")
|
|
html = html.replace("Ð", "Ð")
|
|
html = html.replace("Ð", "Ð")
|
|
html = html.replace("∗", "∗")
|
|
html = html.replace("∗", "∗")
|
|
html = html.replace("χ", "χ")
|
|
html = html.replace("χ", "χ")
|
|
html = html.replace("Á", "Á")
|
|
html = html.replace("Β", "Β")
|
|
html = html.replace("⊥", "⊥")
|
|
html = html.replace("⊥", "⊥")
|
|
html = html.replace("∴", "∴")
|
|
html = html.replace("∴", "∴")
|
|
html = html.replace("π", "π")
|
|
html = html.replace("π", "π")
|
|
html = html.replace("∅", "∅")
|
|
html = html.replace("∉", "∉")
|
|
html = html.replace("î", "î")
|
|
html = html.replace("•", "•")
|
|
html = html.replace("•", "•")
|
|
html = html.replace("υ", "υ")
|
|
html = html.replace("υ", "υ")
|
|
html = html.replace("Ó", "Ó")
|
|
html = html.replace("κ", "κ")
|
|
html = html.replace("κ", "κ")
|
|
html = html.replace("ç", "ç")
|
|
html = html.replace("∩", "∩")
|
|
html = html.replace("∩", "∩")
|
|
html = html.replace("μ", "μ")
|
|
html = html.replace("μ", "μ")
|
|
html = html.replace("°", "°")
|
|
html = html.replace("°", "°")
|
|
html = html.replace("τ", "τ")
|
|
html = html.replace("τ", "τ")
|
|
html = html.replace(" ", " ")
|
|
html = html.replace(" ", " ")
|
|
html = html.replace("…", "…")
|
|
html = html.replace("…", "…")
|
|
html = html.replace("û", "û")
|
|
html = html.replace("ù", "ù")
|
|
html = html.replace("≅", "≅")
|
|
html = html.replace("≅", "≅")
|
|
html = html.replace("Ι", "Ι")
|
|
html = html.replace(""", """)
|
|
html = html.replace(""", """)
|
|
html = html.replace("→", "→")
|
|
html = html.replace("→", "→")
|
|
html = html.replace("Ρ", "Ρ")
|
|
html = html.replace("Ρ", "Ρ")
|
|
html = html.replace("ú", "ú")
|
|
html = html.replace("â", "â")
|
|
html = html.replace("∼", "∼")
|
|
html = html.replace("∼", "∼")
|
|
html = html.replace("φ", "φ")
|
|
html = html.replace("φ", "φ")
|
|
html = html.replace("♦", "♦")
|
|
html = html.replace("Ç", "Ç")
|
|
html = html.replace("Η", "Η")
|
|
html = html.replace("Η", "Η")
|
|
html = html.replace("Γ", "Γ")
|
|
html = html.replace("Γ", "Γ")
|
|
html = html.replace("€", "€")
|
|
html = html.replace("€", "€")
|
|
html = html.replace("ϑ", "ϑ")
|
|
html = html.replace("ϑ", "ϑ")
|
|
html = html.replace("“", "“")
|
|
html = html.replace("♥", "♥")
|
|
html = html.replace("♥", "♥")
|
|
html = html.replace("ó", "ó")
|
|
html = html.replace("‌", "‌")
|
|
html = html.replace("‌", "‌")
|
|
html = html.replace("¥", "¥")
|
|
html = html.replace("¥", "¥")
|
|
html = html.replace("ò", "ò")
|
|
html = html.replace("Χ", "Χ")
|
|
html = html.replace("Χ", "Χ")
|
|
html = html.replace("™", "™")
|
|
html = html.replace("ξ", "ξ")
|
|
html = html.replace("ξ", "ξ")
|
|
html = html.replace("˜", "˜")
|
|
html = html.replace("˜", "˜")
|
|
html = html.replace("‹", "‹")
|
|
html = html.replace("‹", "‹")
|
|
html = html.replace("œ", "œ")
|
|
html = html.replace("œ", "œ")
|
|
html = html.replace("≡", "≡")
|
|
html = html.replace("≤", "≤")
|
|
html = html.replace("≤", "≤")
|
|
html = html.replace("∪", "∪")
|
|
html = html.replace("∪", "∪")
|
|
html = html.replace("Ÿ", "Ÿ")
|
|
html = html.replace("<", "<")
|
|
html = html.replace("<", "<")
|
|
html = html.replace("Υ", "Υ")
|
|
html = html.replace("Υ", "Υ")
|
|
html = html.replace("–", "–")
|
|
html = html.replace("ý", "ý")
|
|
html = html.replace("ℜ", "ℜ")
|
|
html = html.replace("ℜ", "ℜ")
|
|
html = html.replace("ψ", "ψ")
|
|
html = html.replace("ψ", "ψ")
|
|
html = html.replace("›", "›")
|
|
html = html.replace("›", "›")
|
|
html = html.replace("↓", "↓")
|
|
html = html.replace("↓", "↓")
|
|
html = html.replace("Α", "Α")
|
|
html = html.replace("Α", "Α")
|
|
html = html.replace("¬", "¬")
|
|
html = html.replace("¬", "¬")
|
|
html = html.replace("&", "&")
|
|
html = html.replace("ø", "ø")
|
|
html = html.replace("´", "´")
|
|
html = html.replace("‍", "‍")
|
|
html = html.replace("‍", "‍")
|
|
html = html.replace("«", "«")
|
|
html = html.replace("”", "”")
|
|
html = html.replace("Ì", "Ì")
|
|
html = html.replace("µ", "µ")
|
|
html = html.replace("­", "­")
|
|
html = html.replace("­", "­")
|
|
html = html.replace("⊇", "⊇")
|
|
html = html.replace("⊇", "⊇")
|
|
html = html.replace("ß", "ß")
|
|
html = html.replace("♣", "♣")
|
|
html = html.replace("à", "à")
|
|
html = html.replace("Ô", "Ô")
|
|
html = html.replace("↔", "↔")
|
|
html = html.replace("↔", "↔")
|
|
html = html.replace("←", "←")
|
|
html = html.replace("←", "←")
|
|
html = html.replace("½", "½")
|
|
html = html.replace("∝", "∝")
|
|
html = html.replace("∝", "∝")
|
|
html = html.replace("ˆ", "ˆ")
|
|
html = html.replace("ô", "ô")
|
|
html = html.replace("≈", "≈")
|
|
html = html.replace("¨", "¨")
|
|
html = html.replace("¨", "¨")
|
|
html = html.replace("∏", "∏")
|
|
html = html.replace("∏", "∏")
|
|
html = html.replace("®", "®")
|
|
html = html.replace("®", "®")
|
|
html = html.replace("‏", "‏")
|
|
html = html.replace("‏", "‏")
|
|
html = html.replace("∞", "∞")
|
|
html = html.replace("Σ", "Σ")
|
|
html = html.replace("Σ", "Σ")
|
|
html = html.replace("—", "—")
|
|
html = html.replace("↑", "↑")
|
|
html = html.replace("↑", "↑")
|
|
html = html.replace("×", "×")
|
|
html = html.replace("⇒", "⇒")
|
|
html = html.replace("⇒", "⇒")
|
|
html = html.replace("∨", "∨")
|
|
html = html.replace("∨", "∨")
|
|
html = html.replace("γ", "γ")
|
|
html = html.replace("γ", "γ")
|
|
html = html.replace("λ", "λ")
|
|
html = html.replace("λ", "λ")
|
|
html = html.replace("〉", "⟩")
|
|
html = html.replace("〉", "⟩")
|
|
html = html.replace("†", "†")
|
|
html = html.replace("†", "†")
|
|
html = html.replace("ℑ", "ℑ")
|
|
html = html.replace("ℵ", "ℵ")
|
|
html = html.replace("ℵ", "ℵ")
|
|
html = html.replace("⊆", "⊆")
|
|
html = html.replace("⊆", "⊆")
|
|
html = html.replace("α", "α")
|
|
html = html.replace("α", "α")
|
|
html = html.replace("Ν", "Ν")
|
|
html = html.replace("Ν", "Ν")
|
|
html = html.replace("±", "±")
|
|
html = html.replace("¾", "¾")
|
|
html = html.replace("‾", "‾")
|
|
html = html.replace("Δ", "Δ")
|
|
html = html.replace("Δ", "Δ")
|
|
html = html.replace("◊", "◊")
|
|
html = html.replace("◊", "◊")
|
|
html = html.replace("ι", "ι")
|
|
html = html.replace("í", "í")
|
|
html = html.replace("ε", "ε")
|
|
html = html.replace("ε", "ε")
|
|
html = html.replace("℘", "℘")
|
|
html = html.replace("℘", "℘")
|
|
html = html.replace("∂", "∂")
|
|
html = html.replace("∂", "∂")
|
|
html = html.replace("δ", "δ")
|
|
html = html.replace("δ", "δ")
|
|
html = html.replace("ο", "ο")
|
|
html = html.replace("ο", "ο")
|
|
html = html.replace("Ξ", "Ξ")
|
|
html = html.replace("Ξ", "Ξ")
|
|
html = html.replace("‡", "‡")
|
|
html = html.replace("‡", "‡")
|
|
html = html.replace("Ò", "Ò")
|
|
html = html.replace("Û", "Û")
|
|
html = html.replace("š", "š")
|
|
html = html.replace("š", "š")
|
|
html = html.replace("‘", "‘")
|
|
html = html.replace("∈", "∈")
|
|
html = html.replace("∈", "∈")
|
|
html = html.replace("Ζ", "Ζ")
|
|
html = html.replace("−", "−")
|
|
html = html.replace("∧", "∧")
|
|
html = html.replace("∧", "∧")
|
|
html = html.replace("∠", "∠")
|
|
html = html.replace("∠", "∠")
|
|
html = html.replace("¤", "¤")
|
|
html = html.replace("∫", "∫")
|
|
html = html.replace("∫", "∫")
|
|
html = html.replace("⌋", "⌋")
|
|
html = html.replace("⌋", "⌋")
|
|
html = html.replace("↵", "↵")
|
|
html = html.replace("∃", "∃")
|
|
html = html.replace("⊕", "⊕")
|
|
html = html.replace("Â", "Â")
|
|
html = html.replace("ϖ", "ϖ")
|
|
html = html.replace("ϖ", "ϖ")
|
|
html = html.replace("∋", "∋")
|
|
html = html.replace("∋", "∋")
|
|
html = html.replace("Φ", "Φ")
|
|
html = html.replace("Φ", "Φ")
|
|
html = html.replace("Í", "Í")
|
|
html = html.replace("Ú", "Ú")
|
|
html = html.replace("Ο", "Ο")
|
|
html = html.replace("Ο", "Ο")
|
|
html = html.replace("≠", "≠")
|
|
html = html.replace("≠", "≠")
|
|
html = html.replace("¿", "¿")
|
|
html = html.replace("‚", "‚")
|
|
html = html.replace("Ê", "Ê")
|
|
html = html.replace("ζ", "ζ")
|
|
html = html.replace("Ω", "Ω")
|
|
html = html.replace("Ω", "Ω")
|
|
html = html.replace("ν", "ν")
|
|
html = html.replace("ν", "ν")
|
|
html = html.replace("¼", "¼")
|
|
html = html.replace("á", "á")
|
|
html = html.replace("⇑", "⇑")
|
|
html = html.replace("⇑", "⇑")
|
|
html = html.replace("β", "β")
|
|
html = html.replace("ƒ", "ƒ")
|
|
html = html.replace("ρ", "ρ")
|
|
html = html.replace("ρ", "ρ")
|
|
html = html.replace("é", "é")
|
|
html = html.replace("ω", "ω")
|
|
html = html.replace("ω", "ω")
|
|
html = html.replace("·", "·")
|
|
html = html.replace("〈", "⟨")
|
|
html = html.replace("〈", "⟨")
|
|
html = html.replace("♠", "♠")
|
|
html = html.replace("♠", "♠")
|
|
html = html.replace("’", "’")
|
|
html = html.replace("þ", "þ")
|
|
html = html.replace("»", "»")
|
|
html = html.replace("σ", "σ")
|
|
html = html.replace("σ", "σ")
|
|
out = open(output_file, 'w')
|
|
out.write(html)
|
|
out.close()
|