Files
dive-into-python3/htmlminimizer.py
T
2009-03-17 03:11:52 -04:00

385 lines
15 KiB
Python

"""Quick-and-dirty HTML minimizer"""
import sys
input_file = sys.argv[1]
output_file = sys.argv[2]
in_pre = False
out = open(output_file, 'w')
for line in open(input_file).readlines():
g = line.strip()
if g.count('<pre'):
in_pre = True
if g.count('</pre'):
# this will break if you have a </pre> then <pre>
# on the same line, so don't do that
in_pre = False
g = line.rstrip()
if in_pre:
out.write(line)
else:
out.write(g)
out.close()
out = open(output_file)
html = out.read()
out.close()
html = html.replace("&aring;", "&#229;")
html = html.replace("&#62;", "&gt;")
html = html.replace("&#x3e;", "&gt;")
html = html.replace("&#8835;", "&sup;")
html = html.replace("&#x2283;", "&sup;")
html = html.replace("&Ntilde;", "&#209;")
html = html.replace("&#x3d2;", "&#978;")
html = html.replace("&upsih;", "&#978;")
html = html.replace("&Yacute;", "&#221;")
html = html.replace("&Atilde;", "&#195;")
html = html.replace("&#x221a;", "&#8730;")
html = html.replace("&#x2297;", "&#8855;")
html = html.replace("&otimes;", "&#8855;")
html = html.replace("&aelig;", "&#230;")
html = html.replace("&#936;", "&Psi;")
html = html.replace("&#x3a8;", "&Psi;")
html = html.replace("&#x395;", "&#917;")
html = html.replace("&Epsilon;", "&#917;")
html = html.replace("&Icirc;", "&#206;")
html = html.replace("&Eacute;", "&#201;")
html = html.replace("&#x39b;", "&#923;")
html = html.replace("&Lambda;", "&#923;")
html = html.replace("&#x2033;", "&#8243;")
html = html.replace("&#x39a;", "&#922;")
html = html.replace("&Kappa;", "&#922;")
html = html.replace("&#x3c2;", "&#962;")
html = html.replace("&sigmaf;", "&#962;")
html = html.replace("&#8206;", "&lrm;")
html = html.replace("&#x200e;", "&lrm;")
html = html.replace("&cedil;", "&#184;")
html = html.replace("&#8194;", "&ensp;")
html = html.replace("&#x2002;", "&ensp;")
html = html.replace("&AElig;", "&#198;")
html = html.replace("&#x2032;", "&#8242;")
html = html.replace("&#932;", "&Tau;")
html = html.replace("&#x3a4;", "&Tau;")
html = html.replace("&#x2308;", "&#8968;")
html = html.replace("&#8659;", "&dArr;")
html = html.replace("&#x21d3;", "&dArr;")
html = html.replace("&#8805;", "&ge;")
html = html.replace("&#x2265;", "&ge;")
html = html.replace("&#8901;", "&sdot;")
html = html.replace("&#x22c5;", "&sdot;")
html = html.replace("&#x230a;", "&#8970;")
html = html.replace("&lfloor;", "&#8970;")
html = html.replace("&#8656;", "&lArr;")
html = html.replace("&#x21d0;", "&lArr;")
html = html.replace("&brvbar;", "&#166;")
html = html.replace("&Otilde;", "&#213;")
html = html.replace("&#x398;", "&#920;")
html = html.replace("&Theta;", "&#920;")
html = html.replace("&#928;", "&Pi;")
html = html.replace("&#x3a0;", "&Pi;")
html = html.replace("&#x152;", "&#338;")
html = html.replace("&OElig;", "&#338;")
html = html.replace("&#x160;", "&#352;")
html = html.replace("&Scaron;", "&#352;")
html = html.replace("&egrave;", "&#232;")
html = html.replace("&#8834;", "&sub;")
html = html.replace("&#x2282;", "&sub;")
html = html.replace("&iexcl;", "&#161;")
html = html.replace("&#8721;", "&sum;")
html = html.replace("&#x2211;", "&sum;")
html = html.replace("&ntilde;", "&#241;")
html = html.replace("&atilde;", "&#227;")
html = html.replace("&#x3b8;", "&#952;")
html = html.replace("&theta;", "&#952;")
html = html.replace("&#8836;", "&nsub;")
html = html.replace("&#x2284;", "&nsub;")
html = html.replace("&#8660;", "&hArr;")
html = html.replace("&#x21d4;", "&hArr;")
html = html.replace("&Oslash;", "&#216;")
html = html.replace("&THORN;", "&#222;")
html = html.replace("&#924;", "&Mu;")
html = html.replace("&#x39c;", "&Mu;")
html = html.replace("&#x2009;", "&#8201;")
html = html.replace("&thinsp;", "&#8201;")
html = html.replace("&ecirc;", "&#234;")
html = html.replace("&#x201e;", "&#8222;")
html = html.replace("&Aring;", "&#197;")
html = html.replace("&#x2207;", "&#8711;")
html = html.replace("&#x2030;", "&#8240;")
html = html.replace("&permil;", "&#8240;")
html = html.replace("&Ugrave;", "&#217;")
html = html.replace("&#951;", "&eta;")
html = html.replace("&#x3b7;", "&eta;")
html = html.replace("&Agrave;", "&#192;")
html = html.replace("&#x2200;", "&#8704;")
html = html.replace("&forall;", "&#8704;")
html = html.replace("&#240;", "&eth;")
html = html.replace("&#xf0;", "&eth;")
html = html.replace("&#x2309;", "&#8969;")
html = html.replace("&Egrave;", "&#200;")
html = html.replace("&divide;", "&#247;")
html = html.replace("&igrave;", "&#236;")
html = html.replace("&otilde;", "&#245;")
html = html.replace("&pound;", "&#163;")
html = html.replace("&#x2044;", "&#8260;")
html = html.replace("&#208;", "&ETH;")
html = html.replace("&#xd0;", "&ETH;")
html = html.replace("&#x2217;", "&#8727;")
html = html.replace("&lowast;", "&#8727;")
html = html.replace("&#967;", "&chi;")
html = html.replace("&#x3c7;", "&chi;")
html = html.replace("&Aacute;", "&#193;")
html = html.replace("&#x392;", "&#914;")
html = html.replace("&#8869;", "&perp;")
html = html.replace("&#x22a5;", "&perp;")
html = html.replace("&#x2234;", "&#8756;")
html = html.replace("&there4;", "&#8756;")
html = html.replace("&#960;", "&pi;")
html = html.replace("&#x3c0;", "&pi;")
html = html.replace("&#x2205;", "&#8709;")
html = html.replace("&#x2209;", "&#8713;")
html = html.replace("&icirc;", "&#238;")
html = html.replace("&#8226;", "&bull;")
html = html.replace("&#x2022;", "&bull;")
html = html.replace("&#x3c5;", "&#965;")
html = html.replace("&upsilon;", "&#965;")
html = html.replace("&Oacute;", "&#211;")
html = html.replace("&#x3ba;", "&#954;")
html = html.replace("&kappa;", "&#954;")
html = html.replace("&ccedil;", "&#231;")
html = html.replace("&#8745;", "&cap;")
html = html.replace("&#x2229;", "&cap;")
html = html.replace("&#956;", "&mu;")
html = html.replace("&#x3bc;", "&mu;")
html = html.replace("&#176;", "&deg;")
html = html.replace("&#xb0;", "&deg;")
html = html.replace("&#964;", "&tau;")
html = html.replace("&#x3c4;", "&tau;")
html = html.replace("&#8195;", "&emsp;")
html = html.replace("&#x2003;", "&emsp;")
html = html.replace("&#x2026;", "&#8230;")
html = html.replace("&hellip;", "&#8230;")
html = html.replace("&ucirc;", "&#251;")
html = html.replace("&ugrave;", "&#249;")
html = html.replace("&#8773;", "&cong;")
html = html.replace("&#x2245;", "&cong;")
html = html.replace("&#x399;", "&#921;")
html = html.replace("&#x22;", "&#34;")
html = html.replace("&quot;", "&#34;")
html = html.replace("&#8594;", "&rarr;")
html = html.replace("&#x2192;", "&rarr;")
html = html.replace("&#929;", "&Rho;")
html = html.replace("&#x3a1;", "&Rho;")
html = html.replace("&uacute;", "&#250;")
html = html.replace("&acirc;", "&#226;")
html = html.replace("&#8764;", "&sim;")
html = html.replace("&#x223c;", "&sim;")
html = html.replace("&#966;", "&phi;")
html = html.replace("&#x3c6;", "&phi;")
html = html.replace("&#x2666;", "&#9830;")
html = html.replace("&Ccedil;", "&#199;")
html = html.replace("&#919;", "&Eta;")
html = html.replace("&#x397;", "&Eta;")
html = html.replace("&#x393;", "&#915;")
html = html.replace("&Gamma;", "&#915;")
html = html.replace("&#8364;", "&euro;")
html = html.replace("&#x20ac;", "&euro;")
html = html.replace("&#x3d1;", "&#977;")
html = html.replace("&thetasym;", "&#977;")
html = html.replace("&#x201c;", "&#8220;")
html = html.replace("&#x2665;", "&#9829;")
html = html.replace("&hearts;", "&#9829;")
html = html.replace("&oacute;", "&#243;")
html = html.replace("&#8204;", "&zwnj;")
html = html.replace("&#x200c;", "&zwnj;")
html = html.replace("&#165;", "&yen;")
html = html.replace("&#xa5;", "&yen;")
html = html.replace("&ograve;", "&#242;")
html = html.replace("&#935;", "&Chi;")
html = html.replace("&#x3a7;", "&Chi;")
html = html.replace("&#x2122;", "&#8482;")
html = html.replace("&#958;", "&xi;")
html = html.replace("&#x3be;", "&xi;")
html = html.replace("&#x2dc;", "&#732;")
html = html.replace("&tilde;", "&#732;")
html = html.replace("&#x2039;", "&#8249;")
html = html.replace("&lsaquo;", "&#8249;")
html = html.replace("&#x153;", "&#339;")
html = html.replace("&oelig;", "&#339;")
html = html.replace("&#x2261;", "&#8801;")
html = html.replace("&#8804;", "&le;")
html = html.replace("&#x2264;", "&le;")
html = html.replace("&#8746;", "&cup;")
html = html.replace("&#x222a;", "&cup;")
html = html.replace("&#x178;", "&#376;")
html = html.replace("&#60;", "&lt;")
html = html.replace("&#x3c;", "&lt;")
html = html.replace("&#x3a5;", "&#933;")
html = html.replace("&Upsilon;", "&#933;")
html = html.replace("&#x2013;", "&#8211;")
html = html.replace("&yacute;", "&#253;")
html = html.replace("&#8476;", "&real;")
html = html.replace("&#x211c;", "&real;")
html = html.replace("&#968;", "&psi;")
html = html.replace("&#x3c8;", "&psi;")
html = html.replace("&#x203a;", "&#8250;")
html = html.replace("&rsaquo;", "&#8250;")
html = html.replace("&#8595;", "&darr;")
html = html.replace("&#x2193;", "&darr;")
html = html.replace("&#x391;", "&#913;")
html = html.replace("&Alpha;", "&#913;")
html = html.replace("&#172;", "&not;")
html = html.replace("&#xac;", "&not;")
html = html.replace("&#x26;", "&#38;")
html = html.replace("&oslash;", "&#248;")
html = html.replace("&acute;", "&#180;")
html = html.replace("&#8205;", "&zwj;")
html = html.replace("&#x200d;", "&zwj;")
html = html.replace("&laquo;", "&#171;")
html = html.replace("&#x201d;", "&#8221;")
html = html.replace("&Igrave;", "&#204;")
html = html.replace("&micro;", "&#181;")
html = html.replace("&#173;", "&shy;")
html = html.replace("&#xad;", "&shy;")
html = html.replace("&#8839;", "&supe;")
html = html.replace("&#x2287;", "&supe;")
html = html.replace("&szlig;", "&#223;")
html = html.replace("&#x2663;", "&#9827;")
html = html.replace("&agrave;", "&#224;")
html = html.replace("&Ocirc;", "&#212;")
html = html.replace("&#8596;", "&harr;")
html = html.replace("&#x2194;", "&harr;")
html = html.replace("&#8592;", "&larr;")
html = html.replace("&#x2190;", "&larr;")
html = html.replace("&frac12;", "&#189;")
html = html.replace("&#8733;", "&prop;")
html = html.replace("&#x221d;", "&prop;")
html = html.replace("&#x2c6;", "&#710;")
html = html.replace("&ocirc;", "&#244;")
html = html.replace("&#x2248;", "&#8776;")
html = html.replace("&#168;", "&uml;")
html = html.replace("&#xa8;", "&uml;")
html = html.replace("&#8719;", "&prod;")
html = html.replace("&#x220f;", "&prod;")
html = html.replace("&#174;", "&reg;")
html = html.replace("&#xae;", "&reg;")
html = html.replace("&#8207;", "&rlm;")
html = html.replace("&#x200f;", "&rlm;")
html = html.replace("&#x221e;", "&#8734;")
html = html.replace("&#x3a3;", "&#931;")
html = html.replace("&Sigma;", "&#931;")
html = html.replace("&#x2014;", "&#8212;")
html = html.replace("&#8593;", "&uarr;")
html = html.replace("&#x2191;", "&uarr;")
html = html.replace("&times;", "&#215;")
html = html.replace("&#8658;", "&rArr;")
html = html.replace("&#x21d2;", "&rArr;")
html = html.replace("&#8744;", "&or;")
html = html.replace("&#x2228;", "&or;")
html = html.replace("&#x3b3;", "&#947;")
html = html.replace("&gamma;", "&#947;")
html = html.replace("&#x3bb;", "&#955;")
html = html.replace("&lambda;", "&#955;")
html = html.replace("&#9002;", "&rang;")
html = html.replace("&#x232a;", "&rang;")
html = html.replace("&#x2020;", "&#8224;")
html = html.replace("&dagger;", "&#8224;")
html = html.replace("&#x2111;", "&#8465;")
html = html.replace("&#x2135;", "&#8501;")
html = html.replace("&alefsym;", "&#8501;")
html = html.replace("&#8838;", "&sube;")
html = html.replace("&#x2286;", "&sube;")
html = html.replace("&#x3b1;", "&#945;")
html = html.replace("&alpha;", "&#945;")
html = html.replace("&#925;", "&Nu;")
html = html.replace("&#x39d;", "&Nu;")
html = html.replace("&plusmn;", "&#177;")
html = html.replace("&frac34;", "&#190;")
html = html.replace("&#x203e;", "&#8254;")
html = html.replace("&#x394;", "&#916;")
html = html.replace("&Delta;", "&#916;")
html = html.replace("&#9674;", "&loz;")
html = html.replace("&#x25ca;", "&loz;")
html = html.replace("&#x3b9;", "&#953;")
html = html.replace("&iacute;", "&#237;")
html = html.replace("&#x3b5;", "&#949;")
html = html.replace("&epsilon;", "&#949;")
html = html.replace("&#x2118;", "&#8472;")
html = html.replace("&weierp;", "&#8472;")
html = html.replace("&#8706;", "&part;")
html = html.replace("&#x2202;", "&part;")
html = html.replace("&#x3b4;", "&#948;")
html = html.replace("&delta;", "&#948;")
html = html.replace("&#x3bf;", "&#959;")
html = html.replace("&omicron;", "&#959;")
html = html.replace("&#926;", "&Xi;")
html = html.replace("&#x39e;", "&Xi;")
html = html.replace("&#x2021;", "&#8225;")
html = html.replace("&Dagger;", "&#8225;")
html = html.replace("&Ograve;", "&#210;")
html = html.replace("&Ucirc;", "&#219;")
html = html.replace("&#x161;", "&#353;")
html = html.replace("&scaron;", "&#353;")
html = html.replace("&#x2018;", "&#8216;")
html = html.replace("&#8712;", "&isin;")
html = html.replace("&#x2208;", "&isin;")
html = html.replace("&#x396;", "&#918;")
html = html.replace("&#x2212;", "&#8722;")
html = html.replace("&#8743;", "&and;")
html = html.replace("&#x2227;", "&and;")
html = html.replace("&#8736;", "&ang;")
html = html.replace("&#x2220;", "&ang;")
html = html.replace("&curren;", "&#164;")
html = html.replace("&#8747;", "&int;")
html = html.replace("&#x222b;", "&int;")
html = html.replace("&#x230b;", "&#8971;")
html = html.replace("&rfloor;", "&#8971;")
html = html.replace("&#x21b5;", "&#8629;")
html = html.replace("&#x2203;", "&#8707;")
html = html.replace("&#x2295;", "&#8853;")
html = html.replace("&Acirc;", "&#194;")
html = html.replace("&#982;", "&piv;")
html = html.replace("&#x3d6;", "&piv;")
html = html.replace("&#8715;", "&ni;")
html = html.replace("&#x220b;", "&ni;")
html = html.replace("&#934;", "&Phi;")
html = html.replace("&#x3a6;", "&Phi;")
html = html.replace("&Iacute;", "&#205;")
html = html.replace("&Uacute;", "&#218;")
html = html.replace("&#x39f;", "&#927;")
html = html.replace("&Omicron;", "&#927;")
html = html.replace("&#8800;", "&ne;")
html = html.replace("&#x2260;", "&ne;")
html = html.replace("&iquest;", "&#191;")
html = html.replace("&#x201a;", "&#8218;")
html = html.replace("&Ecirc;", "&#202;")
html = html.replace("&#x3b6;", "&#950;")
html = html.replace("&#x3a9;", "&#937;")
html = html.replace("&Omega;", "&#937;")
html = html.replace("&#957;", "&nu;")
html = html.replace("&#x3bd;", "&nu;")
html = html.replace("&frac14;", "&#188;")
html = html.replace("&aacute;", "&#225;")
html = html.replace("&#8657;", "&uArr;")
html = html.replace("&#x21d1;", "&uArr;")
html = html.replace("&#x3b2;", "&#946;")
html = html.replace("&#x192;", "&#402;")
html = html.replace("&#961;", "&rho;")
html = html.replace("&#x3c1;", "&rho;")
html = html.replace("&eacute;", "&#233;")
html = html.replace("&#x3c9;", "&#969;")
html = html.replace("&omega;", "&#969;")
html = html.replace("&middot;", "&#183;")
html = html.replace("&#9001;", "&lang;")
html = html.replace("&#x2329;", "&lang;")
html = html.replace("&#x2660;", "&#9824;")
html = html.replace("&spades;", "&#9824;")
html = html.replace("&#x2019;", "&#8217;")
html = html.replace("&thorn;", "&#254;")
html = html.replace("&raquo;", "&#187;")
html = html.replace("&#x3c3;", "&#963;")
html = html.replace("&sigma;", "&#963;")
out = open(output_file, 'w')
out.write(html)
out.close()