very subtle bug in unicodification

This commit is contained in:
Mark Pilgrim
2009-05-20 23:03:33 -04:00
parent 575436e66b
commit 324f58eb70
+1 -1
View File
@@ -6,7 +6,7 @@ input_file = sys.argv[1]
output_file = sys.argv[2]
in_pre = False
out = open(output_file, 'w', encoding="utf-8") # encoding argument! important!
for line in open(input_file).readlines():
for line in open(input_file, encoding="utf-8").readlines():
# replace entities with Unicode characters
for e in re.findall('&(.+?);', line):
if e in ('lt', 'gt', 'amp', 'quot', 'apos', 'nbsp'):