typography fiddling

This commit is contained in:
Mark Pilgrim
2009-05-20 17:44:43 -04:00
parent 52b451d753
commit c49279087d
3 changed files with 3 additions and 3 deletions
+1 -1
View File
@@ -9,7 +9,7 @@ out = open(output_file, 'w', encoding="utf-8") # encoding argument! important!
for line in open(input_file).readlines():
# replace entities with Unicode characters
for e in re.findall('&(.+?);', line):
if e in ('lt', 'gt', 'amp', 'nbsp'):
if e in ('lt', 'gt', 'amp', 'quot', 'apos', 'nbsp'):
continue
n = html.entities.name2codepoint.get(e)
if not n: