diff --git a/files.html b/files.html index fbb8922..44f6a39 100644 --- a/files.html +++ b/files.html @@ -46,15 +46,14 @@ body{counter-reset:h1 12}
# This example was created on Windows. Other platforms may
# behave differently, for reasons outlined below.
->>> file = open('examples/chinese.txt')
->>> a_string = file.read()
-Traceback (most recent call last):
- File "", line 1, in
+>>> file = open('examples/chinese.txt')
+>>> a_string = file.read()
+Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
File "C:\Python31\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
-UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 28: character maps to
->>>
-
+UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 28: character maps to <undefined>
+>>> /samp>
What just happened? You didn’t specify a character encoding, so Python is forced to use the default encoding. What’s the default encoding? If you look closely at the traceback, you can see that it’s dying in cp1252.py, meaning that Python is using CP-1252 as the default encoding here. (CP-1252 is a common encoding on computers running Microsoft Windows.) The CP-1252 character set doesn’t support the characters that are in this file, so the read fails with an ugly UnicodeDecodeError.
@@ -134,7 +133,7 @@ UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 28: chara
>>> a_file.seek(18)
>>> a_file.read(1)
-Traceback (most recent call last):
+Traceback (most recent call last):
File "<pyshell#12>", line 1, in <module>
a_file.read(1)
File "C:\Python31\lib\codecs.py", line 300, in decode