mirror of
https://github.com/kennethreitz/dive-into-python3.git
synced 2026-06-05 23:10:17 +00:00
add encoding parameter to all file open() calls in code samples, example files, and text
This commit is contained in:
@@ -309,19 +309,19 @@ StopIteration</samp>
|
||||
|
||||
<p class=d>[<a href=examples/favorite-people.txt>download <code>favorite-people.txt</code></a>]
|
||||
<pre class=screen>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = list(open('examples/favorite-people.txt'))</kbd> <span class=u>①</span></a>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = list(open('examples/favorite-people.txt', encoding='utf-8'))</kbd> <span class=u>①</span></a>
|
||||
<samp class=p>>>> </samp><kbd class=pp>names</kbd>
|
||||
<samp class=pp>['Dora\n', 'Ethan\n', 'Wesley\n', 'John\n', 'Anne\n',
|
||||
'Mike\n', 'Chris\n', 'Sarah\n', 'Alex\n', 'Lizzie\n']</samp>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = [name.rstrip() for name in names]</kbd> <span class=u>②</span></a>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = [name.rstrip() for name in names]</kbd> <span class=u>②</span></a>
|
||||
<samp class=p>>>> </samp><kbd class=pp>names</kbd>
|
||||
<samp class=pp>['Dora', 'Ethan', 'Wesley', 'John', 'Anne',
|
||||
'Mike', 'Chris', 'Sarah', 'Alex', 'Lizzie']</samp>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = sorted(names)</kbd> <span class=u>③</span></a>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = sorted(names)</kbd> <span class=u>③</span></a>
|
||||
<samp class=p>>>> </samp><kbd class=pp>names</kbd>
|
||||
<samp class=pp>['Alex', 'Anne', 'Chris', 'Dora', 'Ethan',
|
||||
'John', 'Lizzie', 'Mike', 'Sarah', 'Wesley']</samp>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = sorted(names, key=len)</kbd> <span class=u>④</span></a>
|
||||
<a><samp class=p>>>> </samp><kbd class=pp>names = sorted(names, key=len)</kbd> <span class=u>④</span></a>
|
||||
<samp class=p>>>> </samp><kbd class=pp>names</kbd>
|
||||
<samp class=pp>['Alex', 'Anne', 'Dora', 'John', 'Mike',
|
||||
'Chris', 'Ethan', 'Sarah', 'Lizzie', 'Wesley']</samp></pre>
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
* TODO 2nd draft Refactoring
|
||||
* TODO 1st draft Advanced Classes
|
||||
* TODO 1st draft Files
|
||||
SCHEDULED: <2009-07-16 Thu>
|
||||
** Reading from text files
|
||||
*** Opening a file (to read)
|
||||
*** Character encoding
|
||||
|
||||
+1
-1
@@ -15,7 +15,7 @@ def build_match_and_apply_functions(pattern, search, replace):
|
||||
return [matches_rule, apply_rule]
|
||||
|
||||
rules = []
|
||||
with open('plural4-rules.txt') as pattern_file:
|
||||
with open('plural4-rules.txt', encoding='utf-8') as pattern_file:
|
||||
for line in pattern_file:
|
||||
pattern, search, replace = line.split(None, 3)
|
||||
rules.append(build_match_and_apply_functions(
|
||||
|
||||
+1
-1
@@ -15,7 +15,7 @@ def build_match_and_apply_functions(pattern, search, replace):
|
||||
return [matches_rule, apply_rule]
|
||||
|
||||
def rules(rules_filename):
|
||||
with open(rules_filename) as pattern_file:
|
||||
with open(rules_filename, encoding='utf-8') as pattern_file:
|
||||
for line in pattern_file:
|
||||
pattern, search, replace = line.split(None, 3)
|
||||
yield build_match_and_apply_functions(pattern, search, replace)
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ class LazyRules:
|
||||
rules_filename = 'plural6-rules.txt'
|
||||
|
||||
def __iter__(self):
|
||||
self.pattern_file = open(self.rules_filename)
|
||||
self.pattern_file = open(self.rules_filename, encoding='utf-8')
|
||||
self.cache = []
|
||||
self.cache_index = 0
|
||||
return self
|
||||
|
||||
+10
-10
@@ -22,14 +22,6 @@ body{counter-reset:h1 12}
|
||||
<h2 id=divingin>Diving In</h2>
|
||||
<p class=f>FIXME
|
||||
|
||||
<!--
|
||||
FIXME move this to character encoding section
|
||||
|
||||
OK, so a string is a sequence of Unicode characters. But a file on disk is not a sequence of Unicode characters; a file on disk is a sequence of bytes. So if you read a “text file” from disk, how does Python convert that sequence of bytes into a sequence of characters? The answer is that it decodes the bytes according to a specific character encoding algorithm, and returns a sequence of Unicode characters, otherwise known as a string.
|
||||
|
||||
"The default encoding is platform dependent (whatever locale.getpreferredencoding() returns)." -- http://docs.python.org/3.1/library/io.html
|
||||
-->
|
||||
|
||||
<h2 id=reading-from-text-files>Reading From Text Files</h2>
|
||||
|
||||
<p>FIXME
|
||||
@@ -41,7 +33,11 @@ open(..., 'r', encoding='...')
|
||||
|
||||
<h3 id=encoding>Character Encoding Rears Its Ugly Head</h3>
|
||||
|
||||
<p>FIXME
|
||||
<!--
|
||||
OK, so a string is a sequence of Unicode characters. But a file on disk is not a sequence of Unicode characters; a file on disk is a sequence of bytes. So if you read a “text file” from disk, how does Python convert that sequence of bytes into a sequence of characters? The answer is that it decodes the bytes according to a specific character encoding algorithm, and returns a sequence of Unicode characters, otherwise known as a string.
|
||||
|
||||
"The default encoding is platform dependent (whatever locale.getpreferredencoding() returns)." -- http://docs.python.org/3.1/library/io.html
|
||||
-->
|
||||
|
||||
<h3 id=file-objects>File Objects</h3>
|
||||
|
||||
@@ -134,6 +130,10 @@ ValueError: I/O operation on closed file</samp>
|
||||
|
||||
<p>FIXME what's a "line"? (line endings discussion, universal line endings, etc.)
|
||||
|
||||
<!--
|
||||
A “line” of a text file is just what you think it is — a sequence of characters delimited by a carriage return. Of course, it can’t really be that simple, can it? Text files can use several different characters to mark the end of a line. Some use a carriage return character, others use a line feed character, and some use both characters at the end of every line. Python handles all of these cases automatically, so you can say, “Hey, I want to read this text file one line at a time” and it will Just Work.
|
||||
-->
|
||||
|
||||
<h2 id=write>Writing to Text Files</h2>
|
||||
|
||||
<p>FIXME
|
||||
@@ -195,7 +195,7 @@ test succeededline 2
|
||||
<li>At last, you handle your <code>IOError</code> exception. This could be the <code>IOError</code> exception raised by the call to <code>open</code>, <code>seek</code>, or <code>read</code>. Here, you really don’t care, because all you’re going to do is ignore it silently and continue. (Remember, <code>pass</code> is a Python statement that <a href="#fileinfo.class.simplest" title="Example 5.3. The Simplest Python Class">does nothing</a>.) That’s perfectly legal; “handling” an exception can mean explicitly doing nothing. It still counts as handled, and processing will continue normally on the next line of code after the <code>try...except</code> block.
|
||||
-->
|
||||
|
||||
<h2 id=binary-files>Binary Files</h2>
|
||||
<h2 id=binary>Binary Files</h2>
|
||||
|
||||
<p>FIXME
|
||||
|
||||
|
||||
+2
-2
@@ -296,7 +296,7 @@ rules = []
|
||||
|
||||
<p class=d>[<a href=examples/plural5.py>download <code>plural5.py</code></a>]
|
||||
<pre class=nd><code class=pp>def rules():
|
||||
with open('plural5-rules.txt') as pattern_file:
|
||||
with open('plural5-rules.txt', encoding='utf-8') as pattern_file:
|
||||
for line in pattern_file:
|
||||
pattern, search, replace = line.split(None, 3)
|
||||
yield build_match_and_apply_functions(pattern, search, replace)
|
||||
@@ -376,7 +376,7 @@ def plural(noun):
|
||||
<p>Let’s go back to <code>plural5.py</code> and see how this version of the <code>plural()</code> function works.
|
||||
|
||||
<pre><code class=pp>def rules(rules_filename):
|
||||
with open(rules_filename) as pattern_file:
|
||||
with open(rules_filename, encoding='utf-8') as pattern_file:
|
||||
for line in pattern_file:
|
||||
<a> pattern, search, replace = line.split(None, 3) <span class=u>①</span></a>
|
||||
<a> yield build_match_and_apply_functions(pattern, search, replace) <span class=u>②</span></a>
|
||||
|
||||
+4
-4
@@ -218,7 +218,7 @@ All three of these class methods, <code>__init__</code>, <code>__iter__</code>,
|
||||
rules_filename = 'plural6-rules.txt'
|
||||
|
||||
def __iter__(self):
|
||||
self.pattern_file = open(self.rules_filename)
|
||||
self.pattern_file = open(self.rules_filename, encoding='utf-8')
|
||||
self.cache = []
|
||||
self.cache_index = 0
|
||||
return self
|
||||
@@ -251,9 +251,9 @@ rules = LazyRules()</code></pre>
|
||||
<pre><code class=pp>class LazyRules:
|
||||
rules_filename = 'plural6-rules.txt'
|
||||
|
||||
<a> def __iter__(self): <span class=u>①</span></a>
|
||||
<a> self.pattern_file = open(self.rules_filename) <span class=u>②</span></a>
|
||||
<a> self.cache = [] <span class=u>③</span></a>
|
||||
<a> def __iter__(self): <span class=u>①</span></a>
|
||||
<a> self.pattern_file = open(self.rules_filename, encoding='utf-8') <span class=u>②</span></a>
|
||||
<a> self.cache = [] <span class=u>③</span></a>
|
||||
self.cache_index = 0</code></pre>
|
||||
<ol>
|
||||
<li>The <code>__iter__()</code> method is only going to be called once, after you instantiate the class, assign it to <var>rules</var>, and call <code>iter(rules)</code> to create the iterator.
|
||||
|
||||
Reference in New Issue
Block a user