diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html index 8d651b4..7141079 100644 --- a/case-study-porting-chardet-to-python-3.html +++ b/case-study-porting-chardet-to-python-3.html @@ -659,8 +659,8 @@ for line in open(f, 'rb'):
Once you realize that, the solution is not difficult. Regular expressions defined with strings can search strings. Regular expressions defined with byte arrays can search byte arrays. To define a byte array pattern, we simply change the type of the argument we use to define the regular expression to a byte array. (There is one other case of this same problem, on the very next line.)
class UniversalDetector:
def __init__(self):
-- self._highBitDetector = re.compile(b'[\x80-\xFF]')
-- self._escDetector = re.compile(b'(\033|~{)')
+- self._highBitDetector = re.compile(r'[\x80-\xFF]')
+- self._escDetector = re.compile(r'(\033|~{)')
+ self._highBitDetector = re.compile(b'[\x80-\xFF]')
+ self._escDetector = re.compile(b'(\033|~{)')
self._mEscCharSetProber = None
@@ -1108,22 +1108,24 @@ tests\Big5\0804.blogspot.com.xml
File "C:\home\chardet\chardet\latin1prober.py", line 126, in get_confidence
total = reduce(operator.add, self._mFreqCounter)
NameError: global name 'reduce' is not defined
-According to the official What's New In Python 3.0 guide, the reduce() function has been moved out of the global namespace and into the functools module. Quoting the guide: "Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable."
-
OK then, let's refactor it to use a for loop.
+
According to the official What's New In Python 3.0 guide, the reduce() function has been moved out of the global namespace and into the functools module. Quoting the guide: "Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable." You can read more about the decision from Guido van Rossum's weblog: The fate of reduce() in Python 3000.
def get_confidence(self):
if self.get_state() == constants.eNotMe:
return 0.01
total = reduce(operator.add, self._mFreqCounter)
-The reduce() function takes two arguments — a function and a list (strictly speaking, any iterable object will do) — and applies the function cumulatively to each item of the list. In other words, this is a fancy and roundabout way of adding up all the items in a list and returning the result. It looks much more readable as a for loop.
+
The reduce() function takes two arguments — a function and a list (strictly speaking, any iterable object will do) — and applies the function cumulatively to each item of the list. In other words, this is a fancy and roundabout way of adding up all the items in a list and returning the result.
+
This monstrosity was so common in Python 2 that Python 3 added a global sum() function.
def get_confidence(self):
if self.get_state() == constants.eNotMe:
return 0.01
- total = reduce(operator.add, self._mFreqCounter)
-+ total = 0
-+ for frequency in self._mFreqCounter:
-+ total += frequency
++ total = sum(self._mFreqCounter)
+Since you're no longer using the operator module, you can remove that import from the top of the file as well.
+
from .charsetprober import CharSetProber
+ from . import constants
+- import operator
I CAN HAZ TESTZ?
C:\home\chardet> python test.py tests\*\*
tests\ascii\howto.diveintomark.org.xml ascii with confidence 1.0
diff --git a/chardet/chardet/codingstatemachine.pyc b/chardet/chardet/codingstatemachine.pyc
index e02d8bb..36921e9 100644
Binary files a/chardet/chardet/codingstatemachine.pyc and b/chardet/chardet/codingstatemachine.pyc differ
diff --git a/chardet/chardet/latin1prober.py b/chardet/chardet/latin1prober.py
index 7296fb9..6ecc022 100644
--- a/chardet/chardet/latin1prober.py
+++ b/chardet/chardet/latin1prober.py
@@ -28,7 +28,6 @@
from .charsetprober import CharSetProber
from . import constants
-import operator
FREQ_CAT_NUM = 4
@@ -123,9 +122,7 @@ class Latin1Prober(CharSetProber):
if self.get_state() == constants.eNotMe:
return 0.01
- total = 0
- for frequency in self._mFreqCounter:
- total += frequency
+ total = sum(self._mFreqCounter)
if total < 0.01:
confidence = 0.0
else:
diff --git a/chardet/chardet/latin1prober.pyc b/chardet/chardet/latin1prober.pyc
index 4b38eff..76b7e4f 100644
Binary files a/chardet/chardet/latin1prober.pyc and b/chardet/chardet/latin1prober.pyc differ
diff --git a/porting-code-to-python-3-with-2to3.html b/porting-code-to-python-3-with-2to3.html
index ec561b2..faafaab 100644
--- a/porting-code-to-python-3-with-2to3.html
+++ b/porting-code-to-python-3-with-2to3.html
@@ -495,7 +495,6 @@ for an_iterator in a_sequence_of_iterators:
reduce(a, b, c)
-☞The version of
2to3that shipped with Python 3.0 would not fix thereduce()function automatically. The fix first appeared in the2to3script that shipped with Python 3.1.
apply() global function