diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html index 8d651b4..7141079 100644 --- a/case-study-porting-chardet-to-python-3.html +++ b/case-study-porting-chardet-to-python-3.html @@ -659,8 +659,8 @@ for line in open(f, 'rb'):

Once you realize that, the solution is not difficult. Regular expressions defined with strings can search strings. Regular expressions defined with byte arrays can search byte arrays. To define a byte array pattern, we simply change the type of the argument we use to define the regular expression to a byte array. (There is one other case of this same problem, on the very next line.)

  class UniversalDetector:
       def __init__(self):
--         self._highBitDetector = re.compile(b'[\x80-\xFF]')
--         self._escDetector = re.compile(b'(\033|~{)')
+-         self._highBitDetector = re.compile(r'[\x80-\xFF]')
+-         self._escDetector = re.compile(r'(\033|~{)')
 +         self._highBitDetector = re.compile(b'[\x80-\xFF]')
 +         self._escDetector = re.compile(b'(\033|~{)')
           self._mEscCharSetProber = None
@@ -1108,22 +1108,24 @@ tests\Big5\0804.blogspot.com.xml
   File "C:\home\chardet\chardet\latin1prober.py", line 126, in get_confidence
     total = reduce(operator.add, self._mFreqCounter)
 NameError: global name 'reduce' is not defined
-

According to the official What's New In Python 3.0 guide, the reduce() function has been moved out of the global namespace and into the functools module. Quoting the guide: "Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable." -

OK then, let's refactor it to use a for loop. +

According to the official What's New In Python 3.0 guide, the reduce() function has been moved out of the global namespace and into the functools module. Quoting the guide: "Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable." You can read more about the decision from Guido van Rossum's weblog: The fate of reduce() in Python 3000.

def get_confidence(self):
     if self.get_state() == constants.eNotMe:
         return 0.01
   
     total = reduce(operator.add, self._mFreqCounter)
-

The reduce() function takes two arguments — a function and a list (strictly speaking, any iterable object will do) — and applies the function cumulatively to each item of the list. In other words, this is a fancy and roundabout way of adding up all the items in a list and returning the result. It looks much more readable as a for loop. +

The reduce() function takes two arguments — a function and a list (strictly speaking, any iterable object will do) — and applies the function cumulatively to each item of the list. In other words, this is a fancy and roundabout way of adding up all the items in a list and returning the result. +

This monstrosity was so common in Python 2 that Python 3 added a global sum() function.

  def get_confidence(self):
       if self.get_state() == constants.eNotMe:
           return 0.01
   
 -     total = reduce(operator.add, self._mFreqCounter)
-+     total = 0
-+     for frequency in self._mFreqCounter:
-+         total += frequency
++ total = sum(self._mFreqCounter)
+

Since you're no longer using the operator module, you can remove that import from the top of the file as well. +

  from .charsetprober import CharSetProber
+  from . import constants
+- import operator

I CAN HAZ TESTZ?

C:\home\chardet> python test.py tests\*\*
 tests\ascii\howto.diveintomark.org.xml                       ascii with confidence 1.0
diff --git a/chardet/chardet/codingstatemachine.pyc b/chardet/chardet/codingstatemachine.pyc
index e02d8bb..36921e9 100644
Binary files a/chardet/chardet/codingstatemachine.pyc and b/chardet/chardet/codingstatemachine.pyc differ
diff --git a/chardet/chardet/latin1prober.py b/chardet/chardet/latin1prober.py
index 7296fb9..6ecc022 100644
--- a/chardet/chardet/latin1prober.py
+++ b/chardet/chardet/latin1prober.py
@@ -28,7 +28,6 @@
 
 from .charsetprober import CharSetProber
 from . import constants
-import operator
 
 FREQ_CAT_NUM = 4
 
@@ -123,9 +122,7 @@ class Latin1Prober(CharSetProber):
         if self.get_state() == constants.eNotMe:
             return 0.01
   
-        total = 0
-        for frequency in self._mFreqCounter:
-            total += frequency
+        total = sum(self._mFreqCounter)
         if total < 0.01:
             confidence = 0.0
         else:
diff --git a/chardet/chardet/latin1prober.pyc b/chardet/chardet/latin1prober.pyc
index 4b38eff..76b7e4f 100644
Binary files a/chardet/chardet/latin1prober.pyc and b/chardet/chardet/latin1prober.pyc differ
diff --git a/porting-code-to-python-3-with-2to3.html b/porting-code-to-python-3-with-2to3.html
index ec561b2..faafaab 100644
--- a/porting-code-to-python-3-with-2to3.html
+++ b/porting-code-to-python-3-with-2to3.html
@@ -495,7 +495,6 @@ for an_iterator in a_sequence_of_iterators:
 reduce(a, b, c)
-

The version of 2to3 that shipped with Python 3.0 would not fix the reduce() function automatically. The fix first appeared in the 2to3 script that shipped with Python 3.1.

apply() global function