From fe57cb02150e9fce96e2c34b054c17c39ff29687 Mon Sep 17 00:00:00 2001 From: Mark Pilgrim Date: Fri, 27 Mar 2009 09:38:50 -0500 Subject: [PATCH] mention sum() as a replacement for some instances of reduce() [h/t CJ] --- case-study-porting-chardet-to-python-3.html | 18 ++++++++++-------- chardet/chardet/codingstatemachine.pyc | Bin 2007 -> 2007 bytes chardet/chardet/latin1prober.py | 5 +---- chardet/chardet/latin1prober.pyc | Bin 3798 -> 3738 bytes porting-code-to-python-3-with-2to3.html | 1 - 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html index 8d651b4..7141079 100644 --- a/case-study-porting-chardet-to-python-3.html +++ b/case-study-porting-chardet-to-python-3.html @@ -659,8 +659,8 @@ for line in open(f, 'rb'):

Once you realize that, the solution is not difficult. Regular expressions defined with strings can search strings. Regular expressions defined with byte arrays can search byte arrays. To define a byte array pattern, we simply change the type of the argument we use to define the regular expression to a byte array. (There is one other case of this same problem, on the very next line.)

  class UniversalDetector:
       def __init__(self):
--         self._highBitDetector = re.compile(b'[\x80-\xFF]')
--         self._escDetector = re.compile(b'(\033|~{)')
+-         self._highBitDetector = re.compile(r'[\x80-\xFF]')
+-         self._escDetector = re.compile(r'(\033|~{)')
 +         self._highBitDetector = re.compile(b'[\x80-\xFF]')
 +         self._escDetector = re.compile(b'(\033|~{)')
           self._mEscCharSetProber = None
@@ -1108,22 +1108,24 @@ tests\Big5\0804.blogspot.com.xml
   File "C:\home\chardet\chardet\latin1prober.py", line 126, in get_confidence
     total = reduce(operator.add, self._mFreqCounter)
 NameError: global name 'reduce' is not defined
-

According to the official What's New In Python 3.0 guide, the reduce() function has been moved out of the global namespace and into the functools module. Quoting the guide: "Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable." -

OK then, let's refactor it to use a for loop. +

According to the official What's New In Python 3.0 guide, the reduce() function has been moved out of the global namespace and into the functools module. Quoting the guide: "Use functools.reduce() if you really need it; however, 99 percent of the time an explicit for loop is more readable." You can read more about the decision from Guido van Rossum's weblog: The fate of reduce() in Python 3000.

def get_confidence(self):
     if self.get_state() == constants.eNotMe:
         return 0.01
   
     total = reduce(operator.add, self._mFreqCounter)
-

The reduce() function takes two arguments — a function and a list (strictly speaking, any iterable object will do) — and applies the function cumulatively to each item of the list. In other words, this is a fancy and roundabout way of adding up all the items in a list and returning the result. It looks much more readable as a for loop. +

The reduce() function takes two arguments — a function and a list (strictly speaking, any iterable object will do) — and applies the function cumulatively to each item of the list. In other words, this is a fancy and roundabout way of adding up all the items in a list and returning the result. +

This monstrosity was so common in Python 2 that Python 3 added a global sum() function.

  def get_confidence(self):
       if self.get_state() == constants.eNotMe:
           return 0.01
   
 -     total = reduce(operator.add, self._mFreqCounter)
-+     total = 0
-+     for frequency in self._mFreqCounter:
-+         total += frequency
++ total = sum(self._mFreqCounter)
+

Since you're no longer using the operator module, you can remove that import from the top of the file as well. +

  from .charsetprober import CharSetProber
+  from . import constants
+- import operator

I CAN HAZ TESTZ?

C:\home\chardet> python test.py tests\*\*
 tests\ascii\howto.diveintomark.org.xml                       ascii with confidence 1.0
diff --git a/chardet/chardet/codingstatemachine.pyc b/chardet/chardet/codingstatemachine.pyc
index e02d8bb943cea67b396c3db757c53119868ed05b..36921e9d06c8e6f40b37f9bafbda8c937890e66c 100644
GIT binary patch
delta 16
Xcmcc4f1RJ*nunK5=)PpEXM_9

delta 16
Xcmcc4f1RJ*nunK*#jk23`vrCYC(;Cs

diff --git a/chardet/chardet/latin1prober.py b/chardet/chardet/latin1prober.py
index 7296fb9..6ecc022 100644
--- a/chardet/chardet/latin1prober.py
+++ b/chardet/chardet/latin1prober.py
@@ -28,7 +28,6 @@
 
 from .charsetprober import CharSetProber
 from . import constants
-import operator
 
 FREQ_CAT_NUM = 4
 
@@ -123,9 +122,7 @@ class Latin1Prober(CharSetProber):
         if self.get_state() == constants.eNotMe:
             return 0.01
   
-        total = 0
-        for frequency in self._mFreqCounter:
-            total += frequency
+        total = sum(self._mFreqCounter)
         if total < 0.01:
             confidence = 0.0
         else:
diff --git a/chardet/chardet/latin1prober.pyc b/chardet/chardet/latin1prober.pyc
index 4b38eff6b327ee8103224506434150ddb964071a..76b7e4ff17ddb900a2fd4abcf0e602a0b089c98b 100644
GIT binary patch
delta 1017
zcmc(eJxC)#6vzK>zhXAoxCc2gIm9A(Cs>G;MH(Tf*vNt1f<$pP8$9ns{l)hM&ww!CnGk?yL74Co5Ei@{2pwJ$BGGTnMZSgCw93JKjbf
zQc)hz+h{xi-y9w|J?+@`P-M?c>|=PcNvy<8*fYjHE+2t+r!6x={1YIrC~NRCHV64g
z1@M!j#?8l+7-OHBFy1uVq#i3%v)uoQUwe*XE{zJv%Rpq{&0<@m@EH)CBlP&fxe$*0
zrmtHDU(p%{_xdK$!C=+Q5ed0tT~clEX#F8-%HJ~|BAqGqm(+#+!mbO{fgTVL>KYmv
z+9pU4_9)PYSpz-7IiX8nxdF6o1~i;&Xf1(M1AY@aatyqCni_Bs5YP0gj{zYZ^JyoFH!EI`b=#5-bn7y6-$eKG&&u|_pnm`b3jw@mjNK^!|u@D9%Ic_%S#oU=o0vhf}
zp`DE~olWo$u#o!)0```IV55zlg`L>=_Ezz9EnVI`-ur#byk+J^{awAW6h?CG*8cSl
zraqg1W$t&LK5v{~)ei1EP!6;Urw16r>dr?>XZS_7$S`_>5a4O3h{Qn!4jJ;w#aX`B<8d9hSMgR(S0knyP>5
zKwB6Q9?oG1bZxBONiec7Bn&OInAu*$ZSFK-6X;7&X0Xva2g)7%j%qBJ``J0D@%yS|
z$O+aBZLr}_E3uVffy&6v6y~MhBFDV<7t0Y3wzhS3kZ!N^_j&G&FAEU}^g
XARV-K`vcRAzWH4S5=tmE%{cu5n*^3s

diff --git a/porting-code-to-python-3-with-2to3.html b/porting-code-to-python-3-with-2to3.html
index ec561b2..faafaab 100644
--- a/porting-code-to-python-3-with-2to3.html
+++ b/porting-code-to-python-3-with-2to3.html
@@ -495,7 +495,6 @@ for an_iterator in a_sequence_of_iterators:
 reduce(a, b, c)
-

The version of 2to3 that shipped with Python 3.0 would not fix the reduce() function automatically. The fix first appeared in the 2to3 script that shipped with Python 3.1.

apply() global function