From 1d0c3e4ebff9b5af03e72e6a394332453e382312 Mon Sep 17 00:00:00 2001 From: Mark Pilgrim Date: Tue, 11 Aug 2009 16:34:00 -0400 Subject: [PATCH] shortened #unorderabletypes --- case-study-porting-chardet-to-python-3.html | 129 +------------------- 1 file changed, 1 insertion(+), 128 deletions(-) diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html index 9bc8ed7..d6f121c 100755 --- a/case-study-porting-chardet-to-python-3.html +++ b/case-study-porting-chardet-to-python-3.html @@ -669,135 +669,8 @@ TypeError: unorderable types: int() >= str() + return aBuf[1] - 0xA1, charLen return -1, charLen -

Searching the entire codebase for occurrences of the ord() function uncovers the same problem in chardistribution.py: -

C:\home\chardet> python test.py tests\*\*
-tests\ascii\howto.diveintomark.org.xml                       ascii with confidence 1.0
-tests\Big5\0804.blogspot.com.xml
-Traceback (most recent call last):
-  File "test.py", line 10, in <module>
-    u.feed(line)
-  File "C:\home\chardet\chardet\universaldetector.py", line 117, in feed
-    if prober.feed(aBuf) == constants.eFoundIt:
-  File "C:\home\chardet\chardet\charsetgroupprober.py", line 60, in feed
-    st = prober.feed(aBuf)
-  File "C:\home\chardet\chardet\sjisprober.py", line 72, in feed
-    self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen)
-  File "C:\home\chardet\chardet\chardistribution.py", line 56, in feed
-    order = self.get_order(aStr)
-  File "C:\home\chardet\chardet\chardistribution.py", line 174, in get_order
-    if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'):
-TypeError: unorderable types: int() >= str()
-

The fix is the same: -

  class EUCTWDistributionAnalysis(CharDistributionAnalysis):
-      def __init__(self):
-          CharDistributionAnalysis.__init__(self)
-          self._mCharToFreqOrder = EUCTWCharToFreqOrder
-          self._mTableSize = EUCTW_TABLE_SIZE
-          self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
+

Searching the entire codebase for occurrences of the ord() function uncovers the same problem in chardistribution.py (specifically, in the EUCTWDistributionAnalysis, EUCKRDistributionAnalysis, GB2312DistributionAnalysis, Big5DistributionAnalysis, SJISDistributionAnalysis, and EUCJPDistributionAnalysis classes. In each case, the fix is similar to the change we made to the EUCJPContextAnalysis and SJISContextAnalysis classes in jpcntx.py. -- def get_order(self, aStr): -- if aStr[0] >= '\xC4': -- return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1 -+ def get_order(self, aBuf): -+ if aBuf[0] >= 0xC4: -+ return 94 * (aBuf[0] - 0xC4) + aBuf[1] - 0xA1 - else: - return -1 - - class EUCKRDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = EUCKRCharToFreqOrder - self._mTableSize = EUCKR_TABLE_SIZE - self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - -- def get_order(self, aStr): -- if aStr[0] >= '\xB0': -- return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 -+ def get_order(self, aBuf): -+ if aBuf[0] >= '\xB0': -+ return 94 * (aBuf[0] - 0xB0) + aBuf[1] - 0xA1 - else: - return -1; - - class GB2312DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = GB2312CharToFreqOrder - self._mTableSize = GB2312_TABLE_SIZE - self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO - -- def get_order(self, aStr): -- if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'): -- return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1 -+ def get_order(self, aBuf): -+ if (aBuf[0] >= 0xB0) and (aBuf[1] >= 0xA1): -+ return 94 * (aBuf[0] - 0xB0) + aBuf[1] - 0xA1 - else: - return -1; - - class Big5DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = Big5CharToFreqOrder - self._mTableSize = BIG5_TABLE_SIZE - self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO - -- def get_order(self, aStr): -- if aStr[0] >= '\xA4': -- if aStr[1] >= '\xA1': -- return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63 -+ def get_order(self, aBuf): -+ if aBuf[0] >= 0xA4: -+ if aBuf[1] >= 0xA1: -+ return 157 * (aBuf[0] - 0xA4) + aBuf[1] - 0xA1 + 63 - else: -- return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40 -+ return 157 * (aBuf[0] - 0xA4) + aBuf[1] - 0x40 - else: - return -1 - - class SJISDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO - -- def get_order(self, aStr): -- if (aStr[0] >= '\x81') and (aStr[0] <= '\x9F'): -- order = 188 * (ord(aStr[0]) - 0x81) -- elif (aStr[0] >= '\xE0') and (aStr[0] <= '\xEF'): -- order = 188 * (ord(aStr[0]) - 0xE0 + 31) -+ def get_order(self, aBuf): -+ if (aBuf[0] >= 0x81) and (aBuf[0] <= 0x9F): -+ order = 188 * (aBuf[0] - 0x81) -+ elif (aBuf[0] >= 0xE0) and (aBuf[0] <= 0xEF): -+ order = 188 * (aBuf[0] - 0xE0 + 31) - else: - return -1; -- order = order + ord(aStr[1]) - 0x40 -- if aStr[1] > '\x7F': -+ order = order + aBuf[1] - 0x40 -+ if aBuf[1] > 0x7F: - order =- 1 - return order - - class EUCJPDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - CharDistributionAnalysis.__init__(self) - self._mCharToFreqOrder = JISCharToFreqOrder - self._mTableSize = JIS_TABLE_SIZE - self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO - -- def get_order(self, aStr): -- if aStr[0] >= '\xA0': -- return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xA1 -+ def get_order(self, aBuf): -+ if aBuf[0] >= 0xA0: -+ return 94 * (aBuf[0] - 0xA1) + aBuf[1] - 0xA1 - else: - return -1

Global name 'reduce' is not defined

Once more into the breach…

C:\home\chardet> python test.py tests\*\*