shortened #unorderabletypes

This commit is contained in:
Mark Pilgrim
2009-08-11 16:34:00 -04:00
parent 8d86aaa2dd
commit 1d0c3e4ebf
+1 -128
View File
@@ -669,135 +669,8 @@ TypeError: unorderable types: int() >= str()</samp></pre>
<ins>+ return aBuf[1] - 0xA1, charLen</ins>
return -1, charLen</code></pre>
<p>Searching the entire codebase for occurrences of the <code>ord()</code> function uncovers the same problem in <code>chardistribution.py</code>:
<pre class='nd screen'><samp class=p>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
<samp>tests\ascii\howto.diveintomark.org.xml ascii with confidence 1.0
tests\Big5\0804.blogspot.com.xml</samp>
<samp class=traceback>Traceback (most recent call last):
File "test.py", line 10, in &lt;module>
u.feed(line)
File "C:\home\chardet\chardet\universaldetector.py", line 117, in feed
if prober.feed(aBuf) == constants.eFoundIt:
File "C:\home\chardet\chardet\charsetgroupprober.py", line 60, in feed
st = prober.feed(aBuf)
File "C:\home\chardet\chardet\sjisprober.py", line 72, in feed
self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen)
File "C:\home\chardet\chardet\chardistribution.py", line 56, in feed
order = self.get_order(aStr)
File "C:\home\chardet\chardet\chardistribution.py", line 174, in get_order
if (aStr[0] >= '\x81') and (aStr[0] &lt;= '\x9F'):
TypeError: unorderable types: int() >= str()</samp></pre>
<p>The fix is the same:
<pre class='nd pp'><code> class EUCTWDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = EUCTWCharToFreqOrder
self._mTableSize = EUCTW_TABLE_SIZE
self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
<p>Searching the entire codebase for occurrences of the <code>ord()</code> function uncovers the same problem in <code>chardistribution.py</code> (specifically, in the <code>EUCTWDistributionAnalysis</code>, <code>EUCKRDistributionAnalysis</code>, <code>GB2312DistributionAnalysis</code>, <code>Big5DistributionAnalysis</code>, <code>SJISDistributionAnalysis</code>, and <code>EUCJPDistributionAnalysis</code> classes. In each case, the fix is similar to the change we made to the <code>EUCJPContextAnalysis</code> and <code>SJISContextAnalysis</code> classes in <code>jpcntx.py</code>.
<del>- def get_order(self, aStr):</del>
<del>- if aStr[0] >= '\xC4':</del>
<del>- return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1</del>
<ins>+ def get_order(self, aBuf):</ins>
<ins>+ if aBuf[0] >= 0xC4:</ins>
<ins>+ return 94 * (aBuf[0] - 0xC4) + aBuf[1] - 0xA1</ins>
else:
return -1
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = EUCKRCharToFreqOrder
self._mTableSize = EUCKR_TABLE_SIZE
self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
<del>- def get_order(self, aStr):</del>
<del>- if aStr[0] >= '\xB0':</del>
<del>- return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1</del>
<ins>+ def get_order(self, aBuf):</ins>
<ins>+ if aBuf[0] >= '\xB0':</ins>
<ins>+ return 94 * (aBuf[0] - 0xB0) + aBuf[1] - 0xA1</ins>
else:
return -1;
class GB2312DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = GB2312CharToFreqOrder
self._mTableSize = GB2312_TABLE_SIZE
self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
<del>- def get_order(self, aStr):</del>
<del>- if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'):</del>
<del>- return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1</del>
<ins>+ def get_order(self, aBuf):</ins>
<ins>+ if (aBuf[0] >= 0xB0) and (aBuf[1] >= 0xA1):</ins>
<ins>+ return 94 * (aBuf[0] - 0xB0) + aBuf[1] - 0xA1</ins>
else:
return -1;
class Big5DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = Big5CharToFreqOrder
self._mTableSize = BIG5_TABLE_SIZE
self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
<del>- def get_order(self, aStr):</del>
<del>- if aStr[0] >= '\xA4':</del>
<del>- if aStr[1] >= '\xA1':</del>
<del>- return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63</del>
<ins>+ def get_order(self, aBuf):</ins>
<ins>+ if aBuf[0] >= 0xA4:</ins>
<ins>+ if aBuf[1] >= 0xA1:</ins>
<ins>+ return 157 * (aBuf[0] - 0xA4) + aBuf[1] - 0xA1 + 63</ins>
else:
<del>- return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40</del>
<ins>+ return 157 * (aBuf[0] - 0xA4) + aBuf[1] - 0x40</ins>
else:
return -1
class SJISDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = JISCharToFreqOrder
self._mTableSize = JIS_TABLE_SIZE
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
<del>- def get_order(self, aStr):</del>
<del>- if (aStr[0] >= '\x81') and (aStr[0] &lt;= '\x9F'):</del>
<del>- order = 188 * (ord(aStr[0]) - 0x81)</del>
<del>- elif (aStr[0] >= '\xE0') and (aStr[0] &lt;= '\xEF'):</del>
<del>- order = 188 * (ord(aStr[0]) - 0xE0 + 31)</del>
<ins>+ def get_order(self, aBuf):</ins>
<ins>+ if (aBuf[0] >= 0x81) and (aBuf[0] &lt;= 0x9F):</ins>
<ins>+ order = 188 * (aBuf[0] - 0x81)</ins>
<ins>+ elif (aBuf[0] >= 0xE0) and (aBuf[0] &lt;= 0xEF):</ins>
<ins>+ order = 188 * (aBuf[0] - 0xE0 + 31)</ins>
else:
return -1;
<del>- order = order + ord(aStr[1]) - 0x40</del>
<del>- if aStr[1] > '\x7F':</del>
<ins>+ order = order + aBuf[1] - 0x40</ins>
<ins>+ if aBuf[1] > 0x7F:</ins>
order =- 1
return order
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
CharDistributionAnalysis.__init__(self)
self._mCharToFreqOrder = JISCharToFreqOrder
self._mTableSize = JIS_TABLE_SIZE
self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
<del>- def get_order(self, aStr):</del>
<del>- if aStr[0] >= '\xA0':</del>
<del>- return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xA1</del>
<ins>+ def get_order(self, aBuf):</ins>
<ins>+ if aBuf[0] >= 0xA0:</ins>
<ins>+ return 94 * (aBuf[0] - 0xA1) + aBuf[1] - 0xA1</ins>
else:
return -1</code></pre>
<h3 id=reduceisnotdefined>Global name <code>'reduce'</code> is not defined</h3>
<p>Once more into the breach&hellip;
<pre class='nd screen'><samp class=p>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>