From 1d0c3e4ebff9b5af03e72e6a394332453e382312 Mon Sep 17 00:00:00 2001
From: Mark Pilgrim <mark@diveintomark.org>
Date: Tue, 11 Aug 2009 16:34:00 -0400
Subject: [PATCH] shortened #unorderabletypes

---
 case-study-porting-chardet-to-python-3.html | 129 +-------------------
 1 file changed, 1 insertion(+), 128 deletions(-)
diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html
index 9bc8ed7..d6f121c 100755
--- a/case-study-porting-chardet-to-python-3.html
+++ b/case-study-porting-chardet-to-python-3.html
@@ -669,135 +669,8 @@ TypeError: unorderable types: int() >= str()</samp></pre>
 <ins>+               return aBuf[1] - 0xA1, charLen</ins>
 
         return -1, charLen</code></pre>
-<p>Searching the entire codebase for occurrences of the <code>ord()</code> function uncovers the same problem in <code>chardistribution.py</code>:
-<pre class='nd screen'><samp class=p>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
-<samp>tests\ascii\howto.diveintomark.org.xml                       ascii with confidence 1.0
-tests\Big5\0804.blogspot.com.xml</samp>
-<samp class=traceback>Traceback (most recent call last):
-  File "test.py", line 10, in &lt;module>
-    u.feed(line)
-  File "C:\home\chardet\chardet\universaldetector.py", line 117, in feed
-    if prober.feed(aBuf) == constants.eFoundIt:
-  File "C:\home\chardet\chardet\charsetgroupprober.py", line 60, in feed
-    st = prober.feed(aBuf)
-  File "C:\home\chardet\chardet\sjisprober.py", line 72, in feed
-    self._mDistributionAnalyzer.feed(aBuf[i - 1 : i + 1], charLen)
-  File "C:\home\chardet\chardet\chardistribution.py", line 56, in feed
-    order = self.get_order(aStr)
-  File "C:\home\chardet\chardet\chardistribution.py", line 174, in get_order
-    if (aStr[0] >= '\x81') and (aStr[0] &lt;= '\x9F'):
-TypeError: unorderable types: int() >= str()</samp></pre>
-<p>The fix is the same:
-<pre class='nd pp'><code>  class EUCTWDistributionAnalysis(CharDistributionAnalysis):
-      def __init__(self):
-          CharDistributionAnalysis.__init__(self)
-          self._mCharToFreqOrder = EUCTWCharToFreqOrder
-          self._mTableSize = EUCTW_TABLE_SIZE
-          self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
+<p>Searching the entire codebase for occurrences of the <code>ord()</code> function uncovers the same problem in <code>chardistribution.py</code> (specifically, in the <code>EUCTWDistributionAnalysis</code>, <code>EUCKRDistributionAnalysis</code>, <code>GB2312DistributionAnalysis</code>, <code>Big5DistributionAnalysis</code>, <code>SJISDistributionAnalysis</code>, and <code>EUCJPDistributionAnalysis</code> classes. In each case, the fix is similar to the change we made to the <code>EUCJPContextAnalysis</code> and <code>SJISContextAnalysis</code> classes in <code>jpcntx.py</code>.
 
-<del>-     def get_order(self, aStr):</del>
-<del>-         if aStr[0] >= '\xC4':</del>
-<del>-             return 94 * (ord(aStr[0]) - 0xC4) + ord(aStr[1]) - 0xA1</del>
-<ins>+     def get_order(self, aBuf):</ins>
-<ins>+         if aBuf[0] >= 0xC4:</ins>
-<ins>+             return 94 * (aBuf[0] - 0xC4) + aBuf[1] - 0xA1</ins>
-          else:
-              return -1
-
-  class EUCKRDistributionAnalysis(CharDistributionAnalysis):
-      def __init__(self):
-          CharDistributionAnalysis.__init__(self)
-          self._mCharToFreqOrder = EUCKRCharToFreqOrder
-          self._mTableSize = EUCKR_TABLE_SIZE
-          self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
-
-<del>-     def get_order(self, aStr):</del>
-<del>-         if aStr[0] >= '\xB0':</del>
-<del>-             return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1</del>
-<ins>+     def get_order(self, aBuf):</ins>
-<ins>+         if aBuf[0] >= '\xB0':</ins>
-<ins>+             return 94 * (aBuf[0] - 0xB0) + aBuf[1] - 0xA1</ins>
-          else:
-              return -1;
-
-  class GB2312DistributionAnalysis(CharDistributionAnalysis):
-      def __init__(self):
-          CharDistributionAnalysis.__init__(self)
-          self._mCharToFreqOrder = GB2312CharToFreqOrder
-          self._mTableSize = GB2312_TABLE_SIZE
-          self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
-
-<del>-     def get_order(self, aStr):</del>
-<del>-         if (aStr[0] >= '\xB0') and (aStr[1] >= '\xA1'):</del>
-<del>-             return 94 * (ord(aStr[0]) - 0xB0) + ord(aStr[1]) - 0xA1</del>
-<ins>+     def get_order(self, aBuf):</ins>
-<ins>+         if (aBuf[0] >= 0xB0) and (aBuf[1] >= 0xA1):</ins>
-<ins>+             return 94 * (aBuf[0] - 0xB0) + aBuf[1] - 0xA1</ins>
-          else:
-              return -1;
-
-  class Big5DistributionAnalysis(CharDistributionAnalysis):
-      def __init__(self):
-          CharDistributionAnalysis.__init__(self)
-          self._mCharToFreqOrder = Big5CharToFreqOrder
-          self._mTableSize = BIG5_TABLE_SIZE
-          self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
-
-<del>-     def get_order(self, aStr):</del>
-<del>-         if aStr[0] >= '\xA4':</del>
-<del>-             if aStr[1] >= '\xA1':</del>
-<del>-                 return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0xA1 + 63</del>
-<ins>+     def get_order(self, aBuf):</ins>
-<ins>+         if aBuf[0] >= 0xA4:</ins>
-<ins>+             if aBuf[1] >= 0xA1:</ins>
-<ins>+                 return 157 * (aBuf[0] - 0xA4) + aBuf[1] - 0xA1 + 63</ins>
-              else:
-<del>-                 return 157 * (ord(aStr[0]) - 0xA4) + ord(aStr[1]) - 0x40</del>
-<ins>+                 return 157 * (aBuf[0] - 0xA4) + aBuf[1] - 0x40</ins>
-          else:
-              return -1
-
-  class SJISDistributionAnalysis(CharDistributionAnalysis):
-      def __init__(self):
-          CharDistributionAnalysis.__init__(self)
-          self._mCharToFreqOrder = JISCharToFreqOrder
-          self._mTableSize = JIS_TABLE_SIZE
-          self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
-
-<del>-     def get_order(self, aStr):</del>
-<del>-         if (aStr[0] >= '\x81') and (aStr[0] &lt;= '\x9F'):</del>
-<del>-             order = 188 * (ord(aStr[0]) - 0x81)</del>
-<del>-         elif (aStr[0] >= '\xE0') and (aStr[0] &lt;= '\xEF'):</del>
-<del>-             order = 188 * (ord(aStr[0]) - 0xE0 + 31)</del>
-<ins>+     def get_order(self, aBuf):</ins>
-<ins>+         if (aBuf[0] >= 0x81) and (aBuf[0] &lt;= 0x9F):</ins>
-<ins>+             order = 188 * (aBuf[0] - 0x81)</ins>
-<ins>+         elif (aBuf[0] >= 0xE0) and (aBuf[0] &lt;= 0xEF):</ins>
-<ins>+             order = 188 * (aBuf[0] - 0xE0 + 31)</ins>
-          else:
-              return -1;
-<del>-         order = order + ord(aStr[1]) - 0x40</del>
-<del>-         if aStr[1] > '\x7F':</del>
-<ins>+         order = order + aBuf[1] - 0x40</ins>
-<ins>+         if aBuf[1] > 0x7F:</ins>
-              order =- 1
-          return order
-
-  class EUCJPDistributionAnalysis(CharDistributionAnalysis):
-      def __init__(self):
-          CharDistributionAnalysis.__init__(self)
-          self._mCharToFreqOrder = JISCharToFreqOrder
-          self._mTableSize = JIS_TABLE_SIZE
-          self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
-
-<del>-     def get_order(self, aStr):</del>
-<del>-         if aStr[0] >= '\xA0':</del>
-<del>-             return 94 * (ord(aStr[0]) - 0xA1) + ord(aStr[1]) - 0xA1</del>
-<ins>+     def get_order(self, aBuf):</ins>
-<ins>+         if aBuf[0] >= 0xA0:</ins>
-<ins>+             return 94 * (aBuf[0] - 0xA1) + aBuf[1] - 0xA1</ins>
-          else:
-              return -1</code></pre>
 <h3 id=reduceisnotdefined>Global name <code>'reduce'</code> is not defined</h3>
 <p>Once more into the breach&hellip;
 <pre class='nd screen'><samp class=p>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>