diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html index e433412..9bc8ed7 100755 --- a/case-study-porting-chardet-to-python-3.html +++ b/case-study-porting-chardet-to-python-3.html @@ -613,9 +613,9 @@ TypeError: unorderable types: int() >= str()

In this case, there’s no need to make the code more complicated by adding an explicit coercion. aStr[0] yields an integer; the things you’re comparing to are all constants. Let’s change them from 1-character strings to integers. And while we’re at it, let’s change aStr to aBuf, since it’s not actually a string.

  class SJISContextAnalysis(JapaneseContextAnalysis):
 -     def get_order(self, aStr):
--      if not aStr: return -1, 1
+-      if not aStr: return -1, 1
 +     def get_order(self, aBuf):
-+      if not aBuf: return -1, 1
++      if not aBuf: return -1, 1
           # find out current char's byte length
 -         if ((aStr[0] >= '\x81') and (aStr[0] <= '\x9F')) or \
 -            ((aBuf[0] >= '\xE0') and (aBuf[0] <= '\xFC')):
@@ -626,12 +626,12 @@ TypeError: unorderable types: int() >= str()
charLen = 1 # return its order if it is hiragana -- if len(aStr) > 1: +- if len(aStr) > 1: - if (aStr[0] == '\202') and \ - (aStr[1] >= '\x9F') and \ - (aStr[1] <= '\xF1'): - return ord(aStr[1]) - 0x9F, charLen -+ if len(aBuf) > 1: ++ if len(aBuf) > 1: + if (aBuf[0] == 0x202) and \ + (aBuf[1] >= 0x9F) and \ + (aBuf[1] <= 0xF1): @@ -641,9 +641,9 @@ TypeError: unorderable types: int() >= str() class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, aStr): -- if not aStr: return -1, 1 +- if not aStr: return -1, 1 + def get_order(self, aBuf): -+ if not aBuf: return -1, 1 ++ if not aBuf: return -1, 1 # find out current char's byte length - if (aStr[0] == '\x8E') or \ - ((aStr[0] >= '\xA1') and (aStr[0] <= '\xFE')): @@ -657,12 +657,12 @@ TypeError: unorderable types: int() >= str() charLen = 1 # return its order if it is hiragana -- if len(aStr) > 1: +- if len(aStr) > 1: - if (aStr[0] == '\xA4') and \ - (aStr[1] >= '\xA1') and \ - (aStr[1] <= '\xF3'): - return ord(aStr[1]) - 0xA1, charLen -+ if len(aBuf) > 1: ++ if len(aBuf) > 1: + if (aBuf[0] == 0xA4) and \ + (aBuf[1] >= 0xA1) and \ + (aBuf[1] <= 0xF3):