fixes for python2

This commit is contained in:
Kenneth Reitz
2012-11-27 13:08:16 -08:00
parent 0af19a05ce
commit ccd14ddc81
3 changed files with 19 additions and 12 deletions
@@ -13,12 +13,12 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -41,6 +41,9 @@ class CodingStateMachine:
# for each byte we get its class
# if it is first byte, we also get byte length
# PY3K: aBuf is a byte stream, so c is an int, not a byte
if hasattr(c, 'encode'):
c = int(c.encode('hex'), 16)
byteCls = self._mModel['classTable'][c]
if self._mCurrentState == eStart:
self._mCurrentBytePos = 0
+9 -7
View File
@@ -14,12 +14,12 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -76,9 +76,9 @@ Latin1_CharToClass = ( \
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
)
# 0 : illegal
# 1 : very unlikely
# 2 : normal
# 0 : illegal
# 1 : very unlikely
# 2 : normal
# 3 : very likely
Latin1ClassModel = ( \
# UDF OTH ASC ASS ACV ACO ASV ASO
@@ -108,6 +108,8 @@ class Latin1Prober(CharSetProber):
def feed(self, aBuf):
aBuf = self.filter_with_english_letters(aBuf)
for c in aBuf:
if hasattr(c, 'encode'):
c = int(c.encode('hex'), 16)
charClass = Latin1_CharToClass[c]
freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) + charClass]
if freq == 0:
@@ -121,7 +123,7 @@ class Latin1Prober(CharSetProber):
def get_confidence(self):
if self.get_state() == constants.eNotMe:
return 0.01
total = sum(self._mFreqCounter)
if total < 0.01:
confidence = 0.0
@@ -129,7 +131,7 @@ class Latin1Prober(CharSetProber):
confidence = (self._mFreqCounter[3] / total) - (self._mFreqCounter[1] * 20.0 / total)
if confidence < 0.0:
confidence = 0.0
# lower the confidence of latin1 so that other more accurate detector
# lower the confidence of latin1 so that other more accurate detector
# can take priority.
confidence = confidence * 0.5
return confidence
+5 -3
View File
@@ -14,12 +14,12 @@
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
@@ -38,7 +38,7 @@ SYMBOL_CAT_ORDER = 250
NUMBER_OF_SEQ_CAT = 4
POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
#NEGATIVE_CAT = 0
class SingleByteCharSetProber(CharSetProber):
def __init__(self, model, reversed=False, nameProber=None):
CharSetProber.__init__(self)
@@ -68,6 +68,8 @@ class SingleByteCharSetProber(CharSetProber):
if not aLen:
return self.get_state()
for c in aBuf:
if hasattr(c, 'encode'):
c = int(c.encode('hex'), 16)
order = self._mModel['charToOrderMap'][c]
if order < SYMBOL_CAT_ORDER:
self._mTotalChar += 1