diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html index 20afe38..77016ea 100755 --- a/case-study-porting-chardet-to-python-3.html +++ b/case-study-porting-chardet-to-python-3.html @@ -132,388 +132,11 @@ RefactoringTool: Skipping implicit fixer: ws_comma ENOUGH_DATA_THRESHOLD = 1024 SURE_YES = 0.99 ---- chardet\charsetgroupprober.py (original) -+++ chardet\charsetgroupprober.py (refactored) -@@ -26,7 +26,7 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from charsetprober import CharSetProber -+from .charsetprober import CharSetProber - - class CharSetGroupProber(CharSetProber): - def __init__(self): ---- chardet\codingstatemachine.py (original) -+++ chardet\codingstatemachine.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from constants import eStart, eError, eItsMe -+from .constants import eStart, eError, eItsMe - - class CodingStateMachine: - def __init__(self, sm): ---- chardet\constants.py (original) -+++ chardet\constants.py (refactored) -@@ -38,10 +38,10 @@ - - SHORTCUT_THRESHOLD = 0.95 - --import __builtin__ -+import builtins - if not hasattr(__builtin__, 'False'): - False = 0 - True = 1 - else: -- False = __builtin__.False -- True = __builtin__.True -+ False = builtins.False -+ True = builtins.True ---- chardet\escprober.py (original) -+++ chardet\escprober.py (refactored) -@@ -26,9 +26,9 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel --from charsetprober import CharSetProber --from codingstatemachine import CodingStateMachine -+from .escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel -+from .charsetprober import CharSetProber -+from .codingstatemachine import CodingStateMachine - - class EscCharSetProber(CharSetProber): - def __init__(self): ---- chardet\escsm.py (original) -+++ chardet\escsm.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from constants import eStart, eError, eItsMe -+from .constants import eStart, eError, eItsMe - - HZ_cls = ( \ - 1,0,0,0,0,0,0,0, # 00 - 07 ---- chardet\eucjpprober.py (original) -+++ chardet\eucjpprober.py (refactored) -@@ -26,12 +26,12 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from constants import eStart, eError, eItsMe --from mbcharsetprober import MultiByteCharSetProber --from codingstatemachine import CodingStateMachine --from chardistribution import EUCJPDistributionAnalysis --from jpcntx import EUCJPContextAnalysis --from mbcssm import EUCJPSMModel -+from .constants import eStart, eError, eItsMe -+from .mbcharsetprober import MultiByteCharSetProber -+from .codingstatemachine import CodingStateMachine -+from .chardistribution import EUCJPDistributionAnalysis -+from .jpcntx import EUCJPContextAnalysis -+from .mbcssm import EUCJPSMModel - - class EUCJPProber(MultiByteCharSetProber): - def __init__(self): ---- chardet\euckrprober.py (original) -+++ chardet\euckrprober.py (refactored) -@@ -25,10 +25,10 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from mbcharsetprober import MultiByteCharSetProber --from codingstatemachine import CodingStateMachine --from chardistribution import EUCKRDistributionAnalysis --from mbcssm import EUCKRSMModel -+from .mbcharsetprober import MultiByteCharSetProber -+from .codingstatemachine import CodingStateMachine -+from .chardistribution import EUCKRDistributionAnalysis -+from .mbcssm import EUCKRSMModel - - class EUCKRProber(MultiByteCharSetProber): - def __init__(self): ---- chardet\euctwprober.py (original) -+++ chardet\euctwprober.py (refactored) -@@ -25,10 +25,10 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from mbcharsetprober import MultiByteCharSetProber --from codingstatemachine import CodingStateMachine --from chardistribution import EUCTWDistributionAnalysis --from mbcssm import EUCTWSMModel -+from .mbcharsetprober import MultiByteCharSetProber -+from .codingstatemachine import CodingStateMachine -+from .chardistribution import EUCTWDistributionAnalysis -+from .mbcssm import EUCTWSMModel - - class EUCTWProber(MultiByteCharSetProber): - def __init__(self): ---- chardet\gb2312prober.py (original) -+++ chardet\gb2312prober.py (refactored) -@@ -25,10 +25,10 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from mbcharsetprober import MultiByteCharSetProber --from codingstatemachine import CodingStateMachine --from chardistribution import GB2312DistributionAnalysis --from mbcssm import GB2312SMModel -+from .mbcharsetprober import MultiByteCharSetProber -+from .codingstatemachine import CodingStateMachine -+from .chardistribution import GB2312DistributionAnalysis -+from .mbcssm import GB2312SMModel - - class GB2312Prober(MultiByteCharSetProber): - def __init__(self): ---- chardet\hebrewprober.py (original) -+++ chardet\hebrewprober.py (refactored) -@@ -25,8 +25,8 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from charsetprober import CharSetProber --import constants -+from .charsetprober import CharSetProber -+from . import constants - - # This prober doesn't actually recognize a language or a charset. - # It is a helper prober for the use of the Hebrew model probers ---- chardet\jpcntx.py (original) -+++ chardet\jpcntx.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --import constants -+from . import constants - - NUM_OF_CATEGORY = 6 - DONT_KNOW = -1 ---- chardet\langbulgarianmodel.py (original) -+++ chardet\langbulgarianmodel.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --import constants -+from . import constants - - # 255: Control characters that usually does not exist in any text - # 254: Carriage/Return ---- chardet\langcyrillicmodel.py (original) -+++ chardet\langcyrillicmodel.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --import constants -+from . import constants - - # KOI8-R language model - # Character Mapping Table: ---- chardet\langgreekmodel.py (original) -+++ chardet\langgreekmodel.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --import constants -+from . import constants - - # 255: Control characters that usually does not exist in any text - # 254: Carriage/Return ---- chardet\langhebrewmodel.py (original) -+++ chardet\langhebrewmodel.py (refactored) -@@ -27,7 +27,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --import constants -+from . import constants - - # 255: Control characters that usually does not exist in any text - # 254: Carriage/Return ---- chardet\langhungarianmodel.py (original) -+++ chardet\langhungarianmodel.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --import constants -+from . import constants - - # 255: Control characters that usually does not exist in any text - # 254: Carriage/Return ---- chardet\langthaimodel.py (original) -+++ chardet\langthaimodel.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --import constants -+from . import constants - - # 255: Control characters that usually does not exist in any text - # 254: Carriage/Return ---- chardet\latin1prober.py (original) -+++ chardet\latin1prober.py (refactored) -@@ -26,8 +26,8 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from charsetprober import CharSetProber --import constants -+from .charsetprober import CharSetProber -+from . import constants - import operator - - FREQ_CAT_NUM = 4 ---- chardet\mbcharsetprober.py (original) -+++ chardet\mbcharsetprober.py (refactored) -@@ -28,8 +28,8 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from constants import eStart, eError, eItsMe --from charsetprober import CharSetProber -+from .constants import eStart, eError, eItsMe -+from .charsetprober import CharSetProber - - class MultiByteCharSetProber(CharSetProber): - def __init__(self): ---- chardet\mbcsgroupprober.py (original) -+++ chardet\mbcsgroupprober.py (refactored) -@@ -27,14 +27,14 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from charsetgroupprober import CharSetGroupProber --from utf8prober import UTF8Prober --from sjisprober import SJISProber --from eucjpprober import EUCJPProber --from gb2312prober import GB2312Prober --from euckrprober import EUCKRProber --from big5prober import Big5Prober --from euctwprober import EUCTWProber -+from .charsetgroupprober import CharSetGroupProber -+from .utf8prober import UTF8Prober -+from .sjisprober import SJISProber -+from .eucjpprober import EUCJPProber -+from .gb2312prober import GB2312Prober -+from .euckrprober import EUCKRProber -+from .big5prober import Big5Prober -+from .euctwprober import EUCTWProber - - class MBCSGroupProber(CharSetGroupProber): - def __init__(self): ---- chardet\mbcssm.py (original) -+++ chardet\mbcssm.py (refactored) -@@ -25,7 +25,7 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from constants import eStart, eError, eItsMe -+from .constants import eStart, eError, eItsMe - - # BIG5 - ---- chardet\sbcharsetprober.py (original) -+++ chardet\sbcharsetprober.py (refactored) -@@ -27,7 +27,7 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from charsetprober import CharSetProber -+from .charsetprober import CharSetProber - - SAMPLE_SIZE = 64 - SB_ENOUGH_REL_THRESHOLD = 1024 ---- chardet\sbcsgroupprober.py (original) -+++ chardet\sbcsgroupprober.py (refactored) -@@ -27,15 +27,15 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from charsetgroupprober import CharSetGroupProber --from sbcharsetprober import SingleByteCharSetProber --from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model --from langgreekmodel import Latin7GreekModel, Win1253GreekModel --from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel --from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel --from langthaimodel import TIS620ThaiModel --from langhebrewmodel import Win1255HebrewModel --from hebrewprober import HebrewProber -+from .charsetgroupprober import CharSetGroupProber -+from .sbcharsetprober import SingleByteCharSetProber -+from .langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model -+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel -+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel -+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel -+from .langthaimodel import TIS620ThaiModel -+from .langhebrewmodel import Win1255HebrewModel -+from .hebrewprober import HebrewProber - - class SBCSGroupProber(CharSetGroupProber): - def __init__(self): ---- chardet\sjisprober.py (original) -+++ chardet\sjisprober.py (refactored) -@@ -25,13 +25,13 @@ - # 02110-1301 USA - ######################### END LICENSE BLOCK ######################### - --from mbcharsetprober import MultiByteCharSetProber --from codingstatemachine import CodingStateMachine --from chardistribution import SJISDistributionAnalysis --from jpcntx import SJISContextAnalysis --from mbcssm import SJISSMModel -+from .mbcharsetprober import MultiByteCharSetProber -+from .codingstatemachine import CodingStateMachine -+from .chardistribution import SJISDistributionAnalysis -+from .jpcntx import SJISContextAnalysis -+from .mbcssm import SJISSMModel - import constants, sys --from constants import eStart, eError, eItsMe -+from .constants import eStart, eError, eItsMe - - class SJISProber(MultiByteCharSetProber): - def __init__(self): ---- chardet\universaldetector.py (original) -+++ chardet\universaldetector.py (refactored) -@@ -27,10 +27,10 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from latin1prober import Latin1Prober # windows-1252 --from mbcsgroupprober import MBCSGroupProber # multi-byte character sets --from sbcsgroupprober import SBCSGroupProber # single-byte character sets --from escprober import EscCharSetProber # ISO-2122, etc. -+from .latin1prober import Latin1Prober # windows-1252 -+from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets -+from .sbcsgroupprober import SBCSGroupProber # single-byte character sets -+from .escprober import EscCharSetProber # ISO-2122, etc. - import re - - MINIMUM_THRESHOLD = 0.20 ---- chardet\utf8prober.py (original) -+++ chardet\utf8prober.py (refactored) -@@ -26,10 +26,10 @@ - ######################### END LICENSE BLOCK ######################### - - import constants, sys --from constants import eStart, eError, eItsMe --from charsetprober import CharSetProber --from codingstatemachine import CodingStateMachine --from mbcssm import UTF8SMModel -+from .constants import eStart, eError, eItsMe -+from .charsetprober import CharSetProber -+from .codingstatemachine import CodingStateMachine -+from .mbcssm import UTF8SMModel - - ONE_CHAR_PROB = 0.5 - +. +. +. (it goes on like this for a while) +. +. RefactoringTool: Files that were modified: RefactoringTool: chardet\__init__.py RefactoringTool: chardet\big5prober.py