2 more complete sections and 2 more partial sections in case-study

This commit is contained in:
Mark Pilgrim
2009-03-11 22:36:23 -04:00
parent c8080fdbd1
commit 5ead1cfa01
47 changed files with 668 additions and 446 deletions
+381 -183
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Case study: porting chardet to Python 3 - Dive into Python 3</title> <title>Case study: porting chardet to Python 3 - Dive into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
@@ -42,6 +43,8 @@ body{counter-reset:h1 20}
<li><a href=#namefileisnotdefined>Name '<var>file</var>' is not defined</a> <li><a href=#namefileisnotdefined>Name '<var>file</var>' is not defined</a>
<li><a href=#cantuseastringpattern>Can&#8217;t use a string pattern on a bytes-like object</a> <li><a href=#cantuseastringpattern>Can&#8217;t use a string pattern on a bytes-like object</a>
<li><a href=#cantconvertbytesobject>Can&#8217;t convert '<code>bytes</code>' object to <code>str</code> implicitly</a> <li><a href=#cantconvertbytesobject>Can&#8217;t convert '<code>bytes</code>' object to <code>str</code> implicitly</a>
<li><a href=#unsupportedoperandtypeforplus>TypeError: unsupported operand type(s) for +: 'int' and 'bytes'</a>
<li><a href=#ordexpectedstring>TypeError: ord() expected string of length 1, but int found</a>
</ol> </ol>
</ol> </ol>
<h2 id=divingin>Introducing <code class=filename>chardet</code>: a mini-<abbr>FAQ</abbr></h2> <h2 id=divingin>Introducing <code class=filename>chardet</code>: a mini-<abbr>FAQ</abbr></h2>
@@ -111,8 +114,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
__version__ = "1.0.1" __version__ = "1.0.1"
def detect(aBuf): def detect(aBuf):
- import universaldetector <del>- import universaldetector</del>
+ from . import universaldetector <ins>+ from . import universaldetector</ins>
u = universaldetector.UniversalDetector() u = universaldetector.UniversalDetector()
u.reset() u.reset()
u.feed(aBuf) u.feed(aBuf)
@@ -122,14 +125,14 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from mbcharsetprober import MultiByteCharSetProber <del>-from mbcharsetprober import MultiByteCharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
-from chardistribution import Big5DistributionAnalysis <del>-from chardistribution import Big5DistributionAnalysis</del>
-from mbcssm import Big5SMModel <del>-from mbcssm import Big5SMModel</del>
+from .mbcharsetprober import MultiByteCharSetProber <ins>+from .mbcharsetprober import MultiByteCharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
+from .chardistribution import Big5DistributionAnalysis <ins>+from .chardistribution import Big5DistributionAnalysis</ins>
+from .mbcssm import Big5SMModel <ins>+from .mbcssm import Big5SMModel</ins>
class Big5Prober(MultiByteCharSetProber): class Big5Prober(MultiByteCharSetProber):
def __init__(self): def __init__(self):
@@ -139,18 +142,18 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
-from euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO <del>-from euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO</del>
-from euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO <del>-from euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO</del>
-from gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO <del>-from gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO</del>
-from big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO <del>-from big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO</del>
-from jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO <del>-from jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO</del>
+from . import constants <ins>+from . import constants</ins>
+from .euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO <ins>+from .euctwfreq import EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, EUCTW_TYPICAL_DISTRIBUTION_RATIO</ins>
+from .euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO <ins>+from .euckrfreq import EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, EUCKR_TYPICAL_DISTRIBUTION_RATIO</ins>
+from .gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO <ins>+from .gb2312freq import GB2312CharToFreqOrder, GB2312_TABLE_SIZE, GB2312_TYPICAL_DISTRIBUTION_RATIO</ins>
+from .big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO <ins>+from .big5freq import Big5CharToFreqOrder, BIG5_TABLE_SIZE, BIG5_TYPICAL_DISTRIBUTION_RATIO</ins>
+from .jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO <ins>+from .jisfreq import JISCharToFreqOrder, JIS_TABLE_SIZE, JIS_TYPICAL_DISTRIBUTION_RATIO</ins>
ENOUGH_DATA_THRESHOLD = 1024 ENOUGH_DATA_THRESHOLD = 1024
SURE_YES = 0.99 SURE_YES = 0.99
@@ -160,8 +163,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from charsetprober import CharSetProber <del>-from charsetprober import CharSetProber</del>
+from .charsetprober import CharSetProber <ins>+from .charsetprober import CharSetProber</ins>
class CharSetGroupProber(CharSetProber): class CharSetGroupProber(CharSetProber):
def __init__(self): def __init__(self):
@@ -171,8 +174,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from constants import eStart, eError, eItsMe <del>-from constants import eStart, eError, eItsMe</del>
+from .constants import eStart, eError, eItsMe <ins>+from .constants import eStart, eError, eItsMe</ins>
class CodingStateMachine: class CodingStateMachine:
def __init__(self, sm): def __init__(self, sm):
@@ -182,28 +185,28 @@ RefactoringTool: Skipping implicit fixer: ws_comma
SHORTCUT_THRESHOLD = 0.95 SHORTCUT_THRESHOLD = 0.95
-import __builtin__ <del>-import __builtin__</del>
+import builtins <ins>+import builtins</ins>
if not hasattr(__builtin__, 'False'): if not hasattr(__builtin__, 'False'):
False = 0 False = 0
True = 1 True = 1
else: else:
- False = __builtin__.False <del>- False = __builtin__.False</del>
- True = __builtin__.True <del>- True = __builtin__.True</del>
+ False = builtins.False <ins>+ False = builtins.False</ins>
+ True = builtins.True <ins>+ True = builtins.True</ins>
--- chardet\escprober.py (original) --- chardet\escprober.py (original)
+++ chardet\escprober.py (refactored) +++ chardet\escprober.py (refactored)
@@ -26,9 +26,9 @@ @@ -26,9 +26,9 @@
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel <del>-from escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel</del>
-from charsetprober import CharSetProber <del>-from charsetprober import CharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
+from .escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel <ins>+from .escsm import HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, ISO2022KRSMModel</ins>
+from .charsetprober import CharSetProber <ins>+from .charsetprober import CharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
class EscCharSetProber(CharSetProber): class EscCharSetProber(CharSetProber):
def __init__(self): def __init__(self):
@@ -213,8 +216,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from constants import eStart, eError, eItsMe <del>-from constants import eStart, eError, eItsMe</del>
+from .constants import eStart, eError, eItsMe <ins>+from .constants import eStart, eError, eItsMe</ins>
HZ_cls = ( \ HZ_cls = ( \
1,0,0,0,0,0,0,0, # 00 - 07 1,0,0,0,0,0,0,0, # 00 - 07
@@ -224,18 +227,18 @@ RefactoringTool: Skipping implicit fixer: ws_comma
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from constants import eStart, eError, eItsMe <del>-from constants import eStart, eError, eItsMe</del>
-from mbcharsetprober import MultiByteCharSetProber <del>-from mbcharsetprober import MultiByteCharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
-from chardistribution import EUCJPDistributionAnalysis <del>-from chardistribution import EUCJPDistributionAnalysis</del>
-from jpcntx import EUCJPContextAnalysis <del>-from jpcntx import EUCJPContextAnalysis</del>
-from mbcssm import EUCJPSMModel <del>-from mbcssm import EUCJPSMModel</del>
+from .constants import eStart, eError, eItsMe <ins>+from .constants import eStart, eError, eItsMe</ins>
+from .mbcharsetprober import MultiByteCharSetProber <ins>+from .mbcharsetprober import MultiByteCharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
+from .chardistribution import EUCJPDistributionAnalysis <ins>+from .chardistribution import EUCJPDistributionAnalysis</ins>
+from .jpcntx import EUCJPContextAnalysis <ins>+from .jpcntx import EUCJPContextAnalysis</ins>
+from .mbcssm import EUCJPSMModel <ins>+from .mbcssm import EUCJPSMModel</ins>
class EUCJPProber(MultiByteCharSetProber): class EUCJPProber(MultiByteCharSetProber):
def __init__(self): def __init__(self):
@@ -245,14 +248,14 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from mbcharsetprober import MultiByteCharSetProber <del>-from mbcharsetprober import MultiByteCharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
-from chardistribution import EUCKRDistributionAnalysis <del>-from chardistribution import EUCKRDistributionAnalysis</del>
-from mbcssm import EUCKRSMModel <del>-from mbcssm import EUCKRSMModel</del>
+from .mbcharsetprober import MultiByteCharSetProber <ins>+from .mbcharsetprober import MultiByteCharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
+from .chardistribution import EUCKRDistributionAnalysis <ins>+from .chardistribution import EUCKRDistributionAnalysis</ins>
+from .mbcssm import EUCKRSMModel <ins>+from .mbcssm import EUCKRSMModel</ins>
class EUCKRProber(MultiByteCharSetProber): class EUCKRProber(MultiByteCharSetProber):
def __init__(self): def __init__(self):
@@ -262,14 +265,14 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from mbcharsetprober import MultiByteCharSetProber <del>-from mbcharsetprober import MultiByteCharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
-from chardistribution import EUCTWDistributionAnalysis <del>-from chardistribution import EUCTWDistributionAnalysis</del>
-from mbcssm import EUCTWSMModel <del>-from mbcssm import EUCTWSMModel</del>
+from .mbcharsetprober import MultiByteCharSetProber <ins>+from .mbcharsetprober import MultiByteCharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
+from .chardistribution import EUCTWDistributionAnalysis <ins>+from .chardistribution import EUCTWDistributionAnalysis</ins>
+from .mbcssm import EUCTWSMModel <ins>+from .mbcssm import EUCTWSMModel</ins>
class EUCTWProber(MultiByteCharSetProber): class EUCTWProber(MultiByteCharSetProber):
def __init__(self): def __init__(self):
@@ -279,14 +282,14 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from mbcharsetprober import MultiByteCharSetProber <del>-from mbcharsetprober import MultiByteCharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
-from chardistribution import GB2312DistributionAnalysis <del>-from chardistribution import GB2312DistributionAnalysis</del>
-from mbcssm import GB2312SMModel <del>-from mbcssm import GB2312SMModel</del>
+from .mbcharsetprober import MultiByteCharSetProber <ins>+from .mbcharsetprober import MultiByteCharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
+from .chardistribution import GB2312DistributionAnalysis <ins>+from .chardistribution import GB2312DistributionAnalysis</ins>
+from .mbcssm import GB2312SMModel <ins>+from .mbcssm import GB2312SMModel</ins>
class GB2312Prober(MultiByteCharSetProber): class GB2312Prober(MultiByteCharSetProber):
def __init__(self): def __init__(self):
@@ -296,10 +299,10 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from charsetprober import CharSetProber <del>-from charsetprober import CharSetProber</del>
-import constants <del>-import constants</del>
+from .charsetprober import CharSetProber <ins>+from .charsetprober import CharSetProber</ins>
+from . import constants <ins>+from . import constants</ins>
# This prober doesn't actually recognize a language or a charset. # This prober doesn't actually recognize a language or a charset.
# It is a helper prober for the use of the Hebrew model probers # It is a helper prober for the use of the Hebrew model probers
@@ -309,8 +312,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
+from . import constants <ins>+from . import constants</ins>
NUM_OF_CATEGORY = 6 NUM_OF_CATEGORY = 6
DONT_KNOW = -1 DONT_KNOW = -1
@@ -320,8 +323,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
+from . import constants <ins>+from . import constants</ins>
# 255: Control characters that usually does not exist in any text # 255: Control characters that usually does not exist in any text
# 254: Carriage/Return # 254: Carriage/Return
@@ -331,8 +334,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
+from . import constants <ins>+from . import constants</ins>
# KOI8-R language model # KOI8-R language model
# Character Mapping Table: # Character Mapping Table:
@@ -342,8 +345,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
+from . import constants <ins>+from . import constants</ins>
# 255: Control characters that usually does not exist in any text # 255: Control characters that usually does not exist in any text
# 254: Carriage/Return # 254: Carriage/Return
@@ -353,8 +356,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
+from . import constants <ins>+from . import constants</ins>
# 255: Control characters that usually does not exist in any text # 255: Control characters that usually does not exist in any text
# 254: Carriage/Return # 254: Carriage/Return
@@ -364,8 +367,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
+from . import constants <ins>+from . import constants</ins>
# 255: Control characters that usually does not exist in any text # 255: Control characters that usually does not exist in any text
# 254: Carriage/Return # 254: Carriage/Return
@@ -375,8 +378,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-import constants <del>-import constants</del>
+from . import constants <ins>+from . import constants</ins>
# 255: Control characters that usually does not exist in any text # 255: Control characters that usually does not exist in any text
# 254: Carriage/Return # 254: Carriage/Return
@@ -386,10 +389,10 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from charsetprober import CharSetProber <del>-from charsetprober import CharSetProber</del>
-import constants <del>-import constants</del>
+from .charsetprober import CharSetProber <ins>+from .charsetprober import CharSetProber</ins>
+from . import constants <ins>+from . import constants</ins>
import operator import operator
FREQ_CAT_NUM = 4 FREQ_CAT_NUM = 4
@@ -399,10 +402,10 @@ RefactoringTool: Skipping implicit fixer: ws_comma
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from constants import eStart, eError, eItsMe <del>-from constants import eStart, eError, eItsMe</del>
-from charsetprober import CharSetProber <del>-from charsetprober import CharSetProber</del>
+from .constants import eStart, eError, eItsMe <ins>+from .constants import eStart, eError, eItsMe</ins>
+from .charsetprober import CharSetProber <ins>+from .charsetprober import CharSetProber</ins>
class MultiByteCharSetProber(CharSetProber): class MultiByteCharSetProber(CharSetProber):
def __init__(self): def __init__(self):
@@ -412,22 +415,22 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from charsetgroupprober import CharSetGroupProber <del>-from charsetgroupprober import CharSetGroupProber</del>
-from utf8prober import UTF8Prober <del>-from utf8prober import UTF8Prober</del>
-from sjisprober import SJISProber <del>-from sjisprober import SJISProber</del>
-from eucjpprober import EUCJPProber <del>-from eucjpprober import EUCJPProber</del>
-from gb2312prober import GB2312Prober <del>-from gb2312prober import GB2312Prober</del>
-from euckrprober import EUCKRProber <del>-from euckrprober import EUCKRProber</del>
-from big5prober import Big5Prober <del>-from big5prober import Big5Prober</del>
-from euctwprober import EUCTWProber <del>-from euctwprober import EUCTWProber</del>
+from .charsetgroupprober import CharSetGroupProber <ins>+from .charsetgroupprober import CharSetGroupProber</ins>
+from .utf8prober import UTF8Prober <ins>+from .utf8prober import UTF8Prober</ins>
+from .sjisprober import SJISProber <ins>+from .sjisprober import SJISProber</ins>
+from .eucjpprober import EUCJPProber <ins>+from .eucjpprober import EUCJPProber</ins>
+from .gb2312prober import GB2312Prober <ins>+from .gb2312prober import GB2312Prober</ins>
+from .euckrprober import EUCKRProber <ins>+from .euckrprober import EUCKRProber</ins>
+from .big5prober import Big5Prober <ins>+from .big5prober import Big5Prober</ins>
+from .euctwprober import EUCTWProber <ins>+from .euctwprober import EUCTWProber</ins>
class MBCSGroupProber(CharSetGroupProber): class MBCSGroupProber(CharSetGroupProber):
def __init__(self): def __init__(self):
@@ -437,8 +440,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from constants import eStart, eError, eItsMe <del>-from constants import eStart, eError, eItsMe</del>
+from .constants import eStart, eError, eItsMe <ins>+from .constants import eStart, eError, eItsMe</ins>
# BIG5 # BIG5
@@ -448,8 +451,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from charsetprober import CharSetProber <del>-from charsetprober import CharSetProber</del>
+from .charsetprober import CharSetProber <ins>+from .charsetprober import CharSetProber</ins>
SAMPLE_SIZE = 64 SAMPLE_SIZE = 64
SB_ENOUGH_REL_THRESHOLD = 1024 SB_ENOUGH_REL_THRESHOLD = 1024
@@ -459,24 +462,24 @@ RefactoringTool: Skipping implicit fixer: ws_comma
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from charsetgroupprober import CharSetGroupProber <del>-from charsetgroupprober import CharSetGroupProber</del>
-from sbcharsetprober import SingleByteCharSetProber <del>-from sbcharsetprober import SingleByteCharSetProber</del>
-from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model <del>-from langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model</del>
-from langgreekmodel import Latin7GreekModel, Win1253GreekModel <del>-from langgreekmodel import Latin7GreekModel, Win1253GreekModel</del>
-from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel <del>-from langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel</del>
-from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel <del>-from langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel</del>
-from langthaimodel import TIS620ThaiModel <del>-from langthaimodel import TIS620ThaiModel</del>
-from langhebrewmodel import Win1255HebrewModel <del>-from langhebrewmodel import Win1255HebrewModel</del>
-from hebrewprober import HebrewProber <del>-from hebrewprober import HebrewProber</del>
+from .charsetgroupprober import CharSetGroupProber <ins>+from .charsetgroupprober import CharSetGroupProber</ins>
+from .sbcharsetprober import SingleByteCharSetProber <ins>+from .sbcharsetprober import SingleByteCharSetProber</ins>
+from .langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model <ins>+from .langcyrillicmodel import Win1251CyrillicModel, Koi8rModel, Latin5CyrillicModel, MacCyrillicModel, Ibm866Model, Ibm855Model</ins>
+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel <ins>+from .langgreekmodel import Latin7GreekModel, Win1253GreekModel</ins>
+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel <ins>+from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel</ins>
+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel <ins>+from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel</ins>
+from .langthaimodel import TIS620ThaiModel <ins>+from .langthaimodel import TIS620ThaiModel</ins>
+from .langhebrewmodel import Win1255HebrewModel <ins>+from .langhebrewmodel import Win1255HebrewModel</ins>
+from .hebrewprober import HebrewProber <ins>+from .hebrewprober import HebrewProber</ins>
class SBCSGroupProber(CharSetGroupProber): class SBCSGroupProber(CharSetGroupProber):
def __init__(self): def __init__(self):
@@ -486,19 +489,19 @@ RefactoringTool: Skipping implicit fixer: ws_comma
# 02110-1301 USA # 02110-1301 USA
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
-from mbcharsetprober import MultiByteCharSetProber <del>-from mbcharsetprober import MultiByteCharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
-from chardistribution import SJISDistributionAnalysis <del>-from chardistribution import SJISDistributionAnalysis</del>
-from jpcntx import SJISContextAnalysis <del>-from jpcntx import SJISContextAnalysis</del>
-from mbcssm import SJISSMModel <del>-from mbcssm import SJISSMModel</del>
+from .mbcharsetprober import MultiByteCharSetProber <ins>+from .mbcharsetprober import MultiByteCharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
+from .chardistribution import SJISDistributionAnalysis <ins>+from .chardistribution import SJISDistributionAnalysis</ins>
+from .jpcntx import SJISContextAnalysis <ins>+from .jpcntx import SJISContextAnalysis</ins>
+from .mbcssm import SJISSMModel <ins>+from .mbcssm import SJISSMModel</ins>
import constants, sys import constants, sys
-from constants import eStart, eError, eItsMe <del>-from constants import eStart, eError, eItsMe</del>
+from .constants import eStart, eError, eItsMe <ins>+from .constants import eStart, eError, eItsMe</ins>
class SJISProber(MultiByteCharSetProber): class SJISProber(MultiByteCharSetProber):
def __init__(self): def __init__(self):
@@ -508,14 +511,14 @@ RefactoringTool: Skipping implicit fixer: ws_comma
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from latin1prober import Latin1Prober # windows-1252 <del>-from latin1prober import Latin1Prober # windows-1252</del>
-from mbcsgroupprober import MBCSGroupProber # multi-byte character sets <del>-from mbcsgroupprober import MBCSGroupProber # multi-byte character sets</del>
-from sbcsgroupprober import SBCSGroupProber # single-byte character sets <del>-from sbcsgroupprober import SBCSGroupProber # single-byte character sets</del>
-from escprober import EscCharSetProber # ISO-2122, etc. <del>-from escprober import EscCharSetProber # ISO-2122, etc.</del>
+from .latin1prober import Latin1Prober # windows-1252 <ins>+from .latin1prober import Latin1Prober # windows-1252</ins>
+from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets <ins>+from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets</ins>
+from .sbcsgroupprober import SBCSGroupProber # single-byte character sets <ins>+from .sbcsgroupprober import SBCSGroupProber # single-byte character sets</ins>
+from .escprober import EscCharSetProber # ISO-2122, etc. <ins>+from .escprober import EscCharSetProber # ISO-2122, etc.</ins>
import re import re
MINIMUM_THRESHOLD = 0.20 MINIMUM_THRESHOLD = 0.20
@@ -525,14 +528,14 @@ RefactoringTool: Skipping implicit fixer: ws_comma
######################### END LICENSE BLOCK ######################### ######################### END LICENSE BLOCK #########################
import constants, sys import constants, sys
-from constants import eStart, eError, eItsMe <del>-from constants import eStart, eError, eItsMe</del>
-from charsetprober import CharSetProber <del>-from charsetprober import CharSetProber</del>
-from codingstatemachine import CodingStateMachine <del>-from codingstatemachine import CodingStateMachine</del>
-from mbcssm import UTF8SMModel <del>-from mbcssm import UTF8SMModel</del>
+from .constants import eStart, eError, eItsMe <ins>+from .constants import eStart, eError, eItsMe</ins>
+from .charsetprober import CharSetProber <ins>+from .charsetprober import CharSetProber</ins>
+from .codingstatemachine import CodingStateMachine <ins>+from .codingstatemachine import CodingStateMachine</ins>
+from .mbcssm import UTF8SMModel <ins>+from .mbcssm import UTF8SMModel</ins>
ONE_CHAR_PROB = 0.5 ONE_CHAR_PROB = 0.5
@@ -579,8 +582,8 @@ RefactoringTool: Skipping implicit fixer: ws_comma
count = 0 count = 0
u = UniversalDetector() u = UniversalDetector()
for f in glob.glob(sys.argv[1]): for f in glob.glob(sys.argv[1]):
- print f.ljust(60), <del>- print f.ljust(60),</del>
+ print(f.ljust(60), end=' ') <ins>+ print(f.ljust(60), end=' ')</ins>
u.reset() u.reset()
for line in file(f, 'rb'): for line in file(f, 'rb'):
u.feed(line) u.feed(line)
@@ -588,14 +591,14 @@ RefactoringTool: Skipping implicit fixer: ws_comma
u.close() u.close()
result = u.result result = u.result
if result['encoding']: if result['encoding']:
- print result['encoding'], 'with confidence', result['confidence'] <del>- print result['encoding'], 'with confidence', result['confidence']</del>
+ print(result['encoding'], 'with confidence', result['confidence']) <ins>+ print(result['encoding'], 'with confidence', result['confidence'])</ins>
else: else:
- print '******** no result' <del>- print '******** no result'</del>
+ print('******** no result') <ins>+ print('******** no result')</ins>
count += 1 count += 1
-print count, 'tests' <del>-print count, 'tests'</del>
+print(count, 'tests') <ins>+print(count, 'tests')</ins>
RefactoringTool: Files that were modified: RefactoringTool: Files that were modified:
RefactoringTool: test.py</samp></pre> RefactoringTool: test.py</samp></pre>
<p id=skip2to3outputtest>Well, that wasn&#8217;t so hard. Just a few imports and print statements to convert. Time to run the new version. Do you think it&#8217;ll work? <p id=skip2to3outputtest>Well, that wasn&#8217;t so hard. Just a few imports and print statements to convert. Time to run the new version. Do you think it&#8217;ll work?
@@ -648,7 +651,7 @@ import sys</code></pre>
<p>There are variations of this problem scattered throughout the <code class=filename>chardet</code> library. In some places it&#8217;s "<code>import constants, sys</code>"; in other places, it&#8217;s "<code>import constants, re</code>". The fix is the same: manually split the import statement into two lines, one for the relative import, the other for the absolute import. <p>There are variations of this problem scattered throughout the <code class=filename>chardet</code> library. In some places it&#8217;s "<code>import constants, sys</code>"; in other places, it&#8217;s "<code>import constants, re</code>". The fix is the same: manually split the import statement into two lines, one for the relative import, the other for the absolute import.
<p>Onward! <p>Onward!
<h3 id=namefileisnotdefined>Name '<var>file</var>' is not defined</h3> <h3 id=namefileisnotdefined>Name '<var>file</var>' is not defined</h3>
<p>FIXME intro <p>And here we go again, running <code>test.py</code> to try to execute our test cases&hellip;</p>
<p class=skip><a href=#skipnamefileisnotdefined>skip over this</a> <p class=skip><a href=#skipnamefileisnotdefined>skip over this</a>
<pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd> <pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
<samp>tests\ascii\howto.diveintomark.org.xml</samp> <samp>tests\ascii\howto.diveintomark.org.xml</samp>
@@ -661,7 +664,7 @@ NameError: name 'file' is not defined</samp></pre>
<pre><code>for line in open(f, 'rb'):</code></pre> <pre><code>for line in open(f, 'rb'):</code></pre>
<p>And that&#8217;s all I have to say about that. <p>And that&#8217;s all I have to say about that.
<h3 id=cantuseastringpattern>Can&#8217;t use a string pattern on a bytes-like object</h3> <h3 id=cantuseastringpattern>Can&#8217;t use a string pattern on a bytes-like object</h3>
<p>FIXME intro <p>Now things are starting to get interesting. And by &#8220;interesting,&#8221; I mean &#8220;confusing as all hell.&#8221;
<p class=skip><a href=#skipcantuseastringpattern>skip over this</a> <p class=skip><a href=#skipcantuseastringpattern>skip over this</a>
<pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd> <pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
<samp>tests\ascii\howto.diveintomark.org.xml</samp> <samp>tests\ascii\howto.diveintomark.org.xml</samp>
@@ -671,8 +674,8 @@ NameError: name 'file' is not defined</samp></pre>
File "C:\home\chardet\chardet\universaldetector.py", line 98, in feed File "C:\home\chardet\chardet\universaldetector.py", line 98, in feed
if self._highBitDetector.search(aBuf): if self._highBitDetector.search(aBuf):
TypeError: can't use a string pattern on a bytes-like object</samp></pre> TypeError: can't use a string pattern on a bytes-like object</samp></pre>
<p id=skipcantuseastringpattern>Now things are starting to get interesting. And by &#8220;interesting,&#8221; I mean &#8220;confusing as all hell.&#8221; <p id=skipcantuseastringpattern>
<p>First, let&#8217;s see what <var>self._highBitDetector</var> is. It&#8217;s defined in the <var>__init__</var> method of the <var>UniversalDetector</var> class: <p>To debug this, let&#8217;s see what <var>self._highBitDetector</var> is. It&#8217;s defined in the <var>__init__</var> method of the <var>UniversalDetector</var> class:
<p class=skip><a href=#skiphighbitdetectorcode>skip over this</a> <p class=skip><a href=#skiphighbitdetectorcode>skip over this</a>
<pre><code>class UniversalDetector: <pre><code>class UniversalDetector:
def __init__(self): def __init__(self):
@@ -687,7 +690,7 @@ TypeError: can't use a string pattern on a bytes-like object</samp></pre>
. .
if self._mInputState == ePureAscii: if self._mInputState == ePureAscii:
if self._highBitDetector.search(aBuf):</code></pre> if self._highBitDetector.search(aBuf):</code></pre>
<p id=skipfeedhighbitdetectorcode>And what is <var>aBuf</var>? Let&#8217;s backtrack further to a place that calls <var>UniversalDetector.feed()</var>. One place that calls it is the test harness, <code class=filename>test.py</code>. <p id=skipfeedhighbitdetectorcode>And what is <var>aBuf</var>? Let&#8217;s backtrack further to a place that calls <code>UniversalDetector.feed()</code>. One place that calls it is the test harness, <code class=filename>test.py</code>.
<p class=skip><a href=#skiptestharnessfeedcode>skip over this</a> <p class=skip><a href=#skiptestharnessfeedcode>skip over this</a>
<pre><code>u = UniversalDetector() <pre><code>u = UniversalDetector()
. .
@@ -695,7 +698,7 @@ TypeError: can't use a string pattern on a bytes-like object</samp></pre>
. .
for line in open(f, 'rb'): for line in open(f, 'rb'):
u.feed(line)</code></pre> u.feed(line)</code></pre>
<p id=skiptestharnessfeedcode>And here we find our answer: in the <var>UniversalDetector.feed()</var> method, <var>aBuf</var> is a line read from a file on disk. Look carefully at the parameters used to open the file: <code>'rb'</code>. <code>'r'</code> is for &#8220;read&#8221;; OK, big deal, we&#8217;re reading the file. Ah, but <code>'b'</code> is for &#8220;binary.&#8221; Without the <code>'b'</code> flag, this <code>for</code> loop would read the file, line by line, and convert each line into a string -- an array of Unicode characters -- according to the system default character encoding. (You could override the system encoding with another parameter to <var>open()</var>, but never mind that for now.) But with the <code>'b'</code> flag, this <code>for</code> loop reads the file, line by line, and stores each line exactly as it appears in the file, as an array of bytes. That byte array gets passed to <var>UniversalDetector.feed()</var>, and eventually gets passed to the pre-compiled regular expression, <var>self._highBitDetector</var>, to search for high-bit... characters. But we don&#8217;t have characters; we have bytes. Oops. <p id=skiptestharnessfeedcode>And here we find our answer: in the <code>UniversalDetector.feed()</code> method, <var>aBuf</var> is a line read from a file on disk. Look carefully at the parameters used to open the file: <code>'rb'</code>. <code>'r'</code> is for &#8220;read&#8221;; OK, big deal, we&#8217;re reading the file. Ah, but <code>'b'</code> is for &#8220;binary.&#8221; Without the <code>'b'</code> flag, this <code>for</code> loop would read the file, line by line, and convert each line into a string -- an array of Unicode characters -- according to the system default character encoding. (You could override the system encoding with another parameter to <var>open()</var>, but never mind that for now.) But with the <code>'b'</code> flag, this <code>for</code> loop reads the file, line by line, and stores each line exactly as it appears in the file, as an array of bytes. That byte array gets passed to <code>UniversalDetector.feed()</code>, and eventually gets passed to the pre-compiled regular expression, <var>self._highBitDetector</var>, to search for high-bit... characters. But we don&#8217;t have characters; we have bytes. Oops.
<p>What we need this regular expression to search is not an array of characters, but an array of bytes. <p>What we need this regular expression to search is not an array of characters, but an array of bytes.
<p>Once you realize that, the solution is not difficult. Regular expressions defined with strings can search strings. Regular expressions defined with byte arrays can search byte arrays. To define a byte array pattern, we simply change the type of the argument we use to define the regular expression to a byte array. So instead of this: <p>Once you realize that, the solution is not difficult. Regular expressions defined with strings can search strings. Regular expressions defined with byte arrays can search byte arrays. To define a byte array pattern, we simply change the type of the argument we use to define the regular expression to a byte array. So instead of this:
<pre><code>self._highBitDetector = re.compile(r'[\x80-\xFF]')</code></pre> <pre><code>self._highBitDetector = re.compile(r'[\x80-\xFF]')</code></pre>
@@ -716,7 +719,202 @@ for line in open(f, 'rb'):
File "C:\home\chardet\chardet\universaldetector.py", line 100, in feed File "C:\home\chardet\chardet\universaldetector.py", line 100, in feed
elif (self._mInputState == ePureAscii) and self._escDetector.search(self._mLastChar + aBuf): elif (self._mInputState == ePureAscii) and self._escDetector.search(self._mLastChar + aBuf):
TypeError: Can't convert 'bytes' object to str implicitly</samp></pre> TypeError: Can't convert 'bytes' object to str implicitly</samp></pre>
<p id=skipcantconvertbytesobject>...
<p id=skipcantconvertbytesobject>There's an unfortunate clash of coding style and Python interpreter here. The <code>TypeError</code> could be anywhere on that line, but the traceback doesn't tell you exactly where it is. It could be in the first conditional or the second, and the traceback would look the same. To narrow it down, you should split the line in half, like this:
<p class=skip><a href=#skip-split-conditional>skip over this code listing</a>
<pre><code>elif (self._mInputState == ePureAscii) and \
self._escDetector.search(self._mLastChar + aBuf):</code></pre>
<p id=skip-split-conditional>And re-run the test:</p>
<p class=skip><a href=#skip-cant-convert-bytes-object-2>skip over this command output listing</a>
<pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
<samp>tests\ascii\howto.diveintomark.org.xml</samp>
<samp class=traceback>Traceback (most recent call last):
File "test.py", line 10, in &lt;module>
u.feed(line)
File "C:\home\chardet\chardet\universaldetector.py", line 101, in feed
self._escDetector.search(self._mLastChar + aBuf):
TypeError: Can't convert 'bytes' object to str implicitly</samp></pre>
<p id=skip-over-cant-convert-bytes-object-2>Aha! The problem was not in the first conditional (<code>self._mInputState == ePureAscii</code>) but in the second one. So what could cause a <code>TypeError</code> there? Perhaps you're thinking that the <code>search()</code> method is expecting a value of a different type, but that wouldn't generate this traceback. Python functions can take any value; if you pass the right number of arguments, the function will execute. It may <em>crash</em> if you pass it a value of a different type than it's expecting, but if that happened, the traceback would point to somewhere inside the function. But this traceback says it never got as far as calling the <code>search()</code> method. So the problem must be in that <code>+</code> operation, as it's trying to construct the value that it will eventually pass to the <code>search()</code> method.
<p>We know from <a href="#cantuseastringpattern">previous debugging</a> that <var>aBuf</var> is a byte array. So what is <code>self._mLastChar</code>? It's an instance variable, defined in the <code>reset()</code> method, which is actually called from the <code>__init__()</code> method.
<p class=skip><a href=#skip-mlastchar-declaration>skip over this code listing</a>
<pre><code>class UniversalDetector:
def __init__(self):
self._highBitDetector = re.compile(b'[\x80-\xFF]')
self._escDetector = re.compile(b'(\033|~{)')
self._mEscCharSetProber = None
self._mCharSetProbers = []
<mark> self.reset()</mark>
def reset(self):
self.result = {'encoding': None, 'confidence': 0.0}
self.done = False
self._mStart = True
self._mGotData = False
self._mInputState = ePureAscii
<mark> self._mLastChar = ''</mark></code></pre>
<p id=skip-mlastchar-declaration>And now we have our answer. Do you see it? <var>self._mLastChar</var> is a string, but <var>aBuf</var> is a byte array. And you can't concatenate a string to a byte array &mdash; not even a zero-length string.
<p>So what is <var>self._mLastChar</var> anyway? The answer is in the <code>feed()</code> method, just a few lines down from where the trackback occurred.
<p class=skip><a href=#skip-mlastchar-set>skip over this code listing</a>
<pre><code>if self._mInputState == ePureAscii:
if self._highBitDetector.search(aBuf):
self._mInputState = eHighbyte
elif (self._mInputState == ePureAscii) and \
self._escDetector.search(self._mLastChar + aBuf):
self._mInputState = eEscAscii
<mark>self._mLastChar = aBuf[-1]</mark></code></pre>
<p>The calling function calls this <code>feed()</code> method over and over again with a few bytes at a time. The method processes the bytes it was given (passed in as <var>aBuf</var>), then stores the last byte in <var>self._mLastChar</var> in case it's needed during the next call. (In a multi-byte encoding, the <code>feed()</code> method might get called with half of a character, then called again with the other half.) But because <var>aBuf</var> is now a byte array instead of a string, <var>self._mLastChar</var> needs to be a byte array as well. Thus:
<pre><code> def reset(self):
.
.
.
<del>- self._mLastChar = ''</del>
<ins>+ self._mLastChar = b''</ins></code></pre>
<h3 id=unsupportedoperandtypeforplus>TypeError: unsupported operand type(s) for +: 'int' and 'bytes'</h3>
<p>I have good news, and I have bad news. The good news is we're making progress&hellip;
<p class=skip><a href=#skip-unsupported-operand-types>skip over this command listing</a>
<pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
<samp>tests\ascii\howto.diveintomark.org.xml</samp>
<samp class=traceback>Traceback (most recent call last):
File "test.py", line 10, in &lt;module>
u.feed(line)
File "C:\home\chardet\chardet\universaldetector.py", line 101, in feed
self._escDetector.search(self._mLastChar + aBuf):
TypeError: unsupported operand type(s) for +: 'int' and 'bytes'</samp></pre>
<p id=skip-unsupported-operand-types>&hellip;The bad news is it doesn't always feel like progress.
<p>But this is progress! Really! Even though the traceback calls out the same line of code, it's a different error than it used to be. Progress! So what's the problem now? The last time I checked, this line of code didn't try to concatenate an <code>int</code> with a byte array (<code>bytes</code>). In fact, you just spent a lot of time <a href="#cantconvertbytesobject">ensuring that <var>self._mLastChar</var> was a byte array</a>. How did it turn into an <code>int</code>?
<p>The answer lies not in the previous lines of code, but in the following lines.
<p class=skip><a href=#skip-mlastchar-highlight>skip over this code listing</a>
<pre><code>if self._mInputState == ePureAscii:
if self._highBitDetector.search(aBuf):
self._mInputState = eHighbyte
elif (self._mInputState == ePureAscii) and \
self._escDetector.search(self._mLastChar + aBuf):
self._mInputState = eEscAscii
<mark>self._mLastChar = aBuf[-1]</mark></code></pre>
<p id=skip-mlastchar-highlight>This error doesn't occur the first time the <code>feed()</code> method gets called; it occurs the <em>second time</em>, after <var>self._mLastChar</var> has been set to the last byte of <var>aBuf</var>. Well, what's the problem with that? Getting a single element from a byte array yields an integer, not a byte array. To see the difference, follow me to the interactive shell:
<p class=skip><a href=#skip-mlastchar-interactive>skip over this interpreter listing</a>
<pre class=screen>
<a><samp class=prompt>>>> </samp><kbd>aBuf = b'\xEF\xBB\xBF'</kbd> <span>&#x2460;</span></a>
<samp class=prompt>>>> </samp><kbd>len(aBuf)</kbd>
<samp>3</samp>
<samp class=prompt>>>> </samp><kbd>mLastChar = aBuf[-1]</kbd>
<a><samp class=prompt>>>> </samp><kbd>mLastChar</kbd> <span>&#x2461;</span></a>
<samp>191</samp>
<a><samp class=prompt>>>> </samp><kbd>type(mLastChar)</kbd> <span>&#x2462;</span></a>
<samp>&lt;class 'int'></samp>
<a><samp class=prompt>>>> </samp><kbd>mLastChar + aBuf</kbd> <span>&#x2463;</span></a>
<samp class=traceback>Traceback (most recent call last):
File "<stdin>", line 1, in &lt;module>
TypeError: unsupported operand type(s) for +: 'int' and 'bytes'</samp>
<a><samp class=prompt>>>> </samp><kbd>mLastChar = aBuf[-1:]</kbd> <span>&#x2464;</span></a>
<samp class=prompt>>>> </samp><kbd>mLastChar</kbd>
<samp>b'\xbf'</samp>
<a><samp class=prompt>>>> </samp><kbd>mLastChar + aBuf</kbd> <span>&#x2465;</span></a>
<samp>b'\xbf\xef\xbb\xbf'</samp></pre>
<ol id=skip-mlastchar-interactive>
<li>Define a byte array of 3 bytes.
<li>The last element of the byte array is 191.
<li>That's an integer.
<li>Concatenating an integer with a byte array doesn't work. You've now replicated the error you just found in <code>universaldetector.py</code>.
<li>Ah, here's the fix. Instead of taking the last element of the byte array, use <a href=native-datatypes.html#slicinglists>list slicing</a> to create a new byte array containing just the last element. That is, start with the last element and continue the slice until the end of the byte array. Now <var>mLastChar</var> is a byte array of length 1.
<li>Concatenating a byte array of length 1 with a byte array of length 3 returns a new byte array of length 4.
</ol>
<p>So, to ensure that the <code>feed()</code> method in <code>universaldetector.py</code> continues to work no matter how often it's called, you need to <a href=#cantconvertbytesobject>initialize <var>self._mLastChar</var> as a 0-length byte array</a>, then <em>make sure it stays a byte array</em>.
<pre><code> self._escDetector.search(self._mLastChar + aBuf):
self._mInputState = eEscAscii
<del>- self._mLastChar = aBuf[-1]</del>
<ins>+ self._mLastChar = aBuf[-1:]</ins></code></pre>
<h3 id=ordexpectedstring>TypeError: ord() expected string of length 1, but int found</h3>
<p>Tired yet? You're almost there&hellip;
<p class=skip><a href=#skip-ord-expected-string>skip over this command output listing</a>
<pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
<samp>tests\ascii\howto.diveintomark.org.xml ascii with confidence 1.0
tests\Big5\0804.blogspot.com.xml</samp>
<samp class=traceback>Traceback (most recent call last):
File "test.py", line 10, in &lt;module>
u.feed(line)
File "C:\home\chardet\chardet\universaldetector.py", line 116, in feed
if prober.feed(aBuf) == constants.eFoundIt:
File "C:\home\chardet\chardet\charsetgroupprober.py", line 60, in feed
st = prober.feed(aBuf)
File "C:\home\chardet\chardet\utf8prober.py", line 53, in feed
codingState = self._mCodingSM.next_state(c)
File "C:\home\chardet\chardet\codingstatemachine.py", line 43, in next_state
byteCls = self._mModel['classTable'][ord(c)]
TypeError: ord() expected string of length 1, but int found</samp></pre>
<p id=skip-ord-expected-string>FIXME
<p class=skip><a href=#skip-next-state>skip over this code listing</a>
<pre><code># codingstatemachine.py
def next_state(self, c):
# for each byte we get its class
# if it is first byte, we also get byte length
byteCls = self._mModel['classTable'][ord(c)]</code></pre>
<p id=skip-next-state>FIXME [<var>aBuf</var> is a byte array, so <var>c</var> is an <code>int</code>, not a 1-character string. IOW, there's no need to call the <code>ord()</code> function because <var>c</var> is already an <code>int</code>!]
<p class=skip><a href=#skip-utf8prober-feed>skip over this code listing</a>
<pre><code># utf8prober.py
def feed(self, aBuf):
for c in aBuf:
codingState = self._mCodingSM.next_state(c)</code></pre>
<p id=skip-utf8prober-feed>FIXME [wrapup or deleteme]
<h3 id=unorderabletypes>TypeError: unorderable types: int() >= str()</h3>
<p>FIXME [let's go again]
<p class=skip><a href=#skip-unorderable-types-screen>skip over this command output listing</a>
<pre class=screen><samp class=prompt>C:\home\chardet> </samp><kbd>python test.py tests\*\*</kbd>
<samp>tests\ascii\howto.diveintomark.org.xml ascii with confidence 1.0
tests\Big5\0804.blogspot.com.xml</samp>
<samp>Traceback (most recent call last):
File "test.py", line 10, in &lt;module>
u.feed(line)
File "C:\home\chardet\chardet\universaldetector.py", line 116, in feed
if prober.feed(aBuf) == constants.eFoundIt:
File "C:\home\chardet\chardet\charsetgroupprober.py", line 60, in feed
st = prober.feed(aBuf)
File "C:\home\chardet\chardet\sjisprober.py", line 68, in feed
self._mContextAnalyzer.feed(self._mLastChar[2 - charLen :], charLen)
File "C:\home\chardet\chardet\jpcntx.py", line 145, in feed
order, charLen = self.get_order(aBuf[i:i+2])
File "C:\home\chardet\chardet\jpcntx.py", line 176, in get_order
if ((aStr[0] >= '\x81') and (aStr[0] <= '\x9F')) or \
TypeError: unorderable types: int() >= str()</samp></pre>
<p id=skip-unorderable-types-screen>FIXME
<p class=c>&copy; 2001&ndash;4, 2009 <span>&#x2133;</span>ark Pilgrim, <a href=http://creativecommons.org/licenses/by-sa/3.0/ rel=license>CC-BY-SA-3.0</a> <p class=c>&copy; 2001&ndash;4, 2009 <span>&#x2133;</span>ark Pilgrim, <a href=http://creativecommons.org/licenses/by-sa/3.0/ rel=license>CC-BY-SA-3.0</a>
<script src=jquery.js></script> <script src=jquery.js></script>
<script src=dip3.js></script> <script src=dip3.js></script>
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+6 -4
View File
@@ -10,11 +10,9 @@
import sys import sys
* test.py: change file() to open() * test.py: change file() to open()
* universaldetector.py: change r'' strings to b'' byte arrays in self._highBitDetector, self._escDetector regular expressions * universaldetector.py: change r'' strings to b'' byte arrays in self._highBitDetector, self._escDetector regular expressions
- charsetprober.py: change regular expression-based replace to use b'' byte arrays instead of strings
- universaldetector.py: change self._mLastChar from a r'' string to a b'' byte array * universaldetector.py: change self._mLastChar from a '' string to a b'' byte array
- mbcharsetprober.py: change self._mLastChar from a list of two 1-character strings to a list of two ints * universaldetector.py: getting a single element from a byte array yields an integer, not a byte, so change syntax to make sure we self._mLastChar is always a byte
- universaldetector.py: getting a single element from a byte array yields an integer, not a byte, so change syntax to make sure we self._mLastChar is always a byte
old: old:
self._mLastChar = aBuf[-1] self._mLastChar = aBuf[-1]
new: new:
@@ -25,4 +23,8 @@
- jpcntx.py, chardistribution.py (editorial): global search-and-replace "aStr" --> "aBuf" to make it clear that we're passing around a byte array - jpcntx.py, chardistribution.py (editorial): global search-and-replace "aStr" --> "aBuf" to make it clear that we're passing around a byte array
- sbcharsetprober.py, latin1prober.py: change ord(c) to c since it's already an int (iterating through a byte array) - sbcharsetprober.py, latin1prober.py: change ord(c) to c since it's already an int (iterating through a byte array)
- (not sure where this fits) mbcharsetprober.py: change self._mLastChar from a list of two 1-character strings to a list of two ints
- (not sure where this fits) charsetprober.py: change regular expression-based replace to use b'' byte arrays instead of strings
- latin1prober.py: refactor reduce(operator.add, ...) to use a for loop instead - latin1prober.py: refactor reduce(operator.add, ...) to use a for loop instead
+225 -225
View File
File diff suppressed because it is too large Load Diff
+10 -3
View File
@@ -27,15 +27,19 @@ a:visited{color:darkorchid}
.skip a:active,.skip a:focus{position:static;width:auto;height:auto} .skip a:active,.skip a:focus{position:static;width:auto;height:auto}
/* code blocks */ /* code blocks */
pre{white-space:pre-wrap;padding-left:2.154em;line-height:2.154;border-left:1px dotted} pre{white-space:pre-wrap;padding-left:2.154em;line-height:2.154;border-left:1px solid gainsboro}
.widgets{float:left} .widgets{float:left}
.widgets,.widgets a,.download{font-size:small;line-height:2.154} .widgets,.widgets a,.download{font-size:small;line-height:2.154}
.block,ol{clear:left} .block,ol,p,blockquote{clear:left}
pre a,.widgets a{padding:0.4375em 0;border:0} pre a,.widgets a{padding:0.4375em 0;border:0}
.widgets a{text-decoration:underline} .widgets a{text-decoration:underline}
pre a:hover{border:0} pre a:hover{border:0}
kbd{font-weight:bold} kbd{font-weight:bold}
.prompt{color:#667} .prompt{color:#667}
ins,del,mark{text-decoration:none;font-style:normal;display:inline-block;width:100%;line-height:2.154}
del{background:salmon}
ins{background:palegreen}
mark{background:#ffff80}
/* tables */ /* tables */
table{width:100%;border-collapse:collapse} table{width:100%;border-collapse:collapse}
@@ -45,7 +49,7 @@ td{vertical-align:top}
th:first-child{width:10%;text-align:center} th:first-child{width:10%;text-align:center}
.simple th{font-family:inherit !important} .simple th{font-family:inherit !important}
.hover{background:#eee;color:inherit;cursor:default} .hover{background:#eee;color:inherit;cursor:default}
td pre{margin:0;padding:0;border:0} td pre{margin:0;padding:0;border:0;background:inherit}
/* headers */ /* headers */
h1,h2,h3,p,ul,ol{margin:1.75em 0;font-size:medium} h1,h2,h3,p,ul,ol{margin:1.75em 0;font-size:medium}
@@ -57,3 +61,6 @@ h1{counter-reset:h2}
h2:before{counter-increment:h2;content:counter(h1) "." counter(h2) ". "} h2:before{counter-increment:h2;content:counter(h1) "." counter(h2) ". "}
h2{counter-reset:h3} h2{counter-reset:h3}
h3:before{counter-increment:h3;content:counter(h1) "." counter(h2) "." counter(h3) ". "} h3:before{counter-increment:h3;content:counter(h1) "." counter(h2) "." counter(h3) ". "}
/* HTML 5 support */
article,aside,dialog,footer,header,section{display:block}
+18 -16
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Dive Into Python 3</title> <title>Dive Into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
@@ -10,32 +11,33 @@
.first{clear:both;margin-top:0;padding-top:1.75em} .first{clear:both;margin-top:0;padding-top:1.75em}
li:last-child{list-style:none;margin:0 0 0 -1.7em} li:last-child{list-style:none;margin:0 0 0 -1.7em}
li:last-child:before{content:"A. \00a0 \00a0"} li:last-child:before{content:"A. \00a0 \00a0"}
li.todo{background:white;color:gainsboro}
</style> </style>
</head> </head>
<form action=http://www.google.com/cse id=search><div><input type=hidden name=cx value=014021643941856155761:l5eihuescdw><input type=hidden name=ie value=UTF-8><input name=q size=31>&nbsp;<input type=submit name=sa value=Search></div></form> <form action=http://www.google.com/cse id=search><div><input type=hidden name=cx value=014021643941856155761:l5eihuescdw><input type=hidden name=ie value=UTF-8><input name=q size=31>&nbsp;<input type=submit name=sa value=Search></div></form>
<p class=first><cite>Dive Into Python 3</cite> will cover Python 3 and its differences from Python 2. Compared to the original <cite><a href=http://diveintopython.org/>Dive Into Python</a></cite>, it will be about 50% revised and 50% new material. I will publish drafts online as I go. The final version will be published on paper by Apress. The book will remain online under the <a rel=license href=http://creativecommons.org/licenses/by-sa/3.0/>CC-BY-SA-3.0</a> license. <p class=first><cite>Dive Into Python 3</cite> will cover Python 3 and its differences from Python 2. Compared to the original <cite><a href=http://diveintopython.org/>Dive Into Python</a></cite>, it will be about 50% revised and 50% new material. I will publish drafts online as I go. The final version will be published on paper by Apress. The book will remain online under the <a rel=license href=http://creativecommons.org/licenses/by-sa/3.0/>CC-BY-SA-3.0</a> license.
<p>You can see the <a href=table-of-contents.html>full table of contents</a> (<strong>not finalized</strong>), or read what I&#8217;ve written so far:</p> <p>You can see the <a href=table-of-contents.html>full table of contents</a> (<strong>not finalized</strong>), or read what I&#8217;ve written so far:</p>
<ol start=0> <ol start=0>
<li> <li class=todo>Installing Python
<li><a href=your-first-python-program.html>Your first Python program</a> <li><a href=your-first-python-program.html>Your first Python program</a>
<li><a href=native-datatypes.html>Native datatypes</a> <li><a href=native-datatypes.html>Native datatypes</a>
<li> <li class=todo>Strings
<li><a href=regular-expressions.html>Regular expressions</a> <li><a href=regular-expressions.html>Regular expressions</a>
<li> <li class=todo>The power of introspection
<li> <li class=todo>Objects and object-orientation
<li><a href=unit-testing.html>Unit testing</a> <li><a href=unit-testing.html>Unit testing</a>
<li> <li class=todo>Test-first programming
<li> <li class=todo>Refactoring your code
<li> <li class=todo>Files
<li> <li class=todo>HTML processing
<li> <li class=todo>XML processing
<li> <li class=todo>Web services
<li> <li class=todo>Dynamic functions
<li> <li class=todo>Metaclasses
<li> <li class=todo>Performance tuning
<li> <li class=todo>Packaging Python libraries
<li> <li class=todo>Creating graphics with the Python Imaging Library
<li> <li class=todo>Where to go from here
<li><a href=case-study-porting-chardet-to-python-3.html>Case study: porting <code>chardet</code> to Python 3</a> <li><a href=case-study-porting-chardet-to-python-3.html>Case study: porting <code>chardet</code> to Python 3</a>
<li><a href=porting-code-to-python-3-with-2to3.html>Porting code to Python 3 with <code>2to3</code></a> <li><a href=porting-code-to-python-3-with-2to3.html>Porting code to Python 3 with <code>2to3</code></a>
</ol> </ol>
+1
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Native datatypes - Dive into Python 3</title> <title>Native datatypes - Dive into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
+1
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Porting code to Python 3 with 2to3 - Dive into Python 3</title> <title>Porting code to Python 3 with 2to3 - Dive into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
+1 -3
View File
@@ -14,8 +14,6 @@ sed -i -e "s|//}.; /\* google\..*|});|g" build/dip3.js
revision=`hg log|grep changeset|cut -d":" -f3|head -1` revision=`hg log|grep changeset|cut -d":" -f3|head -1`
java -jar yuicompressor-2.4.2.jar build/dip3.js > build/dip3.$revision.min.js java -jar yuicompressor-2.4.2.jar build/dip3.js > build/dip3.$revision.min.js
java -jar yuicompressor-2.4.2.jar build/dip3.css > build/dip3.$revision.min.css java -jar yuicompressor-2.4.2.jar build/dip3.css > build/dip3.$revision.min.css
#rm build/dip3.js
#rm build/dip3.css
sed -i -e "s|dip3\.js|http://wearehugh.com/dip3/dip3.${revision}.min.js|g" build/*.html sed -i -e "s|dip3\.js|http://wearehugh.com/dip3/dip3.${revision}.min.js|g" build/*.html
sed -i -e "s|dip3\.css|http://wearehugh.com/dip3/dip3.${revision}.min.css|g" build/*.html sed -i -e "s|dip3\.css|http://wearehugh.com/dip3/dip3.${revision}.min.css|g" build/*.html
@@ -23,5 +21,5 @@ sed -i -e "s|dip3\.css|http://wearehugh.com/dip3/dip3.${revision}.min.css|g" bui
chmod 644 build/*.html build/*.css build/*.js build/*.py build/*.txt build/.htaccess chmod 644 build/*.html build/*.css build/*.js build/*.py build/*.txt build/.htaccess
# and push to production # and push to production
rsync -essh -avzP --delete --delete-after build/*.min.css build/*.min.js diveintomark.org:~/web/wearehugh.com/dip3/ rsync -essh -avzP --delete --delete-after build/*.min.css build/*.min.js build/html5.js diveintomark.org:~/web/wearehugh.com/dip3/
rsync -essh -avzP build/*.html build/*.py build/*.txt build/.htaccess diveintomark.org:~/web/diveintopython3.org/ rsync -essh -avzP build/*.html build/*.py build/*.txt build/.htaccess diveintomark.org:~/web/diveintopython3.org/
+1
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Regular expressions - Dive into Python 3</title> <title>Regular expressions - Dive into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
+1
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Table of contents - Dive Into Python 3</title> <title>Table of contents - Dive Into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
+5 -4
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Unit testing - Dive into Python 3</title> <title>Unit testing - Dive into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
@@ -134,7 +135,7 @@ if __name__ == "__main__":
<li>To write a test case, first subclass the <code>TestCase</code> class of the <code>unittest</code> module. This class provides many useful methods which you can use in your test case to test specific conditions. <li>To write a test case, first subclass the <code>TestCase</code> class of the <code>unittest</code> module. This class provides many useful methods which you can use in your test case to test specific conditions.
<li>This is a list of integer/numeral pairs that I verified manually. It includes the lowest ten numbers, the highest number, every number that translates to a single-character Roman numeral, and a random sampling of other valid numbers. The point of a unit test is not to test every possible input, but to test a representative sample. <li>This is a list of integer/numeral pairs that I verified manually. It includes the lowest ten numbers, the highest number, every number that translates to a single-character Roman numeral, and a random sampling of other valid numbers. The point of a unit test is not to test every possible input, but to test a representative sample.
<li>Every individual test is its own method, which must take no parameters and return no value. If the method exits normally without raising an exception, the test is considered passed; if the method raises an exception, the test is considered failed. <li>Every individual test is its own method, which must take no parameters and return no value. If the method exits normally without raising an exception, the test is considered passed; if the method raises an exception, the test is considered failed.
<li>Here you call the actual <code>to_roman()</code> function. (Well, the function hasn't be written yet, but once it is, this is the line that will call it.) Notice that you have now defined the <acronym>API</acronym> for the <code>to_roman()</code> function: it must take an integer (the number to convert) and return a string (the Roman numeral representation). If the <acronym>API</acronym> is different than that, this test is considered failed. Also notice that you are not trapping any exceptions when you call <code>to_roman()</code>. This is intentional. <code>to_roman()</code> shouldn't raise an exception when you call it with valid input, and these input values are all valid. If <code>to_roman()</code> raises an exception, this test is considered failed. <li>Here you call the actual <code>to_roman()</code> function. (Well, the function hasn't be written yet, but once it is, this is the line that will call it.) Notice that you have now defined the <abbr>API</abbr> for the <code>to_roman()</code> function: it must take an integer (the number to convert) and return a string (the Roman numeral representation). If the <abbr>API</abbr> is different than that, this test is considered failed. Also notice that you are not trapping any exceptions when you call <code>to_roman()</code>. This is intentional. <code>to_roman()</code> shouldn't raise an exception when you call it with valid input, and these input values are all valid. If <code>to_roman()</code> raises an exception, this test is considered failed.
<li>Assuming the <code>to_roman()</code> function was defined correctly, called correctly, completed successfully, and returned a value, the last step is to check whether it returned the <em>right</em> value. This is a common question, and the <code>TestCase</code> class provides a method, <code>assertEqual</code>, to check whether two values are equal. If the result returned from <code>to_roman()</code> (<var>result</var>) does not match the known value you were expecting (<var>numeral</var>), <code>assertEqual</code> will raise an exception and the test will fail. If the two values are equal, <code>assertEqual</code> will do nothing. If every value returned from <code>to_roman()</code> matches the known value you expect, <code>assertEqual</code> never raises an exception, so <code>testToRomanKnownValues</code> eventually exits normally, which means <code>to_roman()</code> has passed this test. <li>Assuming the <code>to_roman()</code> function was defined correctly, called correctly, completed successfully, and returned a value, the last step is to check whether it returned the <em>right</em> value. This is a common question, and the <code>TestCase</code> class provides a method, <code>assertEqual</code>, to check whether two values are equal. If the result returned from <code>to_roman()</code> (<var>result</var>) does not match the known value you were expecting (<var>numeral</var>), <code>assertEqual</code> will raise an exception and the test will fail. If the two values are equal, <code>assertEqual</code> will do nothing. If every value returned from <code>to_roman()</code> matches the known value you expect, <code>assertEqual</code> never raises an exception, so <code>testToRomanKnownValues</code> eventually exits normally, which means <code>to_roman()</code> has passed this test.
</ol> </ol>
<p>Once you have a test case, you can start coding the <code>to_roman()</code> function. First, you should stub it out as an empty function and make sure the tests fail. If the tests succeed before you've written any code, you're doing it wrong &mdash; your tests aren't testing your code at all! Write a test that fails, then code until it passes. <p>Once you have a test case, you can start coding the <code>to_roman()</code> function. First, you should stub it out as an empty function and make sure the tests fail. If the tests succeed before you've written any code, you're doing it wrong &mdash; your tests aren't testing your code at all! Write a test that fails, then code until it passes.
@@ -144,7 +145,7 @@ function to_roman(n):
"""convert integer to Roman numeral""" """convert integer to Roman numeral"""
<a> pass <span>&#x2460;</span></a></code></pre> <a> pass <span>&#x2460;</span></a></code></pre>
<ol> <ol>
<li>At this stage, you want to define the <acronym>API</acronym> of the <code>to_roman()</code> function, but you don't want to code it yet. (Your test needs to fail first.) To stub it out, use the Python reserved word <code>pass</code> [FIXME ref], which does precisely nothing.</a>. <li>At this stage, you want to define the <abbr>API</abbr> of the <code>to_roman()</code> function, but you don't want to code it yet. (Your test needs to fail first.) To stub it out, use the Python reserved word <code>pass</code> [FIXME ref], which does precisely nothing.
</ol> </ol>
<p>Execute <code>romantest1.py</code> on the command line to run the test. If you call it with the <code>-v</code> command-line option, it will give more verbose output so you can see exactly what's going on as each test case runs. With any luck, your output should look like this: <p>Execute <code>romantest1.py</code> on the command line to run the test. If you call it with the <code>-v</code> command-line option, it will give more verbose output so you can see exactly what's going on as each test case runs. With any luck, your output should look like this:
<pre class=screen> <pre class=screen>
@@ -289,7 +290,7 @@ FAILED (errors=1)</samp></pre>
<p>Now run the test suite again. <p>Now run the test suite again.
<pre class=screen> <pre class=screen>
<samp class=prompt>you@localhost:~$ </samp><kbd>python3 romantest2.py -v</kbd> <samp class=prompt>you@localhost:~$ </samp><kbd>python3 romantest2.py -v</kbd>
to_roman should give known result with known input ... ok <samp>to_roman should give known result with known input ... ok
<a>to_roman should fail with large input ... FAIL <span>&#x2460;</span></a> <a>to_roman should fail with large input ... FAIL <span>&#x2460;</span></a>
====================================================================== ======================================================================
@@ -360,7 +361,7 @@ For instance, the <code>testFromRomanCase</code> method (&#8220;<code>from_roman
<li>If you take a number, convert it to Roman numerals, then convert that back to a number, you should end up with the number <li>If you take a number, convert it to Roman numerals, then convert that back to a number, you should end up with the number
you started with. So <code>from_roman(to_roman(n)) == n</code> for all <var>n</var> in <code>1..3999</code>. you started with. So <code>from_roman(to_roman(n)) == n</code> for all <var>n</var> in <code>1..3999</code>.
<li><code>to_roman</code> should always return a Roman numeral using uppercase letters. <li><code>to_roman</code> should always return a Roman numeral using uppercase letters.
<li><code>from_roman</code> should only accept uppercase Roman numerals (<i class=foreignphrase><acronym>i.e.</acronym></i> it should fail when given lowercase input). <li><code>from_roman</code> should only accept uppercase Roman numerals (<i class=foreignphrase><abbr>i.e.</abbr></i> it should fail when given lowercase input).
</ol> </ol>
--> -->
<p class=c>&copy; 2001&ndash;4, 2009 <span>&#x2133;</span>ark Pilgrim, <a href=http://creativecommons.org/licenses/by-sa/3.0/ rel=license>CC-BY-SA-3.0</a> <p class=c>&copy; 2001&ndash;4, 2009 <span>&#x2133;</span>ark Pilgrim, <a href=http://creativecommons.org/licenses/by-sa/3.0/ rel=license>CC-BY-SA-3.0</a>
+18 -8
View File
@@ -3,6 +3,7 @@
<head> <head>
<meta charset=utf-8> <meta charset=utf-8>
<title>Your first Python program - Dive into Python 3</title> <title>Your first Python program - Dive into Python 3</title>
<!--[if IE]><script src="html5.js"></script><![endif]-->
<link rel=stylesheet type=text/css href=dip3.css> <link rel=stylesheet type=text/css href=dip3.css>
<link rel="shortcut icon" href=data:image/ico,> <link rel="shortcut icon" href=data:image/ico,>
<link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log> <link rel=alternate type=application/atom+xml href=http://hg.diveintopython3.org/atom-log>
@@ -42,6 +43,7 @@ body{counter-reset:h1 1}
<p class=fancy>Books about programming usually start with a bunch of boring chapters about fundamentals and eventually work up to building something useful. Let's skip all that. Here is a complete, working Python program. It probably makes absolutely no sense to you. Don't worry about that, because you're going to dissect it line by line. But read through it first and see what, if anything, you can make of it. <p class=fancy>Books about programming usually start with a bunch of boring chapters about fundamentals and eventually work up to building something useful. Let's skip all that. Here is a complete, working Python program. It probably makes absolutely no sense to you. Don't worry about that, because you're going to dissect it line by line. But read through it first and see what, if anything, you can make of it.
<p id=noscript>[The code examples will be easier to follow if you enable Javascript, but whatever.] <p id=noscript>[The code examples will be easier to follow if you enable Javascript, but whatever.]
<p class=download>[<a href=humansize.py>download <code>humansize.py</code></a>]</p> <p class=download>[<a href=humansize.py>download <code>humansize.py</code></a>]</p>
<p class=skip><a href=#skip-humansize-py>skip over this code listing</a>
<pre><code>SUFFIXES = {1000: ['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'], <pre><code>SUFFIXES = {1000: ['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'],
1024: ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']} 1024: ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']}
@@ -70,7 +72,8 @@ def approximate_size(size, a_kilobyte_is_1024_bytes=True):
if __name__ == "__main__": if __name__ == "__main__":
print(approximate_size(1000000000000, False)) print(approximate_size(1000000000000, False))
print(approximate_size(1000000000000))</code></pre> print(approximate_size(1000000000000))</code></pre>
<p>Now let's run this program on the command line. On Windows, it will look something like this: <p id=skip-humansize-py>Now let's run this program on the command line. On Windows, it will look something like this:
<p class=skip><a href=#skip-humansize-screen>skip over this command output listing</a>
<pre class=screen> <pre class=screen>
<samp class=prompt>c:\home\diveintopython3> </samp><kbd>c:\python30\python.exe humansize.py</kbd> <samp class=prompt>c:\home\diveintopython3> </samp><kbd>c:\python30\python.exe humansize.py</kbd>
<samp>1.0 TB <samp>1.0 TB
@@ -80,7 +83,7 @@ if __name__ == "__main__":
<samp class=prompt>you@localhost:~$ </samp><kbd>python3 humansize.py</kbd> <samp class=prompt>you@localhost:~$ </samp><kbd>python3 humansize.py</kbd>
<samp>1.0 TB <samp>1.0 TB
931.3 GiB</samp></pre> 931.3 GiB</samp></pre>
<!-- FIXME: this would be a good place to explain what the program, you know, actually does --> <p id=skip-humansize-screen>FIXME: this would be a good place to explain what the program, you know, actually does.
<h2 id=declaringfunctions>Declaring functions</h2> <h2 id=declaringfunctions>Declaring functions</h2>
<p>Python has functions like most other languages, but it does not have separate header files like <abbr>C++</abbr> or <code>interface</code>/<code>implementation</code> sections like Pascal. When you need a function, just declare it, like this: <p>Python has functions like most other languages, but it does not have separate header files like <abbr>C++</abbr> or <code>interface</code>/<code>implementation</code> sections like Pascal. When you need a function, just declare it, like this:
<pre><code>def approximate_size(size, a_kilobyte_is_1024_bytes=True):</code></pre> <pre><code>def approximate_size(size, a_kilobyte_is_1024_bytes=True):</code></pre>
@@ -120,6 +123,7 @@ if __name__ == "__main__":
<p>I won't bore you with a long finger-wagging speech about the importance of documenting your code. Just know that code is written once but read many times, and the most important audience for your code is yourself, six months after writing it (i.e. after you've forgotten everything but need to fix something). Python makes it easy to write readable code, so take advantage of it. You'll thank me in six months. <p>I won't bore you with a long finger-wagging speech about the importance of documenting your code. Just know that code is written once but read many times, and the most important audience for your code is yourself, six months after writing it (i.e. after you've forgotten everything but need to fix something). Python makes it easy to write readable code, so take advantage of it. You'll thank me in six months.
<h3 id=docstrings>Documentation strings</h3> <h3 id=docstrings>Documentation strings</h3>
<p>You can document a Python function by giving it a documentation string (<code>docstring</code> for short). In this program, the <code>approximate_size</code> function has a <code>docstring</code>: <p>You can document a Python function by giving it a documentation string (<code>docstring</code> for short). In this program, the <code>approximate_size</code> function has a <code>docstring</code>:
<p class=skip><a href=#skip-approximate-size>skip over this code listing</a>
<pre><code>def approximate_size(size, a_kilobyte_is_1024_bytes=True): <pre><code>def approximate_size(size, a_kilobyte_is_1024_bytes=True):
"""Convert a file size to human-readable form. """Convert a file size to human-readable form.
@@ -131,7 +135,7 @@ if __name__ == "__main__":
Returns: string Returns: string
"""</code></pre> """</code></pre>
<p>Triple quotes signify a multi-line string. Everything between the start and end quotes is part of a single string, including carriage returns, leading white space, and other quote characters. You can use them anywhere, but you'll see them most often used when defining a <code>docstring</code>. <p id=skip-approximate-size>Triple quotes signify a multi-line string. Everything between the start and end quotes is part of a single string, including carriage returns, leading white space, and other quote characters. You can use them anywhere, but you'll see them most often used when defining a <code>docstring</code>.
<blockquote class="note compare perl5"> <blockquote class="note compare perl5">
<p><span>&#x261E;</span>Triple quotes are also an easy way to define a string with both single and double quotes, like <code>qq/.../</code> in Perl 5. <p><span>&#x261E;</span>Triple quotes are also an easy way to define a string with both single and double quotes, like <code>qq/.../</code> in Perl 5.
</blockquote> </blockquote>
@@ -146,6 +150,7 @@ if __name__ == "__main__":
<h2 id=everythingisanobject>Everything is an object</h2> <h2 id=everythingisanobject>Everything is an object</h2>
<p>In case you missed it, I just said that Python functions have attributes, and that those attributes are available at runtime. A function, like everything else in Python, is an object. <p>In case you missed it, I just said that Python functions have attributes, and that those attributes are available at runtime. A function, like everything else in Python, is an object.
<p>Run the interactive Python shell and follow along: <p>Run the interactive Python shell and follow along:
<p class=skip><a href=#skip-everything-is-an-object-screen>skip over this interpreter listing</a>
<pre class=screen> <pre class=screen>
<a><samp class=prompt>>>> </samp><kbd>import humansize</kbd> <span>&#x2460;</span></a> <a><samp class=prompt>>>> </samp><kbd>import humansize</kbd> <span>&#x2460;</span></a>
<a><samp class=prompt>>>> </samp><kbd>print(humansize.approximate_size(4096, True))</kbd> <span>&#x2461;</span></a> <a><samp class=prompt>>>> </samp><kbd>print(humansize.approximate_size(4096, True))</kbd> <span>&#x2461;</span></a>
@@ -161,7 +166,7 @@ if __name__ == "__main__":
Returns: string Returns: string
</samp></pre> </samp></pre>
<ol> <ol id=skip-everything-is-an-object-screen>
<li>The first line imports the <code>humansize</code> program as a module -- a chunk of code that you can use interactively, or from a larger Python program. (You'll see examples of multi-module Python programs in [FIXME xref].) Once you import a module, you can reference any of its public functions, classes, or attributes. Modules can do this to access functionality in other modules, and you can do it in the Python interactive shell too. This is an important concept, and you'll see a lot more of it throughout this book. <li>The first line imports the <code>humansize</code> program as a module -- a chunk of code that you can use interactively, or from a larger Python program. (You'll see examples of multi-module Python programs in [FIXME xref].) Once you import a module, you can reference any of its public functions, classes, or attributes. Modules can do this to access functionality in other modules, and you can do it in the Python interactive shell too. This is an important concept, and you'll see a lot more of it throughout this book.
<li>When you want to use functions defined in imported modules, you need to include the module name. So you can't just say <code>approximate_size</code>; it must be <code>humansize.approximate_size</code>. If you've used classes in Java, this should feel vaguely familiar. <li>When you want to use functions defined in imported modules, you need to include the module name. So you can't just say <code>approximate_size</code>; it must be <code>humansize.approximate_size</code>. If you've used classes in Java, this should feel vaguely familiar.
<li>Instead of calling the function as you would expect to, you asked for one of the function's attributes, <code>__doc__</code>. <li>Instead of calling the function as you would expect to, you asked for one of the function's attributes, <code>__doc__</code>.
@@ -171,6 +176,7 @@ if __name__ == "__main__":
</blockquote> </blockquote>
<h3 id=importsearchpath>The <code>import</code> search path</h3> <h3 id=importsearchpath>The <code>import</code> search path</h3>
<p>Before this goes any further, I want to briefly mention the library search path. Python looks in several places when you try to import a module. Specifically, it looks in all the directories defined in <code>sys.path</code>. This is just a list, and you can easily view it or modify it with standard list methods. (You'll learn more about lists later in this chapter.) <p>Before this goes any further, I want to briefly mention the library search path. Python looks in several places when you try to import a module. Specifically, it looks in all the directories defined in <code>sys.path</code>. This is just a list, and you can easily view it or modify it with standard list methods. (You'll learn more about lists later in this chapter.)
<p class=skip><a href=#skip-import-search-path-screen>skip over this interpreter listing</a>
<pre class=screen> <pre class=screen>
<a><samp class=prompt>>>> </samp><kbd>import sys</kbd> <span>&#x2460;</span></a> <a><samp class=prompt>>>> </samp><kbd>import sys</kbd> <span>&#x2460;</span></a>
<a><samp class=prompt>>>> </samp><kbd>sys.path</kbd> <span>&#x2461;</span></a> <a><samp class=prompt>>>> </samp><kbd>sys.path</kbd> <span>&#x2461;</span></a>
@@ -178,7 +184,7 @@ if __name__ == "__main__":
<a><samp class=prompt>>>> </samp><kbd>sys</kbd> <span>&#x2462;</span></a> <a><samp class=prompt>>>> </samp><kbd>sys</kbd> <span>&#x2462;</span></a>
<samp>&lt;module 'sys' (built-in)></samp> <samp>&lt;module 'sys' (built-in)></samp>
<a><samp class=prompt>>>> </samp><kbd>sys.path.append('/my/new/path')</kbd> <span>&#x2463;</span></a></pre> <a><samp class=prompt>>>> </samp><kbd>sys.path.append('/my/new/path')</kbd> <span>&#x2463;</span></a></pre>
<ol> <ol id=skip-import-search-path-screen>
<li>Importing the <code>sys</code> module makes all of its functions and attributes available. <li>Importing the <code>sys</code> module makes all of its functions and attributes available.
<li><code>sys.path</code> is a list of directory names that constitute the current search path. (Yours will look different, depending on your operating system, what version of Python you're running, and where it was originally installed.) Python will look through these directories (in this order) for a <code>.py</code> file whose name matches what you're trying to import. <li><code>sys.path</code> is a list of directory names that constitute the current search path. (Yours will look different, depending on your operating system, what version of Python you're running, and where it was originally installed.) Python will look through these directories (in this order) for a <code>.py</code> file whose name matches what you're trying to import.
<li>Actually, I lied; the truth is more complicated than that, because not all modules are stored as <code>.py</code> files. Some, like the <code>sys</code> module, are "built-in modules"; they are actually baked right into Python itself. Built-in modules behave just like regular modules, but their Python source code is not available, because they are not written in Python! (The <code>sys</code> module is written in <abbr>C</abbr>.) <li>Actually, I lied; the truth is more complicated than that, because not all modules are stored as <code>.py</code> files. Some, like the <code>sys</code> module, are "built-in modules"; they are actually baked right into Python itself. Built-in modules behave just like regular modules, but their Python source code is not available, because they are not written in Python! (The <code>sys</code> module is written in <abbr>C</abbr>.)
@@ -190,6 +196,7 @@ if __name__ == "__main__":
<p>This is so important that I'm going to repeat it in case you missed it the first few times: <em>everything in Python is an object</em>. Strings are objects. Lists are objects. Functions are objects. Even modules are objects. <p>This is so important that I'm going to repeat it in case you missed it the first few times: <em>everything in Python is an object</em>. Strings are objects. Lists are objects. Functions are objects. Even modules are objects.
<h2 id=indentingcode>Indenting code</h2> <h2 id=indentingcode>Indenting code</h2>
<p>Python functions have no explicit <code>begin</code> or <code>end</code>, and no curly braces to mark where the function code starts and stops. The only delimiter is a colon (<code>:</code>) and the indentation of the code itself. <p>Python functions have no explicit <code>begin</code> or <code>end</code>, and no curly braces to mark where the function code starts and stops. The only delimiter is a colon (<code>:</code>) and the indentation of the code itself.
<p class=skip><a href=#skip-indenting-code>skip over this code listing</a>
<pre><code> <pre><code>
<a>def approximate_size(size, a_kilobyte_is_1024_bytes=True): <span>&#x2460;</span></a> <a>def approximate_size(size, a_kilobyte_is_1024_bytes=True): <span>&#x2460;</span></a>
<a> if size &lt; 0: <span>&#x2461;</span></a> <a> if size &lt; 0: <span>&#x2461;</span></a>
@@ -202,7 +209,7 @@ if __name__ == "__main__":
return "{0:.1f} {1}".format(size, suffix) return "{0:.1f} {1}".format(size, suffix)
raise ValueError('number too large')</code></pre> raise ValueError('number too large')</code></pre>
<ol> <ol id=skip-indenting-code>
<li>Code blocks are defined by their indentation. By "code block," I mean functions, <code>if</code> statements, <code>for</code> loops, <code>while</code> loops, and so forth. Indenting starts a block and unindenting ends it. There are no explicit braces, brackets, or keywords. This means that whitespace is significant, and must be consistent. In this example, the function code is indented four spaces. It doesn't need to be four spaces, it just needs to be consistent. The first line that is not indented marks the end of the function. <li>Code blocks are defined by their indentation. By "code block," I mean functions, <code>if</code> statements, <code>for</code> loops, <code>while</code> loops, and so forth. Indenting starts a block and unindenting ends it. There are no explicit braces, brackets, or keywords. This means that whitespace is significant, and must be consistent. In this example, the function code is indented four spaces. It doesn't need to be four spaces, it just needs to be consistent. The first line that is not indented marks the end of the function.
<li>In Python, an <code>if</code> statement is followed by a code block. If the <code>if</code> expression evaluates to true, the indented block is executed, otherwise it falls to the <code>else</code> block (if any). (Note the lack of parentheses around the expression.) <li>In Python, an <code>if</code> statement is followed by a code block. If the <code>if</code> expression evaluates to true, the indented block is executed, otherwise it falls to the <code>else</code> block (if any). (Note the lack of parentheses around the expression.)
<li>This line is inside the <code>if</code> code block. This <code>raise</code> statement will raise an exception (of type <code>ValueError</code>), but only if <code>size &lt; 0</code>. <li>This line is inside the <code>if</code> code block. This <code>raise</code> statement will raise an exception (of type <code>ValueError</code>), but only if <code>size &lt; 0</code>.
@@ -215,19 +222,22 @@ if __name__ == "__main__":
</blockquote> </blockquote>
<h2 id=runningscripts>Running scripts</h2> <h2 id=runningscripts>Running scripts</h2>
<p>Python modules are objects and have several useful attributes. You can use this to easily test your modules as you write them, by including a special block of code that executes when you run the Python file on the command line. Take the last few lines of <code>humansize.py</code>: <p>Python modules are objects and have several useful attributes. You can use this to easily test your modules as you write them, by including a special block of code that executes when you run the Python file on the command line. Take the last few lines of <code>humansize.py</code>:
<p class=skip><a href=#skip-running-scripts>skip over this code listing</a>
<pre><code> <pre><code>
if __name__ == "__main__": if __name__ == "__main__":
print(approximate_size(1000000000000, False)) print(approximate_size(1000000000000, False))
print(approximate_size(1000000000000))</code></pre> print(approximate_size(1000000000000))</code></pre>
<blockquote class="note compare clang"> <blockquote class="note compare clang" id=skip-running-scripts>
<p><span>&#x261E;</span>Like <abbr>C</abbr>, Python uses <code>==</code> for comparison and <code>=</code> for assignment. Unlike <abbr>C</abbr>, Python does not support in-line assignment, so there's no chance of accidentally assigning the value you thought you were comparing. <p><span>&#x261E;</span>Like <abbr>C</abbr>, Python uses <code>==</code> for comparison and <code>=</code> for assignment. Unlike <abbr>C</abbr>, Python does not support in-line assignment, so there's no chance of accidentally assigning the value you thought you were comparing.
</blockquote> </blockquote>
<p>So what makes this <code>if</code> statement special? Well, modules are objects, and all modules have a built-in attribute <code>__name__</code>. A module's <code>__name__</code> depends on how you're using the module. If you <code>import</code> the module, then <code>__name__</code> is the module's filename, without a directory path or file extension. <p>So what makes this <code>if</code> statement special? Well, modules are objects, and all modules have a built-in attribute <code>__name__</code>. A module's <code>__name__</code> depends on how you're using the module. If you <code>import</code> the module, then <code>__name__</code> is the module's filename, without a directory path or file extension.
<p class=skip><a href=#skip-import-humansize>skip over this interpreter listing</a>
<pre class=screen> <pre class=screen>
<samp class=prompt>>>> </samp><kbd>import humansize</kbd> <samp class=prompt>>>> </samp><kbd>import humansize</kbd>
<samp class=prompt>>>> </samp><kbd>humansize.__name__</kbd> <samp class=prompt>>>> </samp><kbd>humansize.__name__</kbd>
<samp>'humansize'</samp></pre> <samp>'humansize'</samp></pre>
<p>But you can also run the module directly as a standalone program, in which case <code>__name__</code> will be a special default value, <code>__main__</code>. Python will evaluate this <code>if</code> statement, find a true expression, and execute the <code>if</code> code block. In this case, to print two values. <p id=skip-import-humansize>But you can also run the module directly as a standalone program, in which case <code>__name__</code> will be a special default value, <code>__main__</code>. Python will evaluate this <code>if</code> statement, find a true expression, and execute the <code>if</code> code block. In this case, to print two values.
<p class=skip><a href=#furtherreading>skip over this command output listing</a>
<pre class=screen> <pre class=screen>
<samp class=prompt>c:\home\diveintopython3> </samp><kbd>c:\python30\python.exe humansize.py</kbd> <samp class=prompt>c:\home\diveintopython3> </samp><kbd>c:\python30\python.exe humansize.py</kbd>
<samp>1.0 TB <samp>1.0 TB