diff --git a/advanced-classes.html b/advanced-classes.html deleted file mode 100644 index 782bf58..0000000 --- a/advanced-classes.html +++ /dev/null @@ -1,242 +0,0 @@ - - - -Advanced Classes - Dive into Python 3 - - - - - - - -
  
-

You are here: Home Dive Into Python 3 -

Difficulty level: ♦♦♦♦♢ -

Advanced Classes

-
-

FIXME
— FIXME -

-

  -

Diving In

-

FIXME - -

Ordered Dictionary: Not An Oxymoron

- -

[FIXME here's why ordered dicts are useful: http://www.gossamer-threads.com/lists/python/dev/656556 ] - -

[download ordereddict.py] -

class OrderedDict(dict, MutableMapping):
-    'Dictionary that remembers insertion order'
-    # An inherited dict maps keys to values.
-    # The inherited dict provides __getitem__, __len__, __contains__, and get.
-    # The remaining methods are order-aware.
-    # Big-O running times for all methods are the same as for regular dictionaries.
-
-    # The internal self.__map dictionary maps keys to links in a doubly linked list.
-    # The circular doubly linked list starts and ends with a sentinel element.
-    # The sentinel element never gets deleted (this simplifies the algorithm).
-    # The prev/next links are weakref proxies (to prevent circular references).
-    # Individual links are kept alive by the hard reference in self.__map.
-    # Those hard references disappear when a key is deleted from an OrderedDict.
-
-    def __init__(self, *args, **kwds):
-        '''Initialize an ordered dictionary.  Signature is the same as for
-        regular dictionaries, but keyword arguments are not recommended
-        because their insertion order is arbitrary.
-
-        '''
-        if len(args) > 1:
-            raise TypeError('expected at most 1 arguments, got %d' % len(args))
-        try:
-            self.__root
-        except AttributeError:
-            self.__root = root = _Link()    # sentinel node for the doubly linked list
-            root.prev = root.next = root
-            self.__map = {}
-        self.update(*args, **kwds)
-
-    def clear(self):
-        'od.clear() -> None.  Remove all items from od.'
-        root = self.__root
-        root.prev = root.next = root
-        self.__map.clear()
-        dict.clear(self)
-
-    def __setitem__(self, key, value):
-        'od.__setitem__(i, y) <==> od[i]=y'
-        # Setting a new item creates a new link which goes at the end of the linked
-        # list, and the inherited dictionary is updated with the new key/value pair.
-        if key not in self:
-            self.__map[key] = link = _Link()
-            root = self.__root
-            last = root.prev
-            link.prev, link.next, link.key = last, root, key
-            last.next = root.prev = _proxy(link)
-        dict.__setitem__(self, key, value)
-
-    def __delitem__(self, key):
-        'od.__delitem__(y) <==> del od[y]'
-        # Deleting an existing item uses self.__map to find the link which is
-        # then removed by updating the links in the predecessor and successor nodes.
-        dict.__delitem__(self, key)
-        link = self.__map.pop(key)
-        link.prev.next = link.next
-        link.next.prev = link.prev
-
-    def __iter__(self):
-        'od.__iter__() <==> iter(od)'
-        # Traverse the linked list in order.
-        root = self.__root
-        curr = root.next
-        while curr is not root:
-            yield curr.key
-            curr = curr.next
-
-    def __reversed__(self):
-        'od.__reversed__() <==> reversed(od)'
-        # Traverse the linked list in reverse order.
-        root = self.__root
-        curr = root.prev
-        while curr is not root:
-            yield curr.key
-            curr = curr.prev
-
-    def __reduce__(self):
-        'Return state information for pickling'
-        items = [[k, self[k]] for k in self]
-        tmp = self.__map, self.__root
-        del self.__map, self.__root
-        inst_dict = vars(self).copy()
-        self.__map, self.__root = tmp
-        if inst_dict:
-            return (self.__class__, (items,), inst_dict)
-        return self.__class__, (items,)
-
-    setdefault = MutableMapping.setdefault
-    update = MutableMapping.update
-    pop = MutableMapping.pop
-    keys = MutableMapping.keys
-    values = MutableMapping.values
-    items = MutableMapping.items
-
-    def popitem(self, last=True):
-        '''od.popitem() -> (k, v), return and remove a (key, value) pair.
-        Pairs are returned in LIFO order if last is true or FIFO order if false.
-
-        '''
-        if not self:
-            raise KeyError('dictionary is empty')
-        key = next(reversed(self) if last else iter(self))
-        value = self.pop(key)
-        return key, value
-
-    def __repr__(self):
-        'od.__repr__() <==> repr(od)'
-        if not self:
-            return '%s()' % (self.__class__.__name__,)
-        return '%s(%r)' % (self.__class__.__name__, list(self.items()))
-
-    def copy(self):
-        'od.copy() -> a shallow copy of od'
-        return self.__class__(self)
-
-    @classmethod
-    def fromkeys(cls, iterable, value=None):
-        '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
-        and values equal to v (which defaults to None).
-
-        '''
-        d = cls()
-        for key in iterable:
-            d[key] = value
-        return d
-
-    def __eq__(self, other):
-        '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
-        while comparison to a regular mapping is order-insensitive.
-
-        '''
-        if isinstance(other, OrderedDict):
-            return len(self)==len(other) and \
-                   all(p==q for p, q in zip(self.items(), other.items()))
-        return dict.__eq__(self, other)
-
-    def __ne__(self, other):
-        '''od.__ne__(y) <==> od!=y.  Comparison to another OD is order-sensitive
-        while comparison to a regular mapping is order-insensitive.
-
-        '''
-        return not self == other
- -

⁂ - -

Attributes of a Class Object

- -

FIXME - -

->>> import ordereddict
->>> od = ordereddict.OrderedDict()
->>> klass = od.__class__       
->>> type(klass)
-<class 'abc.ABCMeta'>
->>> klass.__name__
-'OrderedDict'
-
->>> klass.__module__
-'ordereddict'
->>> klass.__bases__
-(<class 'dict'>, <class '_abcoll.MutableMapping'>)
-
    -
  1. FIXME -
- -
-# continued from previous example
->>> klass.__dict__
-{'__abstractmethods__': frozenset(),
- '__delitem__': <function __delitem__ at 0x00DCB6A8>,
- '__dict__': <attribute '__dict__' of 'OrderedDict' objects>,
- '__doc__': None,
- '__eq__': <function __eq__ at 0x00DD2930>,
- '__hash__': None,
- '__init__': <function __init__ at 0x00DC41E0>,
- '__iter__': <function __iter__ at 0x00DCB618>,
- '__module__': 'ordereddict',
- '__reduce__': <function __reduce__ at 0x00DCB6F0>,
- '__repr__': <function __repr__ at 0x00DCB8E8>,
- '__reversed__': <function __reversed__ at 0x00DCB660>,
- '__setitem__': <function __setitem__ at 0x00DCB5D0>,
- '__weakref__': <attribute '__weakref__' of 'OrderedDict' objects>,
- '_abc_cache': <_weakrefset.WeakSet object at 0x00DCF950>,
- '_abc_negative_cache': <_weakrefset.WeakSet object at 0x00DCF990>,
- '_abc_negative_cache_version': 12,
- '_abc_registry': <_weakrefset.WeakSet object at 0x00DCF910>,
- 'clear': <function clear at 0x00DCB7C8>,
- 'copy': <function copy at 0x00DD28A0>,
- 'fromkeys': <classmethod object at 0x00DCF8F0>,
- 'items': <function items at 0x00D60150>,
- 'keys': <function keys at 0x00D60108>,
- 'pop': <function pop at 0x00D60978>,
- 'popitem': <function popitem at 0x00DCB780>,
- 'setdefault': <function setdefault at 0x00D60A98>,
- 'update': <function update at 0x00D60A50>,
- 'values': <function values at 0x00D60198>}
-
    -
  1. FIXME -
- -

⁂ - -

Implementing Fractions

- -

-

© 2001–9 Mark Pilgrim - - - diff --git a/advanced-iterators.html b/advanced-iterators.html index cbf4278..bdad90d 100755 --- a/advanced-iterators.html +++ b/advanced-iterators.html @@ -205,7 +205,7 @@ AssertionError: Only for very large values of 2 >>> tuple(ord(c) for c in unique_characters) (69, 68, 77, 79, 78, 83, 82, 89)

    -
  1. A generator expression is like an anonymous function that yields values. The expression itself looks like a list comprehension [FIXME xref], but it’s wrapped in parentheses instead of square brackets. +
  2. A generator expression is like an anonymous function that yields values. The expression itself looks like a list comprehension, but it’s wrapped in parentheses instead of square brackets.
  3. The generator expression returns… an iterator.
  4. Calling next(gen) returns the next value from the iterator.
  5. If you like, you can iterate through all the possible values and return a tuple, list, or set, by passing the generator expression to tuple(), list(), or set(). In these cases, you don’t need an extra set of parentheses — just pass the “bare” expression ord(c) for c in unique_characters to the tuple() function, and Python figures out that it’s a generator expression. @@ -408,7 +408,7 @@ Wesley 'N': '5', 'S': '1', 'R': '6', 'Y': '7'}
    1. Given a list of letters and a list of digits (each represented here as 1-character strings), the zip function will create a pairing of letters and digits, in order. -
    2. Why is that cool? Because that data structure happens to be exactly the right structure to pass to the dict() function to create a dictionary that uses letters as keys and their associated digits as values. (This isn’t the only way to do it, of course. You could use a dictionary comprehension [FIXME xref] to create the dictionary directly.) Although the printed representation of the dictionary lists the pairs in a different order (dictionaries have no “order” per se), you can see that each letter is associated with the digit, based on the ordering of the original characters and guess sequences. +
    3. Why is that cool? Because that data structure happens to be exactly the right structure to pass to the dict() function to create a dictionary that uses letters as keys and their associated digits as values. (This isn’t the only way to do it, of course. You could use a dictionary comprehension to create the dictionary directly.) Although the printed representation of the dictionary lists the pairs in a different order (dictionaries have no “order” per se), you can see that each letter is associated with the digit, based on the ordering of the original characters and guess sequences.

    The alphametics solver uses this technique to create a dictionary that maps letters in the puzzle to digits in the solution, for each possible solution. @@ -635,7 +635,7 @@ NameError: name '__import__' is not defined

    Many, many thanks to Raymond Hettinger for agreeing to relicense his code so I could port it to Python 3 and use it as the basis for this chapter. -

    +

    © 2001–9 Mark Pilgrim diff --git a/case-study-porting-chardet-to-python-3.html b/case-study-porting-chardet-to-python-3.html index 87f3c56..20afe38 100755 --- a/case-study-porting-chardet-to-python-3.html +++ b/case-study-porting-chardet-to-python-3.html @@ -5,7 +5,7 @@ @@ -259,6 +259,8 @@ ValueError: I/O operation on closed file. 9 Alex 10 Lizzie +

    ⁂ +

    Writing to Text Files

    You can write to files in much the same way that you read from them. First you open a file and get a file object, then you use methods on the file object to write data to the file, then you close the file. @@ -296,6 +298,8 @@ ValueError: I/O operation on closed file.

    Did you notice the encoding parameter that got passed in to the open() function while you were opening a file for writing? It’s important; don’t ever leave it out! As you saw in the beginning of this chapter, files don’t contain strings, they contain bytes. Reading a “string” from a text file only works because you told Python what encoding to use to read a stream of bytes and convert it to a string. Writing text to a file presents the same problem in reverse. You can’t write characters to a file; characters are an abstraction. In order to write to the file, Python needs to know how to convert your string into a sequence of bytes. The only way to be sure it’s performing the correct conversion is to specify the encoding parameter when you open the file for writing. +

    ⁂ +

    Binary Files

    my dog Beauregard @@ -343,6 +347,8 @@ AttributeError: '_io.BufferedReader' object has no attribute 'encoding'That means that there’s never an unexpected mismatch between the number you passed into the read() method and the position index you get out of the tell() method. The read() method reads bytes, and the seek() and tell() methods track the number of bytes read. For binary files, they’ll always agree.

+

⁂ +

File-like Objects

One of Python’s greatest strengths is its dynamic binding, and one powerful use of dynamic binding is the file-like object. @@ -403,6 +409,8 @@ AttributeError: '_io.BufferedReader' object has no attribute 'encoding'you@localhost:~$ cat out.log A nine mile walk is no joke, especially in the rain. +

⁂ +

Standard Input, Output, and Error

Command-line gurus are already familiar with the concept of standard input, standard output, and standard error. This section is for the rest of you. @@ -503,6 +511,8 @@ C

Redirecting standard error works exactly the same way, using sys.stderr instead of sys.stdout. +

⁂ +

Further Reading

-

+

© 2001–9 Mark Pilgrim diff --git a/generators.html b/generators.html index f85ad37..888ce72 100755 --- a/generators.html +++ b/generators.html @@ -5,7 +5,7 @@ @@ -411,7 +411,7 @@ def plural(noun, rules_filename='plural5-rules.txt'):

  • English Irregular Plural Nouns -

    +

    © 2001–9 Mark Pilgrim diff --git a/http-web-services.html b/http-web-services.html index 5d0d882..f7a1645 100755 --- a/http-web-services.html +++ b/http-web-services.html @@ -5,7 +5,7 @@ diff --git a/index.html b/index.html index 3bf469f..1cbc6bc 100644 --- a/index.html +++ b/index.html @@ -29,15 +29,14 @@ h1:before{content:''}

  • Installing Python
  • Your First Python Program
  • Native Datatypes +
  • Comprehensions
  • Strings
  • Regular Expressions
  • Closures & Generators
  • Classes & Iterators
  • Advanced Iterators
  • Unit Testing -
  • Advanced Unit Testing
  • Refactoring -
  • Advanced Classes
  • Files
  • XML
  • Serializing Python Objects diff --git a/iterators-and-generators.html b/iterators-and-generators.html deleted file mode 100755 index fa3eb43..0000000 --- a/iterators-and-generators.html +++ /dev/null @@ -1,33 +0,0 @@ - - - - -Secret Leftover Page - Dive into Python 3 - - - - - - -
      
    -

    You are here: Home Dive Into Python 3 -

    Secret Leftover Page

    -
    -

    You step in the stream / but the water has moved on. / This page is not here.
    — 404 Not Found haiku -

    -

      -

    Huh?

    -

    This book used to have a chapter called “Iterators & Generators,” but I split the chapter in half so I could introduce Python classes before talking about iterators. The content that used to be at this address is now in one of those two chapters: - -

    - -

    © 2001–9 Mark Pilgrim - - - diff --git a/iterators.html b/iterators.html index 8e2c49c..73cc60b 100755 --- a/iterators.html +++ b/iterators.html @@ -5,7 +5,7 @@ @@ -383,7 +383,7 @@ rules = LazyRules()

  • Generator Tricks for Systems Programmers -

    +

    © 2001–9 Mark Pilgrim diff --git a/native-datatypes.html b/native-datatypes.html index 4ad5139..21fd725 100755 --- a/native-datatypes.html +++ b/native-datatypes.html @@ -827,7 +827,7 @@ KeyError: 'db.diveintopython3.org'

  • PEP 237: Unifying Long Integers and Integers
  • PEP 238: Changing the Division Operator -

    +

    © 2001–9 Mark Pilgrim diff --git a/refactoring.html b/refactoring.html index 552a375..92a2339 100755 --- a/refactoring.html +++ b/refactoring.html @@ -26,7 +26,7 @@ body{counter-reset:h1 10} >>> roman7.from_roman('') 0

      -
    1. Remember in the [FIXME-xref] previous section when you kept seeing that an empty string would match the regular expression you were using to check for valid Roman numerals? Well, it turns out that this is still true for the final version of the regular expression. And that’s a bug; you want an empty string to raise an InvalidRomanNumeralError exception just like any other sequence of characters that don’t represent a valid Roman numeral. +
    2. This is a bug. An empty string should raise an InvalidRomanNumeralError exception, just like any other sequence of characters that don’t represent a valid Roman numeral.

    After reproducing the bug, and before fixing it, you should write a test case that fails, thus illustrating the bug. @@ -120,7 +120,7 @@ Ran 11 tests in 0.156s

    Handling Changing Requirements

    Despite your best efforts to pin your customers to the ground and extract exact requirements from them on pain of horrible nasty things involving scissors and hot wax, requirements will change. Most customers don’t know what they want until they see it, and even if they do, they aren’t that good at articulating what they want precisely enough to be useful. And even if they do, they’ll want more in the next release anyway. So be prepared to update your test cases as requirements change. -

    Suppose, for instance, that you wanted to expand the range of the Roman numeral conversion functions. Remember [FIXME-xref] the rule that said that no character could be repeated more than three times? Well, the Romans were willing to make an exception to that rule by having 4 M characters in a row to represent 4000. If you make this change, you’ll be able to expand the range of convertible numbers from 1..3999 to 1..4999. But first, you need to make some changes to your test cases. +

    Suppose, for instance, that you wanted to expand the range of the Roman numeral conversion functions. Normally, no character in a Roman numeral can be repeated more than three times in a row. But the Romans were willing to make an exception to that rule by having 4 M characters in a row to represent 4000. If you make this change, you’ll be able to expand the range of convertible numbers from 1..3999 to 1..4999. But first, you need to make some changes to your test cases.

    [download roman8.py]

    class KnownValues(unittest.TestCase):
    @@ -471,7 +471,7 @@ OK
  • Refactoring mercilessly to improve performance, scalability, readability, maintainability, or whatever other -ility you’re lacking -

    +

    © 2001–9 Mark Pilgrim diff --git a/regular-expressions.html b/regular-expressions.html index 8375780..7b6a542 100755 --- a/regular-expressions.html +++ b/regular-expressions.html @@ -5,7 +5,7 @@ @@ -259,13 +259,13 @@ body{counter-reset:h1 4}

     >>> pattern = '''
         ^                   # beginning of string
    -    M{0,3}              # thousands - 0 to 3 M's
    -    (CM|CD|D?C{0,3})    # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's),
    -                        #            or 500-800 (D, followed by 0 to 3 C's)
    -    (XC|XL|L?X{0,3})    # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's),
    -                        #        or 50-80 (L, followed by 0 to 3 X's)
    -    (IX|IV|V?I{0,3})    # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's),
    -                        #        or 5-8 (V, followed by 0 to 3 I's)
    +    M{0,3}              # thousands - 0 to 3 Ms
    +    (CM|CD|D?C{0,3})    # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 Cs),
    +                        #            or 500-800 (D, followed by 0 to 3 Cs)
    +    (XC|XL|L?X{0,3})    # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 Xs),
    +                        #        or 50-80 (L, followed by 0 to 3 Xs)
    +    (IX|IV|V?I{0,3})    # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 Is),
    +                        #        or 5-8 (V, followed by 0 to 3 Is)
         $                   # end of string
         '''
     >>> re.search(pattern, 'M', re.VERBOSE)                 
    @@ -437,7 +437,7 @@ AttributeError: 'NoneType' object has no attribute 'groups'
  • (x) in general is a remembered group. You can get the value of what matched by using the groups() method of the object returned by re.search.

    Regular expressions are extremely powerful, but they are not the correct solution for every problem. You should learn enough about them to know when they are appropriate, when they will solve your problems, and when they will cause more problems than they solve. -

    +

    © 2001–9 Mark Pilgrim diff --git a/strings.html b/strings.html index f854b32..6d0fca9 100755 --- a/strings.html +++ b/strings.html @@ -5,7 +5,7 @@ @@ -264,12 +264,10 @@ experience of years.

    1. The split() string method takes one argument, a delimiter, and split a string into a list of strings based on the delimiter. Here, the delimiter is an ampersand character, but it could be anything. -
    2. Now we have a list of strings, each with a key, followed by an equals sign, followed by a value. We want to iterate over the entire list and split each string into two strings based on the first equals sign. (In theory, a value could contain an equals sign too. If we just used 'key=value=foo'.split('='), we would end up with a three-item list ['key', 'value', 'foo'].) +
    3. Now we have a list of strings, each with a key, followed by an equals sign, followed by a value. We can use a list comprehension to iterate over the entire list and split each string into two strings based on the first equals sign. (In theory, a value could contain an equals sign too. If we just used 'key=value=foo'.split('='), we would end up with a three-item list ['key', 'value', 'foo'].)
    4. Finally, Python can turn that list-of-lists into a dictionary simply by passing it to the dict() function.
    -

    [FIXME - this is the first time we've seen a list comprehension. Add a forward or backward reference once we have a full section explaining them.] -

    The previous example looks a lot like parsing query parameters in a URL, but real-life URL parsing is actually more complicated than this. If you’re dealing with URL query parameters, you’re better off using the urllib.parse.parse_qs() function, which handles some non-obvious edge cases.

    @@ -459,7 +457,7 @@ TypeError: Can't convert 'bytes' object to str implicitly
  • PEP 3101: Advanced String Formatting -

    +

    © 2001–9 Mark Pilgrim diff --git a/table-of-contents.html b/table-of-contents.html index 6833f43..37a76fc 100755 --- a/table-of-contents.html +++ b/table-of-contents.html @@ -89,6 +89,10 @@ ul li ol{margin:0;padding:0 0 0 2.5em}

  • Further reading +
  • Comprehensions +
      +
    1. Diving In +
  • Strings
    1. Some Boring Stuff You Need To Understand Before You Can Dive In @@ -161,29 +165,23 @@ ul li ol{margin:0;padding:0 0 0 2.5em}
    2. Putting It All Together
    3. Further Reading
    -
  • Unit testing +
  • Unit Testing
      -
    1. (Not) diving in -
    2. A single question -
    3. “Halt and catch fire” -
    4. More halting, more fire -
    5. ... +
    6. (Not) Diving In +
    7. A single Question +
    8. “Halt and Catch Fire” +
    9. More Halting, More Fire +
    10. And One More Thing… +
    11. A Pleasing Symmetry +
    12. More Bad Input
    -
  • Advanced Unit Testing -
      -
    1. ... -
    -
  • Refactoring your code +
  • Refactoring
    1. Diving in -
    2. Handling changing requirements +
    3. Handling Changing Requirements
    4. Refactoring
    5. Summary
    -
  • Advanced Classes -
      -
    1. Diving in -
  • Files
    1. Diving In diff --git a/unit-testing.html b/unit-testing.html index 0461829..3386dac 100755 --- a/unit-testing.html +++ b/unit-testing.html @@ -537,7 +537,7 @@ OK

      But first, the tests. We’ll need a “known values” test to spot-check for accuracy. Our test suite already contains a mapping of known values; let’s reuse that. -

          def test_from_roman_known_values(self):
      +
          def test_from_roman_known_values(self):
               '''from_roman should give known result with known input'''
               for integer, numeral in self.known_values:
                   result = roman5.from_roman(numeral)
      @@ -545,11 +545,11 @@ OK

      There’s a pleasing symmetry here. The to_roman() and from_roman() functions are inverses of each other. The first converts integers to specially-formatted strings, the second converts specially-formated strings to integers. In theory, we should be able to “round-trip” a number by passing to the to_roman() function to get a string, then passing that string to the from_roman() function to get an integer, and end up with the same number. In mathematical terms, -

      x = f(g(x)) for all values of x
      +
      x = f(g(x)) for all values of x

      In this case, “all values” means any number between 1..3999, since that is the valid range of inputs to the to_roman() function. We can express this symmetry in a test case that runs through all the values 1..3999, calls to_roman(), calls from_roman(), and checks that the output is the same as the original input. -

      class RoundtripCheck(unittest.TestCase):
      +
      class RoundtripCheck(unittest.TestCase):
           def test_roundtrip(self):
               '''from_roman(to_roman(n))==n for all n'''
               for integer in range(1, 4000):
      @@ -587,7 +587,7 @@ FAILED (errors=2)

      A quick stub function will solve that problem. -

      # roman5.py
      +
      # roman5.py
       def from_roman(s):
           '''convert Roman numeral to integer'''
      @@ -621,7 +621,7 @@ FAILED (failures=2)

      Now it’s time to write the from_roman() function. -

      def from_roman(s):
      +
      def from_roman(s):
           """convert Roman numeral to integer"""
           result = 0
           index = 0
      @@ -636,7 +636,7 @@ FAILED (failures=2)

      If you're not clear how from_roman() works, add a print statement to the end of the while loop: -

      def from_roman(s):
      +
      def from_roman(s):
           """convert Roman numeral to integer"""
           result = 0
           index = 0
      @@ -646,7 +646,7 @@ FAILED (failures=2)
      index += len(numeral) print('found', numeral, 'of length', len(numeral), ', adding', integer)
      -
      +
       >>> import roman5
       >>> roman5.from_roman('MCMLXXII')
       found M , of length 1, adding 1000
      @@ -670,6 +670,126 @@ OK

      Two pieces of exciting news here. The first is that the from_roman() function works for good input, at least for all the known values. The second is that the “round trip” test also passed. Combined with the known values tests, you can be reasonably sure that both the to_roman() and from_roman() functions work properly for all possible good values. (This is not guaranteed; it is theoretically possible that to_roman() has a bug that produces the wrong Roman numeral for some particular set of inputs, and that from_roman() has a reciprocal bug that produces the same wrong integer values for exactly that set of Roman numerals that to_roman() generated incorrectly. Depending on your application and your requirements, this possibility may bother you; if so, write more comprehensive test cases until it doesn't bother you.) +

      ⁂ + +

      More Bad Input

      + +

      Now that the from_roman() function works properly with good input, it's time to fit in the last piece of the puzzle: making it work properly with bad input. That means finding a way to look at a string and determine if it's a valid Roman numeral. This is inherently more difficult than validating numeric input in the to_roman() function, but you have a powerful tool at your disposal: regular expressions. (If you’re not familiar with regular expressions, now would be a good time to read the regular expressions chapter.) + +

      As you saw in Case Study: Roman Numerals, there are several simple rules for constructing a Roman numeral, using the letters M, D, C, L, X, V, and I. Let's review the rules: + +

        +
      1. Characters are additive. I is 1, II is 2, and III is 3. VI is 6 (literally, “5 and 1”), VII is 7, and VIII is 8. +
      2. The tens characters (I, X, C, and M) can be repeated up to three times. At 4, you need to subtract from the next highest fives character. You can't represent 4 as IIII; instead, it is represented as IV (“1 less than 5”). 40 is written as XL (“10 less than 50”), 41 as XLI, 42 as XLII, 43 as XLIII, and then 44 as XLIV (“10 less than 50, then 1 less than 5”). +
      3. Similarly, at 9, you need to subtract from the next highest tens character: 8 is VIII, but 9 is IX (“1 less than 10”), not VIIII (since the I character can not be repeated four times). 90 is XC, 900 is CM. +
      4. The fives characters can not be repeated. 10 is always represented as X, never as VV. 100 is always C, never LL. +
      5. Roman numerals are always written highest to lowest, and read left to right, so order of characters matters very much. DC is 600; CD is a completely different number (400, “100 less than 500”). CI is 101; IC is not even a valid Roman numeral (because you can't subtract 1 directly from 100; you would need to write it as XCIX, “10 less than 100, then 1 less than 10”). +
      + +

      Thus, one useful test would be to ensure that the from_roman() function should fail when you pass it a string with too many repeated numerals. How many is “too many” depends on the numeral. + +

      class FromRomanBadInput(unittest.TestCase):
      +    def test_too_many_repeated_numerals(self):
      +        '''from_roman should fail with too many repeated numerals'''
      +        for s in ('MMMM', 'DD', 'CCCC', 'LL', 'XXXX', 'VV', 'IIII'):
      +            self.assertRaises(roman6.InvalidRomanNumeralError, roman6.from_roman, s)
      + +

      Another useful test would be to check that certain patterns aren’t repeated. For example, IX is 9, but IXIX is never valid. + +

          def test_repeated_pairs(self):
      +        '''from_roman should fail with repeated pairs of numerals'''
      +        for s in ('CMCM', 'CDCD', 'XCXC', 'XLXL', 'IXIX', 'IVIV'):
      +            self.assertRaises(roman6.InvalidRomanNumeralError, roman6.from_roman, s)
      + +

      A third test could check that numerals appear in the correct order, from highest to lowest value. For example, CL is 150, but LC is never valid, because the numeral for 50 can never come before the numeral for 100. + +

          def test_malformed_antecedents(self):
      +        '''from_roman should fail with malformed antecedents'''
      +        for s in ('IIMXCC', 'VX', 'DCM', 'CMM', 'IXIV',
      +                  'MCMC', 'XCX', 'IVI', 'LM', 'LD', 'LC'):
      +            self.assertRaises(roman6.InvalidRomanNumeralError, roman6.from_roman, s)
      + +

      Each of these tests relies the from_roman() function raising a new exception, InvalidRomanNumeralError, which we haven’t defined yet. + +

      # roman6.py
      +class InvalidRomanNumeralError(ValueError): pass
      + +

      All three of these tests should fail, since the from_roman() function doesn’t currently have any validity checking. (If they don’t fail now, then what the heck are they testing?) + +

      +you@localhost:~/diveintopython3/examples$ python3 romantest6.py
      +FFF.......
      +======================================================================
      +FAIL: test_malformed_antecedents (__main__.FromRomanBadInput)
      +from_roman should fail with malformed antecedents
      +----------------------------------------------------------------------
      +Traceback (most recent call last):
      +  File "romantest6.py", line 113, in test_malformed_antecedents
      +    self.assertRaises(roman6.InvalidRomanNumeralError, roman6.from_roman, s)
      +AssertionError: InvalidRomanNumeralError not raised by from_roman
      +
      +======================================================================
      +FAIL: test_repeated_pairs (__main__.FromRomanBadInput)
      +from_roman should fail with repeated pairs of numerals
      +----------------------------------------------------------------------
      +Traceback (most recent call last):
      +  File "romantest6.py", line 107, in test_repeated_pairs
      +    self.assertRaises(roman6.InvalidRomanNumeralError, roman6.from_roman, s)
      +AssertionError: InvalidRomanNumeralError not raised by from_roman
      +
      +======================================================================
      +FAIL: test_too_many_repeated_numerals (__main__.FromRomanBadInput)
      +from_roman should fail with too many repeated numerals
      +----------------------------------------------------------------------
      +Traceback (most recent call last):
      +  File "romantest6.py", line 102, in test_too_many_repeated_numerals
      +    self.assertRaises(roman6.InvalidRomanNumeralError, roman6.from_roman, s)
      +AssertionError: InvalidRomanNumeralError not raised by from_roman
      +
      +----------------------------------------------------------------------
      +Ran 10 tests in 0.058s
      +
      +FAILED (failures=3)
      + +

      Good deal. Now, all we need to do is add the regular expression to test for valid Roman numerals into the from_roman() function. + +

      roman_numeral_pattern = re.compile('''
      +    ^                   # beginning of string
      +    M{0,3}              # thousands - 0 to 3 Ms
      +    (CM|CD|D?C{0,3})    # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 Cs),
      +                        #            or 500-800 (D, followed by 0 to 3 Cs)
      +    (XC|XL|L?X{0,3})    # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 Xs),
      +                        #        or 50-80 (L, followed by 0 to 3 Xs)
      +    (IX|IV|V?I{0,3})    # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 Is),
      +                        #        or 5-8 (V, followed by 0 to 3 Is)
      +    $                   # end of string
      +    ''', re.VERBOSE)
      +
      +def from_roman(s):
      +    '''convert Roman numeral to integer'''
      +    if not roman_numeral_pattern.search(s):
      +        raise InvalidRomanNumeralError('Invalid Roman numeral: {0}'.format(s))
      +
      +    result = 0
      +    index = 0
      +    for numeral, integer in roman_numeral_map:
      +        while s[index : index + len(numeral)] == numeral:
      +            result += integer
      +            index += len(numeral)
      +    return result
      + +

      And re-run the tests… + +

      +you@localhost:~/diveintopython3/examples$ python3 romantest7.py
      +..........
      +----------------------------------------------------------------------
      +Ran 10 tests in 0.066s
      +
      +OK
      + +

      And the anticlimax award of the year goes to… the word “OK”, which is printed by the unittest module when all the tests pass. +

      © 2001–9 Mark Pilgrim diff --git a/where-to-go-from-here.html b/where-to-go-from-here.html index 242aaac..5dace0b 100644 --- a/where-to-go-from-here.html +++ b/where-to-go-from-here.html @@ -5,7 +5,7 @@ diff --git a/xml.html b/xml.html index 1f8720f..c79c792 100755 --- a/xml.html +++ b/xml.html @@ -5,7 +5,7 @@