From 23b89610f7a991f98271d10dc31393f74a3ee3b8 Mon Sep 17 00:00:00 2001
From: Richard Jones <r1chardj0n3s@gmail.com>
Date: Fri, 18 Nov 2011 10:44:55 +1100
Subject: [PATCH] implement loads more of format() spec implement closer to the
 format() spec

---
 MANIFEST.in |   2 +-
 README.rst  |  88 +++++++++++
 README.txt  |  44 ------
 parse.py    | 417 +++++++++++++++++++++++++++++++++++++++-------------
 setup.py    |   5 +-
 5 files changed, 410 insertions(+), 146 deletions(-)
 create mode 100644 README.rst
 delete mode 100644 README.txt
diff --git a/MANIFEST.in b/MANIFEST.in
index 8f3b4b5..d68c40a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,2 +1,2 @@
-include README.txt
+include README.rst
 include *.py
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..f3db1c8
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,88 @@
+Parse strings using a specification based on the Python format() syntax.
+
+   parse() is the opposite of format()
+
+The `Format String Syntax`_ is supported with anonymous (fixed-position),
+named and formatted values are supported::
+
+   {[field name]:[format spec]}
+
+Field names must be a single Python identifier word. No attributes or
+element indexes are supported (as they would make no sense.)
+
+Numbered fields are also not supported: the result of parsing will include
+the parsed fields in the order they are parsed.
+
+There conversion of values to types other than strings is not yet supported.
+
+Some simple parse() format string examples:
+
+ >>> parse("Bring me a {}", "Bring me a shrubbery")
+ <Result ('shrubbery',) {}>
+ >>> parse("The {} who say {}", "The knights who say Ni!")
+ <Result ('knights', 'Ni!') {}>
+ >>> parse("Bring out the holy {item}", "Bring out the holy hand grenade")
+ <Result () {'item': 'hand grenade'}>
+
+Most of the `Format Specification Mini-Language`_ is supported::
+
+   [[fill]align][sign][#][0][width][,][.precision][type]
+
+The align operators will cause spaces (or specified fill character)
+to be stripped from the value. The alignment character "=" is not yet
+supported.
+
+The comma "," separator is not yet supported.
+
+The types supported are the not the format() types but rather some of
+those types b, o, h, x, X and also regular expression character group types
+d, D, w, W, s, S and not the string format types. The format() types n, f,
+F, e, E, g and G are not yet supported.
+
+===== ==========================================
+Type  Characters Matched
+===== ==========================================
+ w    Letters and underscore
+ W    Non-letter and underscore
+ s    Whitespace
+ S    Non-whitespace
+ d    Digits (effectively integer numbers)
+ D    Non-digit
+ b    Binary numbers
+ o    Octal numbers
+ h    Hexadecimal numbers (lower and upper case)
+ x    Lower-case hexadecimal numbers
+ X    Upper-case hexadecimal numbers
+===== ==========================================
+
+Do remember though that most often a straight type-less {} will suffice
+where a more complex type specification might have been used.
+
+So, for example, some typed parsing, and None resulting if the typing
+does not match:
+
+ >>> parse('Hello {:d} {:w}', 'Hello 12 people')
+ <Result ('12', 'people') {}>
+ >>> print parse('Hello {:d} {:w}', 'Hello twelve people')
+ None
+
+And messing about with alignment:
+
+ >>> parse('hello {:<} world', 'hello there     world')
+ <Result ('there',) {}>
+ >>> parse('hello {:^} world', 'hello    there     world')
+ <Result ('there',) {}>
+
+Note that the "center" alignment does not test to make sure the value is
+actually centered. It just strips leading and trailing whitespace.
+
+See also the unit tests at the end of the module for some more
+examples. Run the tests with "python -m parse".
+
+.. _`Format String Syntax`: http://docs.python.org/library/string.html#format-string-syntax
+.. _`Format Specification Mini-Language`: http://docs.python.org/library/string.html#format-specification-mini-language
+
+----
+
+This code is copyright 2011 eKit.com Inc (http://www.ekit.com/)
+See the end of the source file for the license of use.
diff --git a/README.txt b/README.txt
deleted file mode 100644
index 465bb5d..0000000
--- a/README.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Parse strings using a specification based on the Python format() syntax.
-
-Anonymous (fixed-position), named and typed values are supported. Also the
-alignment operators will cause whitespace (or another alignment character)
-to be stripped from the value.
-
-You may not use both fixed and named values in your format string.
-
-The types supported in ":type" expressions are the regular expression
-character group types d, D, w, W, s, S and not the string format types.
-
-So, for example, some fixed-position parsing:
-
- >>> r = parse('hello {}', 'hello world')
- >>> r.fixed
- ('world', )
-
- >>> r = parse('hello {:d} {:w}', 'hello 12 people')
- >>> r.fixed
- ('12', 'people')
-
-And some named parsing:
-
- >>> r = parse('{greeting} {name}', 'hello world')
- >>> r.named
- {'greeting': 'hello', 'name': 'world'}
-
- >>> r = parse('hello {^} world', 'hello  there     world')
- >>> r.fixed
- ('there', )
-
-None will be returned if there is no match:
-
- >>> r = parse('hello {name:w}', 'hello 12')
- >>> print r
- None
-
-See also the unit tests at the end of the module for some more
-examples. Run those with "python -m parse".
-
-----
-
-This code is copyright 2011 eKit.com Inc (http://www.ekit.com/)
-See the end of the source file for the license of use.
diff --git a/parse.py b/parse.py
index 41e7f6c..f15664f 100644
--- a/parse.py
+++ b/parse.py
@@ -4,50 +4,94 @@
 #
 '''Parse strings using a specification based on the Python format() syntax.
 
-Anonymous (fixed-position), named and typed values are supported. Also the
-alignment operators will cause whitespace (or another alignment character)
-to be stripped from the value.
+   parse() is the opposite of format()
 
-You may not use both fixed and named values in your format string.
+The `Format String Syntax`_ is supported with anonymous (fixed-position),
+named and formatted values are supported::
 
-The types supported in ":type" expressions are the regular expression
-character group types d, D, w, W, s, S and not the string format types.
+   {[field name]:[format spec]}
 
-So, for example, some fixed-position parsing:
+Field names must be a single Python identifier word. No attributes or
+element indexes are supported (as they would make no sense.)
 
- >>> r = parse('hello {}', 'hello world')
- >>> r.fixed
- ('world', )
+Numbered fields are also not supported: the result of parsing will include
+the parsed fields in the order they are parsed.
 
- >>> r = parse('hello {:d} {:w}', 'hello 12 people')
- >>> r.fixed
- ('12', 'people')
+There conversion of values to types other than strings is not yet supported.
 
-And some named parsing:
+Some simple parse() format string examples:
 
- >>> r = parse('{greeting} {name}', 'hello world')
- >>> r.named
- {'greeting': 'hello', 'name': 'world'}
+ >>> parse("Bring me a {}", "Bring me a shrubbery")
+ <Result ('shrubbery',) {}>
+ >>> parse("The {} who say {}", "The knights who say Ni!")
+ <Result ('knights', 'Ni!') {}>
+ >>> parse("Bring out the holy {item}", "Bring out the holy hand grenade")
+ <Result () {'item': 'hand grenade'}>
 
- >>> r = parse('hello {^} world', 'hello  there     world')
- >>> r.fixed
- ('there', )
+Most of the `Format Specification Mini-Language`_ is supported::
 
-None will be returned if there is no match:
+   [[fill]align][sign][#][0][width][,][.precision][type]
 
- >>> r = parse('hello {name:w}', 'hello 12')
- >>> print r
+The align operators will cause spaces (or specified fill character)
+to be stripped from the value. The alignment character "=" is not yet
+supported.
+
+The comma "," separator is not yet supported.
+
+The types supported are the not the format() types but rather some of
+those types b, o, h, x, X and also regular expression character group types
+d, D, w, W, s, S and not the string format types. The format() types n, f,
+F, e, E, g and G are not yet supported.
+
+===== ==========================================
+Type  Characters Matched
+===== ==========================================
+ w    Letters and underscore
+ W    Non-letter and underscore
+ s    Whitespace
+ S    Non-whitespace
+ d    Digits (effectively integer numbers)
+ D    Non-digit
+ b    Binary numbers
+ o    Octal numbers
+ h    Hexadecimal numbers (lower and upper case)
+ x    Lower-case hexadecimal numbers
+ X    Upper-case hexadecimal numbers
+===== ==========================================
+
+Do remember though that most often a straight type-less {} will suffice
+where a more complex type specification might have been used.
+
+So, for example, some typed parsing, and None resulting if the typing
+does not match:
+
+ >>> parse('Hello {:d} {:w}', 'Hello 12 people')
+ <Result ('12', 'people') {}>
+ >>> print parse('Hello {:d} {:w}', 'Hello twelve people')
  None
 
+And messing about with alignment:
+
+ >>> parse('hello {:<} world', 'hello there     world')
+ <Result ('there',) {}>
+ >>> parse('hello {:^} world', 'hello    there     world')
+ <Result ('there',) {}>
+
+Note that the "center" alignment does not test to make sure the value is
+actually centered. It just strips leading and trailing whitespace.
+
 See also the unit tests at the end of the module for some more
-examples. Run those with "python -m parse".
+examples. Run the tests with "python -m parse".
+
+.. _`Format String Syntax`: http://docs.python.org/library/string.html#format-string-syntax
+.. _`Format Specification Mini-Language`: http://docs.python.org/library/string.html#format-specification-mini-language
 
 ----
 
 This code is copyright 2011 eKit.com Inc (http://www.ekit.com/)
 See the end of the source file for the license of use.
 '''
-__version__ = '1.0.0'
+__version__ = '1.1.0'
 
 import re
 import unittest
@@ -61,52 +105,159 @@ PARSE_RE = re.compile('''
 |
   (?P<closebrace>}})
 |
-  (?P<fixed>{(?P<falign>[^}]?[<>^])?(:[^}]+?)?})
+  (?P<fixed>{(:[^}]+?)?})
 |
-  {(?P<nalign>[^}]?[<>^])?(?P<named>\w+(:[^}]+?)?)}
+  {(?P<named>\w+(:[^}]+?)?)}
 )''', re.VERBOSE)
 
 
-class Format(object):
-    # we're an object so we can keep track of whether the user is trying to
-    # specify both fixed and named args
-    has_fixed = False
-    has_named = False
+# three problems?
+FORMAT_RE = re.compile('''
+    (?P<align>(?P<fill>[^}])?[<>^])?
+    (?P<sign>[-+ ])?
+    (?P<prefix>\#)?
+    (?P<width>(?P<zero>0)?[1-9]\d*)?
+    (\.(?P<precision>\d+))?
+    (?P<type>[bohxXwWdDsS])?
+''', re.VERBOSE)
+
+
+class Result(object):
+    def __init__(self):
+        self._fixed_args = []
+        self._groups = 0
+        self.fixed = ()
+        self.named = {}
+
+    def __repr__(self):
+        return '<Result %r %r>' % (self.fixed, self.named)
+
+    @classmethod
+    def parse(cls, format, string):
+        o = cls()
+        # first, turn the format into a regular expression
+        r = PARSE_RE.sub(o.replace, format)
+        m = re.match('^' + r + '$', string)
+        if m is None:
+            return None
+
+        l = m.groups()
+
+        o.named = m.groupdict()
+        o.fixed = tuple(l[n] for n in o._fixed_args)
+
+        return o
+
     def replace(self, match):
         d = match.groupdict()
-        if d['openbrace']: return '{{'
-        if d['closebrace']: return '}}'
-        align = None
+        if d['openbrace']: return '{'
+        if d['closebrace']: return '}'
+
+        format = ''
+
+        #print 'PARSE', d
 
         if d['fixed']:
-            if self.has_named:
-                raise ValueError("can't mix named and fixed")
-            self.has_fixed = True
+            self._fixed_args.append(self._groups)
+            wrap = '(%s)'
             if ':' in d['fixed']:
-                x, type = d['fixed'].split(':')
-                s = r'(\%s+?)' % type[:1]
+                format = d['fixed'][2:-1]
+        elif d['named']:
+            if ':' in d['named']:
+                name, format = d['named'].split(':')
             else:
-                s = r'(.+?)'
-            align = d['falign']
+                name = d['named']
+            wrap = '(?P<%s>%%s)' % name
+        else:
+            raise ValueError('format not recognised')
 
-        if d['named']:
-            if self.has_fixed:
-                raise ValueError("can't mix named and fixed")
-            self.has_named = True
-            if ':' not in d['named']:
-                s = r'(?P<%s>.+?)' % d['named']
+        self._groups += 1
+
+        if not format:
+            return wrap % '.+?'
+
+        m = FORMAT_RE.match(format)
+        if m is None:
+            raise ValueError('format %r not recognised' % format)
+
+        d = m.groupdict()
+        #print 'FORMAT', d
+
+        if d['type'] == 'o':
+            s = '[0-7]'
+        elif d['type'] == 'b':
+            s = '[01]'
+        elif d['type'] == 'h':
+            s = '[0-9a-fA-F]'
+        elif d['type'] == 'x':
+            s = '[0-9a-f]'
+        elif d['type'] == 'X':
+            s = '[0-9A-F]'
+        elif d['type']:
+            s = r'\%s' % d['type']
+        else:
+            s = '.'
+
+        # TODO: number types still to support:
+        # n    Number (with number separator characters)
+        # f    Floating-point numbers
+        # e    Exponent notation
+        # E    Exponent notation with upper-case E
+        # g    General number format with added nan, inf and -inf
+        # G    General number format with upper-case E, NAN, INF and -INF
+
+        if d['type'] and d['type'] in 'dobhxX':
+            if d['prefix']:
+                if d['type'] == 'b':
+                    s = '0b' + s
+                elif d['type'] == 'o':
+                    s = '0o' + s
+                elif d['type'] in 'hxX':
+                    s = '0x' + s
+                else:
+                    raise ValueError('prefix # not compatible with type %s' %
+                        d['type'])
+            if not d['sign']:
+                # default sign handling
+                s = r'-?' + s
+            elif d['sign'] == '+':
+                s = r'[-+]?' + s
+            elif d['sign'] == '-':
+                s = r'-?' + s
+            elif d['sign'] == ' ':
+                s = r'[- ]?' + s
             else:
-                name, type = d['named'].split(':')
-                s = r'(?P<%s>\%s+?)' % (name, type)
-            align = d['nalign']
+                raise ValueError('sign in format "%s" unrecognised' % d['sign'])
+        else:
+            if d['prefix']:
+                raise ValueError('prefix # in format must accompany numeric type')
+            if d['sign']:
+                raise ValueError('sign in format must accompany "d" type')
 
-        if not align:
-            return s
+        if d['width']:
+            if d['zero']:
+                s = s + '{%s}' % d['width'][1:]
+            else:
+                s = s + '{%s}' % d['width']
+        else:
+            s = s + '+?'
 
-        if len(align) == 2:
-            fill, align = align
+        s = wrap % s
+
+        if d['zero']:
+            s = '0*' + s
+
+        # TODO handle precision
+        #(\.(?P<precision>\d+))?
+
+        # TODO support '='
+        align = d['align']
+        fill = d['fill']
+        if fill:
+            align = align[1]
         else:
             fill = ' '
+
         if fill in '.\+?*[](){}^$':
             fill = '\\' + fill
         if align == '<':
@@ -118,9 +269,6 @@ class Format(object):
         return s
 
 
-Result = collections.namedtuple('Result', 'fixed named')
-
-
 def parse(format, string):
     '''Using "format" attempt to pull values from "string".
 
@@ -129,83 +277,98 @@ def parse(format, string):
      .fixed - tuple of fixed-position values from the string
      .named - dict of named values from the string
 
-    If the format is invalid (usually mixing fixed-position and named values
-    in the format) a ValueError will be raised.
+    If the format is invalid a ValueError will be raised.
 
     In the case there is no match parse() will return None.
     '''
-    # first, turn the format into a regular expression
-    r = PARSE_RE.sub(Format().replace, format)
-    m = re.match('^' + r + '$', string)
-    if m is None:
-        return None
-    d = m.groupdict()
-    if d:
-        return Result(None, d)
-    else:
-        return Result(m.groups(), None)
+    return Result().parse(format, string)
 
 
 # yes, I now unit test both of the problems
 class TestPattern(unittest.TestCase):
-    def test_mixed(self):
-        'check enforcement of fixed OR named'
-        self.assertRaises(ValueError, PARSE_RE.sub, Format().replace,
-            '{} {name}')
-
     def test_braces(self):
         'pull a simple string out of another string'
-        s = PARSE_RE.sub(Format().replace, '{{ }}')
-        self.assertEquals(s, '{{ }}')
+        s = PARSE_RE.sub(Result().replace, '{{ }}')
+        self.assertEquals(s, '{ }')
 
     def test_fixed(self):
         'pull a simple string out of another string'
-        s = PARSE_RE.sub(Format().replace, '{}')
+        s = PARSE_RE.sub(Result().replace, '{}')
         self.assertEquals(s, '(.+?)')
-        s = PARSE_RE.sub(Format().replace, '{} {}')
+        s = PARSE_RE.sub(Result().replace, '{} {}')
         self.assertEquals(s, '(.+?) (.+?)')
 
     def test_typed(self):
         'pull a named string out of another string'
-        s = PARSE_RE.sub(Format().replace, '{:d}')
-        self.assertEquals(s, '(\d+?)')
-        s = PARSE_RE.sub(Format().replace, '{:d} {:w}')
-        self.assertEquals(s, '(\d+?) (\w+?)')
+        s = PARSE_RE.sub(Result().replace, '{:d}')
+        self.assertEquals(s, '(-?\d+?)')
+        s = PARSE_RE.sub(Result().replace, '{:d} {:w}')
+        self.assertEquals(s, '(-?\d+?) (\w+?)')
 
     def test_named(self):
         'pull a named string out of another string'
-        s = PARSE_RE.sub(Format().replace, '{name}')
+        s = PARSE_RE.sub(Result().replace, '{name}')
         self.assertEquals(s, '(?P<name>.+?)')
-        s = PARSE_RE.sub(Format().replace, '{name} {other}')
+        s = PARSE_RE.sub(Result().replace, '{name} {other}')
         self.assertEquals(s, '(?P<name>.+?) (?P<other>.+?)')
 
     def test_named_typed(self):
         'pull a named string out of another string'
-        s = PARSE_RE.sub(Format().replace, '{name:d}')
-        self.assertEquals(s, '(?P<name>\d+?)')
-        s = PARSE_RE.sub(Format().replace, '{name:d} {other:w}')
-        self.assertEquals(s, '(?P<name>\d+?) (?P<other>\w+?)')
+        s = PARSE_RE.sub(Result().replace, '{name:d}')
+        self.assertEquals(s, '(?P<name>-?\d+?)')
+        s = PARSE_RE.sub(Result().replace, '{name:d} {other:w}')
+        self.assertEquals(s, '(?P<name>-?\d+?) (?P<other>\w+?)')
 
-    def test_left(self):
+    def test_beaker(self):
         'skip some trailing whitespace'
-        s = PARSE_RE.sub(Format().replace, '{<}')
+        s = PARSE_RE.sub(Result().replace, '{:<}')
         self.assertEquals(s, '(.+?) +')
 
     def test_left_fill(self):
         'skip some trailing periods'
-        s = PARSE_RE.sub(Format().replace, '{.<}')
+        s = PARSE_RE.sub(Result().replace, '{:.<}')
         self.assertEquals(s, '(.+?)\.+')
 
-    def test_right(self):
+    def test_bird(self):
         'skip some trailing whitespace'
-        s = PARSE_RE.sub(Format().replace, '{>}')
+        s = PARSE_RE.sub(Result().replace, '{:>}')
         self.assertEquals(s, ' +(.+?)')
 
     def test_center(self):
         'skip some surrounding whitespace'
-        s = PARSE_RE.sub(Format().replace, '{^}')
+        s = PARSE_RE.sub(Result().replace, '{:^}')
         self.assertEquals(s, ' +(.+?) +')
 
+    def test_format(self):
+        def _(fmt, matches):
+            m = FORMAT_RE.match(fmt)
+            self.assertNotEquals(m, None,
+                'FORMAT_RE failed to parse %r' % fmt)
+            d = m.groupdict()
+            for k in matches:
+                self.assertEquals(d.get(k), matches[k],
+                    'm["%s"]=%r, expect %r' % (k, d.get(k), matches[k]))
+
+        for t in 'obhdDwWsS':
+            _(t, dict(type=t))
+            _('10'+t, dict(type=t, width='10'))
+        _('05d', dict(type='d', width='05', zero='0'))
+        _('#d', dict(type='d', prefix='#'))
+        _('<', dict(align='<'))
+        _('.<', dict(align='.<', fill='.'))
+        _('>', dict(align='>'))
+        _('.>', dict(align='.>', fill='.'))
+        _('^', dict(align='^'))
+        _('.^', dict(align='.^', fill='.'))
+        _('d', dict(type='d'))
+        _('-d', dict(type='d', sign='-'))
+        _('+d', dict(type='d', sign='+'))
+        _(' d', dict(type='d', sign=' '))
+
+        _('.^+#010d', dict(type='d', width='010', align='.^', fill='.', prefix='#',
+            sign='+', zero='0'))
+
+        #(\.(?P<precision>\d+))?
 
 class TestParse(unittest.TestCase):
     def test_no_match(self):
@@ -214,9 +377,9 @@ class TestParse(unittest.TestCase):
 
     def test_nothing(self):
         'do no actual parsing'
-        r = parse('{{hello}}', '{{hello}}')
+        r = parse('{{hello}}', '{hello}')
         self.assertEquals(r.fixed, ())
-        self.assertEquals(r.named, None)
+        self.assertEquals(r.named, {})
 
     def test_fixed(self):
         'pull a fixed value out of string'
@@ -225,17 +388,17 @@ class TestParse(unittest.TestCase):
 
     def test_left(self):
         'pull left-aligned text out of string'
-        r = parse('{<} world', 'hello       world')
+        r = parse('{:<} world', 'hello       world')
         self.assertEquals(r.fixed, ('hello', ))
 
     def test_right(self):
         'pull right-aligned text out of string'
-        r = parse('hello {>}', 'hello       world')
+        r = parse('hello {:>}', 'hello       world')
         self.assertEquals(r.fixed, ('world', ))
 
     def test_center(self):
         'pull right-aligned text out of string'
-        r = parse('hello {^} world', 'hello  there     world')
+        r = parse('hello {:^} world', 'hello  there     world')
         self.assertEquals(r.fixed, ('there', ))
 
     def test_typed(self):
@@ -252,11 +415,64 @@ class TestParse(unittest.TestCase):
         r = parse('hello {name}', 'hello world')
         self.assertEquals(r.named, {'name': 'world'})
 
+    def test_mixed(self):
+        'pull a fixed and named values out of string'
+        r = parse('hello {} {name} {} {spam}', 'hello world and other beings')
+        self.assertEquals(r.fixed, ('world', 'other'))
+        self.assertEquals(r.named, dict(name='and', spam='beings'))
+
     def test_named_typed(self):
         'pull a named, typed values out of string'
         r = parse('hello {number:d} {things}', 'hello 12 people')
         self.assertEquals(r.named, dict(number='12', things='people'))
 
+    def test_named_aligned_typed(self):
+        'pull a named, typed values out of string'
+        r = parse('hello {number:<d} {things}', 'hello 12      people')
+        self.assertEquals(r.named, dict(number='12', things='people'))
+        r = parse('hello {number:>d} {things}', 'hello      12 people')
+        self.assertEquals(r.named, dict(number='12', things='people'))
+        r = parse('hello {number:^d} {things}', 'hello      12      people')
+        self.assertEquals(r.named, dict(number='12', things='people'))
+
+    def test_numbers(self):
+        'pull a numbers out of a string'
+        def y(fmt, s, e):
+            r = parse(fmt, s)
+            if r is None: self.fail('%r did not match %r' % (fmt, s))
+            self.assertEquals(r.fixed[0], e,
+                '%r found %r in %r, not %r' % (fmt, r.fixed[0], s, e))
+        def n(fmt, s, e):
+            if parse(fmt, s) is not None:
+                self.fail('%r matched %r' % (fmt, s))
+        y('a {:d} b', 'a 12 b', '12')
+        y('a {:d} b', 'a -12 b', '-12')
+        n('a {:d} b', 'a +12 b', None)
+        y('a {:-d} b', 'a -12 b', '-12')
+        n('a {:-d} b', 'a +12 b', None)
+        y('a {:+d} b', 'a -12 b', '-12')
+        y('a {:+d} b', 'a +12 b', '+12')
+        y('a {: d} b', 'a -12 b', '-12')
+        y('a {: d} b', 'a  12 b', ' 12')
+        n('a {: d} b', 'a +12 b', None)
+
+        y('a {:b} b', 'a 101101 b', '101101')
+        y('a {:#b} b', 'a 0b101101 b', '0b101101')
+        y('a {:o} b', 'a 12345670 b', '12345670')
+        y('a {:#o} b', 'a 0o12345670 b', '0o12345670')
+        y('a {:h} b', 'a 1234567890abcdef b', '1234567890abcdef')
+        y('a {:h} b', 'a 1234567890ABCDEF b', '1234567890ABCDEF')
+        y('a {:#h} b', 'a 0x1234567890abcdef b', '0x1234567890abcdef')
+        y('a {:#h} b', 'a 0x1234567890ABCDEF b', '0x1234567890ABCDEF')
+        y('a {:x} b', 'a 1234567890abcdef b', '1234567890abcdef')
+        y('a {:X} b', 'a 1234567890ABCDEF b', '1234567890ABCDEF')
+        y('a {:#x} b', 'a 0x1234567890abcdef b', '0x1234567890abcdef')
+        y('a {:#X} b', 'a 0x1234567890ABCDEF b', '0x1234567890ABCDEF')
+
+        y('a {:05d} b', 'a 00001 b', '00001')
+
+        # TODO this should pass
+        # y('a {:05d} b', 'a 0000001 b', None)
 
 if __name__ == '__main__':
     unittest.main()
@@ -281,3 +497,4 @@ if __name__ == '__main__':
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+# vim: set filetype=python ts=4 sw=4 et si tw=75
diff --git a/setup.py b/setup.py
index 81784af..07fb6de 100644
--- a/setup.py
+++ b/setup.py
@@ -4,11 +4,14 @@ from distutils.core import setup
 
 from parse import __version__, __doc__
 
+with open('README.rst', 'w') as f:
+    f.write(__doc__)
+
 # perform the setup action
 setup(
     name = "parse",
     version = __version__,
-    description = "Parse strings using a specification based on the Python format() syntax.",
+    description = "parse() is the opposite of format()",
     long_description = __doc__.decode('utf8'),
     author = "Richard Jones",
     author_email = "rjones@ekit-inc.com",