mirror of
https://github.com/kennethreitz-archive/parse.git
synced 2026-06-05 23:40:17 +00:00
refactored, added compile()
This commit is contained in:
+8
-8
@@ -3,7 +3,7 @@ Parse strings using a specification based on the Python format() syntax.
|
||||
parse() is the opposite of format()
|
||||
|
||||
The `Format String Syntax`_ is supported with anonymous (fixed-position),
|
||||
named and formatted fields are supported::
|
||||
named and formatted fields::
|
||||
|
||||
{[field name]:[format spec]}
|
||||
|
||||
@@ -67,17 +67,17 @@ where a more complex type specification might have been used.
|
||||
So, for example, some typed parsing, and None resulting if the typing
|
||||
does not match:
|
||||
|
||||
>>> parse('Hello {:d} {:w}', 'Hello 12 people')
|
||||
<Result ('12', 'people') {}>
|
||||
>>> print parse('Hello {:d} {:w}', 'Hello twelve people')
|
||||
>>> parse('Our {:d} {:w} are...', 'Our 3 weapons are...')
|
||||
<Result ('3', 'weapons') {}>
|
||||
>>> parse('Our {:d} {:w} are...', 'Our three weapons are...')
|
||||
None
|
||||
|
||||
And messing about with alignment:
|
||||
|
||||
>>> parse('hello {:<} world', 'hello there world')
|
||||
<Result ('there',) {}>
|
||||
>>> parse('hello {:^} world', 'hello there world')
|
||||
<Result ('there',) {}>
|
||||
>>> parse('with {:>} herring', 'with a herring')
|
||||
<Result ('a',) {}>
|
||||
>>> parse('spam {:^} spam', 'spam lovely spam')
|
||||
<Result ('lovely',) {}>
|
||||
|
||||
Note that the "center" alignment does not test to make sure the value is
|
||||
actually centered. It just strips leading and trailing whitespace.
|
||||
|
||||
@@ -6,8 +6,23 @@
|
||||
|
||||
parse() is the opposite of format()
|
||||
|
||||
The `Format String Syntax`_ is supported with anonymous (fixed-position),
|
||||
named and formatted fields::
|
||||
Basic usage:
|
||||
|
||||
>>> from parse import * # only exports parse() and compile()
|
||||
>>> parse("It's {}, I love it!", "It's spam, I love it!")
|
||||
<Result ('spam',) {}>
|
||||
>>> p = compile("It's {}, I love it!")
|
||||
>>> print p
|
||||
<Parser "It's {}, I love it!">
|
||||
>>> p.parse("It's spam, I love it!")
|
||||
<Result ('spam',) {}>
|
||||
|
||||
|
||||
Format Syntax
|
||||
-------------
|
||||
|
||||
Most of the `Format String Syntax`_ is supported with anonymous
|
||||
(fixed-position), named and formatted fields::
|
||||
|
||||
{[field name]:[format spec]}
|
||||
|
||||
@@ -34,6 +49,9 @@ Some simple parse() format string examples:
|
||||
>>> print r.named
|
||||
{'item': 'hand grenade'}
|
||||
|
||||
Format Specification
|
||||
--------------------
|
||||
|
||||
Most of the `Format Specification Mini-Language`_ is supported::
|
||||
|
||||
[[fill]align][sign][#][0][width][,][.precision][type]
|
||||
@@ -96,6 +114,7 @@ examples. Run the tests with "python -m parse".
|
||||
|
||||
**Version history (in brief)**:
|
||||
|
||||
- 1.1.2 refactored, added compile() and limited "from parse import *"
|
||||
- 1.1.1 documentation improvements
|
||||
- 1.1.0 implemented more of the `Format Specification Mini-Language`_
|
||||
and removed the restriction on mixing fixed-position and named fields
|
||||
@@ -104,11 +123,13 @@ examples. Run the tests with "python -m parse".
|
||||
This code is copyright 2011 eKit.com Inc (http://www.ekit.com/)
|
||||
See the end of the source file for the license of use.
|
||||
'''
|
||||
__version__ = '1.1.1'
|
||||
__version__ = '1.1.2'
|
||||
|
||||
import re
|
||||
import unittest
|
||||
import collections
|
||||
|
||||
|
||||
__all__ = 'parse compile'.split()
|
||||
|
||||
|
||||
# yes, I now have two problems
|
||||
@@ -135,31 +156,25 @@ FORMAT_RE = re.compile('''
|
||||
''', re.VERBOSE)
|
||||
|
||||
|
||||
class Result(object):
|
||||
def __init__(self):
|
||||
class Parser(object):
|
||||
def __init__(self, format):
|
||||
self._fixed_args = []
|
||||
self._groups = 0
|
||||
self.fixed = ()
|
||||
self.named = {}
|
||||
self._format = format
|
||||
self._expression = re.compile('^%s$' % PARSE_RE.sub(self.replace, format))
|
||||
|
||||
def __repr__(self):
|
||||
return '<Result %r %r>' % (self.fixed, self.named)
|
||||
if len(self._format) > 20:
|
||||
return '<%s %r>' % (self.__class__.__name__, self._format[:17] + '...')
|
||||
return '<%s %r>' % (self.__class__.__name__, self._format)
|
||||
|
||||
@classmethod
|
||||
def parse(cls, format, string):
|
||||
o = cls()
|
||||
# first, turn the format into a regular expression
|
||||
r = PARSE_RE.sub(o.replace, format)
|
||||
m = re.match('^' + r + '$', string)
|
||||
def parse(self, string):
|
||||
m = self._expression.match(string)
|
||||
if m is None:
|
||||
return None
|
||||
|
||||
l = m.groups()
|
||||
|
||||
o.named = m.groupdict()
|
||||
o.fixed = tuple(l[n] for n in o._fixed_args)
|
||||
|
||||
return o
|
||||
fixed = tuple(l[n] for n in self._fixed_args)
|
||||
return Result(fixed, m.groupdict())
|
||||
|
||||
def replace(self, match):
|
||||
d = match.groupdict()
|
||||
@@ -282,6 +297,16 @@ class Result(object):
|
||||
return s
|
||||
|
||||
|
||||
class Result(object):
|
||||
def __init__(self, fixed, named):
|
||||
self.fixed = fixed
|
||||
self.named = named
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s %r %r>' % (self.__class__.__name__, self.fixed,
|
||||
self.named)
|
||||
|
||||
|
||||
def parse(format, string):
|
||||
'''Using "format" attempt to pull values from "string".
|
||||
|
||||
@@ -294,62 +319,77 @@ def parse(format, string):
|
||||
|
||||
In the case there is no match parse() will return None.
|
||||
'''
|
||||
return Result().parse(format, string)
|
||||
return Parser(format).parse(string)
|
||||
|
||||
|
||||
def compile(format):
|
||||
'''Create a Parser instance to parse "format".
|
||||
|
||||
The resultant Parser has a method .parse(string) which
|
||||
behaves in the same manner as parse(format, string).
|
||||
|
||||
Use this function if you intend to parse many strings
|
||||
with the same format.
|
||||
'''
|
||||
return Parser(format)
|
||||
|
||||
|
||||
# yes, I now unit test both of the problems
|
||||
class TestPattern(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.p = Parser('')
|
||||
|
||||
def test_braces(self):
|
||||
'pull a simple string out of another string'
|
||||
s = PARSE_RE.sub(Result().replace, '{{ }}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{{ }}')
|
||||
self.assertEquals(s, '{ }')
|
||||
|
||||
def test_fixed(self):
|
||||
'pull a simple string out of another string'
|
||||
s = PARSE_RE.sub(Result().replace, '{}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{}')
|
||||
self.assertEquals(s, '(.+?)')
|
||||
s = PARSE_RE.sub(Result().replace, '{} {}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{} {}')
|
||||
self.assertEquals(s, '(.+?) (.+?)')
|
||||
|
||||
def test_typed(self):
|
||||
'pull a named string out of another string'
|
||||
s = PARSE_RE.sub(Result().replace, '{:d}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{:d}')
|
||||
self.assertEquals(s, '(-?\d+?)')
|
||||
s = PARSE_RE.sub(Result().replace, '{:d} {:w}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{:d} {:w}')
|
||||
self.assertEquals(s, '(-?\d+?) (\w+?)')
|
||||
|
||||
def test_named(self):
|
||||
'pull a named string out of another string'
|
||||
s = PARSE_RE.sub(Result().replace, '{name}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{name}')
|
||||
self.assertEquals(s, '(?P<name>.+?)')
|
||||
s = PARSE_RE.sub(Result().replace, '{name} {other}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{name} {other}')
|
||||
self.assertEquals(s, '(?P<name>.+?) (?P<other>.+?)')
|
||||
|
||||
def test_named_typed(self):
|
||||
'pull a named string out of another string'
|
||||
s = PARSE_RE.sub(Result().replace, '{name:d}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{name:d}')
|
||||
self.assertEquals(s, '(?P<name>-?\d+?)')
|
||||
s = PARSE_RE.sub(Result().replace, '{name:d} {other:w}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{name:d} {other:w}')
|
||||
self.assertEquals(s, '(?P<name>-?\d+?) (?P<other>\w+?)')
|
||||
|
||||
def test_beaker(self):
|
||||
'skip some trailing whitespace'
|
||||
s = PARSE_RE.sub(Result().replace, '{:<}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{:<}')
|
||||
self.assertEquals(s, '(.+?) +')
|
||||
|
||||
def test_left_fill(self):
|
||||
'skip some trailing periods'
|
||||
s = PARSE_RE.sub(Result().replace, '{:.<}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{:.<}')
|
||||
self.assertEquals(s, '(.+?)\.+')
|
||||
|
||||
def test_bird(self):
|
||||
'skip some trailing whitespace'
|
||||
s = PARSE_RE.sub(Result().replace, '{:>}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{:>}')
|
||||
self.assertEquals(s, ' +(.+?)')
|
||||
|
||||
def test_center(self):
|
||||
'skip some surrounding whitespace'
|
||||
s = PARSE_RE.sub(Result().replace, '{:^}')
|
||||
s = PARSE_RE.sub(self.p.replace, '{:^}')
|
||||
self.assertEquals(s, ' +(.+?) +')
|
||||
|
||||
def test_format(self):
|
||||
@@ -383,6 +423,7 @@ class TestPattern(unittest.TestCase):
|
||||
|
||||
#(\.(?P<precision>\d+))?
|
||||
|
||||
|
||||
class TestParse(unittest.TestCase):
|
||||
def test_no_match(self):
|
||||
'string does not match format'
|
||||
@@ -487,6 +528,7 @@ class TestParse(unittest.TestCase):
|
||||
# TODO this should pass
|
||||
# y('a {:05d} b', 'a 0000001 b', None)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user