diff --git a/README.rst b/README.rst index d3ee1dc..a5cd2e6 100644 --- a/README.rst +++ b/README.rst @@ -2,17 +2,43 @@ Parse strings using a specification based on the Python format() syntax. ``parse()`` is the opposite of ``format()`` -Basic usage: +The module is set up to only export ``parse()``, ``search()`` and +``findall()`` when "import *" is used: + +>>> from parse import * + +From there it's a simple thing to parse a string: ->>> from parse import * # only exports parse() and compile() >>> parse("It's {}, I love it!", "It's spam, I love it!") ->>> p = compile("It's {}, I love it!") + +Or to search a string for some pattern: + +>>> search('Age: {:d} +', 'Name: Rufus +Age: 42 +Color: red +') + + +Or find all the occurrances of some pattern in a string: + +>>> ''.join(r.fixed[0] for r in findall(">{}<", "

some bold text

") +"some bold text" + +If you're going to use the same pattern to match lots of strings you can +compile it once: + +>>> import parse +>>> p = parse.compile("It's {}, I love it!") >>> print p >>> p.parse("It's spam, I love it!") +("compile" is not exported for "import *" usage as it would override the +built-in ``compile()`` function) + Format Syntax ------------- @@ -47,6 +73,7 @@ Some simple parse() format string examples: >>> print r.named {'item': 'hand grenade'} + Format Specification -------------------- @@ -109,15 +136,15 @@ Type Characters Matched Output e.g. 10:21:36 PM -5:30 ===== =========================================== ======== -So, for example, some typed parsing, and ``None`` resulting if the typing +Some examples of typed parsing with ``None`` returned if the typing does not match: >>> parse('Our {:d} {:w} are...', 'Our 3 weapons are...') >>> parse('Our {:d} {:w} are...', 'Our three weapons are...') None ->>> parse('Meet at {:tg}', 'Meet at 11/11/2011 11:11') - +>>> parse('Meet at {:tg}', 'Meet at 1/2/2011 11:00 PM') + And messing about with alignment: @@ -127,7 +154,7 @@ And messing about with alignment: Note that the "center" alignment does not test to make sure the value is -actually centered. It just strips leading and trailing whitespace. +centered - it just strips leading and trailing whitespace. Some notes for the date and time types: @@ -151,7 +178,8 @@ Some notes for the date and time types: Note: attempting to match too many datetime fields in a single parse() will currently result in a resource allocation issue. A TooManyFields exception -will be raised in this instance. The current limit is about 15. +will be raised in this instance. The current limit is about 15. It is hoped +that this limit will be removed one day. See also the unit tests at the end of the module for some more examples. Run the tests with "python -m parse". @@ -201,6 +229,8 @@ with the same identifier. **Version history (in brief)**: +- 1.3 added search() and findall(); removed compile() from "import *" export + as it overwrites builtin. - 1.2 added ability for custom and override type conversions to be provided; some cleanup - 1.1.9 to keep things simpler number sign is handled automatically; diff --git a/parse.py b/parse.py index 7748fa5..b6fba9b 100644 --- a/parse.py +++ b/parse.py @@ -2,18 +2,39 @@ ``parse()`` is the opposite of ``format()`` -Basic usage: +The module is set up to only export ``parse()``, ``search()`` and +``findall()`` when "import *" is used: + +>>> from parse import * + +From there it's a simple thing to parse a string: ->>> from parse import * # only exports parse() and compile() >>> parse("It's {}, I love it!", "It's spam, I love it!") ->>> p = compile("It's {}, I love it!") + +Or to search a string for some pattern: + +>>> search('Age: {:d}\n', 'Name: Rufus\nAge: 42\nColor: red\n') + + +Or find all the occurrances of some pattern in a string: + +>>> ''.join(r.fixed[0] for r in findall(">{}<", "

some bold text

") +"some bold text" + +If you're going to use the same pattern to match lots of strings you can +compile it once: + +>>> import parse +>>> p = parse.compile("It's {}, I love it!") >>> print p >>> p.parse("It's spam, I love it!") ->>> ''.join(findall(">{}<", "

some bold text

")) -"some bold text" + +("compile" is not exported for "import *" usage as it would override the +built-in ``compile()`` function) + Format Syntax ------------- @@ -48,6 +69,7 @@ Some simple parse() format string examples: >>> print r.named {'item': 'hand grenade'} + Format Specification -------------------- @@ -110,15 +132,15 @@ Type Characters Matched Output e.g. 10:21:36 PM -5:30 ===== =========================================== ======== -So, for example, some typed parsing, and ``None`` resulting if the typing +Some examples of typed parsing with ``None`` returned if the typing does not match: >>> parse('Our {:d} {:w} are...', 'Our 3 weapons are...') >>> parse('Our {:d} {:w} are...', 'Our three weapons are...') None ->>> parse('Meet at {:tg}', 'Meet at 11/11/2011 11:11') - +>>> parse('Meet at {:tg}', 'Meet at 1/2/2011 11:00 PM') + And messing about with alignment: @@ -128,7 +150,7 @@ And messing about with alignment: Note that the "center" alignment does not test to make sure the value is -actually centered. It just strips leading and trailing whitespace. +centered - it just strips leading and trailing whitespace. Some notes for the date and time types: @@ -152,7 +174,8 @@ Some notes for the date and time types: Note: attempting to match too many datetime fields in a single parse() will currently result in a resource allocation issue. A TooManyFields exception -will be raised in this instance. The current limit is about 15. +will be raised in this instance. The current limit is about 15. It is hoped +that this limit will be removed one day. See also the unit tests at the end of the module for some more examples. Run the tests with "python -m parse". @@ -202,7 +225,8 @@ with the same identifier. **Version history (in brief)**: -- 1.3 added search() and findall() +- 1.3 added search() and findall(); removed compile() from "import *" export + as it overwrites builtin. - 1.2 added ability for custom and override type conversions to be provided; some cleanup - 1.1.9 to keep things simpler number sign is handled automatically; @@ -228,14 +252,14 @@ with the same identifier. This code is copyright 2011 eKit.com Inc (http://www.ekit.com/) See the end of the source file for the license of use. ''' -__version__ = '1.2' +__version__ = '1.3' # yes, I now have two problems import re from datetime import datetime, time, tzinfo, timedelta from functools import partial -__all__ = 'parse compile'.split() +__all__ = 'parse search findall'.split() def int_convert(base): @@ -297,6 +321,7 @@ class FixedTzOffset(tzinfo): def __eq__(self, other): return self._name == other._name and self._offset == other._offset + MONTHS_MAP = dict( Jan=1, January=1, Feb=2, February=2, @@ -438,24 +463,6 @@ def extract_format(format, extra_types): PARSE_RE = re.compile('({{|}}|{}|{:[^}]+?}|{\w+?}|{\w+?:[^}]+?})') -class ResultIterator(object): - def __init__(self, parser, string, pos, endpos): - self.parser = parser - self.string = string - self.pos = pos - self.endpos = endpos - - def __iter__(self): - return self - - def next(self): - m = self.parser._search_re.search(self.string, self.pos, self.endpos) - if m is None: - raise StopIteration() - self.pos = m.end() - return self.parser._generate_result(m) - - class Parser(object): def __init__(self, format, extra_types={}): self._format = format @@ -464,7 +471,7 @@ class Parser(object): self._named_fields = [] self._group_index = 0 self._type_conversions = {} - self._expression = self.generate_expression() + self._expression = self._generate_expression() self._search_re = None self._match_re = None @@ -561,10 +568,10 @@ class Parser(object): # and that's our result return Result(fixed_fields, named_fields, spans) - def re_replace(self, match): + def _regex_replace(self, match): return '\\' + match.group(1) - def generate_expression(self): + def _generate_expression(self): # turn my _format attribute into the _expression attribute e = [] for part in PARSE_RE.split(self._format): @@ -576,13 +583,13 @@ class Parser(object): e.append(r'\}') elif part[0] == '{': # this will be a braces-delimited field to handle - e.append(self.handle_field(part)) + e.append(self._handle_field(part)) else: # just some text to match - e.append(REGEX_SAFETY.sub(self.re_replace, part)) + e.append(REGEX_SAFETY.sub(self._regex_replace, part)) return ''.join(e) - def handle_field(self, field): + def _handle_field(self, field): # first: lose the braces field = field[1:-1] @@ -764,6 +771,8 @@ class Parser(object): class Result(object): + '''The result of a parse() or search(). + ''' def __init__(self, fixed, named, spans): self.fixed = fixed self.named = named @@ -774,6 +783,28 @@ class Result(object): self.named) +class ResultIterator(object): + '''The result of a findall() operation. + + Each element is a Result instance. + ''' + def __init__(self, parser, string, pos, endpos): + self.parser = parser + self.string = string + self.pos = pos + self.endpos = endpos + + def __iter__(self): + return self + + def next(self): + m = self.parser._search_re.search(self.string, self.pos, self.endpos) + if m is None: + raise StopIteration() + self.pos = m.end() + return self.parser._generate_result(m) + + def parse(format, string, extra_types={}): '''Using "format" attempt to pull values from "string".