From 1d656b930be29f5031849033112116feb8e22fa1 Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Thu, 17 Nov 2011 16:32:27 +1100 Subject: [PATCH] first commit --- MANIFEST.in | 2 + README.txt | 7 ++ parse.py | 282 ++++++++++++++++++++++++++++++++++++++++++++++++++++ setup.py | 30 ++++++ 4 files changed, 321 insertions(+) create mode 100644 MANIFEST.in create mode 100644 README.txt create mode 100644 parse.py create mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..8f3b4b5 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +include README.txt +include *.py diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..3b88f03 --- /dev/null +++ b/README.txt @@ -0,0 +1,7 @@ +Parse strings using a specification based on the Python format() syntax. + +For documentation please "pydoc parse". The same documentation is available +at http://pypi.python.org/pypi/parse + +To run its tests use "python -m html". + diff --git a/parse.py b/parse.py new file mode 100644 index 0000000..27e071c --- /dev/null +++ b/parse.py @@ -0,0 +1,282 @@ +# +# $Id$ +# $HeadURL$ +# +'''Parse strings using a specification based on the Python format() syntax. + +Anonymous (fixed-position), named and typed values are supported. Also the +alignment operators will cause whitespace (or another alignment character) +to be stripped from the value. + +You may not use both fixed and named values in your format string. + +The types supported in ":type" expressions are the regular expression +character group types d, D, w, W, s, S and not the string format types. + +So, for example, some fixed-position parsing: + + >>> r = parse('hello {}', 'hello world') + >>> r.fixed + ('world', ) + + >>> r = parse('hello {:d} {:w}', 'hello 12 people') + >>> r.fixed + ('12', 'people') + +And some named parsing: + + >>> r = parse('{greeting} {name}', 'hello world') + >>> r.named + {'greeting': 'hello', 'name': 'world'} + + >>> r = parse('hello {^} world', 'hello there world') + >>> r.fixed + ('there', ) + +A ValueError will be raised if there is no match: + + >>> r = parse('hello {name:w}', 'hello 12') + ValueError: ... + +See also the unit tests at the end of the module for some more +examples. + +---- + +This code is copyright 2009-2011 eKit.com Inc (http://www.ekit.com/) +See the end of the source file for the license of use. +''' +__version__ = '1.0.0' + +import re +import unittest +import collections + + +# yes, I now have two problems +PARSE_RE = re.compile(''' +( + (?P{{) +| + (?P}}) +| + (?P{(?P[^}]?[<>^])?(:[^}]+?)?}) +| + {(?P[^}]?[<>^])?(?P\w+(:[^}]+?)?)} +)''', re.VERBOSE) + + +class Format(object): + # we're an object so we can keep track of whether the user is trying to + # specify both fixed and named args + has_fixed = False + has_named = False + def replace(self, match): + d = match.groupdict() + if d['openbrace']: return '{{' + if d['closebrace']: return '}}' + align = None + + if d['fixed']: + if self.has_named: + raise ValueError("can't mix named and fixed") + self.has_fixed = True + if ':' in d['fixed']: + x, type = d['fixed'].split(':') + s = r'(\%s+?)' % type[:1] + else: + s = r'(.+?)' + align = d['falign'] + + if d['named']: + if self.has_fixed: + raise ValueError("can't mix named and fixed") + self.has_named = True + if ':' not in d['named']: + s = r'(?P<%s>.+?)' % d['named'] + else: + name, type = d['named'].split(':') + s = r'(?P<%s>\%s+?)' % (name, type) + align = d['nalign'] + + if not align: + return s + + if len(align) == 2: + fill, align = align + else: + fill = ' ' + if fill in '.\+?*[](){}^$': + fill = '\\' + fill + if align == '<': + s = '%s%s+' % (s, fill) + elif align == '>': + s = '%s+%s' % (fill, s) + elif align == '^': + s = '%s+%s%s+' % (fill, s, fill) + return s + + +Result = collections.namedtuple('Result', 'fixed named') + + +def parse(format, string): + '''Using "format" attempt to pull values from "string". + + The return value will be an object with two attributes: + + .fixed - tuple of fixed-position values from the string + .named - dict of named values from the string + + If the format is invalid (usually mixing fixed-position and named values + in the format) a ValueError will be raised. + + In the case there is no match parse() will return None. + ''' + # first, turn the format into a regular expression + r = PARSE_RE.sub(Format().replace, format) + m = re.match('^' + r + '$', string) + if m is None: + return None + d = m.groupdict() + if d: + return Result(None, d) + else: + return Result(m.groups(), None) + + +# yes, I now unit test both of the problems +class TestPattern(unittest.TestCase): + def test_mixed(self): + 'check enforcement of fixed OR named' + self.assertRaises(ValueError, PARSE_RE.sub, Format().replace, + '{} {name}') + + def test_braces(self): + 'pull a simple string out of another string' + s = PARSE_RE.sub(Format().replace, '{{ }}') + self.assertEquals(s, '{{ }}') + + def test_fixed(self): + 'pull a simple string out of another string' + s = PARSE_RE.sub(Format().replace, '{}') + self.assertEquals(s, '(.+?)') + s = PARSE_RE.sub(Format().replace, '{} {}') + self.assertEquals(s, '(.+?) (.+?)') + + def test_typed(self): + 'pull a named string out of another string' + s = PARSE_RE.sub(Format().replace, '{:d}') + self.assertEquals(s, '(\d+?)') + s = PARSE_RE.sub(Format().replace, '{:d} {:w}') + self.assertEquals(s, '(\d+?) (\w+?)') + + def test_named(self): + 'pull a named string out of another string' + s = PARSE_RE.sub(Format().replace, '{name}') + self.assertEquals(s, '(?P.+?)') + s = PARSE_RE.sub(Format().replace, '{name} {other}') + self.assertEquals(s, '(?P.+?) (?P.+?)') + + def test_named_typed(self): + 'pull a named string out of another string' + s = PARSE_RE.sub(Format().replace, '{name:d}') + self.assertEquals(s, '(?P\d+?)') + s = PARSE_RE.sub(Format().replace, '{name:d} {other:w}') + self.assertEquals(s, '(?P\d+?) (?P\w+?)') + + def test_left(self): + 'skip some trailing whitespace' + s = PARSE_RE.sub(Format().replace, '{<}') + self.assertEquals(s, '(.+?) +') + + def test_left_fill(self): + 'skip some trailing periods' + s = PARSE_RE.sub(Format().replace, '{.<}') + self.assertEquals(s, '(.+?)\.+') + + def test_right(self): + 'skip some trailing whitespace' + s = PARSE_RE.sub(Format().replace, '{>}') + self.assertEquals(s, ' +(.+?)') + + def test_center(self): + 'skip some surrounding whitespace' + s = PARSE_RE.sub(Format().replace, '{^}') + self.assertEquals(s, ' +(.+?) +') + + +class TestParse(unittest.TestCase): + def test_no_match(self): + 'string does not match format' + self.assertEquals(parse('{{hello}}', 'hello'), None) + + def test_nothing(self): + 'do no actual parsing' + r = parse('{{hello}}', '{{hello}}') + self.assertEquals(r.fixed, ()) + self.assertEquals(r.named, None) + + def test_fixed(self): + 'pull a fixed value out of string' + r = parse('hello {}', 'hello world') + self.assertEquals(r.fixed, ('world', )) + + def test_left(self): + 'pull left-aligned text out of string' + r = parse('{<} world', 'hello world') + self.assertEquals(r.fixed, ('hello', )) + + def test_right(self): + 'pull right-aligned text out of string' + r = parse('hello {>}', 'hello world') + self.assertEquals(r.fixed, ('world', )) + + def test_center(self): + 'pull right-aligned text out of string' + r = parse('hello {^} world', 'hello there world') + self.assertEquals(r.fixed, ('there', )) + + def test_typed(self): + 'pull a named, typed values out of string' + r = parse('hello {:d} {:w}', 'hello 12 people') + self.assertEquals(r.fixed, ('12', 'people')) + + def test_typed_fail(self): + 'pull a named, typed values out of string' + self.assertEquals(parse('hello {:d} {:w}', 'hello people 12'), None) + + def test_named(self): + 'pull a named value out of string' + r = parse('hello {name}', 'hello world') + self.assertEquals(r.named, {'name': 'world'}) + + def test_named_typed(self): + 'pull a named, typed values out of string' + r = parse('hello {number:d} {things}', 'hello 12 people') + self.assertEquals(r.named, dict(number='12', things='people')) + + +if __name__ == '__main__': + unittest.main() + +# Copyright (c) 2011 eKit.com Inc (http://www.ekit.com/) +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b201a7b --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +#! /usr/bin/env python + +from distutils.core import setup + +from html import __version__, __doc__ + +# perform the setup action +setup( + name = "html", + version = __version__, + description = "simple, elegant HTML, XHTML and XML generation", + long_description = __doc__.decode('utf8'), + author = "Richard Jones", + author_email = "rjones@ekit-inc.com", + py_modules = ['html'], + url = 'http://pypi.python.org/pypi/html', + classifiers = [ + 'Environment :: Web Environment', + 'Intended Audience :: Developers', + 'Programming Language :: Python :: 2.5', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 3', + 'Topic :: Software Development :: Code Generators', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Text Processing :: Markup :: HTML', + 'License :: OSI Approved :: BSD License', + ], +) + +# vim: set filetype=python ts=4 sw=4 et si