From 25da44f56980ac8ddc7aa4b034ab0ec9a1ebd5e7 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Sun, 21 Nov 2010 13:00:56 +0100 Subject: [PATCH 01/18] Support for HTML (export only). Unit-tested. Depends on the "markup.py" package(http://markup.sourceforge.net) which is included in packages/ Notice that the tests now depend on the presence of markup.py. --- tablib/core.py | 8 + tablib/formats/__init__.py | 3 +- tablib/formats/_html.py | 38 +++ tablib/packages/markup.py | 484 +++++++++++++++++++++++++++++++++++++ test_tablib.py | 23 ++ 5 files changed, 555 insertions(+), 1 deletion(-) create mode 100644 tablib/formats/_html.py create mode 100644 tablib/packages/markup.py diff --git a/tablib/core.py b/tablib/core.py index bd2d4ba..88d99a0 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -425,6 +425,14 @@ class Dataset(object): Import assumes (for now) that headers exist. """ + @property + def html(): + """A HTML table representation of the :class:`Dataset` object. If + headers have been set, they will be used as table headers. + + ..notice:: This method can be used for export only. + """ + pass def append(self, row=None, col=None, header=None, tags=list()): """Adds a row or column to the :class:`Dataset`. diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index f5960b8..147df31 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -8,5 +8,6 @@ import _json as json import _xls as xls import _yaml as yaml import _tsv as tsv +import _html as html -available = (json, xls, yaml, csv, tsv) +available = (json, xls, yaml, csv, tsv, html) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py new file mode 100644 index 0000000..c68eb37 --- /dev/null +++ b/tablib/formats/_html.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +""" Tablib - HTML export support. +""" + +import cStringIO + +import markup +import tablib + +title = 'html' +extentions = ('html', ) + +def export_set(dataset): + + """HTML representation of a Dataset.""" + + stream = cStringIO.StringIO() + + page = markup.page() + page.table.open() + + if dataset.headers is not None: + page.thead.open() + headers = markup.oneliner.th(dataset.headers) + page.tr(headers) + page.thead.close() + + for row in dataset: + html_row = markup.oneliner.td(row) + page.tr(html_row) + + page.table.close() + + stream.writelines(str(page)) + + return stream.getvalue() + diff --git a/tablib/packages/markup.py b/tablib/packages/markup.py new file mode 100644 index 0000000..98d9b1d --- /dev/null +++ b/tablib/packages/markup.py @@ -0,0 +1,484 @@ +# This code is in the public domain, it comes +# with absolutely no warranty and you can do +# absolutely whatever you want with it. + +__date__ = '17 May 2007' +__version__ = '1.7' +__doc__= """ +This is markup.py - a Python module that attempts to +make it easier to generate HTML/XML from a Python program +in an intuitive, lightweight, customizable and pythonic way. + +The code is in the public domain. + +Version: %s as of %s. + +Documentation and further info is at http://markup.sourceforge.net/ + +Please send bug reports, feature requests, enhancement +ideas or questions to nogradi at gmail dot com. + +Installation: drop markup.py somewhere into your Python path. +""" % ( __version__, __date__ ) + +import string + +class element: + """This class handles the addition of a new element.""" + + def __init__( self, tag, case='lower', parent=None ): + self.parent = parent + + if case == 'lower': + self.tag = tag.lower( ) + else: + self.tag = tag.upper( ) + + def __call__( self, *args, **kwargs ): + if len( args ) > 1: + raise ArgumentError( self.tag ) + + # if class_ was defined in parent it should be added to every element + if self.parent is not None and self.parent.class_ is not None: + if 'class_' not in kwargs: + kwargs['class_'] = self.parent.class_ + + if self.parent is None and len( args ) == 1: + x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] + return '\n'.join( x ) + elif self.parent is None and len( args ) == 0: + x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] + return '\n'.join( x ) + + if self.tag in self.parent.twotags: + for myarg, mydict in _argsdicts( args, kwargs ): + self.render( self.tag, False, myarg, mydict ) + elif self.tag in self.parent.onetags: + if len( args ) == 0: + for myarg, mydict in _argsdicts( args, kwargs ): + self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0 + else: + raise ClosingError( self.tag ) + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + else: + raise InvalidElementError( self.tag, self.parent.mode ) + + def render( self, tag, single, between, kwargs ): + """Append the actual tags to content.""" + + out = "<%s" % tag + for key, value in kwargs.iteritems( ): + if value is not None: # when value is None that means stuff like <... checked> + key = key.strip('_') # strip this so class_ will mean class, etc. + if key == 'http_equiv': # special cases, maybe change _ to - overall? + key = 'http-equiv' + elif key == 'accept_charset': + key = 'accept-charset' + out = "%s %s=\"%s\"" % ( out, key, escape( value ) ) + else: + out = "%s %s" % ( out, key ) + if between is not None: + out = "%s>%s" % ( out, between, tag ) + else: + if single: + out = "%s />" % out + else: + out = "%s>" % out + if self.parent is not None: + self.parent.content.append( out ) + else: + return out + + def close( self ): + """Append a closing tag unless element has only opening tag.""" + + if self.tag in self.parent.twotags: + self.parent.content.append( "" % self.tag ) + elif self.tag in self.parent.onetags: + raise ClosingError( self.tag ) + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + + def open( self, **kwargs ): + """Append an opening tag.""" + + if self.tag in self.parent.twotags or self.tag in self.parent.onetags: + self.render( self.tag, False, None, kwargs ) + elif self.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + +class page: + """This is our main class representing a document. Elements are added + as attributes of an instance of this class.""" + + def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ): + """Stuff that effects the whole document. + + mode -- 'strict_html' for HTML 4.01 (default) + 'html' alias for 'strict_html' + 'loose_html' to allow some deprecated elements + 'xml' to allow arbitrary elements + + case -- 'lower' element names will be printed in lower case (default) + 'upper' they will be printed in upper case + + onetags -- list or tuple of valid elements with opening tags only + twotags -- list or tuple of valid elements with both opening and closing tags + these two keyword arguments may be used to select + the set of valid elements in 'xml' mode + invalid elements will raise appropriate exceptions + + separator -- string to place between added elements, defaults to newline + + class_ -- a class that will be added to every element if defined""" + + valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ] + valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON", + "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET", + "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS", + "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP", + "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE", + "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR", + "TT", "UL", "VAR" ] + deprecated_onetags = [ "BASEFONT", "ISINDEX" ] + deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ] + + self.header = [ ] + self.content = [ ] + self.footer = [ ] + self.case = case + self.separator = separator + + # init( ) sets it to True so we know that has to be printed at the end + self._full = False + self.class_= class_ + + if mode == 'strict_html' or mode == 'html': + self.onetags = valid_onetags + self.onetags += map( string.lower, self.onetags ) + self.twotags = valid_twotags + self.twotags += map( string.lower, self.twotags ) + self.deptags = deprecated_onetags + deprecated_twotags + self.deptags += map( string.lower, self.deptags ) + self.mode = 'strict_html' + elif mode == 'loose_html': + self.onetags = valid_onetags + deprecated_onetags + self.onetags += map( string.lower, self.onetags ) + self.twotags = valid_twotags + deprecated_twotags + self.twotags += map( string.lower, self.twotags ) + self.mode = mode + elif mode == 'xml': + if onetags and twotags: + self.onetags = onetags + self.twotags = twotags + elif ( onetags and not twotags ) or ( twotags and not onetags ): + raise CustomizationError( ) + else: + self.onetags = russell( ) + self.twotags = russell( ) + self.mode = mode + else: + raise ModeError( mode ) + + def __getattr__( self, attr ): + if attr.startswith("__") and attr.endswith("__"): + raise AttributeError, attr + return element( attr, case=self.case, parent=self ) + + def __str__( self ): + + if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ): + end = [ '', '' ] + else: + end = [ ] + + return self.separator.join( self.header + self.content + self.footer + end ) + + def __call__( self, escape=False ): + """Return the document as a string. + + escape -- False print normally + True replace < and > by < and > + the default escape sequences in most browsers""" + + if escape: + return _escape( self.__str__( ) ) + else: + return self.__str__( ) + + def add( self, text ): + """This is an alias to addcontent.""" + self.addcontent( text ) + + def addfooter( self, text ): + """Add some text to the bottom of the document""" + self.footer.append( text ) + + def addheader( self, text ): + """Add some text to the top of the document""" + self.header.append( text ) + + def addcontent( self, text ): + """Add some text to the main part of the document""" + self.content.append( text ) + + + def init( self, lang='en', css=None, metainfo=None, title=None, header=None, + footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None ): + """This method is used for complete documents with appropriate + doctype, encoding, title, etc information. For an HTML/XML snippet + omit this method. + + lang -- language, usually a two character string, will appear + as in html mode (ignored in xml mode) + + css -- Cascading Style Sheet filename as a string or a list of + strings for multiple css files (ignored in xml mode) + + metainfo -- a dictionary in the form { 'name':'content' } to be inserted + into meta element(s) as + (ignored in xml mode) + + bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added + as attributes of the element as + (ignored in xml mode) + + script -- dictionary containing src:type pairs, + + title -- the title of the document as a string to be inserted into + a title element as my title (ignored in xml mode) + + header -- some text to be inserted right after the element + (ignored in xml mode) + + footer -- some text to be inserted right before the element + (ignored in xml mode) + + charset -- a string defining the character set, will be inserted into a + + element (ignored in xml mode) + + encoding -- a string defining the encoding, will be put into to first line of + the document as in + xml mode (ignored in html mode) + + doctype -- the document type string, defaults to + + in html mode (ignored in xml mode)""" + + self._full = True + + if self.mode == 'strict_html' or self.mode == 'loose_html': + if doctype is None: + doctype = "" + self.header.append( doctype ) + self.html( lang=lang ) + self.head( ) + if charset is not None: + self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset ) + if metainfo is not None: + self.metainfo( metainfo ) + if css is not None: + self.css( css ) + if title is not None: + self.title( title ) + if script is not None: + self.scripts( script ) + self.head.close() + if bodyattrs is not None: + self.body( **bodyattrs ) + else: + self.body( ) + if header is not None: + self.content.append( header ) + if footer is not None: + self.footer.append( footer ) + + elif self.mode == 'xml': + if doctype is None: + if encoding is not None: + doctype = "" % encoding + else: + doctype = "" + self.header.append( doctype ) + + def css( self, filelist ): + """This convenience function is only useful for html. + It adds css stylesheet(s) to the document via the element.""" + + if isinstance( filelist, basestring ): + self.link( href=filelist, rel='stylesheet', type='text/css', media='all' ) + else: + for file in filelist: + self.link( href=file, rel='stylesheet', type='text/css', media='all' ) + + def metainfo( self, mydict ): + """This convenience function is only useful for html. + It adds meta information via the element, the argument is + a dictionary of the form { 'name':'content' }.""" + + if isinstance( mydict, dict ): + for name, content in mydict.iteritems( ): + self.meta( name=name, content=content ) + else: + raise TypeError, "Metainfo should be called with a dictionary argument of name:content pairs." + + def scripts( self, mydict ): + """Only useful in html, mydict is dictionary of src:type pairs will + be rendered as """ + + if isinstance( mydict, dict ): + for src, type in mydict.iteritems( ): + self.script( '', src=src, type='text/%s' % type ) + else: + raise TypeError, "Script should be given a dictionary of src:type pairs." + + +class _oneliner: + """An instance of oneliner returns a string corresponding to one element. + This class can be used to write 'oneliners' that return a string + immediately so there is no need to instantiate the page class.""" + + def __init__( self, case='lower' ): + self.case = case + + def __getattr__( self, attr ): + if attr.startswith("__") and attr.endswith("__"): + raise AttributeError, attr + return element( attr, case=self.case, parent=None ) + +oneliner = _oneliner( case='lower' ) +upper_oneliner = _oneliner( case='upper' ) + +def _argsdicts( args, mydict ): + """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1.""" + + if len( args ) == 0: + args = None, + elif len( args ) == 1: + args = _totuple( args[0] ) + else: + raise Exception, "We should have never gotten here." + + mykeys = mydict.keys( ) + myvalues = map( _totuple, mydict.values( ) ) + + maxlength = max( map( len, [ args ] + myvalues ) ) + + for i in xrange( maxlength ): + thisdict = { } + for key, value in zip( mykeys, myvalues ): + try: + thisdict[ key ] = value[i] + except IndexError: + thisdict[ key ] = value[-1] + try: + thisarg = args[i] + except IndexError: + thisarg = args[-1] + + yield thisarg, thisdict + +def _totuple( x ): + """Utility stuff to convert string, int, float, None or anything to a usable tuple.""" + + if isinstance( x, basestring ): + out = x, + elif isinstance( x, ( int, float ) ): + out = str( x ), + elif x is None: + out = None, + else: + out = tuple( x ) + + return out + +def escape( text, newline=False ): + """Escape special html characters.""" + + if isinstance( text, basestring ): + if '&' in text: + text = text.replace( '&', '&' ) + if '>' in text: + text = text.replace( '>', '>' ) + if '<' in text: + text = text.replace( '<', '<' ) + if '\"' in text: + text = text.replace( '\"', '"' ) + if '\'' in text: + text = text.replace( '\'', '"' ) + if newline: + if '\n' in text: + text = text.replace( '\n', '
' ) + + return text + +_escape = escape + +def unescape( text ): + """Inverse of escape.""" + + if isinstance( text, basestring ): + if '&' in text: + text = text.replace( '&', '&' ) + if '>' in text: + text = text.replace( '>', '>' ) + if '<' in text: + text = text.replace( '<', '<' ) + if '"' in text: + text = text.replace( '"', '\"' ) + + return text + +class dummy: + """A dummy class for attaching attributes.""" + pass + +doctype = dummy( ) +doctype.frameset = "" +doctype.strict = "" +doctype.loose = "" + +class russell: + """A dummy class that contains anything.""" + + def __contains__( self, item ): + return True + + +class MarkupError( Exception ): + """All our exceptions subclass this.""" + def __str__( self ): + return self.message + +class ClosingError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag + +class OpeningError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' can not be opened." % tag + +class ArgumentError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' was called with more than one non-keyword argument." % tag + +class InvalidElementError( MarkupError ): + def __init__( self, tag, mode ): + self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode ) + +class DeprecationError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag + +class ModeError( MarkupError ): + def __init__( self, mode ): + self.message = "Mode '%s' is invalid, possible values: strict_html, loose_html, xml." % mode + +class CustomizationError( MarkupError ): + def __init__( self ): + self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'." + +if __name__ == '__main__': + print __doc__ diff --git a/test_tablib.py b/test_tablib.py index 8e2454f..bc660fe 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -5,6 +5,7 @@ import unittest +import markup import tablib @@ -182,6 +183,27 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(tsv, self.founders.tsv) + def test_html_export(self): + + """HTML export""" + + html = markup.page() + html.table.open() + html.thead.open() + + html.tr(markup.oneliner.th(self.founders.headers)) + html.thead.close() + + for founder in self.founders: + + html.tr(markup.oneliner.td(founder)) + + html.table.close() + html = str(html) + + self.assertEqual(html, self.founders.html) + + def test_unicode_append(self): """Passes in a single unicode charecter and exports.""" @@ -403,6 +425,7 @@ class TablibTestCase(unittest.TestCase): ("John", "Adams", 90, "John", "Adams", 90)) + def test_wipe(self): """Purge a dataset.""" From 99896a5f28c7c451446ec68bf0215615032b4b57 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Sun, 21 Nov 2010 13:14:47 +0100 Subject: [PATCH 02/18] Fix Databook data leaks. --- tablib/core.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index bd2d4ba..e732db9 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -615,11 +615,16 @@ class Databook(object): """A book of :class:`Dataset` objects. """ - def __init__(self, sets=[]): + def __init__(self, sets=None): + + if sets is None: + self._datasets = list() + else: + self._datasets = sets + self._datasets = sets self._register_formats() - def __repr__(self): try: return '<%s databook>' % (self.title.lower()) From e3e6b656e32c64b7f4d5713ede350bb77506650b Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Sun, 21 Nov 2010 13:17:36 +0100 Subject: [PATCH 03/18] Fix the stupid mistake. --- tablib/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index e732db9..c3013ed 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -622,7 +622,6 @@ class Databook(object): else: self._datasets = sets - self._datasets = sets self._register_formats() def __repr__(self): From 6a7c6851111dcbe280c6b358c6136d1f298fda17 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 21 Nov 2010 18:49:02 -0500 Subject: [PATCH 04/18] Import path fix. --- tablib/formats/_html.py | 2 +- test_tablib.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index c68eb37..60969f6 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -5,7 +5,7 @@ import cStringIO -import markup +from tablib.packages import markup import tablib title = 'html' diff --git a/test_tablib.py b/test_tablib.py index bc660fe..2d1f6b4 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -5,7 +5,8 @@ import unittest -import markup +from tablib.packages import markup + import tablib From 7055d18a2e02f02f3eea7a770e87113d177a7a38 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 21 Nov 2010 18:53:18 -0500 Subject: [PATCH 05/18] History update. --- HISTORY.rst | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index c64d7b5..43a8cda 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,19 +1,26 @@ History ------- +0.9.3 (2010-11-2?) +++++++++++++++++++ + +* Databook duplication leak fix. +* HTML Table output. + + 0.9.2 (2010-11-17) ++++++++++++++++++ -* Tanspose method added to Datasets -* New frozen top row in Excel output -* Pickling support for Datasets and Rows -* Support for row/column stacking +* Tanspose method added to Datasets. +* New frozen top row in Excel output. +* Pickling support for Datasets and Rows. +* Support for row/column stacking. 0.9.1 (2010-11-04) ++++++++++++++++++ -* Minor reference shadowing bugfix +* Minor reference shadowing bugfix. 0.9.0 (2010-11-04) From 0784d4b32c109515d6d919caf2b07e90caf5ae49 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 21 Nov 2010 18:55:45 -0500 Subject: [PATCH 06/18] Updated todo w/ new html output feature --- TODO.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TODO.rst b/TODO.rst index 231f03e..4d48313 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,8 +1,8 @@ +* Add seperator support to HTML out * Backwards-compatible OrderedDict support * Write more exhausive unit-tests. * Write stress tests. * Make CSV write customizable. -* HTML Table exports. * Integrate django-tablib * Mention django-tablib in Documention -* Dataset title usage in documentation (#17) \ No newline at end of file +* Dataset title usage in documentation (#17) From 22d337790acc82e8760e1dd261c47c93297f1d21 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 21 Nov 2010 18:58:30 -0500 Subject: [PATCH 07/18] small changes to html output --- tablib/formats/_html.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 60969f6..786d5a3 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -3,7 +3,7 @@ """ Tablib - HTML export support. """ -import cStringIO +from StringIO import StringIO from tablib.packages import markup import tablib @@ -12,10 +12,9 @@ title = 'html' extentions = ('html', ) def export_set(dataset): - """HTML representation of a Dataset.""" - stream = cStringIO.StringIO() + stream = StringIO() page = markup.page() page.table.open() From e3b3659ea40151eb849e3f53555900b7ba2c7d43 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 21 Nov 2010 21:32:00 -0500 Subject: [PATCH 08/18] whitespace fix --- tablib/formats/_json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index da31b23..7f31ee5 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -26,11 +26,11 @@ def export_set(dataset): def export_book(databook): """Returns JSON representation of Databook.""" return json.dumps(databook._package()) - + def import_set(dset, in_stream): """Returns dataset from JSON stream.""" - + dset.wipe() dset.dict = json.loads(in_stream) @@ -52,4 +52,4 @@ def detect(stream): json.loads(stream) return True except ValueError: - return False \ No newline at end of file + return False From 22c4d185e122da6ea1c71d17af009a6c55217c39 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 21 Nov 2010 21:33:01 -0500 Subject: [PATCH 09/18] Export HTML for Databooks. --- tablib/formats/_html.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 786d5a3..13dc055 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -8,9 +8,12 @@ from StringIO import StringIO from tablib.packages import markup import tablib +BOOK_ENDINGS = 'h3' + title = 'html' extentions = ('html', ) + def export_set(dataset): """HTML representation of a Dataset.""" @@ -35,3 +38,16 @@ def export_set(dataset): return stream.getvalue() + +def export_book(databook): + """HTML representation of a Databook.""" + + stream = StringIO() + + for i, dset in enumerate(databook._datasets): + title = (dset.title if dset.title else 'Set %s' % (i)) + stream.write('<%s>%s\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS)) + stream.write(dset.html) + stream.write('\n') + + return stream.getvalue() From d25655588b13657c253f9c216d1cc2360a5d5e57 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 13 Dec 2010 17:08:11 -0500 Subject: [PATCH 10/18] TODO update. --- TODO.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/TODO.rst b/TODO.rst index 4d48313..9f8c99f 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,4 +1,9 @@ * Add seperator support to HTML out +* Hooks System + - pre/post-append + - pre/post-import + - pre/post-export +* Big Data * Backwards-compatible OrderedDict support * Write more exhausive unit-tests. * Write stress tests. From 34415b89b858075323b7a2d034c06b57dd26bdc7 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 10 Jan 2011 19:28:12 -0500 Subject: [PATCH 11/18] New Year! --- LICENSE | 2 +- docs/_themes/LICENSE | 2 +- docs/conf.py | 2 +- fabfile.py | 2 +- tablib/core.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/LICENSE b/LICENSE index 717ff16..ea8c217 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2010 Kenneth Reitz. +Copyright (c) 2011 Kenneth Reitz. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/docs/_themes/LICENSE b/docs/_themes/LICENSE index 81f4d30..b160a8e 100644 --- a/docs/_themes/LICENSE +++ b/docs/_themes/LICENSE @@ -1,6 +1,6 @@ Modifications: -Copyright (c) 2010 Kenneth Reitz. +Copyright (c) 2011 Kenneth Reitz. Original Project: diff --git a/docs/conf.py b/docs/conf.py index 325002c..2a642c9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'Tablib' -copyright = u'2010, Kenneth Reitz. Styles (modified) © Armin Ronacher' +copyright = u'2011, Kenneth Reitz. Styles (modified) © Armin Ronacher' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/fabfile.py b/fabfile.py index 391bf92..6e72092 100644 --- a/fabfile.py +++ b/fabfile.py @@ -1,7 +1,7 @@ import os from fabric.api import * - +os.f def scrub(): """ Death to the bytecode! """ local('rm -fr dist build') diff --git a/tablib/core.py b/tablib/core.py index da49788..c7e9dd5 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -5,7 +5,7 @@ This module implements the central tablib objects. - :copyright: (c) 2010 by Kenneth Reitz. + :copyright: (c) 2011 by Kenneth Reitz. :license: MIT, see LICENSE for more details. """ @@ -19,7 +19,7 @@ __version__ = '0.9.2' __build__ = 0x000902 __author__ = 'Kenneth Reitz' __license__ = 'MIT' -__copyright__ = 'Copyright 2010 Kenneth Reitz' +__copyright__ = 'Copyright 2011 Kenneth Reitz' class Row(object): From f81dc41a57e19926868efa036bdb9e862cd3bbbb Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Tue, 11 Jan 2011 20:53:59 +0100 Subject: [PATCH 12/18] Support for sorting. Unit-tested. --- tablib/core.py | 20 ++++++++++++++++++++ test_tablib.py | 15 +++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index c7e9dd5..5727a69 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -10,6 +10,7 @@ """ from copy import copy +from operator import itemgetter from tablib import formats @@ -528,6 +529,25 @@ class Dataset(object): return _dset + def sort(self, col, reverse=False): + + """Sort a :class:`Dataset` by a specific column. The order can be + reversed by setting ``reverse`` to ``True``. Requires headers to be + set. Returns a new :class:`Dataset` instance where columns have been + sorted.""" + + if not self.headers: + raise HeadersNeeded + + _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) + _dset = Dataset(headers=self.headers) + + for item in _sorted: + row = [item[key] for key in self.headers] + _dset.append(row=row) + + return _dset + def transpose(self): """Transpose a :class:`Dataset`, turning rows into columns and vice versa, returning a new ``Dataset`` instance. The first row of the diff --git a/test_tablib.py b/test_tablib.py index 2d1f6b4..15630f2 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -425,7 +425,22 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(column_stacked[0], ("John", "Adams", 90, "John", "Adams", 90)) + def test_sorting(self): + """Sort columns.""" + + sorted_data = self.founders.sort(col="first_name") + + first_row = sorted_data[0] + second_row = sorted_data[2] + third_row = sorted_data[1] + expected_first = self.founders[1] + expected_second = self.founders[2] + expected_third = self.founders[0] + + self.assertEqual(first_row, expected_first) + self.assertEqual(second_row, expected_second) + self.assertEqual(third_row, expected_third) def test_wipe(self): """Purge a dataset.""" From 0797ec67d4a4c6c145d86d0b7dca9d3b03c6d8e6 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 31 Jan 2011 00:58:16 -0500 Subject: [PATCH 13/18] Prepping for new release (0.9.3) --- HISTORY.rst | 3 ++- README.rst | 1 + tablib/core.py | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 43a8cda..95b9328 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,11 +1,12 @@ History ------- -0.9.3 (2010-11-2?) +0.9.3 (2011-01-31) ++++++++++++++++++ * Databook duplication leak fix. * HTML Table output. +* Added column sorting. 0.9.2 (2010-11-17) diff --git a/README.rst b/README.rst index 00b6345..f974248 100644 --- a/README.rst +++ b/README.rst @@ -18,6 +18,7 @@ Output formats supported: - Excel (Sets + Books) - JSON (Sets + Books) - YAML (Sets + Books) +- HTML (Sets) - TSV (Sets) - CSV (Sets) diff --git a/tablib/core.py b/tablib/core.py index c7e9dd5..6eb57db 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -15,8 +15,8 @@ from tablib import formats __title__ = 'tablib' -__version__ = '0.9.2' -__build__ = 0x000902 +__version__ = '0.9.3' +__build__ = 0x000903 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2011 Kenneth Reitz' From 89b431213bc74f219da48489fbaabdd02bcfe56a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 31 Jan 2011 01:28:10 -0500 Subject: [PATCH 14/18] Sorting update for headerless datasets. --- tablib/core.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 19eec3e..0de2b28 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -535,16 +535,32 @@ class Dataset(object): reversed by setting ``reverse`` to ``True``. Requires headers to be set. Returns a new :class:`Dataset` instance where columns have been sorted.""" + if isinstance(col, basestring): - if not self.headers: - raise HeadersNeeded + if not self.headers: + raise HeadersNeeded - _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) - _dset = Dataset(headers=self.headers) + _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) + _dset = Dataset(headers=self.headers) + + for item in _sorted: + row = [item[key] for key in self.headers] + _dset.append(row=row) + + else: + if self.headers: + col = self.headers[col] + + _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) + _dset = Dataset(headers=self.headers) + + for item in _sorted: + if self.headers: + row = [item[key] for key in self.headers] + else: + row = item + _dset.append(row=row) - for item in _sorted: - row = [item[key] for key in self.headers] - _dset.append(row=row) return _dset From a0822bc9b091ca6b613aac71afc39acf0665a1e1 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 31 Jan 2011 01:29:41 -0500 Subject: [PATCH 15/18] sorting update. --- tablib/core.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 0de2b28..9d36970 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -520,6 +520,7 @@ class Dataset(object): else: self._data = [Row([row]) for row in col] + def filter(self, tag): """Returns a new instance of the :class:`Dataset`, excluding any rows that do not contain the given :ref:`tags `. @@ -529,12 +530,14 @@ class Dataset(object): return _dset - def sort(self, col, reverse=False): - """Sort a :class:`Dataset` by a specific column. The order can be - reversed by setting ``reverse`` to ``True``. Requires headers to be - set. Returns a new :class:`Dataset` instance where columns have been + def sort(self, col, reverse=False): + """Sort a :class:`Dataset` by a specific column, given string (for + header) or integer (for column index). The order can be reversed by + setting ``reverse`` to ``True``. + Returns a new :class:`Dataset` instance where columns have been sorted.""" + if isinstance(col, basestring): if not self.headers: From e8b44b57779b6374fbee3e5dc2b7dc1091e1cf5c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 31 Jan 2011 01:33:00 -0500 Subject: [PATCH 16/18] Version bump. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e779457..c851751 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ required = [] setup( name='tablib', - version='0.9.2', + version='0.9.3', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), From 5379c5683d9cb7c07f7619e544c0c2fc4efa80b1 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 31 Jan 2011 01:33:12 -0500 Subject: [PATCH 17/18] Markup license notice. PD? Really? --- NOTICE | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/NOTICE b/NOTICE index 88d5d2d..c8428e8 100644 --- a/NOTICE +++ b/NOTICE @@ -1,6 +1,12 @@ Tablib includes some vendorized python libraries: ordereddict, pyyaml, simplejson, and xlwt. +Markup License +============== + +Markup is in the public domain. + + OrderedDict License =================== From 140736ff332ff164f18821ec150488b1a2092898 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 31 Jan 2011 01:34:40 -0500 Subject: [PATCH 18/18] fabfile typo. --- fabfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fabfile.py b/fabfile.py index 6e72092..391bf92 100644 --- a/fabfile.py +++ b/fabfile.py @@ -1,7 +1,7 @@ import os from fabric.api import * -os.f + def scrub(): """ Death to the bytecode! """ local('rm -fr dist build')