From 25da44f56980ac8ddc7aa4b034ab0ec9a1ebd5e7 Mon Sep 17 00:00:00 2001
From: Luca Beltrame <einar@heavensinferno.net>
Date: Sun, 21 Nov 2010 13:00:56 +0100
Subject: [PATCH 01/18] Support for HTML (export only). Unit-tested. Depends on
 the "markup.py" package(http://markup.sourceforge.net) which is included in
 packages/ Notice that the tests now depend on the presence of markup.py.

---
 tablib/core.py             |   8 +
 tablib/formats/__init__.py |   3 +-
 tablib/formats/_html.py    |  38 +++
 tablib/packages/markup.py  | 484 +++++++++++++++++++++++++++++++++++++
 test_tablib.py             |  23 ++
 5 files changed, 555 insertions(+), 1 deletion(-)
 create mode 100644 tablib/formats/_html.py
 create mode 100644 tablib/packages/markup.py

diff --git a/tablib/core.py b/tablib/core.py
index bd2d4ba..88d99a0 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -425,6 +425,14 @@ class Dataset(object):
 	    Import assumes (for now) that headers exist.
 		"""
 
+	@property
+	def html():
+		"""A HTML table representation of the :class:`Dataset` object. If
+		headers have been set, they will be used as table headers.
+
+		..notice:: This method can be used for export only.
+		"""
+		pass
 
 	def append(self, row=None, col=None, header=None, tags=list()):
 		"""Adds a row or column to the :class:`Dataset`.
diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py
index f5960b8..147df31 100644
--- a/tablib/formats/__init__.py
+++ b/tablib/formats/__init__.py
@@ -8,5 +8,6 @@ import _json as json
 import _xls as xls
 import _yaml as yaml
 import _tsv as tsv
+import _html as html
 
-available = (json, xls, yaml, csv, tsv)
+available = (json, xls, yaml, csv, tsv, html)
diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
new file mode 100644
index 0000000..c68eb37
--- /dev/null
+++ b/tablib/formats/_html.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+
+""" Tablib - HTML export support.
+"""
+
+import cStringIO
+
+import markup
+import tablib
+
+title = 'html'
+extentions = ('html', )
+
+def export_set(dataset):
+
+	"""HTML representation of a Dataset."""
+
+	stream = cStringIO.StringIO()
+
+	page = markup.page()
+	page.table.open()
+
+	if dataset.headers is not None:
+		page.thead.open()
+		headers = markup.oneliner.th(dataset.headers)
+		page.tr(headers)
+		page.thead.close()
+
+	for row in dataset:
+		html_row = markup.oneliner.td(row)
+		page.tr(html_row)
+
+	page.table.close()
+
+	stream.writelines(str(page))
+
+	return stream.getvalue()
+
diff --git a/tablib/packages/markup.py b/tablib/packages/markup.py
new file mode 100644
index 0000000..98d9b1d
--- /dev/null
+++ b/tablib/packages/markup.py
@@ -0,0 +1,484 @@
+# This code is in the public domain, it comes
+# with absolutely no warranty and you can do
+# absolutely whatever you want with it.
+
+__date__ = '17 May 2007'
+__version__ = '1.7'
+__doc__= """
+This is markup.py - a Python module that attempts to
+make it easier to generate HTML/XML from a Python program
+in an intuitive, lightweight, customizable and pythonic way.
+
+The code is in the public domain.
+
+Version: %s as of %s.
+
+Documentation and further info is at http://markup.sourceforge.net/
+
+Please send bug reports, feature requests, enhancement
+ideas or questions to nogradi at gmail dot com.
+
+Installation: drop markup.py somewhere into your Python path.
+""" % ( __version__, __date__ )
+
+import string
+
+class element:
+    """This class handles the addition of a new element."""
+
+    def __init__( self, tag, case='lower', parent=None ):
+        self.parent = parent
+
+	if case == 'lower':
+	    self.tag = tag.lower( )
+	else:
+	    self.tag = tag.upper( )
+    
+    def __call__( self, *args, **kwargs ):
+        if len( args ) > 1:
+            raise ArgumentError( self.tag )
+
+        # if class_ was defined in parent it should be added to every element
+        if self.parent is not None and self.parent.class_ is not None:
+            if 'class_' not in kwargs:
+                kwargs['class_'] = self.parent.class_
+            
+        if self.parent is None and len( args ) == 1:
+            x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ]
+            return '\n'.join( x )
+        elif self.parent is None and len( args ) == 0:
+            x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ]
+            return '\n'.join( x )
+            
+        if self.tag in self.parent.twotags:
+            for myarg, mydict in _argsdicts( args, kwargs ):
+                self.render( self.tag, False, myarg, mydict )
+        elif self.tag in self.parent.onetags:
+            if len( args ) == 0:
+                for myarg, mydict in _argsdicts( args, kwargs ):
+                    self.render( self.tag, True, myarg, mydict )    # here myarg is always None, because len( args ) = 0
+            else:
+                raise ClosingError( self.tag )
+        elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
+            raise DeprecationError( self.tag )
+        else:
+            raise InvalidElementError( self.tag, self.parent.mode )
+    
+    def render( self, tag, single, between, kwargs ):
+        """Append the actual tags to content."""
+
+	out = "<%s" % tag
+	for key, value in kwargs.iteritems( ):
+            if value is not None:               # when value is None that means stuff like <... checked>
+                key = key.strip('_')            # strip this so class_ will mean class, etc.
+                if key == 'http_equiv':         # special cases, maybe change _ to - overall?
+                    key = 'http-equiv'
+                elif key == 'accept_charset':
+                    key = 'accept-charset'
+                out = "%s %s=\"%s\"" % ( out, key, escape( value ) )
+            else:
+                out = "%s %s" % ( out, key )
+	if between is not None:
+	    out = "%s>%s</%s>" % ( out, between, tag )
+	else:
+	    if single:
+		out = "%s />" % out
+	    else:
+		out = "%s>" % out
+        if self.parent is not None:
+            self.parent.content.append( out )
+        else:
+            return out
+    
+    def close( self ):
+        """Append a closing tag unless element has only opening tag."""
+
+        if self.tag in self.parent.twotags:
+            self.parent.content.append( "</%s>" % self.tag )
+        elif self.tag in self.parent.onetags:
+            raise ClosingError( self.tag )
+        elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
+            raise DeprecationError( self.tag )
+
+    def open( self, **kwargs ):
+        """Append an opening tag."""
+
+        if self.tag in self.parent.twotags or self.tag in self.parent.onetags:
+            self.render( self.tag, False, None, kwargs )
+        elif self.mode == 'strict_html' and self.tag in self.parent.deptags:
+            raise DeprecationError( self.tag )
+
+class page:
+    """This is our main class representing a document. Elements are added
+    as attributes of an instance of this class."""
+
+    def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ):
+        """Stuff that effects the whole document.
+
+        mode -- 'strict_html'   for HTML 4.01 (default)
+                'html'          alias for 'strict_html'
+                'loose_html'    to allow some deprecated elements
+                'xml'           to allow arbitrary elements
+
+        case -- 'lower'         element names will be printed in lower case (default)
+                'upper'         they will be printed in upper case
+
+        onetags --              list or tuple of valid elements with opening tags only
+        twotags --              list or tuple of valid elements with both opening and closing tags
+                                these two keyword arguments may be used to select
+                                the set of valid elements in 'xml' mode
+                                invalid elements will raise appropriate exceptions
+        
+        separator --            string to place between added elements, defaults to newline
+        
+        class_ --               a class that will be added to every element if defined"""
+        
+        valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ]
+        valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON",
+                "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET",
+                "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS",
+                "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP",
+                "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE",
+                "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR",
+                "TT", "UL", "VAR" ]
+        deprecated_onetags = [ "BASEFONT", "ISINDEX" ]
+        deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ]
+
+        self.header = [ ]
+	self.content = [ ]
+        self.footer = [ ]
+	self.case = case
+        self.separator = separator
+
+        # init( ) sets it to True so we know that </body></html> has to be printed at the end
+        self._full = False
+        self.class_= class_
+
+	if mode == 'strict_html' or mode == 'html':
+	    self.onetags = valid_onetags
+	    self.onetags += map( string.lower, self.onetags )
+	    self.twotags = valid_twotags
+	    self.twotags += map( string.lower, self.twotags )
+	    self.deptags = deprecated_onetags + deprecated_twotags
+	    self.deptags += map( string.lower, self.deptags )
+	    self.mode = 'strict_html'
+	elif mode == 'loose_html':
+	    self.onetags = valid_onetags + deprecated_onetags 
+	    self.onetags += map( string.lower, self.onetags )
+	    self.twotags = valid_twotags + deprecated_twotags
+	    self.twotags += map( string.lower, self.twotags )
+	    self.mode = mode
+	elif mode == 'xml':
+            if onetags and twotags:
+                self.onetags = onetags
+                self.twotags = twotags
+            elif ( onetags and not twotags ) or ( twotags and not onetags ):
+                raise CustomizationError( )
+            else:
+                self.onetags = russell( )
+                self.twotags = russell( )
+            self.mode = mode
+	else:
+	    raise ModeError( mode )
+
+    def __getattr__( self, attr ):
+        if attr.startswith("__") and attr.endswith("__"):
+            raise AttributeError, attr
+        return element( attr, case=self.case, parent=self )
+
+    def __str__( self ):
+        
+        if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ):
+            end = [ '</body>', '</html>' ]
+        else:
+            end = [ ]
+	
+        return self.separator.join( self.header + self.content + self.footer + end )
+
+    def __call__( self, escape=False ):
+        """Return the document as a string.
+
+        escape --   False   print normally
+                    True    replace < and > by &lt; and &gt;
+                            the default escape sequences in most browsers"""
+
+        if escape:
+            return _escape( self.__str__( ) )
+        else:
+            return self.__str__( )
+
+    def add( self, text ):
+        """This is an alias to addcontent."""
+        self.addcontent( text )
+
+    def addfooter( self, text ):
+        """Add some text to the bottom of the document"""
+        self.footer.append( text )
+
+    def addheader( self, text ):
+        """Add some text to the top of the document"""
+        self.header.append( text )
+
+    def addcontent( self, text ):
+        """Add some text to the main part of the document"""
+        self.content.append( text )
+
+
+    def init( self, lang='en', css=None, metainfo=None, title=None, header=None,
+              footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None ):
+        """This method is used for complete documents with appropriate
+        doctype, encoding, title, etc information. For an HTML/XML snippet
+        omit this method.
+
+        lang --     language, usually a two character string, will appear
+                    as <html lang='en'> in html mode (ignored in xml mode)
+        
+        css --      Cascading Style Sheet filename as a string or a list of
+                    strings for multiple css files (ignored in xml mode)
+
+        metainfo -- a dictionary in the form { 'name':'content' } to be inserted
+                    into meta element(s) as <meta name='name' content='content'>
+                    (ignored in xml mode)
+
+        bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added
+                    as attributes of the <body> element as <body key='value' ... >
+                    (ignored in xml mode)
+
+        script --   dictionary containing src:type pairs, <script type='text/type' src=src></script>
+
+        title --    the title of the document as a string to be inserted into
+                    a title element as <title>my title</title> (ignored in xml mode)
+
+        header --   some text to be inserted right after the <body> element
+                    (ignored in xml mode)
+
+        footer --   some text to be inserted right before the </body> element
+                    (ignored in xml mode)
+
+        charset --  a string defining the character set, will be inserted into a
+                    <meta http-equiv='Content-Type' content='text/html; charset=myset'>
+                    element (ignored in xml mode)
+
+        encoding -- a string defining the encoding, will be put into to first line of
+                    the document as <?xml version='1.0' encoding='myencoding' ?> in
+                    xml mode (ignored in html mode)
+
+        doctype --  the document type string, defaults to
+                    <!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>
+                    in html mode (ignored in xml mode)"""
+
+        self._full = True
+
+        if self.mode == 'strict_html' or self.mode == 'loose_html':
+            if doctype is None:
+                doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>"
+            self.header.append( doctype )
+            self.html( lang=lang )
+            self.head( )
+            if charset is not None:
+                self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset )
+            if metainfo is not None:
+                self.metainfo( metainfo )
+            if css is not None:
+                self.css( css )
+            if title is not None:
+                self.title( title )
+            if script is not None:
+                self.scripts( script )
+            self.head.close()
+            if bodyattrs is not None:
+                self.body( **bodyattrs )
+            else:
+                self.body( )
+            if header is not None:
+                self.content.append( header )
+            if footer is not None:
+                self.footer.append( footer )
+
+        elif self.mode == 'xml':
+            if doctype is None:
+                if encoding is not None:
+                    doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding
+                else:
+                    doctype = "<?xml version='1.0' ?>"
+            self.header.append( doctype )
+
+    def css( self, filelist ):
+        """This convenience function is only useful for html.
+        It adds css stylesheet(s) to the document via the <link> element."""
+      
+        if isinstance( filelist, basestring ):
+            self.link( href=filelist, rel='stylesheet', type='text/css', media='all' )
+        else:
+            for file in filelist:
+                self.link( href=file, rel='stylesheet', type='text/css', media='all' )
+
+    def metainfo( self, mydict ):
+        """This convenience function is only useful for html.
+        It adds meta information via the <meta> element, the argument is
+        a dictionary of the form { 'name':'content' }."""
+
+        if isinstance( mydict, dict ):
+            for name, content in mydict.iteritems( ):
+                self.meta( name=name, content=content )
+        else:
+            raise TypeError, "Metainfo should be called with a dictionary argument of name:content pairs."
+
+    def scripts( self, mydict ):
+        """Only useful in html, mydict is dictionary of src:type pairs will
+        be rendered as <script type='text/type' src=src></script>"""
+
+        if isinstance( mydict, dict ):
+            for src, type in mydict.iteritems( ):
+                self.script( '', src=src, type='text/%s' % type )
+        else:
+            raise TypeError, "Script should be given a dictionary of src:type pairs."
+
+
+class _oneliner:
+    """An instance of oneliner returns a string corresponding to one element.
+    This class can be used to write 'oneliners' that return a string
+    immediately so there is no need to instantiate the page class."""
+    
+    def __init__( self, case='lower' ):
+        self.case = case
+    
+    def __getattr__( self, attr ):
+        if attr.startswith("__") and attr.endswith("__"):
+            raise AttributeError, attr
+        return element( attr, case=self.case, parent=None )
+
+oneliner = _oneliner( case='lower' )
+upper_oneliner = _oneliner( case='upper' )
+
+def _argsdicts( args, mydict ):
+    """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1."""
+    
+    if len( args ) == 0:
+        args = None, 
+    elif len( args ) == 1:
+        args = _totuple( args[0] )
+    else:
+        raise Exception, "We should have never gotten here."
+
+    mykeys = mydict.keys( )
+    myvalues = map( _totuple, mydict.values( ) )
+
+    maxlength = max( map( len, [ args ] + myvalues ) )
+
+    for i in xrange( maxlength ):
+        thisdict = { }
+        for key, value in zip( mykeys, myvalues ):
+            try:
+                thisdict[ key ] = value[i]
+            except IndexError:
+                thisdict[ key ] = value[-1]
+        try:
+            thisarg = args[i]
+        except IndexError:
+            thisarg = args[-1]
+
+        yield thisarg, thisdict
+
+def _totuple( x ):
+    """Utility stuff to convert string, int, float, None or anything to a usable tuple."""
+
+    if isinstance( x, basestring ):
+        out = x,
+    elif isinstance( x, ( int, float ) ):
+        out = str( x ),
+    elif x is None:
+        out = None,
+    else:
+        out = tuple( x )
+
+    return out
+
+def escape( text, newline=False ):
+    """Escape special html characters."""
+
+    if isinstance( text, basestring ):
+        if '&' in text:
+            text = text.replace( '&', '&amp;' )
+        if '>' in text:
+            text = text.replace( '>', '&gt;' )
+        if '<' in text:
+            text = text.replace( '<', '&lt;' )
+        if '\"' in text:
+            text = text.replace( '\"', '&quot;' )
+        if '\'' in text:
+            text = text.replace( '\'', '&quot;' )
+        if newline:
+            if '\n' in text:
+                text = text.replace( '\n', '<br>' )
+
+    return text
+
+_escape = escape
+
+def unescape( text ):
+    """Inverse of escape."""
+    
+    if isinstance( text, basestring ):
+        if '&amp;' in text:
+            text = text.replace( '&amp;', '&' )
+        if '&gt;' in text:
+            text = text.replace( '&gt;', '>' )
+        if '&lt;' in text:
+            text = text.replace( '&lt;', '<' )
+        if '&quot;' in text:
+            text = text.replace( '&quot;', '\"' )
+
+    return text
+
+class dummy:
+    """A dummy class for attaching attributes."""
+    pass
+
+doctype = dummy( )
+doctype.frameset = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Frameset//EN' 'http://www.w3.org/TR/html4/frameset.dtd'>"
+doctype.strict = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN' 'http://www.w3.org/TR/html4/strict.dtd'>"
+doctype.loose = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>"
+
+class russell:
+    """A dummy class that contains anything."""
+
+    def __contains__( self, item ):
+	return True
+
+
+class MarkupError( Exception ):
+    """All our exceptions subclass this."""
+    def __str__( self ):
+	return self.message
+
+class ClosingError( MarkupError ):
+    def __init__( self, tag ):
+	self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag
+
+class OpeningError( MarkupError ):
+    def __init__( self, tag ):
+	self.message = "The element '%s' can not be opened." % tag
+
+class ArgumentError( MarkupError ):
+    def __init__( self, tag ):
+	self.message = "The element '%s' was called with more than one non-keyword argument." % tag
+
+class InvalidElementError( MarkupError ):
+    def __init__( self, tag, mode ):
+	self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode )
+
+class DeprecationError( MarkupError ):
+    def __init__( self, tag ):
+	self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag
+
+class ModeError( MarkupError ):
+    def __init__( self, mode ):
+	self.message = "Mode '%s' is invalid, possible values: strict_html, loose_html, xml." % mode
+
+class CustomizationError( MarkupError ):
+    def __init__( self ):
+        self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'."
+
+if __name__ == '__main__':
+    print __doc__
diff --git a/test_tablib.py b/test_tablib.py
index 8e2454f..bc660fe 100755
--- a/test_tablib.py
+++ b/test_tablib.py
@@ -5,6 +5,7 @@
 
 import unittest
 
+import markup
 import tablib
 
 
@@ -182,6 +183,27 @@ class TablibTestCase(unittest.TestCase):
 
 		self.assertEqual(tsv, self.founders.tsv)
 
+	def test_html_export(self):
+
+		"""HTML export"""
+
+		html = markup.page()
+		html.table.open()
+		html.thead.open()
+
+		html.tr(markup.oneliner.th(self.founders.headers))
+		html.thead.close()
+
+		for founder in self.founders:
+
+			html.tr(markup.oneliner.td(founder))
+
+		html.table.close()
+		html = str(html)
+
+		self.assertEqual(html, self.founders.html)
+
+
 	def test_unicode_append(self):
 		"""Passes in a single unicode charecter and exports."""
 
@@ -403,6 +425,7 @@ class TablibTestCase(unittest.TestCase):
 				   ("John", "Adams", 90, "John", "Adams", 90))
 
 
+
 	def test_wipe(self):
 		"""Purge a dataset."""
 

From 99896a5f28c7c451446ec68bf0215615032b4b57 Mon Sep 17 00:00:00 2001
From: Luca Beltrame <einar@heavensinferno.net>
Date: Sun, 21 Nov 2010 13:14:47 +0100
Subject: [PATCH 02/18] Fix Databook data leaks.

---
 tablib/core.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tablib/core.py b/tablib/core.py
index bd2d4ba..e732db9 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -615,11 +615,16 @@ class Databook(object):
 	"""A book of :class:`Dataset` objects.
 	"""
 
-	def __init__(self, sets=[]):
+	def __init__(self, sets=None):
+
+		if sets is None:
+			self._datasets = list()
+		else:
+			self._datasets = sets
+
 		self._datasets = sets
 		self._register_formats()
 
-
 	def __repr__(self):
 		try:
 			return '<%s databook>' % (self.title.lower())

From e3e6b656e32c64b7f4d5713ede350bb77506650b Mon Sep 17 00:00:00 2001
From: Luca Beltrame <einar@heavensinferno.net>
Date: Sun, 21 Nov 2010 13:17:36 +0100
Subject: [PATCH 03/18] Fix the stupid mistake.

---
 tablib/core.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tablib/core.py b/tablib/core.py
index e732db9..c3013ed 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -622,7 +622,6 @@ class Databook(object):
 		else:
 			self._datasets = sets
 
-		self._datasets = sets
 		self._register_formats()
 
 	def __repr__(self):

From 6a7c6851111dcbe280c6b358c6136d1f298fda17 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Sun, 21 Nov 2010 18:49:02 -0500
Subject: [PATCH 04/18] Import path fix.

---
 tablib/formats/_html.py | 2 +-
 test_tablib.py          | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
index c68eb37..60969f6 100644
--- a/tablib/formats/_html.py
+++ b/tablib/formats/_html.py
@@ -5,7 +5,7 @@
 
 import cStringIO
 
-import markup
+from tablib.packages import markup
 import tablib
 
 title = 'html'
diff --git a/test_tablib.py b/test_tablib.py
index bc660fe..2d1f6b4 100755
--- a/test_tablib.py
+++ b/test_tablib.py
@@ -5,7 +5,8 @@
 
 import unittest
 
-import markup
+from tablib.packages import markup
+
 import tablib
 
 

From 7055d18a2e02f02f3eea7a770e87113d177a7a38 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Sun, 21 Nov 2010 18:53:18 -0500
Subject: [PATCH 05/18] History update.

---
 HISTORY.rst | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index c64d7b5..43a8cda 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -1,19 +1,26 @@
 History
 -------
 
+0.9.3 (2010-11-2?)
+++++++++++++++++++
+
+* Databook duplication leak fix.
+* HTML Table output.
+
+
 0.9.2 (2010-11-17)
 ++++++++++++++++++
 
-* Tanspose method added to Datasets
-* New frozen top row in Excel output
-* Pickling support for Datasets and Rows
-* Support for row/column stacking
+* Tanspose method added to Datasets.
+* New frozen top row in Excel output.
+* Pickling support for Datasets and Rows.
+* Support for row/column stacking.
 
 
 0.9.1 (2010-11-04)
 ++++++++++++++++++
 
-* Minor reference shadowing bugfix 
+* Minor reference shadowing bugfix.
 
 
 0.9.0 (2010-11-04)

From 0784d4b32c109515d6d919caf2b07e90caf5ae49 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Sun, 21 Nov 2010 18:55:45 -0500
Subject: [PATCH 06/18] Updated todo w/ new html output feature

---
 TODO.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/TODO.rst b/TODO.rst
index 231f03e..4d48313 100644
--- a/TODO.rst
+++ b/TODO.rst
@@ -1,8 +1,8 @@
+* Add seperator support to HTML out
 * Backwards-compatible OrderedDict support
 * Write more exhausive unit-tests.
 * Write stress tests.
 * Make CSV write customizable.
-* HTML Table exports.
 * Integrate django-tablib
 * Mention django-tablib in Documention
-* Dataset title usage in documentation (#17)
\ No newline at end of file
+* Dataset title usage in documentation (#17)

From 22d337790acc82e8760e1dd261c47c93297f1d21 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Sun, 21 Nov 2010 18:58:30 -0500
Subject: [PATCH 07/18] small changes to html output

---
 tablib/formats/_html.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
index 60969f6..786d5a3 100644
--- a/tablib/formats/_html.py
+++ b/tablib/formats/_html.py
@@ -3,7 +3,7 @@
 """ Tablib - HTML export support.
 """
 
-import cStringIO
+from StringIO import StringIO
 
 from tablib.packages import markup
 import tablib
@@ -12,10 +12,9 @@ title = 'html'
 extentions = ('html', )
 
 def export_set(dataset):
-
 	"""HTML representation of a Dataset."""
 
-	stream = cStringIO.StringIO()
+	stream = StringIO()
 
 	page = markup.page()
 	page.table.open()

From e3b3659ea40151eb849e3f53555900b7ba2c7d43 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Sun, 21 Nov 2010 21:32:00 -0500
Subject: [PATCH 08/18] whitespace fix

---
 tablib/formats/_json.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py
index da31b23..7f31ee5 100644
--- a/tablib/formats/_json.py
+++ b/tablib/formats/_json.py
@@ -26,11 +26,11 @@ def export_set(dataset):
 def export_book(databook):
 	"""Returns JSON representation of Databook."""
 	return json.dumps(databook._package())
-	
+
 
 def import_set(dset, in_stream):
 	"""Returns dataset from JSON stream."""
-	
+
 	dset.wipe()
 	dset.dict = json.loads(in_stream)
 
@@ -52,4 +52,4 @@ def detect(stream):
 		json.loads(stream)
 		return True
 	except ValueError:
-		return False
\ No newline at end of file
+		return False

From 22c4d185e122da6ea1c71d17af009a6c55217c39 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Sun, 21 Nov 2010 21:33:01 -0500
Subject: [PATCH 09/18] Export HTML for Databooks.

---
 tablib/formats/_html.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
index 786d5a3..13dc055 100644
--- a/tablib/formats/_html.py
+++ b/tablib/formats/_html.py
@@ -8,9 +8,12 @@ from StringIO import StringIO
 from tablib.packages import markup
 import tablib
 
+BOOK_ENDINGS = 'h3'
+
 title = 'html'
 extentions = ('html', )
 
+
 def export_set(dataset):
 	"""HTML representation of a Dataset."""
 
@@ -35,3 +38,16 @@ def export_set(dataset):
 
 	return stream.getvalue()
 
+
+def export_book(databook):
+	"""HTML representation of a Databook."""
+
+	stream = StringIO()
+
+	for i, dset in enumerate(databook._datasets):
+		title = (dset.title if dset.title else 'Set %s' % (i))
+		stream.write('<%s>%s</%s>\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS))
+		stream.write(dset.html)
+		stream.write('\n')
+
+	return stream.getvalue()

From d25655588b13657c253f9c216d1cc2360a5d5e57 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 13 Dec 2010 17:08:11 -0500
Subject: [PATCH 10/18] TODO update.

---
 TODO.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/TODO.rst b/TODO.rst
index 4d48313..9f8c99f 100644
--- a/TODO.rst
+++ b/TODO.rst
@@ -1,4 +1,9 @@
 * Add seperator support to HTML out
+* Hooks System
+  - pre/post-append
+  - pre/post-import
+  - pre/post-export
+* Big Data
 * Backwards-compatible OrderedDict support
 * Write more exhausive unit-tests.
 * Write stress tests.

From 34415b89b858075323b7a2d034c06b57dd26bdc7 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 10 Jan 2011 19:28:12 -0500
Subject: [PATCH 11/18] New Year!

---
 LICENSE              | 2 +-
 docs/_themes/LICENSE | 2 +-
 docs/conf.py         | 2 +-
 fabfile.py           | 2 +-
 tablib/core.py       | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/LICENSE b/LICENSE
index 717ff16..ea8c217 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2010 Kenneth Reitz.
+Copyright (c) 2011 Kenneth Reitz.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/docs/_themes/LICENSE b/docs/_themes/LICENSE
index 81f4d30..b160a8e 100644
--- a/docs/_themes/LICENSE
+++ b/docs/_themes/LICENSE
@@ -1,6 +1,6 @@
 Modifications: 
 
-Copyright (c) 2010 Kenneth Reitz.
+Copyright (c) 2011 Kenneth Reitz.
 
 
 Original Project: 
diff --git a/docs/conf.py b/docs/conf.py
index 325002c..2a642c9 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -42,7 +42,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'Tablib'
-copyright = u'2010, Kenneth Reitz. Styles (modified) &copy; Armin Ronacher'
+copyright = u'2011, Kenneth Reitz. Styles (modified) &copy; Armin Ronacher'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
diff --git a/fabfile.py b/fabfile.py
index 391bf92..6e72092 100644
--- a/fabfile.py
+++ b/fabfile.py
@@ -1,7 +1,7 @@
 import os
 from fabric.api import *
 
-
+os.f
 def scrub():
 	""" Death to the bytecode! """
 	local('rm -fr dist build')
diff --git a/tablib/core.py b/tablib/core.py
index da49788..c7e9dd5 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -5,7 +5,7 @@
 
     This module implements the central tablib objects.
 
-    :copyright: (c) 2010 by Kenneth Reitz.
+    :copyright: (c) 2011 by Kenneth Reitz.
     :license: MIT, see LICENSE for more details.
 """
 
@@ -19,7 +19,7 @@ __version__ = '0.9.2'
 __build__ = 0x000902
 __author__ = 'Kenneth Reitz'
 __license__ = 'MIT'
-__copyright__ = 'Copyright 2010 Kenneth Reitz'
+__copyright__ = 'Copyright 2011 Kenneth Reitz'
 
 
 class Row(object):

From f81dc41a57e19926868efa036bdb9e862cd3bbbb Mon Sep 17 00:00:00 2001
From: Luca Beltrame <einar@heavensinferno.net>
Date: Tue, 11 Jan 2011 20:53:59 +0100
Subject: [PATCH 12/18] Support for sorting. Unit-tested.

---
 tablib/core.py | 20 ++++++++++++++++++++
 test_tablib.py | 15 +++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/tablib/core.py b/tablib/core.py
index c7e9dd5..5727a69 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -10,6 +10,7 @@
 """
 
 from copy import copy
+from operator import itemgetter
 
 from tablib import formats
 
@@ -528,6 +529,25 @@ class Dataset(object):
 
 		return _dset
 
+	def sort(self, col, reverse=False):
+
+		"""Sort a :class:`Dataset` by a specific column. The order can be
+		reversed by setting ``reverse`` to ``True``. Requires headers to be
+		set. Returns a new :class:`Dataset` instance where columns have been
+		sorted."""
+
+		if not self.headers:
+			raise HeadersNeeded
+
+		_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
+		_dset = Dataset(headers=self.headers)
+
+		for item in _sorted:
+			row = [item[key] for key in self.headers]
+			_dset.append(row=row)
+
+		return _dset
+
 	def transpose(self):
 		"""Transpose a :class:`Dataset`, turning rows into columns and vice
 		versa, returning a new ``Dataset`` instance. The first row of the
diff --git a/test_tablib.py b/test_tablib.py
index 2d1f6b4..15630f2 100755
--- a/test_tablib.py
+++ b/test_tablib.py
@@ -425,7 +425,22 @@ class TablibTestCase(unittest.TestCase):
 		self.assertEqual(column_stacked[0],
 				   ("John", "Adams", 90, "John", "Adams", 90))
 
+	def test_sorting(self):
 
+		"""Sort columns."""
+
+		sorted_data = self.founders.sort(col="first_name")
+
+		first_row = sorted_data[0]
+		second_row = sorted_data[2]
+		third_row = sorted_data[1]
+		expected_first = self.founders[1]
+		expected_second = self.founders[2]
+		expected_third = self.founders[0]
+
+		self.assertEqual(first_row, expected_first)
+		self.assertEqual(second_row, expected_second)
+		self.assertEqual(third_row, expected_third)
 
 	def test_wipe(self):
 		"""Purge a dataset."""

From 0797ec67d4a4c6c145d86d0b7dca9d3b03c6d8e6 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 31 Jan 2011 00:58:16 -0500
Subject: [PATCH 13/18] Prepping for new release (0.9.3)

---
 HISTORY.rst    | 3 ++-
 README.rst     | 1 +
 tablib/core.py | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 43a8cda..95b9328 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -1,11 +1,12 @@
 History
 -------
 
-0.9.3 (2010-11-2?)
+0.9.3 (2011-01-31)
 ++++++++++++++++++
 
 * Databook duplication leak fix.
 * HTML Table output.
+* Added column sorting.
 
 
 0.9.2 (2010-11-17)
diff --git a/README.rst b/README.rst
index 00b6345..f974248 100644
--- a/README.rst
+++ b/README.rst
@@ -18,6 +18,7 @@ Output formats supported:
 - Excel (Sets + Books)
 - JSON (Sets + Books)
 - YAML (Sets + Books)
+- HTML (Sets)
 - TSV (Sets)
 - CSV (Sets)
 
diff --git a/tablib/core.py b/tablib/core.py
index c7e9dd5..6eb57db 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -15,8 +15,8 @@ from tablib import formats
 
 
 __title__ = 'tablib'
-__version__ = '0.9.2'
-__build__ = 0x000902
+__version__ = '0.9.3'
+__build__ = 0x000903
 __author__ = 'Kenneth Reitz'
 __license__ = 'MIT'
 __copyright__ = 'Copyright 2011 Kenneth Reitz'

From 89b431213bc74f219da48489fbaabdd02bcfe56a Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 31 Jan 2011 01:28:10 -0500
Subject: [PATCH 14/18] Sorting update for headerless datasets.

---
 tablib/core.py | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/tablib/core.py b/tablib/core.py
index 19eec3e..0de2b28 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -535,16 +535,32 @@ class Dataset(object):
 		reversed by setting ``reverse`` to ``True``. Requires headers to be
 		set. Returns a new :class:`Dataset` instance where columns have been
 		sorted."""
+		if isinstance(col, basestring):
 
-		if not self.headers:
-			raise HeadersNeeded
+			if not self.headers:
+				raise HeadersNeeded
 
-		_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
-		_dset = Dataset(headers=self.headers)
+			_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
+			_dset = Dataset(headers=self.headers)
+
+			for item in _sorted:
+				row = [item[key] for key in self.headers]
+				_dset.append(row=row)
+
+		else:
+			if self.headers:
+				col = self.headers[col]
+
+			_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
+			_dset = Dataset(headers=self.headers)
+
+			for item in _sorted:
+				if self.headers:
+					row = [item[key] for key in self.headers]
+				else:
+					row = item
+				_dset.append(row=row)
 
-		for item in _sorted:
-			row = [item[key] for key in self.headers]
-			_dset.append(row=row)
 
 		return _dset
 

From a0822bc9b091ca6b613aac71afc39acf0665a1e1 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 31 Jan 2011 01:29:41 -0500
Subject: [PATCH 15/18] sorting update.

---
 tablib/core.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tablib/core.py b/tablib/core.py
index 0de2b28..9d36970 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -520,6 +520,7 @@ class Dataset(object):
 			else:
 				self._data = [Row([row]) for row in col]
 
+
 	def filter(self, tag):
 		"""Returns a new instance of the :class:`Dataset`, excluding any rows
 		that do not contain the given :ref:`tags <tags>`.
@@ -529,12 +530,14 @@ class Dataset(object):
 
 		return _dset
 
-	def sort(self, col, reverse=False):
 
-		"""Sort a :class:`Dataset` by a specific column. The order can be
-		reversed by setting ``reverse`` to ``True``. Requires headers to be
-		set. Returns a new :class:`Dataset` instance where columns have been
+	def sort(self, col, reverse=False):
+		"""Sort a :class:`Dataset` by a specific column, given string (for
+		header) or integer (for column index). The order can be reversed by
+		setting ``reverse`` to ``True``. 
+		Returns a new :class:`Dataset` instance where columns have been
 		sorted."""
+		
 		if isinstance(col, basestring):
 
 			if not self.headers:

From e8b44b57779b6374fbee3e5dc2b7dc1091e1cf5c Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 31 Jan 2011 01:33:00 -0500
Subject: [PATCH 16/18] Version bump.

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index e779457..c851751 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ required = []
 
 setup(
 	name='tablib',
-	version='0.9.2',
+	version='0.9.3',
 	description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
 	long_description=open('README.rst').read() + '\n\n' +
 	                 open('HISTORY.rst').read(),

From 5379c5683d9cb7c07f7619e544c0c2fc4efa80b1 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 31 Jan 2011 01:33:12 -0500
Subject: [PATCH 17/18] Markup license notice. PD? Really?

---
 NOTICE | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/NOTICE b/NOTICE
index 88d5d2d..c8428e8 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,6 +1,12 @@
 Tablib includes some vendorized python libraries: ordereddict, pyyaml,
 simplejson, and xlwt.
 
+Markup License
+==============
+
+Markup is in the public domain.
+
+
 
 OrderedDict License
 ===================

From 140736ff332ff164f18821ec150488b1a2092898 Mon Sep 17 00:00:00 2001
From: Kenneth Reitz <me@kennethreitz.com>
Date: Mon, 31 Jan 2011 01:34:40 -0500
Subject: [PATCH 18/18] fabfile typo.

---
 fabfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fabfile.py b/fabfile.py
index 6e72092..391bf92 100644
--- a/fabfile.py
+++ b/fabfile.py
@@ -1,7 +1,7 @@
 import os
 from fabric.api import *
 
-os.f
+
 def scrub():
 	""" Death to the bytecode! """
 	local('rm -fr dist build')