diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..018f9b7 --- /dev/null +++ b/HACKING @@ -0,0 +1,14 @@ +Where possible, please follow PEP8 with regard to coding style. Sometimes the line +length restriction is too hard to follow, so don't bend over backwards there. + +Triple-quotes should always be """, single quotes are ' unless using " +would result in less escaping within the string. + +All modules, functions, and methods should be well documented reStructuredText for +Sphinx AutoDoc. + +All functionality should be available in pure Python. Optional C (via Cython) +implementations may be written for performance reasons, but should never +replace the Python implementation. + +Lastly, don't take yourself too seriously :) \ No newline at end of file diff --git a/docs/development.rst b/docs/development.rst index 77af330..6255d5e 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -87,7 +87,7 @@ Adding New Formats Tablib welcomes new format additions! Format suggestions include: -* Tab Seperated Values +* Tab Separated Values * MySQL Dump * HTML Table @@ -178,7 +178,7 @@ Every commit made to the **develop** branch is automatically tested and inspecte Anyone may view the build status and history at any time. - http://git.kennethreitz.com/ci/ + http://ci.kennethreitz.com/ If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server. diff --git a/docs/index.rst b/docs/index.rst index 4726d7b..77ce7cf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,8 +3,10 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Tablib: Pythonic Tabular Data -============================= +Tablib: Pythonic Tabular Datasets +================================= + +Release |version|. .. Contents: .. diff --git a/docs/install.rst b/docs/install.rst index 9b9a519..b6c3f31 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -55,7 +55,7 @@ However, if performance is important to you (and it should be), you can install $ pip install PyYAML -If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. :: +If you're using Python 2.5, you should also install the **simplejson** module (pip will do this for you). If you're using Python 2.6+, the built-in **json** module is already optimized and in use. :: $ pip install simplejson diff --git a/docs/intro.rst b/docs/intro.rst index c2d75b2..bc4256a 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -36,6 +36,31 @@ Tablib is released under terms of `The MIT License`_. .. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php +.. _license: + +Tablib License +-------------- + +Copyright (c) 2011 Kenneth Reitz. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + .. _pythonsupport: @@ -44,8 +69,10 @@ Pythons Supported At this time, the following Python platforms are officially supported: -* Python 2.6 -* Python 2.7 +* cPython 2.5 +* cPython 2.6 +* cPython 2.7 +* PyPy-c 1.4 Support for other Pythons will be rolled out soon. diff --git a/tablib/core.py b/tablib/core.py index dfbcb6a..8ef6312 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -13,6 +13,7 @@ from copy import copy from operator import itemgetter from tablib import formats +import collections try: from collections import OrderedDict @@ -26,6 +27,7 @@ __build__ = 0x000904 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2011 Kenneth Reitz' +__docformat__ = 'restructuredtext' class Row(object): @@ -62,7 +64,7 @@ class Row(object): return {slot: [getattr(self, slot) for slot in self.__slots__]} def __setstate__(self, state): - for (k, v) in state.items(): setattr(self, k, v) + for (k, v) in list(state.items()): setattr(self, k, v) def append(self, value): self._row.append(value) @@ -88,7 +90,7 @@ class Row(object): if tag == None: return False - elif isinstance(tag, basestring): + elif isinstance(tag, str): return (tag in self.tags) else: return bool(len(set(tag) & set(self.tags))) @@ -136,6 +138,9 @@ class Dataset(object): # ('title', index) tuples self._separators = [] + + # (column, callback) tuples + self._formatters = [] try: self.headers = kwargs['headers'] @@ -155,7 +160,7 @@ class Dataset(object): def __getitem__(self, key): - if isinstance(key, basestring): + if isinstance(key, str): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data return [row[pos] for row in self._data] @@ -175,7 +180,7 @@ class Dataset(object): def __delitem__(self, key): - if isinstance(key, basestring): + if isinstance(key, str): if key in self.headers: @@ -236,13 +241,29 @@ class Dataset(object): def _package(self, dicts=True): """Packages Dataset into lists of dictionaries for transmission.""" + _data = list(self._data) + + # Execute formatters + if self._formatters: + for row_i, row in enumerate(_data): + for col, callback in self._formatters: + try: + if col is None: + for j, c in enumerate(row): + _data[row_i][j] = callback(c) + else: + _data[row_i][col] = callback(row[col]) + except IndexError: + raise InvalidDatasetIndex + + if self.headers: if dicts: - data = [OrderedDict(zip(self.headers, data_row)) for data_row in self ._data] + data = [OrderedDict(list(zip(self.headers, data_row))) for data_row in _data] else: - data = [list(self.headers)] + list(self._data) + data = [list(self.headers)] + list(_data) else: - data = [list(row) for row in self._data] + data = [list(row) for row in _data] return data @@ -257,8 +278,8 @@ class Dataset(object): else: header = [] - if len(col) == 1 and callable(col[0]): - col = map(col[0], self._data) + if len(col) == 1 and isinstance(col[0], collections.Callable): + col = list(map(col[0], self._data)) col = tuple(header + col) return col @@ -347,9 +368,9 @@ class Dataset(object): # if list of objects elif isinstance(pickle[0], dict): self.wipe() - self.headers = pickle[0].keys() + self.headers = list(pickle[0].keys()) for row in pickle: - self.append(Row(row.values())) + self.append(Row(list(row.values()))) else: raise UnsupportedFormat @@ -385,6 +406,7 @@ class Dataset(object): """ pass + @property def tsv(): """A TSV representation of the :class:`Dataset` object. The top row will contain @@ -469,6 +491,29 @@ class Dataset(object): self.insert_separator(index, text) + def add_formatter(self, col, handler): + """Adds a :ref:`formatter` to the :class:`Dataset`. + + .. versionadded:: 0.9.5 + :param col: column to. Accepts index int or header str. + :param handler: reference to callback function to execute + against each cell value. + """ + + if isinstance(col, str): + if col in self.headers: + col = self.headers.index(col) # get 'key' index from each data + else: + raise KeyError + + if not col > self.width: + self._formatters.append((col, handler)) + else: + raise InvalidDatasetIndex + + return True + + def insert(self, index, row=None, col=None, header=None, tags=list()): """Inserts a row or column to the :class:`Dataset` at the given index. @@ -504,8 +549,8 @@ class Dataset(object): col = list(col) # Callable Columns... - if len(col) == 1 and callable(col[0]): - col = map(col[0], self._data) + if len(col) == 1 and isinstance(col[0], collections.Callable): + col = list(map(col[0], self._data)) col = self._clean_col(col) self._validate(col=col) @@ -543,7 +588,7 @@ class Dataset(object): Returns a new :class:`Dataset` instance where columns have been sorted.""" - if isinstance(col, basestring): + if isinstance(col, str): if not self.headers: raise HeadersNeeded @@ -658,12 +703,14 @@ class Dataset(object): return _dset + def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None + class Databook(object): """A book of :class:`Dataset` objects. """ @@ -748,7 +795,7 @@ def import_set(stream): format.import_set(data, stream) return data - except AttributeError, e: + except AttributeError as e: return None @@ -758,6 +805,9 @@ class InvalidDatasetType(Exception): class InvalidDimensions(Exception): "Invalid size" + +class InvalidDatasetIndex(Exception): + "Outside of Dataset size" class HeadersNeeded(Exception): "Header parameter must be given when appending a column in this Dataset." diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 147df31..305026d 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -3,11 +3,11 @@ """ Tablib - formats """ -import _csv as csv -import _json as json -import _xls as xls -import _yaml as yaml -import _tsv as tsv -import _html as html +from . import _csv as csv +from . import _json as json +from . import _xls as xls +from . import _yaml as yaml +from . import _tsv as tsv +from . import _html as html available = (json, xls, yaml, csv, tsv, html) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 2c74a1c..46a26d5 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -3,7 +3,13 @@ """ Tablib - CSV Support. """ -import cStringIO +import sys +if sys.version_info.major > 2: + from io import StringIO +else: + from cStringIO import StringIO + + import csv import os @@ -17,7 +23,7 @@ extentions = ('csv',) def export_set(dataset): """Returns CSV representation of Dataset.""" - stream = cStringIO.StringIO() + stream = StringIO() _csv = csv.writer(stream) for row in dataset._package(dicts=False): @@ -31,7 +37,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() - rows = csv.reader(in_stream.split()) + rows = csv.reader(in_stream.splitlines()) for i, row in enumerate(rows): if (i == 0) and (headers): diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 13dc055..64be2af 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -3,9 +3,16 @@ """ Tablib - HTML export support. """ -from StringIO import StringIO +import sys + + +if sys.version_info.major > 2: + from io import StringIO + from tablib.packages import markup3 as markup +else: + from cStringIO import StringIO + from tablib.packages import markup -from tablib.packages import markup import tablib BOOK_ENDINGS = 'h3' diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 76a5f07..ab10d32 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,7 +3,12 @@ """ Tablib - TSV (Tab Separated Values) Support. """ -import cStringIO +import sys +if sys.version_info.major > 2: + from io import StringIO +else: + from cStringIO import StringIO + import csv import os @@ -17,7 +22,7 @@ extentions = ('tsv',) def export_set(dataset): """Returns a TSV representation of Dataset.""" - stream = cStringIO.StringIO() + stream = StringIO() _tsv = csv.writer(stream, delimiter='\t') for row in dataset._package(dicts=False): diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 717a6d5..6f59da8 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -3,12 +3,19 @@ """ Tablib - XLS Support. """ -import cStringIO +import io +import sys try: - import xlwt + if sys.version_info.major > 2: + import xlwt3 as xlwt + else: + import xlwt except ImportError: - import tablib.packages.xlwt as xlwt + if sys.version_info.major > 2: + import tablib.packages.xlwt3 as xlwt + else: + import tablib.packages.xlwt as xlwt title = 'xls' @@ -23,11 +30,11 @@ def export_set(dataset): """Returns XLS representation of Dataset.""" wb = xlwt.Workbook(encoding='utf8') - ws = wb.add_sheet(dataset.title if dataset.title else 'Tabbed Dataset') + ws = wb.add_sheet(dataset.title if dataset.title else 'Tablib Dataset') dset_sheet(dataset, ws) - stream = cStringIO.StringIO() + stream = io.BytesIO() wb.save(stream) return stream.getvalue() @@ -43,7 +50,7 @@ def export_book(databook): dset_sheet(dset, ws) - stream = cStringIO.StringIO() + stream = io.BytesIO() wb.save(stream) return stream.getvalue() diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 3f2f8b7..0124c94 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -3,10 +3,16 @@ """ Tablib - YAML Support. """ +import sys + try: import yaml except ImportError: - import tablib.packages.yaml as yaml + if sys.version_info.major > 2: + import tablib.packages.yaml3 as yaml + else: + import tablib.packages.yaml as yaml + import tablib diff --git a/tablib/helpers.py b/tablib/helpers.py deleted file mode 100644 index 718d159..0000000 --- a/tablib/helpers.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- - -""" Tablib - General Helpers. -""" - -import sys - - -class Struct(object): - """Your attributes are belong to us.""" - - def __init__(self, **entries): - self.__dict__.update(entries) - - def __getitem__(self, key): - return getattr(self, key, None) - - def dictionary(self): - """Returns dictionary representation of object.""" - return self.__dict__ - - def items(self): - """Returns items within object.""" - return self.__dict__.items() - - def keys(self): - """Returns keys within object.""" - return self.__dict__.keys() - - - -def piped(): - """Returns piped input via stdin, else False.""" - with sys.stdin as stdin: - # TTY is only way to detect if stdin contains data - return stdin.read() if not stdin.isatty() else None - diff --git a/tablib/packages/anyjson.py b/tablib/packages/anyjson.py index 6603751..a7d1a5f 100644 --- a/tablib/packages/anyjson.py +++ b/tablib/packages/anyjson.py @@ -53,7 +53,7 @@ class _JsonImplementation(object): """Incapsulates a JSON implementation""" def __init__(self, modspec): - modinfo = dict(zip(_fields, modspec)) + modinfo = dict(list(zip(_fields, modspec))) # No try block. We want importerror to end up at caller module = self._attempt_load(modinfo["modname"]) @@ -64,9 +64,9 @@ class _JsonImplementation(object): self._encode_error = modinfo["encerror"] self._decode_error = modinfo["decerror"] - if isinstance(modinfo["encerror"], basestring): + if isinstance(modinfo["encerror"], str): self._encode_error = getattr(module, modinfo["encerror"]) - if isinstance(modinfo["decerror"], basestring): + if isinstance(modinfo["decerror"], str): self._decode_error = getattr(module, modinfo["decerror"]) self.name = modinfo["modname"] @@ -82,7 +82,7 @@ class _JsonImplementation(object): TypeError if the object could not be serialized.""" try: return self._encode(data) - except self._encode_error, exc: + except self._encode_error as exc: raise TypeError(*exc.args) def deserialize(self, s): @@ -90,7 +90,7 @@ class _JsonImplementation(object): ValueError if the string vould not be parsed.""" try: return self._decode(s) - except self._decode_error, exc: + except self._decode_error as exc: raise ValueError(*exc.args) diff --git a/tablib/packages/markup3.py b/tablib/packages/markup3.py new file mode 100644 index 0000000..1973c00 --- /dev/null +++ b/tablib/packages/markup3.py @@ -0,0 +1,484 @@ +# This code is in the public domain, it comes +# with absolutely no warranty and you can do +# absolutely whatever you want with it. + +__date__ = '17 May 2007' +__version__ = '1.7' +__doc__= """ +This is markup.py - a Python module that attempts to +make it easier to generate HTML/XML from a Python program +in an intuitive, lightweight, customizable and pythonic way. + +The code is in the public domain. + +Version: %s as of %s. + +Documentation and further info is at http://markup.sourceforge.net/ + +Please send bug reports, feature requests, enhancement +ideas or questions to nogradi at gmail dot com. + +Installation: drop markup.py somewhere into your Python path. +""" % ( __version__, __date__ ) + +import string + +class element: + """This class handles the addition of a new element.""" + + def __init__( self, tag, case='lower', parent=None ): + self.parent = parent + + if case == 'lower': + self.tag = tag.lower( ) + else: + self.tag = tag.upper( ) + + def __call__( self, *args, **kwargs ): + if len( args ) > 1: + raise ArgumentError( self.tag ) + + # if class_ was defined in parent it should be added to every element + if self.parent is not None and self.parent.class_ is not None: + if 'class_' not in kwargs: + kwargs['class_'] = self.parent.class_ + + if self.parent is None and len( args ) == 1: + x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] + return '\n'.join( x ) + elif self.parent is None and len( args ) == 0: + x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] + return '\n'.join( x ) + + if self.tag in self.parent.twotags: + for myarg, mydict in _argsdicts( args, kwargs ): + self.render( self.tag, False, myarg, mydict ) + elif self.tag in self.parent.onetags: + if len( args ) == 0: + for myarg, mydict in _argsdicts( args, kwargs ): + self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0 + else: + raise ClosingError( self.tag ) + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + else: + raise InvalidElementError( self.tag, self.parent.mode ) + + def render( self, tag, single, between, kwargs ): + """Append the actual tags to content.""" + + out = "<%s" % tag + for key, value in kwargs.items( ): + if value is not None: # when value is None that means stuff like <... checked> + key = key.strip('_') # strip this so class_ will mean class, etc. + if key == 'http_equiv': # special cases, maybe change _ to - overall? + key = 'http-equiv' + elif key == 'accept_charset': + key = 'accept-charset' + out = "%s %s=\"%s\"" % ( out, key, escape( value ) ) + else: + out = "%s %s" % ( out, key ) + if between is not None: + out = "%s>%s" % ( out, between, tag ) + else: + if single: + out = "%s />" % out + else: + out = "%s>" % out + if self.parent is not None: + self.parent.content.append( out ) + else: + return out + + def close( self ): + """Append a closing tag unless element has only opening tag.""" + + if self.tag in self.parent.twotags: + self.parent.content.append( "" % self.tag ) + elif self.tag in self.parent.onetags: + raise ClosingError( self.tag ) + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + + def open( self, **kwargs ): + """Append an opening tag.""" + + if self.tag in self.parent.twotags or self.tag in self.parent.onetags: + self.render( self.tag, False, None, kwargs ) + elif self.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + +class page: + """This is our main class representing a document. Elements are added + as attributes of an instance of this class.""" + + def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ): + """Stuff that effects the whole document. + + mode -- 'strict_html' for HTML 4.01 (default) + 'html' alias for 'strict_html' + 'loose_html' to allow some deprecated elements + 'xml' to allow arbitrary elements + + case -- 'lower' element names will be printed in lower case (default) + 'upper' they will be printed in upper case + + onetags -- list or tuple of valid elements with opening tags only + twotags -- list or tuple of valid elements with both opening and closing tags + these two keyword arguments may be used to select + the set of valid elements in 'xml' mode + invalid elements will raise appropriate exceptions + + separator -- string to place between added elements, defaults to newline + + class_ -- a class that will be added to every element if defined""" + + valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ] + valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON", + "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET", + "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS", + "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP", + "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE", + "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR", + "TT", "UL", "VAR" ] + deprecated_onetags = [ "BASEFONT", "ISINDEX" ] + deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ] + + self.header = [ ] + self.content = [ ] + self.footer = [ ] + self.case = case + self.separator = separator + + # init( ) sets it to True so we know that has to be printed at the end + self._full = False + self.class_= class_ + + if mode == 'strict_html' or mode == 'html': + self.onetags = valid_onetags + self.onetags += list(map( str.lower, self.onetags )) + self.twotags = valid_twotags + self.twotags += list(map( str.lower, self.twotags )) + self.deptags = deprecated_onetags + deprecated_twotags + self.deptags += list(map( str.lower, self.deptags )) + self.mode = 'strict_html' + elif mode == 'loose_html': + self.onetags = valid_onetags + deprecated_onetags + self.onetags += list(map( str.lower, self.onetags )) + self.twotags = valid_twotags + deprecated_twotags + self.twotags += list(map( str.lower, self.twotags )) + self.mode = mode + elif mode == 'xml': + if onetags and twotags: + self.onetags = onetags + self.twotags = twotags + elif ( onetags and not twotags ) or ( twotags and not onetags ): + raise CustomizationError( ) + else: + self.onetags = russell( ) + self.twotags = russell( ) + self.mode = mode + else: + raise ModeError( mode ) + + def __getattr__( self, attr ): + if attr.startswith("__") and attr.endswith("__"): + raise AttributeError(attr) + return element( attr, case=self.case, parent=self ) + + def __str__( self ): + + if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ): + end = [ '', '' ] + else: + end = [ ] + + return self.separator.join( self.header + self.content + self.footer + end ) + + def __call__( self, escape=False ): + """Return the document as a string. + + escape -- False print normally + True replace < and > by < and > + the default escape sequences in most browsers""" + + if escape: + return _escape( self.__str__( ) ) + else: + return self.__str__( ) + + def add( self, text ): + """This is an alias to addcontent.""" + self.addcontent( text ) + + def addfooter( self, text ): + """Add some text to the bottom of the document""" + self.footer.append( text ) + + def addheader( self, text ): + """Add some text to the top of the document""" + self.header.append( text ) + + def addcontent( self, text ): + """Add some text to the main part of the document""" + self.content.append( text ) + + + def init( self, lang='en', css=None, metainfo=None, title=None, header=None, + footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None ): + """This method is used for complete documents with appropriate + doctype, encoding, title, etc information. For an HTML/XML snippet + omit this method. + + lang -- language, usually a two character string, will appear + as in html mode (ignored in xml mode) + + css -- Cascading Style Sheet filename as a string or a list of + strings for multiple css files (ignored in xml mode) + + metainfo -- a dictionary in the form { 'name':'content' } to be inserted + into meta element(s) as + (ignored in xml mode) + + bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added + as attributes of the element as + (ignored in xml mode) + + script -- dictionary containing src:type pairs, + + title -- the title of the document as a string to be inserted into + a title element as my title (ignored in xml mode) + + header -- some text to be inserted right after the element + (ignored in xml mode) + + footer -- some text to be inserted right before the element + (ignored in xml mode) + + charset -- a string defining the character set, will be inserted into a + + element (ignored in xml mode) + + encoding -- a string defining the encoding, will be put into to first line of + the document as in + xml mode (ignored in html mode) + + doctype -- the document type string, defaults to + + in html mode (ignored in xml mode)""" + + self._full = True + + if self.mode == 'strict_html' or self.mode == 'loose_html': + if doctype is None: + doctype = "" + self.header.append( doctype ) + self.html( lang=lang ) + self.head( ) + if charset is not None: + self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset ) + if metainfo is not None: + self.metainfo( metainfo ) + if css is not None: + self.css( css ) + if title is not None: + self.title( title ) + if script is not None: + self.scripts( script ) + self.head.close() + if bodyattrs is not None: + self.body( **bodyattrs ) + else: + self.body( ) + if header is not None: + self.content.append( header ) + if footer is not None: + self.footer.append( footer ) + + elif self.mode == 'xml': + if doctype is None: + if encoding is not None: + doctype = "" % encoding + else: + doctype = "" + self.header.append( doctype ) + + def css( self, filelist ): + """This convenience function is only useful for html. + It adds css stylesheet(s) to the document via the element.""" + + if isinstance( filelist, str ): + self.link( href=filelist, rel='stylesheet', type='text/css', media='all' ) + else: + for file in filelist: + self.link( href=file, rel='stylesheet', type='text/css', media='all' ) + + def metainfo( self, mydict ): + """This convenience function is only useful for html. + It adds meta information via the element, the argument is + a dictionary of the form { 'name':'content' }.""" + + if isinstance( mydict, dict ): + for name, content in mydict.items( ): + self.meta( name=name, content=content ) + else: + raise TypeError("Metainfo should be called with a dictionary argument of name:content pairs.") + + def scripts( self, mydict ): + """Only useful in html, mydict is dictionary of src:type pairs will + be rendered as """ + + if isinstance( mydict, dict ): + for src, type in mydict.items( ): + self.script( '', src=src, type='text/%s' % type ) + else: + raise TypeError("Script should be given a dictionary of src:type pairs.") + + +class _oneliner: + """An instance of oneliner returns a string corresponding to one element. + This class can be used to write 'oneliners' that return a string + immediately so there is no need to instantiate the page class.""" + + def __init__( self, case='lower' ): + self.case = case + + def __getattr__( self, attr ): + if attr.startswith("__") and attr.endswith("__"): + raise AttributeError(attr) + return element( attr, case=self.case, parent=None ) + +oneliner = _oneliner( case='lower' ) +upper_oneliner = _oneliner( case='upper' ) + +def _argsdicts( args, mydict ): + """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1.""" + + if len( args ) == 0: + args = None, + elif len( args ) == 1: + args = _totuple( args[0] ) + else: + raise Exception("We should have never gotten here.") + + mykeys = list(mydict.keys( )) + myvalues = list(map( _totuple, list(mydict.values( )) )) + + maxlength = max( list(map( len, [ args ] + myvalues )) ) + + for i in range( maxlength ): + thisdict = { } + for key, value in zip( mykeys, myvalues ): + try: + thisdict[ key ] = value[i] + except IndexError: + thisdict[ key ] = value[-1] + try: + thisarg = args[i] + except IndexError: + thisarg = args[-1] + + yield thisarg, thisdict + +def _totuple( x ): + """Utility stuff to convert string, int, float, None or anything to a usable tuple.""" + + if isinstance( x, str ): + out = x, + elif isinstance( x, ( int, float ) ): + out = str( x ), + elif x is None: + out = None, + else: + out = tuple( x ) + + return out + +def escape( text, newline=False ): + """Escape special html characters.""" + + if isinstance( text, str ): + if '&' in text: + text = text.replace( '&', '&' ) + if '>' in text: + text = text.replace( '>', '>' ) + if '<' in text: + text = text.replace( '<', '<' ) + if '\"' in text: + text = text.replace( '\"', '"' ) + if '\'' in text: + text = text.replace( '\'', '"' ) + if newline: + if '\n' in text: + text = text.replace( '\n', '
' ) + + return text + +_escape = escape + +def unescape( text ): + """Inverse of escape.""" + + if isinstance( text, str ): + if '&' in text: + text = text.replace( '&', '&' ) + if '>' in text: + text = text.replace( '>', '>' ) + if '<' in text: + text = text.replace( '<', '<' ) + if '"' in text: + text = text.replace( '"', '\"' ) + + return text + +class dummy: + """A dummy class for attaching attributes.""" + pass + +doctype = dummy( ) +doctype.frameset = "" +doctype.strict = "" +doctype.loose = "" + +class russell: + """A dummy class that contains anything.""" + + def __contains__( self, item ): + return True + + +class MarkupError( Exception ): + """All our exceptions subclass this.""" + def __str__( self ): + return self.message + +class ClosingError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag + +class OpeningError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' can not be opened." % tag + +class ArgumentError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' was called with more than one non-keyword argument." % tag + +class InvalidElementError( MarkupError ): + def __init__( self, tag, mode ): + self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode ) + +class DeprecationError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag + +class ModeError( MarkupError ): + def __init__( self, mode ): + self.message = "Mode '%s' is invalid, possible values: strict_html, loose_html, xml." % mode + +class CustomizationError( MarkupError ): + def __init__( self ): + self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'." + +if __name__ == '__main__': + print(__doc__) diff --git a/tablib/packages/ordereddict.py b/tablib/packages/ordereddict.py index 5b0303f..a5b896d 100644 --- a/tablib/packages/ordereddict.py +++ b/tablib/packages/ordereddict.py @@ -1,127 +1,127 @@ -# Copyright (c) 2009 Raymond Hettinger -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -from UserDict import DictMixin - -class OrderedDict(dict, DictMixin): - - def __init__(self, *args, **kwds): - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__end - except AttributeError: - self.clear() - self.update(*args, **kwds) - - def clear(self): - self.__end = end = [] - end += [None, end, end] # sentinel node for doubly linked list - self.__map = {} # key --> [key, prev, next] - dict.clear(self) - - def __setitem__(self, key, value): - if key not in self: - end = self.__end - curr = end[1] - curr[2] = end[1] = self.__map[key] = [key, curr, end] - dict.__setitem__(self, key, value) - - def __delitem__(self, key): - dict.__delitem__(self, key) - key, prev, next = self.__map.pop(key) - prev[2] = next - next[1] = prev - - def __iter__(self): - end = self.__end - curr = end[2] - while curr is not end: - yield curr[0] - curr = curr[2] - - def __reversed__(self): - end = self.__end - curr = end[1] - while curr is not end: - yield curr[0] - curr = curr[1] - - def popitem(self, last=True): - if not self: - raise KeyError('dictionary is empty') - if last: - key = reversed(self).next() - else: - key = iter(self).next() - value = self.pop(key) - return key, value - - def __reduce__(self): - items = [[k, self[k]] for k in self] - tmp = self.__map, self.__end - del self.__map, self.__end - inst_dict = vars(self).copy() - self.__map, self.__end = tmp - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def keys(self): - return list(self) - - setdefault = DictMixin.setdefault - update = DictMixin.update - pop = DictMixin.pop - values = DictMixin.values - items = DictMixin.items - iterkeys = DictMixin.iterkeys - itervalues = DictMixin.itervalues - iteritems = DictMixin.iteritems - - def __repr__(self): - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) - - def copy(self): - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - if isinstance(other, OrderedDict): - if len(self) != len(other): - return False - for p, q in zip(self.items(), other.items()): - if p != q: - return False - return True - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other +# Copyright (c) 2009 Raymond Hettinger +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +from UserDict import DictMixin + +class OrderedDict(dict, DictMixin): + + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + if last: + key = next(reversed(self)) + else: + key = next(iter(self)) + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + if len(self) != len(other): + return False + for p, q in zip(list(self.items()), list(other.items())): + if p != q: + return False + return True + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other diff --git a/tablib/packages/xlwt3/BIFFRecords.py b/tablib/packages/xlwt3/BIFFRecords.py new file mode 100644 index 0000000..79be326 --- /dev/null +++ b/tablib/packages/xlwt3/BIFFRecords.py @@ -0,0 +1,2392 @@ +from struct import pack +from .UnicodeUtils import upack1, upack2 +import sys + +class SharedStringTable(object): + _SST_ID = 0x00FC + _CONTINUE_ID = 0x003C + + def __init__(self, encoding): + self.encoding = encoding + self._str_indexes = {} + self._tally = [] + self._add_calls = 0 + # Following 3 attrs are used for temporary storage in the + # get_biff_record() method and methods called by it. The pseudo- + # initialisation here is for documentation purposes only. + self._sst_record = None + self._continues = None + self._current_piece = None + + def add_str(self, s): + if self.encoding != 'ascii' and not isinstance(s, str): + s = str(s, self.encoding) + self._add_calls += 1 + if s not in self._str_indexes: + idx = len(self._str_indexes) + self._str_indexes[s] = idx + self._tally.append(1) + else: + idx = self._str_indexes[s] + self._tally[idx] += 1 + return idx + + def del_str(self, idx): + # This is called when we are replacing the contents of a string cell. + assert self._tally[idx] > 0 + self._tally[idx] -= 1 + self._add_calls -= 1 + + def str_index(self, s): + return self._str_indexes[s] + + def get_biff_record(self): + self._sst_record = b'' + self._continues = [None, None] + self._current_piece = pack(' 0x2020: # limit for BIFF7/8 + chunks = [] + pos = 0 + while pos < len(data): + chunk_pos = pos + 0x2020 + chunk = data[pos:chunk_pos] + chunks.append(chunk) + pos = chunk_pos + continues = pack('<2H', self._REC_ID, len(chunks[0])) + chunks[0] + for chunk in chunks[1:]: + continues += pack('<2H%ds'%len(chunk), 0x003C, len(chunk), chunk) + # 0x003C -- CONTINUE record id + return continues + else: + return self.get_rec_header() + data + + +class Biff8BOFRecord(BiffRecord): + """ + Offset Size Contents + 0 2 Version, contains 0600H for BIFF8 and BIFF8X + 2 2 Type of the following data: + 0005H = Workbook globals + 0006H = Visual Basic module + 0010H = Worksheet + 0020H = Chart + 0040H = Macro sheet + 0100H = Workspace file + 4 2 Build identifier + 6 2 Build year + 8 4 File history flags + 12 4 Lowest Excel version that can read all records in this file + """ + _REC_ID = 0x0809 + # stream types + BOOK_GLOBAL = 0x0005 + VB_MODULE = 0x0006 + WORKSHEET = 0x0010 + CHART = 0x0020 + MACROSHEET = 0x0040 + WORKSPACE = 0x0100 + + def __init__(self, rec_type): + version = 0x0600 + build = 0x0DBB + year = 0x07CC + file_hist_flags = 0x00 + ver_can_read = 0x06 + + self._rec_data = pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read) + + +class InteraceHdrRecord(BiffRecord): + _REC_ID = 0x00E1 + + def __init__(self): + self._rec_data = pack('BB', 0xB0, 0x04) + + +class InteraceEndRecord(BiffRecord): + _REC_ID = 0x00E2 + + def __init__(self): + self._rec_data = b'' + + +class MMSRecord(BiffRecord): + _REC_ID = 0x00C1 + + def __init__(self): + self._rec_data = pack('> 15 + c = low_15 | high_15 + passwd_hash ^= c + passwd_hash ^= len(plaintext) + passwd_hash ^= 0xCE4B + return passwd_hash + + def __init__(self, passwd = b""): + self._rec_data = pack('