diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..018f9b7 --- /dev/null +++ b/HACKING @@ -0,0 +1,14 @@ +Where possible, please follow PEP8 with regard to coding style. Sometimes the line +length restriction is too hard to follow, so don't bend over backwards there. + +Triple-quotes should always be """, single quotes are ' unless using " +would result in less escaping within the string. + +All modules, functions, and methods should be well documented reStructuredText for +Sphinx AutoDoc. + +All functionality should be available in pure Python. Optional C (via Cython) +implementations may be written for performance reasons, but should never +replace the Python implementation. + +Lastly, don't take yourself too seriously :) \ No newline at end of file diff --git a/HISTORY.rst b/HISTORY.rst index 4842a45..78b8581 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,15 @@ History ------- +0.9.5 (2011-03-24) +++++++++++++++++++ + +* Python 3.1, Python 3.2 Support (same code base!) +* Formatter callback support +* Various bug fixes + + + 0.9.4 (2011-02-18) ++++++++++++++++++ diff --git a/docs/development.rst b/docs/development.rst index 77af330..6255d5e 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -87,7 +87,7 @@ Adding New Formats Tablib welcomes new format additions! Format suggestions include: -* Tab Seperated Values +* Tab Separated Values * MySQL Dump * HTML Table @@ -178,7 +178,7 @@ Every commit made to the **develop** branch is automatically tested and inspecte Anyone may view the build status and history at any time. - http://git.kennethreitz.com/ci/ + http://ci.kennethreitz.com/ If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server. diff --git a/docs/index.rst b/docs/index.rst index 4726d7b..77ce7cf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,8 +3,10 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Tablib: Pythonic Tabular Data -============================= +Tablib: Pythonic Tabular Datasets +================================= + +Release |version|. .. Contents: .. diff --git a/docs/install.rst b/docs/install.rst index 9b9a519..b6c3f31 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -55,7 +55,7 @@ However, if performance is important to you (and it should be), you can install $ pip install PyYAML -If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. :: +If you're using Python 2.5, you should also install the **simplejson** module (pip will do this for you). If you're using Python 2.6+, the built-in **json** module is already optimized and in use. :: $ pip install simplejson diff --git a/docs/intro.rst b/docs/intro.rst index c2d75b2..719133d 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -36,6 +36,31 @@ Tablib is released under terms of `The MIT License`_. .. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php +.. _license: + +Tablib License +-------------- + +Copyright (c) 2011 Kenneth Reitz. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + .. _pythonsupport: @@ -44,8 +69,12 @@ Pythons Supported At this time, the following Python platforms are officially supported: -* Python 2.6 -* Python 2.7 +* cPython 2.5 +* cPython 2.6 +* cPython 2.7 +* cPython 3.1 +* cPython 3.2 +* PyPy-c 1.4 Support for other Pythons will be rolled out soon. diff --git a/setup.py b/setup.py index c9974b0..2aa532a 100644 --- a/setup.py +++ b/setup.py @@ -17,12 +17,12 @@ if sys.argv[-1] == "publish": required = [] -if sys.version_info < (2,6): +if sys.version_info[:2] < (2,6): required.append('simplejson') setup( name='tablib', - version='0.9.4', + version='0.9.5', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), @@ -46,7 +46,8 @@ setup( 'Programming Language :: Python :: 2.5', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', - # 'Programming Language :: Python :: 3.0', - # 'Programming Language :: Python :: 3.1', + 'Programming Language :: Python :: 3.0', + 'Programming Language :: Python :: 3.1', + 'Programming Language :: Python :: 3.2', ), ) diff --git a/tablib/__init__.py b/tablib/__init__.py index c2205b6..dc85527 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -1,8 +1,16 @@ """ Tablib. """ -from tablib.core import ( - Databook, Dataset, detect, import_set, - InvalidDatasetType, InvalidDimensions, UnsupportedFormat -) +import sys +if sys.version_info[0:1] > (2, 5): + from tablib.core import ( + Databook, Dataset, detect, import_set, + InvalidDatasetType, InvalidDimensions, UnsupportedFormat + ) + +else: + from tablib.core25 import ( + Databook, Dataset, detect, import_set, + InvalidDatasetType, InvalidDimensions, UnsupportedFormat + ) diff --git a/tablib/core.py b/tablib/core.py index dfbcb6a..8ef6312 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -13,6 +13,7 @@ from copy import copy from operator import itemgetter from tablib import formats +import collections try: from collections import OrderedDict @@ -26,6 +27,7 @@ __build__ = 0x000904 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2011 Kenneth Reitz' +__docformat__ = 'restructuredtext' class Row(object): @@ -62,7 +64,7 @@ class Row(object): return {slot: [getattr(self, slot) for slot in self.__slots__]} def __setstate__(self, state): - for (k, v) in state.items(): setattr(self, k, v) + for (k, v) in list(state.items()): setattr(self, k, v) def append(self, value): self._row.append(value) @@ -88,7 +90,7 @@ class Row(object): if tag == None: return False - elif isinstance(tag, basestring): + elif isinstance(tag, str): return (tag in self.tags) else: return bool(len(set(tag) & set(self.tags))) @@ -136,6 +138,9 @@ class Dataset(object): # ('title', index) tuples self._separators = [] + + # (column, callback) tuples + self._formatters = [] try: self.headers = kwargs['headers'] @@ -155,7 +160,7 @@ class Dataset(object): def __getitem__(self, key): - if isinstance(key, basestring): + if isinstance(key, str): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data return [row[pos] for row in self._data] @@ -175,7 +180,7 @@ class Dataset(object): def __delitem__(self, key): - if isinstance(key, basestring): + if isinstance(key, str): if key in self.headers: @@ -236,13 +241,29 @@ class Dataset(object): def _package(self, dicts=True): """Packages Dataset into lists of dictionaries for transmission.""" + _data = list(self._data) + + # Execute formatters + if self._formatters: + for row_i, row in enumerate(_data): + for col, callback in self._formatters: + try: + if col is None: + for j, c in enumerate(row): + _data[row_i][j] = callback(c) + else: + _data[row_i][col] = callback(row[col]) + except IndexError: + raise InvalidDatasetIndex + + if self.headers: if dicts: - data = [OrderedDict(zip(self.headers, data_row)) for data_row in self ._data] + data = [OrderedDict(list(zip(self.headers, data_row))) for data_row in _data] else: - data = [list(self.headers)] + list(self._data) + data = [list(self.headers)] + list(_data) else: - data = [list(row) for row in self._data] + data = [list(row) for row in _data] return data @@ -257,8 +278,8 @@ class Dataset(object): else: header = [] - if len(col) == 1 and callable(col[0]): - col = map(col[0], self._data) + if len(col) == 1 and isinstance(col[0], collections.Callable): + col = list(map(col[0], self._data)) col = tuple(header + col) return col @@ -347,9 +368,9 @@ class Dataset(object): # if list of objects elif isinstance(pickle[0], dict): self.wipe() - self.headers = pickle[0].keys() + self.headers = list(pickle[0].keys()) for row in pickle: - self.append(Row(row.values())) + self.append(Row(list(row.values()))) else: raise UnsupportedFormat @@ -385,6 +406,7 @@ class Dataset(object): """ pass + @property def tsv(): """A TSV representation of the :class:`Dataset` object. The top row will contain @@ -469,6 +491,29 @@ class Dataset(object): self.insert_separator(index, text) + def add_formatter(self, col, handler): + """Adds a :ref:`formatter` to the :class:`Dataset`. + + .. versionadded:: 0.9.5 + :param col: column to. Accepts index int or header str. + :param handler: reference to callback function to execute + against each cell value. + """ + + if isinstance(col, str): + if col in self.headers: + col = self.headers.index(col) # get 'key' index from each data + else: + raise KeyError + + if not col > self.width: + self._formatters.append((col, handler)) + else: + raise InvalidDatasetIndex + + return True + + def insert(self, index, row=None, col=None, header=None, tags=list()): """Inserts a row or column to the :class:`Dataset` at the given index. @@ -504,8 +549,8 @@ class Dataset(object): col = list(col) # Callable Columns... - if len(col) == 1 and callable(col[0]): - col = map(col[0], self._data) + if len(col) == 1 and isinstance(col[0], collections.Callable): + col = list(map(col[0], self._data)) col = self._clean_col(col) self._validate(col=col) @@ -543,7 +588,7 @@ class Dataset(object): Returns a new :class:`Dataset` instance where columns have been sorted.""" - if isinstance(col, basestring): + if isinstance(col, str): if not self.headers: raise HeadersNeeded @@ -658,12 +703,14 @@ class Dataset(object): return _dset + def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None + class Databook(object): """A book of :class:`Dataset` objects. """ @@ -748,7 +795,7 @@ def import_set(stream): format.import_set(data, stream) return data - except AttributeError, e: + except AttributeError as e: return None @@ -758,6 +805,9 @@ class InvalidDatasetType(Exception): class InvalidDimensions(Exception): "Invalid size" + +class InvalidDatasetIndex(Exception): + "Outside of Dataset size" class HeadersNeeded(Exception): "Header parameter must be given when appending a column in this Dataset." diff --git a/tablib/core25.py b/tablib/core25.py new file mode 100644 index 0000000..c8352a6 --- /dev/null +++ b/tablib/core25.py @@ -0,0 +1,818 @@ +# -*- coding: utf-8 -*- +u""" + tablib.core + ~~~~~~~~~~~ + + This module implements the central Tablib objects. + + :copyright: (c) 2011 by Kenneth Reitz. + :license: MIT, see LICENSE for more details. +""" + +from copy import copy +from operator import itemgetter + +from tablib import formats +import collections +from itertools import izip +from itertools import imap + +try: + from collections import OrderedDict +except ImportError: + from tablib.packages.ordereddict import OrderedDict + + +__title__ = u'tablib' +__version__ = u'0.9.4' +__build__ = 0x000904 +__author__ = u'Kenneth Reitz' +__license__ = u'MIT' +__copyright__ = u'Copyright 2011 Kenneth Reitz' +__docformat__ = u'restructuredtext' + + +class Row(object): + u"""Internal Row object. Mainly used for filtering.""" + + __slots__ = [u'tuple', u'_row', u'tags'] + + def __init__(self, row=list(), tags=list()): + self._row = list(row) + self.tags = list(tags) + + def __iter__(self): + return (col for col in self._row) + + def __len__(self): + return len(self._row) + + def __repr__(self): + return repr(self._row) + + def __getslice__(self, i, j): + return self._row[i,j] + + def __getitem__(self, i): + return self._row[i] + + def __setitem__(self, i, value): + self._row[i] = value + + def __delitem__(self, i): + del self._row[i] + + def __getstate__(self): + return {slot: [getattr(self, slot) for slot in self.__slots__]} + + def __setstate__(self, state): + for (k, v) in list(state.items()): setattr(self, k, v) + + def append(self, value): + self._row.append(value) + + def insert(self, index, value): + self._row.insert(index, value) + + def __contains__(self, item): + return (item in self._row) + + @property + def tuple(self): + u'''Tuple representation of :class:`Row`.''' + return tuple(self._row) + + @property + def list(self): + u'''List representation of :class:`Row`.''' + return list(self._row) + + def has_tag(self, tag): + u"""Returns true if current row contains tag.""" + + if tag == None: + return False + elif isinstance(tag, basestring): + return (tag in self.tags) + else: + return bool(len(set(tag) & set(self.tags))) + + + + +class Dataset(object): + u"""The :class:`Dataset` object is the heart of Tablib. It provides all core + functionality. + + Usually you create a :class:`Dataset` instance in your main module, and append + rows and columns as you collect data. :: + + data = tablib.Dataset() + data.headers = ('name', 'age') + + for (name, age) in some_collector(): + data.append((name, age)) + + You can also set rows and headers upon instantiation. This is useful if dealing + with dozens or hundres of :class:`Dataset` objects. :: + + headers = ('first_name', 'last_name') + data = [('John', 'Adams'), ('George', 'Washington')] + + data = tablib.Dataset(*data, headers=headers) + + + :param \*args: (optional) list of rows to populate Dataset + :param headers: (optional) list strings for Dataset header row + + + .. admonition:: Format Attributes Definition + + If you look at the code, the various output/import formats are not + defined within the :class:`Dataset` object. To add support for a new format, see + :ref:`Adding New Formats `. + + """ + + def __init__(self, *args, **kwargs): + self._data = list(Row(arg) for arg in args) + self.__headers = None + + # ('title', index) tuples + self._separators = [] + + # (column, callback) tuples + self._formatters = [] + + try: + self.headers = kwargs[u'headers'] + except KeyError: + self.headers = None + + try: + self.title = kwargs[u'title'] + except KeyError: + self.title = None + + self._register_formats() + + + def __len__(self): + return self.height + + + def __getitem__(self, key): + if isinstance(key, basestring): + if key in self.headers: + pos = self.headers.index(key) # get 'key' index from each data + return [row[pos] for row in self._data] + else: + raise KeyError + else: + _results = self._data[key] + if isinstance(_results, Row): + return _results.tuple + else: + return [result.tuple for result in _results] + + + def __setitem__(self, key, value): + self._validate(value) + self._data[key] = Row(value) + + + def __delitem__(self, key): + if isinstance(key, basestring): + + if key in self.headers: + + pos = self.headers.index(key) + del self.headers[pos] + + for i, row in enumerate(self._data): + + del row[pos] + self._data[i] = row + else: + raise KeyError + else: + del self._data[key] + + + def __repr__(self): + try: + return u'<%s dataset>' % (self.title.lower()) + except AttributeError: + return u'' + + + @classmethod + def _register_formats(cls): + u"""Adds format properties.""" + for fmt in formats.available: + try: + try: + setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_set)) + + except AttributeError: + pass + + + def _validate(self, row=None, col=None, safety=False): + u"""Assures size of every row in dataset is of proper proportions.""" + if row: + is_valid = (len(row) == self.width) if self.width else True + elif col: + if len(col) < 1: + is_valid = True + else: + is_valid = (len(col) == self.height) if self.height else True + else: + is_valid = all((len(x) == self.width for x in self._data)) + + if is_valid: + return True + else: + if not safety: + raise InvalidDimensions + return False + + + def _package(self, dicts=True): + u"""Packages Dataset into lists of dictionaries for transmission.""" + + _data = list(self._data) + + # Execute formatters + if self._formatters: + for row_i, row in enumerate(_data): + for col, callback in self._formatters: + try: + if col is None: + for j, c in enumerate(row): + _data[row_i][j] = callback(c) + else: + _data[row_i][col] = callback(row[col]) + except IndexError: + raise InvalidDatasetIndex + + + if self.headers: + if dicts: + data = [OrderedDict(list(izip(self.headers, data_row))) for data_row in _data] + else: + data = [list(self.headers)] + list(_data) + else: + data = [list(row) for row in _data] + + return data + + + def _clean_col(self, col): + u"""Prepares the given column for insert/append.""" + + col = list(col) + + if self.headers: + header = [col.pop(0)] + else: + header = [] + + if len(col) == 1 and hasattr(col[0], '__call__'): + col = list(imap(col[0], self._data)) + col = tuple(header + col) + + return col + + + @property + def height(self): + u"""The number of rows currently in the :class:`Dataset`. + Cannot be directly modified. + """ + return len(self._data) + + + @property + def width(self): + u"""The number of columns currently in the :class:`Dataset`. + Cannot be directly modified. + """ + + try: + return len(self._data[0]) + except IndexError: + try: + return len(self.headers) + except TypeError: + return 0 + + + def _get_headers(self): + u"""An *optional* list of strings to be used for header rows and attribute names. + + This must be set manually. The given list length must equal :class:`Dataset.width`. + + """ + return self.__headers + + + def _set_headers(self, collection): + u"""Validating headers setter.""" + self._validate(collection) + if collection: + try: + self.__headers = list(collection) + except TypeError: + raise TypeError + else: + self.__headers = None + + headers = property(_get_headers, _set_headers) + + def _get_dict(self): + u"""A native Python representation of the :class:`Dataset` object. If headers have + been set, a list of Python dictionaries will be returned. If no headers have been set, + a list of tuples (rows) will be returned instead. + + A dataset object can also be imported by setting the `Dataset.dict` attribute: :: + + data = tablib.Dataset() + data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + + """ + return self._package() + + + def _set_dict(self, pickle): + u"""A native Python representation of the Dataset object. If headers have been + set, a list of Python dictionaries will be returned. If no headers have been + set, a list of tuples (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. :: + + data = tablib.Dataset() + data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] + + """ + + if not len(pickle): + return + + # if list of rows + if isinstance(pickle[0], list): + self.wipe() + for row in pickle: + self.append(Row(row)) + + # if list of objects + elif isinstance(pickle[0], dict): + self.wipe() + self.headers = list(pickle[0].keys()) + for row in pickle: + self.append(Row(list(row.values()))) + else: + raise UnsupportedFormat + + dict = property(_get_dict, _set_dict) + + + @property + def xls(): + u"""An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set. + + .. admonition:: Binary Warning + + :class:`Dataset.xls` contains binary data, so make sure to write in binary mode:: + + with open('output.xls', 'wb') as f: + f.write(data.xls)' + """ + pass + + + @property + def csv(): + u"""A CSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain + the first row of the dataset. + + A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. :: + + data = tablib.Dataset() + data.csv = 'age, first_name, last_name\\n90, John, Adams' + + Import assumes (for now) that headers exist. + """ + pass + + + @property + def tsv(): + u"""A TSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain + the first row of the dataset. + + A dataset object can also be imported by setting the :class:`Dataset.tsv` attribute. :: + + data = tablib.Dataset() + data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams' + + Import assumes (for now) that headers exist. + """ + + @property + def yaml(): + u"""A YAML representation of the :class:`Dataset` object. If headers have been + set, a YAML list of objects will be returned. If no headers have + been set, a YAML list of lists (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: + + data = tablib.Dataset() + data.yaml = '- {age: 90, first_name: John, last_name: Adams}' + + Import assumes (for now) that headers exist. + """ + pass + + + @property + def json(): + u"""A JSON representation of the :class:`Dataset` object. If headers have been + set, a JSON list of objects will be returned. If no headers have + been set, a JSON list of lists (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: + + data = tablib.Dataset() + data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]' + + Import assumes (for now) that headers exist. + """ + + @property + def html(): + u"""A HTML table representation of the :class:`Dataset` object. If + headers have been set, they will be used as table headers. + + ..notice:: This method can be used for export only. + """ + pass + + + def append(self, row=None, col=None, header=None, tags=list()): + u"""Adds a row or column to the :class:`Dataset`. + Usage is :class:`Dataset.insert` for documentation. + """ + + if row is not None: + self.insert(self.height, row=row, tags=tags) + elif col is not None: + self.insert(self.width, col=col, header=header) + + + def insert_separator(self, index, text=u'-'): + u"""Adds a separator to :class:`Dataset` at given index.""" + + sep = (index, text) + self._separators.append(sep) + + + def append_separator(self, text=u'-'): + u"""Adds a :ref:`seperator ` to the :class:`Dataset`.""" + + # change offsets if headers are or aren't defined + if not self.headers: + index = self.height if self.height else 0 + else: + index = (self.height + 1) if self.height else 1 + + self.insert_separator(index, text) + + + def add_formatter(self, col, handler): + u"""Adds a :ref:`formatter` to the :class:`Dataset`. + + .. versionadded:: 0.9.5 + :param col: column to. Accepts index int or header str. + :param handler: reference to callback function to execute + against each cell value. + """ + + if isinstance(col, basestring): + if col in self.headers: + col = self.headers.index(col) # get 'key' index from each data + else: + raise KeyError + + if not col > self.width: + self._formatters.append((col, handler)) + else: + raise InvalidDatasetIndex + + return True + + + def insert(self, index, row=None, col=None, header=None, tags=list()): + u"""Inserts a row or column to the :class:`Dataset` at the given index. + + Rows and columns inserted must be the correct size (height or width). + + The default behaviour is to insert the given row to the :class:`Dataset` + object at the given index. If the ``col`` parameter is given, however, + a new column will be insert to the :class:`Dataset` object instead. + + You can also insert a column of a single callable object, which will + add a new column with the return values of the callable each as an + item in the column. :: + + data.append(col=random.randint) + + See :ref:`dyncols` for an in-depth example. + + .. versionchanged:: 0.9.0 + If inserting a column, and :class:`Dataset.headers` is set, the + header attribute must be set, and will be considered the header for + that row. + + .. versionadded:: 0.9.0 + If inserting a row, you can add :ref:`tags ` to the row you are inserting. + This gives you the ability to :class:`filter ` your + :class:`Dataset` later. + + """ + if row: + self._validate(row) + self._data.insert(index, Row(row, tags=tags)) + elif col: + col = list(col) + + # Callable Columns... + if len(col) == 1 and hasattr(col[0], '__call__'): + col = list(imap(col[0], self._data)) + + col = self._clean_col(col) + self._validate(col=col) + + if self.headers: + # pop the first item off, add to headers + if not header: + raise HeadersNeeded() + self.headers.insert(index, header) + + if self.height and self.width: + + for i, row in enumerate(self._data): + + row.insert(index, col[i]) + self._data[i] = row + else: + self._data = [Row([row]) for row in col] + + + def filter(self, tag): + u"""Returns a new instance of the :class:`Dataset`, excluding any rows + that do not contain the given :ref:`tags `. + """ + _dset = copy(self) + _dset._data = [row for row in _dset._data if row.has_tag(tag)] + + return _dset + + + def sort(self, col, reverse=False): + u"""Sort a :class:`Dataset` by a specific column, given string (for + header) or integer (for column index). The order can be reversed by + setting ``reverse`` to ``True``. + Returns a new :class:`Dataset` instance where columns have been + sorted.""" + + if isinstance(col, basestring): + + if not self.headers: + raise HeadersNeeded + + _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) + _dset = Dataset(headers=self.headers) + + for item in _sorted: + row = [item[key] for key in self.headers] + _dset.append(row=row) + + else: + if self.headers: + col = self.headers[col] + + _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) + _dset = Dataset(headers=self.headers) + + for item in _sorted: + if self.headers: + row = [item[key] for key in self.headers] + else: + row = item + _dset.append(row=row) + + + return _dset + + + def transpose(self): + u"""Transpose a :class:`Dataset`, turning rows into columns and vice + versa, returning a new ``Dataset`` instance. The first row of the + original instance becomes the new header row.""" + + # Don't transpose if there is no data + if not self: + return + + _dset = Dataset() + # The first element of the headers stays in the headers, + # it is our "hinge" on which we rotate the data + new_headers = [self.headers[0]] + self[self.headers[0]] + + _dset.headers = new_headers + for column in self.headers: + + if column == self.headers[0]: + # It's in the headers, so skip it + continue + + # Adding the column name as now they're a regular column + row_data = [column] + self[column] + row_data = Row(row_data) + _dset.append(row=row_data) + + return _dset + + + def stack_rows(self, other): + u"""Stack two :class:`Dataset` instances together by + joining at the row level, and return new combined + ``Dataset`` instance.""" + + if not isinstance(other, Dataset): + return + + if self.width != other.width: + raise InvalidDimensions + + # Copy the source data + _dset = copy(self) + + rows_to_stack = [row for row in _dset._data] + other_rows = [row for row in other._data] + + rows_to_stack.extend(other_rows) + _dset._data = rows_to_stack + + return _dset + + + def stack_columns(self, other): + u"""Stack two :class:`Dataset` instances together by + joining at the column level, and return a new + combined ``Dataset`` instance. If either ``Dataset`` + has headers set, than the other must as well.""" + + if not isinstance(other, Dataset): + return + + if self.headers or other.headers: + if not self.headers or not other.headers: + raise HeadersNeeded + + if self.height != other.height: + raise InvalidDimensions + + try: + new_headers = self.headers + other.headers + except TypeError: + new_headers = None + + _dset = Dataset() + + for column in self.headers: + _dset.append(col=self[column]) + + for column in other.headers: + _dset.append(col=other[column]) + + _dset.headers = new_headers + + return _dset + + + def wipe(self): + u"""Removes all content and headers from the :class:`Dataset` object.""" + self._data = list() + self.__headers = None + + + +class Databook(object): + u"""A book of :class:`Dataset` objects. + """ + + def __init__(self, sets=None): + + if sets is None: + self._datasets = list() + else: + self._datasets = sets + + self._register_formats() + + def __repr__(self): + try: + return u'<%s databook>' % (self.title.lower()) + except AttributeError: + return u'' + + + def wipe(self): + u"""Removes all :class:`Dataset` objects from the :class:`Databook`.""" + self._datasets = [] + + + @classmethod + def _register_formats(cls): + u"""Adds format properties.""" + for fmt in formats.available: + try: + try: + setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_book)) + + except AttributeError: + pass + + + def add_sheet(self, dataset): + u"""Adds given :class:`Dataset` to the :class:`Databook`.""" + if type(dataset) is Dataset: + self._datasets.append(dataset) + else: + raise InvalidDatasetType + + + def _package(self): + u"""Packages :class:`Databook` for delivery.""" + collector = [] + for dset in self._datasets: + collector.append(OrderedDict( + title = dset.title, + data = dset.dict + )) + return collector + + + @property + def size(self): + u"""The number of the :class:`Dataset` objects within :class:`Databook`.""" + return len(self._datasets) + + +def detect(stream): + u"""Return (format, stream) of given stream.""" + for fmt in formats.available: + try: + if fmt.detect(stream): + return (fmt, stream) + except AttributeError: + pass + return (None, stream) + + +def import_set(stream): + u"""Return dataset of given stream.""" + (format, stream) = detect(stream) + + try: + data = Dataset() + format.import_set(data, stream) + return data + + except AttributeError, e: + return None + + +class InvalidDatasetType(Exception): + u"Only Datasets can be added to a DataBook" + + +class InvalidDimensions(Exception): + u"Invalid size" + +class InvalidDatasetIndex(Exception): + u"Outside of Dataset size" + +class HeadersNeeded(Exception): + u"Header parameter must be given when appending a column in this Dataset." + +class UnsupportedFormat(NotImplementedError): + u"Format is not supported" diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 147df31..305026d 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -3,11 +3,11 @@ """ Tablib - formats """ -import _csv as csv -import _json as json -import _xls as xls -import _yaml as yaml -import _tsv as tsv -import _html as html +from . import _csv as csv +from . import _json as json +from . import _xls as xls +from . import _yaml as yaml +from . import _tsv as tsv +from . import _html as html available = (json, xls, yaml, csv, tsv, html) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 2c74a1c..4b1dc02 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -3,7 +3,13 @@ """ Tablib - CSV Support. """ -import cStringIO +import sys +if sys.version_info[0] > 2: + from io import StringIO +else: + from cStringIO import StringIO + + import csv import os @@ -17,7 +23,7 @@ extentions = ('csv',) def export_set(dataset): """Returns CSV representation of Dataset.""" - stream = cStringIO.StringIO() + stream = StringIO() _csv = csv.writer(stream) for row in dataset._package(dicts=False): @@ -31,7 +37,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() - rows = csv.reader(in_stream.split()) + rows = csv.reader(in_stream.splitlines()) for i, row in enumerate(rows): if (i == 0) and (headers): diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 13dc055..485536c 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -3,9 +3,16 @@ """ Tablib - HTML export support. """ -from StringIO import StringIO +import sys + + +if sys.version_info[0] > 2: + from io import StringIO + from tablib.packages import markup3 as markup +else: + from cStringIO import StringIO + from tablib.packages import markup -from tablib.packages import markup import tablib BOOK_ENDINGS = 'h3' diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index 262c627..8d498df 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -3,8 +3,13 @@ """ Tablib - JSON Support """ -import tablib.core -from tablib.packages import anyjson +import tablib + +import sys +if sys.version_info[:2] > (2, 5): + from tablib.packages import anyjson +else: + from tablib.packages import anyjson25 as anyjson @@ -34,7 +39,7 @@ def import_book(dbook, in_stream): dbook.wipe() for sheet in anyjson.deserialize(in_stream): - data = tablib.core.Dataset() + data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 76a5f07..acf28da 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,7 +3,12 @@ """ Tablib - TSV (Tab Separated Values) Support. """ -import cStringIO +import sys +if sys.version_info[0] > 2: + from io import StringIO +else: + from cStringIO import StringIO + import csv import os @@ -17,7 +22,7 @@ extentions = ('tsv',) def export_set(dataset): """Returns a TSV representation of Dataset.""" - stream = cStringIO.StringIO() + stream = StringIO() _tsv = csv.writer(stream, delimiter='\t') for row in dataset._package(dicts=False): diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 717a6d5..d820250 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -3,14 +3,19 @@ """ Tablib - XLS Support. """ -import cStringIO +import sys -try: - import xlwt -except ImportError: + +if sys.version_info[0] > 2: + from io import BytesIO + import tablib.packages.xlwt3 as xlwt + +else: + from cStringIO import StringIO as BytesIO import tablib.packages.xlwt as xlwt + title = 'xls' extentions = ('xls',) @@ -23,11 +28,11 @@ def export_set(dataset): """Returns XLS representation of Dataset.""" wb = xlwt.Workbook(encoding='utf8') - ws = wb.add_sheet(dataset.title if dataset.title else 'Tabbed Dataset') + ws = wb.add_sheet(dataset.title if dataset.title else 'Tablib Dataset') dset_sheet(dataset, ws) - stream = cStringIO.StringIO() + stream = BytesIO() wb.save(stream) return stream.getvalue() @@ -43,7 +48,7 @@ def export_book(databook): dset_sheet(dset, ws) - stream = cStringIO.StringIO() + stream = BytesIO() wb.save(stream) return stream.getvalue() diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 3f2f8b7..66800a7 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -3,10 +3,16 @@ """ Tablib - YAML Support. """ +import sys + try: import yaml except ImportError: - import tablib.packages.yaml as yaml + if sys.version_info[0] > 2: + import tablib.packages.yaml3 as yaml + else: + import tablib.packages.yaml as yaml + import tablib @@ -40,7 +46,7 @@ def import_book(dbook, in_stream): dbook.wipe() for sheet in yaml.load(in_stream): - data = tablib.core.Dataset() + data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) diff --git a/tablib/helpers.py b/tablib/helpers.py deleted file mode 100644 index 718d159..0000000 --- a/tablib/helpers.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- - -""" Tablib - General Helpers. -""" - -import sys - - -class Struct(object): - """Your attributes are belong to us.""" - - def __init__(self, **entries): - self.__dict__.update(entries) - - def __getitem__(self, key): - return getattr(self, key, None) - - def dictionary(self): - """Returns dictionary representation of object.""" - return self.__dict__ - - def items(self): - """Returns items within object.""" - return self.__dict__.items() - - def keys(self): - """Returns keys within object.""" - return self.__dict__.keys() - - - -def piped(): - """Returns piped input via stdin, else False.""" - with sys.stdin as stdin: - # TTY is only way to detect if stdin contains data - return stdin.read() if not stdin.isatty() else None - diff --git a/tablib/packages/anyjson.py b/tablib/packages/anyjson.py index 6603751..a7d1a5f 100644 --- a/tablib/packages/anyjson.py +++ b/tablib/packages/anyjson.py @@ -53,7 +53,7 @@ class _JsonImplementation(object): """Incapsulates a JSON implementation""" def __init__(self, modspec): - modinfo = dict(zip(_fields, modspec)) + modinfo = dict(list(zip(_fields, modspec))) # No try block. We want importerror to end up at caller module = self._attempt_load(modinfo["modname"]) @@ -64,9 +64,9 @@ class _JsonImplementation(object): self._encode_error = modinfo["encerror"] self._decode_error = modinfo["decerror"] - if isinstance(modinfo["encerror"], basestring): + if isinstance(modinfo["encerror"], str): self._encode_error = getattr(module, modinfo["encerror"]) - if isinstance(modinfo["decerror"], basestring): + if isinstance(modinfo["decerror"], str): self._decode_error = getattr(module, modinfo["decerror"]) self.name = modinfo["modname"] @@ -82,7 +82,7 @@ class _JsonImplementation(object): TypeError if the object could not be serialized.""" try: return self._encode(data) - except self._encode_error, exc: + except self._encode_error as exc: raise TypeError(*exc.args) def deserialize(self, s): @@ -90,7 +90,7 @@ class _JsonImplementation(object): ValueError if the string vould not be parsed.""" try: return self._decode(s) - except self._decode_error, exc: + except self._decode_error as exc: raise ValueError(*exc.args) diff --git a/tablib/packages/anyjson25.py b/tablib/packages/anyjson25.py new file mode 100644 index 0000000..ad6fc40 --- /dev/null +++ b/tablib/packages/anyjson25.py @@ -0,0 +1,118 @@ +u""" +Wraps the best available JSON implementation available in a common interface +""" + +__version__ = u"0.2.0" +__author__ = u"Rune Halvorsen " +__homepage__ = u"http://bitbucket.org/runeh/anyjson/" +__docformat__ = u"restructuredtext" + +u""" + +.. function:: serialize(obj) + + Serialize the object to JSON. + +.. function:: deserialize(str) + + Deserialize JSON-encoded object to a Python object. + +.. function:: force_implementation(name) + + Load a specific json module. This is useful for testing and not much else + +.. attribute:: implementation + + The json implementation object. This is probably not useful to you, + except to get the name of the implementation in use. The name is + available through `implementation.name`. +""" + +import sys +from itertools import izip + +implementation = None + +u""" +.. data:: _modules + + List of known json modules, and the names of their serialize/unserialize + methods, as well as the exception they throw. Exception can be either + an exception class or a string. +""" +_modules = [(u"cjson", u"encode", u"EncodeError", u"decode", u"DecodeError"), + (u"jsonlib2", u"write", u"WriteError", u"read", u"ReadError"), + (u"jsonlib", u"write", u"WriteError", u"read", u"ReadError"), + (u"simplejson", u"dumps", TypeError, u"loads", ValueError), + (u"json", u"dumps", TypeError, u"loads", ValueError), + (u"django.utils.simplejson", u"dumps", TypeError, u"loads", + ValueError)] +_fields = (u"modname", u"encoder", u"encerror", u"decoder", u"decerror") + + +class _JsonImplementation(object): + u"""Incapsulates a JSON implementation""" + + def __init__(self, modspec): + modinfo = dict(list(izip(_fields, modspec))) + + # No try block. We want importerror to end up at caller + module = self._attempt_load(modinfo[u"modname"]) + + self.implementation = modinfo[u"modname"] + self._encode = getattr(module, modinfo[u"encoder"]) + self._decode = getattr(module, modinfo[u"decoder"]) + self._encode_error = modinfo[u"encerror"] + self._decode_error = modinfo[u"decerror"] + + if isinstance(modinfo[u"encerror"], unicode): + self._encode_error = getattr(module, modinfo[u"encerror"]) + if isinstance(modinfo[u"decerror"], unicode): + self._decode_error = getattr(module, modinfo[u"decerror"]) + + self.name = modinfo[u"modname"] + + def _attempt_load(self, modname): + u"""Attempt to load module name modname, returning it on success, + throwing ImportError if module couldn't be imported""" + __import__(modname) + return sys.modules[modname] + + def serialize(self, data): + u"""Serialize the datastructure to json. Returns a string. Raises + TypeError if the object could not be serialized.""" + try: + return self._encode(data) + except self._encode_error, exc: + raise TypeError(*exc.args) + + def deserialize(self, s): + u"""deserialize the string to python data types. Raises + ValueError if the string vould not be parsed.""" + try: + return self._decode(s) + except self._decode_error, exc: + raise ValueError(*exc.args) + + +def force_implementation(modname): + u"""Forces anyjson to use a specific json module if it's available""" + global implementation + for name, spec in [(e[0], e) for e in _modules]: + if name == modname: + implementation = _JsonImplementation(spec) + return + raise ImportError(u"No module named: %s" % modname) + + +for modspec in _modules: + try: + implementation = _JsonImplementation(modspec) + break + except ImportError: + pass +else: + raise ImportError(u"No supported JSON module found") + +serialize = lambda value: implementation.serialize(value) +deserialize = lambda value: implementation.deserialize(value) diff --git a/tablib/packages/markup3.py b/tablib/packages/markup3.py new file mode 100644 index 0000000..1973c00 --- /dev/null +++ b/tablib/packages/markup3.py @@ -0,0 +1,484 @@ +# This code is in the public domain, it comes +# with absolutely no warranty and you can do +# absolutely whatever you want with it. + +__date__ = '17 May 2007' +__version__ = '1.7' +__doc__= """ +This is markup.py - a Python module that attempts to +make it easier to generate HTML/XML from a Python program +in an intuitive, lightweight, customizable and pythonic way. + +The code is in the public domain. + +Version: %s as of %s. + +Documentation and further info is at http://markup.sourceforge.net/ + +Please send bug reports, feature requests, enhancement +ideas or questions to nogradi at gmail dot com. + +Installation: drop markup.py somewhere into your Python path. +""" % ( __version__, __date__ ) + +import string + +class element: + """This class handles the addition of a new element.""" + + def __init__( self, tag, case='lower', parent=None ): + self.parent = parent + + if case == 'lower': + self.tag = tag.lower( ) + else: + self.tag = tag.upper( ) + + def __call__( self, *args, **kwargs ): + if len( args ) > 1: + raise ArgumentError( self.tag ) + + # if class_ was defined in parent it should be added to every element + if self.parent is not None and self.parent.class_ is not None: + if 'class_' not in kwargs: + kwargs['class_'] = self.parent.class_ + + if self.parent is None and len( args ) == 1: + x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] + return '\n'.join( x ) + elif self.parent is None and len( args ) == 0: + x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ] + return '\n'.join( x ) + + if self.tag in self.parent.twotags: + for myarg, mydict in _argsdicts( args, kwargs ): + self.render( self.tag, False, myarg, mydict ) + elif self.tag in self.parent.onetags: + if len( args ) == 0: + for myarg, mydict in _argsdicts( args, kwargs ): + self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0 + else: + raise ClosingError( self.tag ) + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + else: + raise InvalidElementError( self.tag, self.parent.mode ) + + def render( self, tag, single, between, kwargs ): + """Append the actual tags to content.""" + + out = "<%s" % tag + for key, value in kwargs.items( ): + if value is not None: # when value is None that means stuff like <... checked> + key = key.strip('_') # strip this so class_ will mean class, etc. + if key == 'http_equiv': # special cases, maybe change _ to - overall? + key = 'http-equiv' + elif key == 'accept_charset': + key = 'accept-charset' + out = "%s %s=\"%s\"" % ( out, key, escape( value ) ) + else: + out = "%s %s" % ( out, key ) + if between is not None: + out = "%s>%s" % ( out, between, tag ) + else: + if single: + out = "%s />" % out + else: + out = "%s>" % out + if self.parent is not None: + self.parent.content.append( out ) + else: + return out + + def close( self ): + """Append a closing tag unless element has only opening tag.""" + + if self.tag in self.parent.twotags: + self.parent.content.append( "" % self.tag ) + elif self.tag in self.parent.onetags: + raise ClosingError( self.tag ) + elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + + def open( self, **kwargs ): + """Append an opening tag.""" + + if self.tag in self.parent.twotags or self.tag in self.parent.onetags: + self.render( self.tag, False, None, kwargs ) + elif self.mode == 'strict_html' and self.tag in self.parent.deptags: + raise DeprecationError( self.tag ) + +class page: + """This is our main class representing a document. Elements are added + as attributes of an instance of this class.""" + + def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ): + """Stuff that effects the whole document. + + mode -- 'strict_html' for HTML 4.01 (default) + 'html' alias for 'strict_html' + 'loose_html' to allow some deprecated elements + 'xml' to allow arbitrary elements + + case -- 'lower' element names will be printed in lower case (default) + 'upper' they will be printed in upper case + + onetags -- list or tuple of valid elements with opening tags only + twotags -- list or tuple of valid elements with both opening and closing tags + these two keyword arguments may be used to select + the set of valid elements in 'xml' mode + invalid elements will raise appropriate exceptions + + separator -- string to place between added elements, defaults to newline + + class_ -- a class that will be added to every element if defined""" + + valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ] + valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON", + "CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET", + "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS", + "KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP", + "OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE", + "SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR", + "TT", "UL", "VAR" ] + deprecated_onetags = [ "BASEFONT", "ISINDEX" ] + deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ] + + self.header = [ ] + self.content = [ ] + self.footer = [ ] + self.case = case + self.separator = separator + + # init( ) sets it to True so we know that has to be printed at the end + self._full = False + self.class_= class_ + + if mode == 'strict_html' or mode == 'html': + self.onetags = valid_onetags + self.onetags += list(map( str.lower, self.onetags )) + self.twotags = valid_twotags + self.twotags += list(map( str.lower, self.twotags )) + self.deptags = deprecated_onetags + deprecated_twotags + self.deptags += list(map( str.lower, self.deptags )) + self.mode = 'strict_html' + elif mode == 'loose_html': + self.onetags = valid_onetags + deprecated_onetags + self.onetags += list(map( str.lower, self.onetags )) + self.twotags = valid_twotags + deprecated_twotags + self.twotags += list(map( str.lower, self.twotags )) + self.mode = mode + elif mode == 'xml': + if onetags and twotags: + self.onetags = onetags + self.twotags = twotags + elif ( onetags and not twotags ) or ( twotags and not onetags ): + raise CustomizationError( ) + else: + self.onetags = russell( ) + self.twotags = russell( ) + self.mode = mode + else: + raise ModeError( mode ) + + def __getattr__( self, attr ): + if attr.startswith("__") and attr.endswith("__"): + raise AttributeError(attr) + return element( attr, case=self.case, parent=self ) + + def __str__( self ): + + if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ): + end = [ '', '' ] + else: + end = [ ] + + return self.separator.join( self.header + self.content + self.footer + end ) + + def __call__( self, escape=False ): + """Return the document as a string. + + escape -- False print normally + True replace < and > by < and > + the default escape sequences in most browsers""" + + if escape: + return _escape( self.__str__( ) ) + else: + return self.__str__( ) + + def add( self, text ): + """This is an alias to addcontent.""" + self.addcontent( text ) + + def addfooter( self, text ): + """Add some text to the bottom of the document""" + self.footer.append( text ) + + def addheader( self, text ): + """Add some text to the top of the document""" + self.header.append( text ) + + def addcontent( self, text ): + """Add some text to the main part of the document""" + self.content.append( text ) + + + def init( self, lang='en', css=None, metainfo=None, title=None, header=None, + footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None ): + """This method is used for complete documents with appropriate + doctype, encoding, title, etc information. For an HTML/XML snippet + omit this method. + + lang -- language, usually a two character string, will appear + as in html mode (ignored in xml mode) + + css -- Cascading Style Sheet filename as a string or a list of + strings for multiple css files (ignored in xml mode) + + metainfo -- a dictionary in the form { 'name':'content' } to be inserted + into meta element(s) as + (ignored in xml mode) + + bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added + as attributes of the element as + (ignored in xml mode) + + script -- dictionary containing src:type pairs, + + title -- the title of the document as a string to be inserted into + a title element as my title (ignored in xml mode) + + header -- some text to be inserted right after the element + (ignored in xml mode) + + footer -- some text to be inserted right before the element + (ignored in xml mode) + + charset -- a string defining the character set, will be inserted into a + + element (ignored in xml mode) + + encoding -- a string defining the encoding, will be put into to first line of + the document as in + xml mode (ignored in html mode) + + doctype -- the document type string, defaults to + + in html mode (ignored in xml mode)""" + + self._full = True + + if self.mode == 'strict_html' or self.mode == 'loose_html': + if doctype is None: + doctype = "" + self.header.append( doctype ) + self.html( lang=lang ) + self.head( ) + if charset is not None: + self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset ) + if metainfo is not None: + self.metainfo( metainfo ) + if css is not None: + self.css( css ) + if title is not None: + self.title( title ) + if script is not None: + self.scripts( script ) + self.head.close() + if bodyattrs is not None: + self.body( **bodyattrs ) + else: + self.body( ) + if header is not None: + self.content.append( header ) + if footer is not None: + self.footer.append( footer ) + + elif self.mode == 'xml': + if doctype is None: + if encoding is not None: + doctype = "" % encoding + else: + doctype = "" + self.header.append( doctype ) + + def css( self, filelist ): + """This convenience function is only useful for html. + It adds css stylesheet(s) to the document via the element.""" + + if isinstance( filelist, str ): + self.link( href=filelist, rel='stylesheet', type='text/css', media='all' ) + else: + for file in filelist: + self.link( href=file, rel='stylesheet', type='text/css', media='all' ) + + def metainfo( self, mydict ): + """This convenience function is only useful for html. + It adds meta information via the element, the argument is + a dictionary of the form { 'name':'content' }.""" + + if isinstance( mydict, dict ): + for name, content in mydict.items( ): + self.meta( name=name, content=content ) + else: + raise TypeError("Metainfo should be called with a dictionary argument of name:content pairs.") + + def scripts( self, mydict ): + """Only useful in html, mydict is dictionary of src:type pairs will + be rendered as """ + + if isinstance( mydict, dict ): + for src, type in mydict.items( ): + self.script( '', src=src, type='text/%s' % type ) + else: + raise TypeError("Script should be given a dictionary of src:type pairs.") + + +class _oneliner: + """An instance of oneliner returns a string corresponding to one element. + This class can be used to write 'oneliners' that return a string + immediately so there is no need to instantiate the page class.""" + + def __init__( self, case='lower' ): + self.case = case + + def __getattr__( self, attr ): + if attr.startswith("__") and attr.endswith("__"): + raise AttributeError(attr) + return element( attr, case=self.case, parent=None ) + +oneliner = _oneliner( case='lower' ) +upper_oneliner = _oneliner( case='upper' ) + +def _argsdicts( args, mydict ): + """A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1.""" + + if len( args ) == 0: + args = None, + elif len( args ) == 1: + args = _totuple( args[0] ) + else: + raise Exception("We should have never gotten here.") + + mykeys = list(mydict.keys( )) + myvalues = list(map( _totuple, list(mydict.values( )) )) + + maxlength = max( list(map( len, [ args ] + myvalues )) ) + + for i in range( maxlength ): + thisdict = { } + for key, value in zip( mykeys, myvalues ): + try: + thisdict[ key ] = value[i] + except IndexError: + thisdict[ key ] = value[-1] + try: + thisarg = args[i] + except IndexError: + thisarg = args[-1] + + yield thisarg, thisdict + +def _totuple( x ): + """Utility stuff to convert string, int, float, None or anything to a usable tuple.""" + + if isinstance( x, str ): + out = x, + elif isinstance( x, ( int, float ) ): + out = str( x ), + elif x is None: + out = None, + else: + out = tuple( x ) + + return out + +def escape( text, newline=False ): + """Escape special html characters.""" + + if isinstance( text, str ): + if '&' in text: + text = text.replace( '&', '&' ) + if '>' in text: + text = text.replace( '>', '>' ) + if '<' in text: + text = text.replace( '<', '<' ) + if '\"' in text: + text = text.replace( '\"', '"' ) + if '\'' in text: + text = text.replace( '\'', '"' ) + if newline: + if '\n' in text: + text = text.replace( '\n', '
' ) + + return text + +_escape = escape + +def unescape( text ): + """Inverse of escape.""" + + if isinstance( text, str ): + if '&' in text: + text = text.replace( '&', '&' ) + if '>' in text: + text = text.replace( '>', '>' ) + if '<' in text: + text = text.replace( '<', '<' ) + if '"' in text: + text = text.replace( '"', '\"' ) + + return text + +class dummy: + """A dummy class for attaching attributes.""" + pass + +doctype = dummy( ) +doctype.frameset = "" +doctype.strict = "" +doctype.loose = "" + +class russell: + """A dummy class that contains anything.""" + + def __contains__( self, item ): + return True + + +class MarkupError( Exception ): + """All our exceptions subclass this.""" + def __str__( self ): + return self.message + +class ClosingError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag + +class OpeningError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' can not be opened." % tag + +class ArgumentError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' was called with more than one non-keyword argument." % tag + +class InvalidElementError( MarkupError ): + def __init__( self, tag, mode ): + self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode ) + +class DeprecationError( MarkupError ): + def __init__( self, tag ): + self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag + +class ModeError( MarkupError ): + def __init__( self, mode ): + self.message = "Mode '%s' is invalid, possible values: strict_html, loose_html, xml." % mode + +class CustomizationError( MarkupError ): + def __init__( self ): + self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'." + +if __name__ == '__main__': + print(__doc__) diff --git a/tablib/packages/ordereddict.py b/tablib/packages/ordereddict.py index 5b0303f..a5b896d 100644 --- a/tablib/packages/ordereddict.py +++ b/tablib/packages/ordereddict.py @@ -1,127 +1,127 @@ -# Copyright (c) 2009 Raymond Hettinger -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -from UserDict import DictMixin - -class OrderedDict(dict, DictMixin): - - def __init__(self, *args, **kwds): - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__end - except AttributeError: - self.clear() - self.update(*args, **kwds) - - def clear(self): - self.__end = end = [] - end += [None, end, end] # sentinel node for doubly linked list - self.__map = {} # key --> [key, prev, next] - dict.clear(self) - - def __setitem__(self, key, value): - if key not in self: - end = self.__end - curr = end[1] - curr[2] = end[1] = self.__map[key] = [key, curr, end] - dict.__setitem__(self, key, value) - - def __delitem__(self, key): - dict.__delitem__(self, key) - key, prev, next = self.__map.pop(key) - prev[2] = next - next[1] = prev - - def __iter__(self): - end = self.__end - curr = end[2] - while curr is not end: - yield curr[0] - curr = curr[2] - - def __reversed__(self): - end = self.__end - curr = end[1] - while curr is not end: - yield curr[0] - curr = curr[1] - - def popitem(self, last=True): - if not self: - raise KeyError('dictionary is empty') - if last: - key = reversed(self).next() - else: - key = iter(self).next() - value = self.pop(key) - return key, value - - def __reduce__(self): - items = [[k, self[k]] for k in self] - tmp = self.__map, self.__end - del self.__map, self.__end - inst_dict = vars(self).copy() - self.__map, self.__end = tmp - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def keys(self): - return list(self) - - setdefault = DictMixin.setdefault - update = DictMixin.update - pop = DictMixin.pop - values = DictMixin.values - items = DictMixin.items - iterkeys = DictMixin.iterkeys - itervalues = DictMixin.itervalues - iteritems = DictMixin.iteritems - - def __repr__(self): - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) - - def copy(self): - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - if isinstance(other, OrderedDict): - if len(self) != len(other): - return False - for p, q in zip(self.items(), other.items()): - if p != q: - return False - return True - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other +# Copyright (c) 2009 Raymond Hettinger +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +from UserDict import DictMixin + +class OrderedDict(dict, DictMixin): + + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + if last: + key = next(reversed(self)) + else: + key = next(iter(self)) + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + if len(self) != len(other): + return False + for p, q in zip(list(self.items()), list(other.items())): + if p != q: + return False + return True + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other diff --git a/tablib/packages/xlwt/__init__.py b/tablib/packages/xlwt/__init__.py index dcc23f0..cb65687 100644 --- a/tablib/packages/xlwt/__init__.py +++ b/tablib/packages/xlwt/__init__.py @@ -3,9 +3,6 @@ __VERSION__ = '0.7.2' import sys -if sys.version_info[:2] < (2, 3): - print >> sys.stderr, "Sorry, xlwt requires Python 2.3 or later" - sys.exit(1) from Workbook import Workbook from Worksheet import Worksheet diff --git a/tablib/packages/xlwt3/BIFFRecords.py b/tablib/packages/xlwt3/BIFFRecords.py new file mode 100644 index 0000000..39a9554 --- /dev/null +++ b/tablib/packages/xlwt3/BIFFRecords.py @@ -0,0 +1,2392 @@ +from struct import pack +from .UnicodeUtils import upack1, upack2 +import sys + +class SharedStringTable(object): + _SST_ID = 0x00FC + _CONTINUE_ID = 0x003C + + def __init__(self, encoding): + self.encoding = encoding + self._str_indexes = {} + self._tally = [] + self._add_calls = 0 + # Following 3 attrs are used for temporary storage in the + # get_biff_record() method and methods called by it. The pseudo- + # initialisation here is for documentation purposes only. + self._sst_record = None + self._continues = None + self._current_piece = None + + def add_str(self, s): + if self.encoding != 'ascii' and not isinstance(s, str): + s = str(s, self.encoding) + self._add_calls += 1 + if s not in self._str_indexes: + idx = len(self._str_indexes) + self._str_indexes[s] = idx + self._tally.append(1) + else: + idx = self._str_indexes[s] + self._tally[idx] += 1 + return idx + + def del_str(self, idx): + # This is called when we are replacing the contents of a string cell. + assert self._tally[idx] > 0 + self._tally[idx] -= 1 + self._add_calls -= 1 + + def str_index(self, s): + return self._str_indexes[s] + + def get_biff_record(self): + self._sst_record = b'' + self._continues = [None, None] + self._current_piece = pack(' 0x2020: # limit for BIFF7/8 + chunks = [] + pos = 0 + while pos < len(data): + chunk_pos = pos + 0x2020 + chunk = data[pos:chunk_pos] + chunks.append(chunk) + pos = chunk_pos + continues = pack('<2H', self._REC_ID, len(chunks[0])) + chunks[0] + for chunk in chunks[1:]: + continues += pack('<2H%ds'%len(chunk), 0x003C, len(chunk), chunk) + # 0x003C -- CONTINUE record id + return continues + else: + return self.get_rec_header() + data + + +class Biff8BOFRecord(BiffRecord): + """ + Offset Size Contents + 0 2 Version, contains 0600H for BIFF8 and BIFF8X + 2 2 Type of the following data: + 0005H = Workbook globals + 0006H = Visual Basic module + 0010H = Worksheet + 0020H = Chart + 0040H = Macro sheet + 0100H = Workspace file + 4 2 Build identifier + 6 2 Build year + 8 4 File history flags + 12 4 Lowest Excel version that can read all records in this file + """ + _REC_ID = 0x0809 + # stream types + BOOK_GLOBAL = 0x0005 + VB_MODULE = 0x0006 + WORKSHEET = 0x0010 + CHART = 0x0020 + MACROSHEET = 0x0040 + WORKSPACE = 0x0100 + + def __init__(self, rec_type): + version = 0x0600 + build = 0x0DBB + year = 0x07CC + file_hist_flags = 0x00 + ver_can_read = 0x06 + + self._rec_data = pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read) + + +class InteraceHdrRecord(BiffRecord): + _REC_ID = 0x00E1 + + def __init__(self): + self._rec_data = pack('BB', 0xB0, 0x04) + + +class InteraceEndRecord(BiffRecord): + _REC_ID = 0x00E2 + + def __init__(self): + self._rec_data = b'' + + +class MMSRecord(BiffRecord): + _REC_ID = 0x00C1 + + def __init__(self): + self._rec_data = pack('> 15 + c = low_15 | high_15 + passwd_hash ^= c + passwd_hash ^= len(plaintext) + passwd_hash ^= 0xCE4B + return passwd_hash + + def __init__(self, passwd = b""): + self._rec_data = pack('