From c5bbc74b96cc04254c39f8586e4d976a98b8d02a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 23 Mar 2011 03:55:23 -0400 Subject: [PATCH] import magic --- tablib/__init__.py | 16 +- tablib/core25.py | 816 +++++++++++++++++++++++++++++++++++ tablib/formats/_json.py | 11 +- tablib/packages/anyjson25.py | 118 +++++ tox.ini | 9 +- 5 files changed, 961 insertions(+), 9 deletions(-) create mode 100644 tablib/core25.py create mode 100644 tablib/packages/anyjson25.py diff --git a/tablib/__init__.py b/tablib/__init__.py index c2205b6..dc85527 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -1,8 +1,16 @@ """ Tablib. """ -from tablib.core import ( - Databook, Dataset, detect, import_set, - InvalidDatasetType, InvalidDimensions, UnsupportedFormat -) +import sys +if sys.version_info[0:1] > (2, 5): + from tablib.core import ( + Databook, Dataset, detect, import_set, + InvalidDatasetType, InvalidDimensions, UnsupportedFormat + ) + +else: + from tablib.core25 import ( + Databook, Dataset, detect, import_set, + InvalidDatasetType, InvalidDimensions, UnsupportedFormat + ) diff --git a/tablib/core25.py b/tablib/core25.py new file mode 100644 index 0000000..4ccf229 --- /dev/null +++ b/tablib/core25.py @@ -0,0 +1,816 @@ +# -*- coding: utf-8 -*- +""" + tablib.core + ~~~~~~~~~~~ + + This module implements the central Tablib objects. + + :copyright: (c) 2011 by Kenneth Reitz. + :license: MIT, see LICENSE for more details. +""" + +from copy import copy +from operator import itemgetter + +from tablib import formats +import collections + +try: + from collections import OrderedDict +except ImportError: + from tablib.packages.ordereddict import OrderedDict + + +__title__ = 'tablib' +__version__ = '0.9.4' +__build__ = 0x000904 +__author__ = 'Kenneth Reitz' +__license__ = 'MIT' +__copyright__ = 'Copyright 2011 Kenneth Reitz' +__docformat__ = 'restructuredtext' + + +class Row(object): + """Internal Row object. Mainly used for filtering.""" + + __slots__ = ['tuple', '_row', 'tags'] + + def __init__(self, row=list(), tags=list()): + self._row = list(row) + self.tags = list(tags) + + def __iter__(self): + return (col for col in self._row) + + def __len__(self): + return len(self._row) + + def __repr__(self): + return repr(self._row) + + def __getslice__(self, i, j): + return self._row[i,j] + + def __getitem__(self, i): + return self._row[i] + + def __setitem__(self, i, value): + self._row[i] = value + + def __delitem__(self, i): + del self._row[i] + + def __getstate__(self): + return {slot: [getattr(self, slot) for slot in self.__slots__]} + + def __setstate__(self, state): + for (k, v) in list(state.items()): setattr(self, k, v) + + def append(self, value): + self._row.append(value) + + def insert(self, index, value): + self._row.insert(index, value) + + def __contains__(self, item): + return (item in self._row) + + @property + def tuple(self): + '''Tuple representation of :class:`Row`.''' + return tuple(self._row) + + @property + def list(self): + '''List representation of :class:`Row`.''' + return list(self._row) + + def has_tag(self, tag): + """Returns true if current row contains tag.""" + + if tag == None: + return False + elif isinstance(tag, str): + return (tag in self.tags) + else: + return bool(len(set(tag) & set(self.tags))) + + + + +class Dataset(object): + """The :class:`Dataset` object is the heart of Tablib. It provides all core + functionality. + + Usually you create a :class:`Dataset` instance in your main module, and append + rows and columns as you collect data. :: + + data = tablib.Dataset() + data.headers = ('name', 'age') + + for (name, age) in some_collector(): + data.append((name, age)) + + You can also set rows and headers upon instantiation. This is useful if dealing + with dozens or hundres of :class:`Dataset` objects. :: + + headers = ('first_name', 'last_name') + data = [('John', 'Adams'), ('George', 'Washington')] + + data = tablib.Dataset(*data, headers=headers) + + + :param \*args: (optional) list of rows to populate Dataset + :param headers: (optional) list strings for Dataset header row + + + .. admonition:: Format Attributes Definition + + If you look at the code, the various output/import formats are not + defined within the :class:`Dataset` object. To add support for a new format, see + :ref:`Adding New Formats `. + + """ + + def __init__(self, *args, **kwargs): + self._data = list(Row(arg) for arg in args) + self.__headers = None + + # ('title', index) tuples + self._separators = [] + + # (column, callback) tuples + self._formatters = [] + + try: + self.headers = kwargs['headers'] + except KeyError: + self.headers = None + + try: + self.title = kwargs['title'] + except KeyError: + self.title = None + + self._register_formats() + + + def __len__(self): + return self.height + + + def __getitem__(self, key): + if isinstance(key, str): + if key in self.headers: + pos = self.headers.index(key) # get 'key' index from each data + return [row[pos] for row in self._data] + else: + raise KeyError + else: + _results = self._data[key] + if isinstance(_results, Row): + return _results.tuple + else: + return [result.tuple for result in _results] + + + def __setitem__(self, key, value): + self._validate(value) + self._data[key] = Row(value) + + + def __delitem__(self, key): + if isinstance(key, str): + + if key in self.headers: + + pos = self.headers.index(key) + del self.headers[pos] + + for i, row in enumerate(self._data): + + del row[pos] + self._data[i] = row + else: + raise KeyError + else: + del self._data[key] + + + def __repr__(self): + try: + return '<%s dataset>' % (self.title.lower()) + except AttributeError: + return '' + + + @classmethod + def _register_formats(cls): + """Adds format properties.""" + for fmt in formats.available: + try: + try: + setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_set)) + + except AttributeError: + pass + + + def _validate(self, row=None, col=None, safety=False): + """Assures size of every row in dataset is of proper proportions.""" + if row: + is_valid = (len(row) == self.width) if self.width else True + elif col: + if len(col) < 1: + is_valid = True + else: + is_valid = (len(col) == self.height) if self.height else True + else: + is_valid = all((len(x) == self.width for x in self._data)) + + if is_valid: + return True + else: + if not safety: + raise InvalidDimensions + return False + + + def _package(self, dicts=True): + """Packages Dataset into lists of dictionaries for transmission.""" + + _data = list(self._data) + + # Execute formatters + if self._formatters: + for row_i, row in enumerate(_data): + for col, callback in self._formatters: + try: + if col is None: + for j, c in enumerate(row): + _data[row_i][j] = callback(c) + else: + _data[row_i][col] = callback(row[col]) + except IndexError: + raise InvalidDatasetIndex + + + if self.headers: + if dicts: + data = [OrderedDict(list(zip(self.headers, data_row))) for data_row in _data] + else: + data = [list(self.headers)] + list(_data) + else: + data = [list(row) for row in _data] + + return data + + + def _clean_col(self, col): + """Prepares the given column for insert/append.""" + + col = list(col) + + if self.headers: + header = [col.pop(0)] + else: + header = [] + + if len(col) == 1 and isinstance(col[0], collections.Callable): + col = list(map(col[0], self._data)) + col = tuple(header + col) + + return col + + + @property + def height(self): + """The number of rows currently in the :class:`Dataset`. + Cannot be directly modified. + """ + return len(self._data) + + + @property + def width(self): + """The number of columns currently in the :class:`Dataset`. + Cannot be directly modified. + """ + + try: + return len(self._data[0]) + except IndexError: + try: + return len(self.headers) + except TypeError: + return 0 + + + def _get_headers(self): + """An *optional* list of strings to be used for header rows and attribute names. + + This must be set manually. The given list length must equal :class:`Dataset.width`. + + """ + return self.__headers + + + def _set_headers(self, collection): + """Validating headers setter.""" + self._validate(collection) + if collection: + try: + self.__headers = list(collection) + except TypeError: + raise TypeError + else: + self.__headers = None + + headers = property(_get_headers, _set_headers) + + def _get_dict(self): + """A native Python representation of the :class:`Dataset` object. If headers have + been set, a list of Python dictionaries will be returned. If no headers have been set, + a list of tuples (rows) will be returned instead. + + A dataset object can also be imported by setting the `Dataset.dict` attribute: :: + + data = tablib.Dataset() + data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + + """ + return self._package() + + + def _set_dict(self, pickle): + """A native Python representation of the Dataset object. If headers have been + set, a list of Python dictionaries will be returned. If no headers have been + set, a list of tuples (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. :: + + data = tablib.Dataset() + data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] + + """ + + if not len(pickle): + return + + # if list of rows + if isinstance(pickle[0], list): + self.wipe() + for row in pickle: + self.append(Row(row)) + + # if list of objects + elif isinstance(pickle[0], dict): + self.wipe() + self.headers = list(pickle[0].keys()) + for row in pickle: + self.append(Row(list(row.values()))) + else: + raise UnsupportedFormat + + dict = property(_get_dict, _set_dict) + + + @property + def xls(): + """An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set. + + .. admonition:: Binary Warning + + :class:`Dataset.xls` contains binary data, so make sure to write in binary mode:: + + with open('output.xls', 'wb') as f: + f.write(data.xls)' + """ + pass + + + @property + def csv(): + """A CSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain + the first row of the dataset. + + A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. :: + + data = tablib.Dataset() + data.csv = 'age, first_name, last_name\\n90, John, Adams' + + Import assumes (for now) that headers exist. + """ + pass + + + @property + def tsv(): + """A TSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain + the first row of the dataset. + + A dataset object can also be imported by setting the :class:`Dataset.tsv` attribute. :: + + data = tablib.Dataset() + data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams' + + Import assumes (for now) that headers exist. + """ + + @property + def yaml(): + """A YAML representation of the :class:`Dataset` object. If headers have been + set, a YAML list of objects will be returned. If no headers have + been set, a YAML list of lists (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: + + data = tablib.Dataset() + data.yaml = '- {age: 90, first_name: John, last_name: Adams}' + + Import assumes (for now) that headers exist. + """ + pass + + + @property + def json(): + """A JSON representation of the :class:`Dataset` object. If headers have been + set, a JSON list of objects will be returned. If no headers have + been set, a JSON list of lists (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: + + data = tablib.Dataset() + data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]' + + Import assumes (for now) that headers exist. + """ + + @property + def html(): + """A HTML table representation of the :class:`Dataset` object. If + headers have been set, they will be used as table headers. + + ..notice:: This method can be used for export only. + """ + pass + + + def append(self, row=None, col=None, header=None, tags=list()): + """Adds a row or column to the :class:`Dataset`. + Usage is :class:`Dataset.insert` for documentation. + """ + + if row is not None: + self.insert(self.height, row=row, tags=tags) + elif col is not None: + self.insert(self.width, col=col, header=header) + + + def insert_separator(self, index, text='-'): + """Adds a separator to :class:`Dataset` at given index.""" + + sep = (index, text) + self._separators.append(sep) + + + def append_separator(self, text='-'): + """Adds a :ref:`seperator ` to the :class:`Dataset`.""" + + # change offsets if headers are or aren't defined + if not self.headers: + index = self.height if self.height else 0 + else: + index = (self.height + 1) if self.height else 1 + + self.insert_separator(index, text) + + + def add_formatter(self, col, handler): + """Adds a :ref:`formatter` to the :class:`Dataset`. + + .. versionadded:: 0.9.5 + :param col: column to. Accepts index int or header str. + :param handler: reference to callback function to execute + against each cell value. + """ + + if isinstance(col, str): + if col in self.headers: + col = self.headers.index(col) # get 'key' index from each data + else: + raise KeyError + + if not col > self.width: + self._formatters.append((col, handler)) + else: + raise InvalidDatasetIndex + + return True + + + def insert(self, index, row=None, col=None, header=None, tags=list()): + """Inserts a row or column to the :class:`Dataset` at the given index. + + Rows and columns inserted must be the correct size (height or width). + + The default behaviour is to insert the given row to the :class:`Dataset` + object at the given index. If the ``col`` parameter is given, however, + a new column will be insert to the :class:`Dataset` object instead. + + You can also insert a column of a single callable object, which will + add a new column with the return values of the callable each as an + item in the column. :: + + data.append(col=random.randint) + + See :ref:`dyncols` for an in-depth example. + + .. versionchanged:: 0.9.0 + If inserting a column, and :class:`Dataset.headers` is set, the + header attribute must be set, and will be considered the header for + that row. + + .. versionadded:: 0.9.0 + If inserting a row, you can add :ref:`tags ` to the row you are inserting. + This gives you the ability to :class:`filter ` your + :class:`Dataset` later. + + """ + if row: + self._validate(row) + self._data.insert(index, Row(row, tags=tags)) + elif col: + col = list(col) + + # Callable Columns... + if len(col) == 1 and isinstance(col[0], collections.Callable): + col = list(map(col[0], self._data)) + + col = self._clean_col(col) + self._validate(col=col) + + if self.headers: + # pop the first item off, add to headers + if not header: + raise HeadersNeeded() + self.headers.insert(index, header) + + if self.height and self.width: + + for i, row in enumerate(self._data): + + row.insert(index, col[i]) + self._data[i] = row + else: + self._data = [Row([row]) for row in col] + + + def filter(self, tag): + """Returns a new instance of the :class:`Dataset`, excluding any rows + that do not contain the given :ref:`tags `. + """ + _dset = copy(self) + _dset._data = [row for row in _dset._data if row.has_tag(tag)] + + return _dset + + + def sort(self, col, reverse=False): + """Sort a :class:`Dataset` by a specific column, given string (for + header) or integer (for column index). The order can be reversed by + setting ``reverse`` to ``True``. + Returns a new :class:`Dataset` instance where columns have been + sorted.""" + + if isinstance(col, str): + + if not self.headers: + raise HeadersNeeded + + _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) + _dset = Dataset(headers=self.headers) + + for item in _sorted: + row = [item[key] for key in self.headers] + _dset.append(row=row) + + else: + if self.headers: + col = self.headers[col] + + _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) + _dset = Dataset(headers=self.headers) + + for item in _sorted: + if self.headers: + row = [item[key] for key in self.headers] + else: + row = item + _dset.append(row=row) + + + return _dset + + + def transpose(self): + """Transpose a :class:`Dataset`, turning rows into columns and vice + versa, returning a new ``Dataset`` instance. The first row of the + original instance becomes the new header row.""" + + # Don't transpose if there is no data + if not self: + return + + _dset = Dataset() + # The first element of the headers stays in the headers, + # it is our "hinge" on which we rotate the data + new_headers = [self.headers[0]] + self[self.headers[0]] + + _dset.headers = new_headers + for column in self.headers: + + if column == self.headers[0]: + # It's in the headers, so skip it + continue + + # Adding the column name as now they're a regular column + row_data = [column] + self[column] + row_data = Row(row_data) + _dset.append(row=row_data) + + return _dset + + + def stack_rows(self, other): + """Stack two :class:`Dataset` instances together by + joining at the row level, and return new combined + ``Dataset`` instance.""" + + if not isinstance(other, Dataset): + return + + if self.width != other.width: + raise InvalidDimensions + + # Copy the source data + _dset = copy(self) + + rows_to_stack = [row for row in _dset._data] + other_rows = [row for row in other._data] + + rows_to_stack.extend(other_rows) + _dset._data = rows_to_stack + + return _dset + + + def stack_columns(self, other): + """Stack two :class:`Dataset` instances together by + joining at the column level, and return a new + combined ``Dataset`` instance. If either ``Dataset`` + has headers set, than the other must as well.""" + + if not isinstance(other, Dataset): + return + + if self.headers or other.headers: + if not self.headers or not other.headers: + raise HeadersNeeded + + if self.height != other.height: + raise InvalidDimensions + + try: + new_headers = self.headers + other.headers + except TypeError: + new_headers = None + + _dset = Dataset() + + for column in self.headers: + _dset.append(col=self[column]) + + for column in other.headers: + _dset.append(col=other[column]) + + _dset.headers = new_headers + + return _dset + + + def wipe(self): + """Removes all content and headers from the :class:`Dataset` object.""" + self._data = list() + self.__headers = None + + + +class Databook(object): + """A book of :class:`Dataset` objects. + """ + + def __init__(self, sets=None): + + if sets is None: + self._datasets = list() + else: + self._datasets = sets + + self._register_formats() + + def __repr__(self): + try: + return '<%s databook>' % (self.title.lower()) + except AttributeError: + return '' + + + def wipe(self): + """Removes all :class:`Dataset` objects from the :class:`Databook`.""" + self._datasets = [] + + + @classmethod + def _register_formats(cls): + """Adds format properties.""" + for fmt in formats.available: + try: + try: + setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_book)) + + except AttributeError: + pass + + + def add_sheet(self, dataset): + """Adds given :class:`Dataset` to the :class:`Databook`.""" + if type(dataset) is Dataset: + self._datasets.append(dataset) + else: + raise InvalidDatasetType + + + def _package(self): + """Packages :class:`Databook` for delivery.""" + collector = [] + for dset in self._datasets: + collector.append(OrderedDict( + title = dset.title, + data = dset.dict + )) + return collector + + + @property + def size(self): + """The number of the :class:`Dataset` objects within :class:`Databook`.""" + return len(self._datasets) + + +def detect(stream): + """Return (format, stream) of given stream.""" + for fmt in formats.available: + try: + if fmt.detect(stream): + return (fmt, stream) + except AttributeError: + pass + return (None, stream) + + +def import_set(stream): + """Return dataset of given stream.""" + (format, stream) = detect(stream) + + try: + data = Dataset() + format.import_set(data, stream) + return data + + except AttributeError: + return None + + +class InvalidDatasetType(Exception): + "Only Datasets can be added to a DataBook" + + +class InvalidDimensions(Exception): + "Invalid size" + +class InvalidDatasetIndex(Exception): + "Outside of Dataset size" + +class HeadersNeeded(Exception): + "Header parameter must be given when appending a column in this Dataset." + +class UnsupportedFormat(NotImplementedError): + "Format is not supported" diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index 262c627..8d498df 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -3,8 +3,13 @@ """ Tablib - JSON Support """ -import tablib.core -from tablib.packages import anyjson +import tablib + +import sys +if sys.version_info[:2] > (2, 5): + from tablib.packages import anyjson +else: + from tablib.packages import anyjson25 as anyjson @@ -34,7 +39,7 @@ def import_book(dbook, in_stream): dbook.wipe() for sheet in anyjson.deserialize(in_stream): - data = tablib.core.Dataset() + data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) diff --git a/tablib/packages/anyjson25.py b/tablib/packages/anyjson25.py new file mode 100644 index 0000000..ad6fc40 --- /dev/null +++ b/tablib/packages/anyjson25.py @@ -0,0 +1,118 @@ +u""" +Wraps the best available JSON implementation available in a common interface +""" + +__version__ = u"0.2.0" +__author__ = u"Rune Halvorsen " +__homepage__ = u"http://bitbucket.org/runeh/anyjson/" +__docformat__ = u"restructuredtext" + +u""" + +.. function:: serialize(obj) + + Serialize the object to JSON. + +.. function:: deserialize(str) + + Deserialize JSON-encoded object to a Python object. + +.. function:: force_implementation(name) + + Load a specific json module. This is useful for testing and not much else + +.. attribute:: implementation + + The json implementation object. This is probably not useful to you, + except to get the name of the implementation in use. The name is + available through `implementation.name`. +""" + +import sys +from itertools import izip + +implementation = None + +u""" +.. data:: _modules + + List of known json modules, and the names of their serialize/unserialize + methods, as well as the exception they throw. Exception can be either + an exception class or a string. +""" +_modules = [(u"cjson", u"encode", u"EncodeError", u"decode", u"DecodeError"), + (u"jsonlib2", u"write", u"WriteError", u"read", u"ReadError"), + (u"jsonlib", u"write", u"WriteError", u"read", u"ReadError"), + (u"simplejson", u"dumps", TypeError, u"loads", ValueError), + (u"json", u"dumps", TypeError, u"loads", ValueError), + (u"django.utils.simplejson", u"dumps", TypeError, u"loads", + ValueError)] +_fields = (u"modname", u"encoder", u"encerror", u"decoder", u"decerror") + + +class _JsonImplementation(object): + u"""Incapsulates a JSON implementation""" + + def __init__(self, modspec): + modinfo = dict(list(izip(_fields, modspec))) + + # No try block. We want importerror to end up at caller + module = self._attempt_load(modinfo[u"modname"]) + + self.implementation = modinfo[u"modname"] + self._encode = getattr(module, modinfo[u"encoder"]) + self._decode = getattr(module, modinfo[u"decoder"]) + self._encode_error = modinfo[u"encerror"] + self._decode_error = modinfo[u"decerror"] + + if isinstance(modinfo[u"encerror"], unicode): + self._encode_error = getattr(module, modinfo[u"encerror"]) + if isinstance(modinfo[u"decerror"], unicode): + self._decode_error = getattr(module, modinfo[u"decerror"]) + + self.name = modinfo[u"modname"] + + def _attempt_load(self, modname): + u"""Attempt to load module name modname, returning it on success, + throwing ImportError if module couldn't be imported""" + __import__(modname) + return sys.modules[modname] + + def serialize(self, data): + u"""Serialize the datastructure to json. Returns a string. Raises + TypeError if the object could not be serialized.""" + try: + return self._encode(data) + except self._encode_error, exc: + raise TypeError(*exc.args) + + def deserialize(self, s): + u"""deserialize the string to python data types. Raises + ValueError if the string vould not be parsed.""" + try: + return self._decode(s) + except self._decode_error, exc: + raise ValueError(*exc.args) + + +def force_implementation(modname): + u"""Forces anyjson to use a specific json module if it's available""" + global implementation + for name, spec in [(e[0], e) for e in _modules]: + if name == modname: + implementation = _JsonImplementation(spec) + return + raise ImportError(u"No module named: %s" % modname) + + +for modspec in _modules: + try: + implementation = _JsonImplementation(modspec) + break + except ImportError: + pass +else: + raise ImportError(u"No supported JSON module found") + +serialize = lambda value: implementation.serialize(value) +deserialize = lambda value: implementation.deserialize(value) diff --git a/tox.ini b/tox.ini index ce52a01..764ee3c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,12 +1,17 @@ [tox] -envlist = py24,py25,py26,py27, py3 +envlist = py25,py26,py27,py3 [testenv] commands=py.test --junitxml=junit-{envname}.xml deps = pytest +[testenv:py25] +simplejson = pytest simplejson + [testenv:pypy] basepython=/usr/bin/pypy-c +simplejson = pytest simplejson [testenv:py3] -basepython=/usr/bin/python3 \ No newline at end of file +basepython=/usr/bin/python3 +simplejson = pytest \ No newline at end of file