From 2bb052599031f883ec220921c45d23eee6ae32f3 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 5 Nov 2010 09:46:14 -0400 Subject: [PATCH 01/23] Optimized set intersection for tag checking. --- tablib/core.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index b5c2526..1417000 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -76,12 +76,9 @@ class Row(object): if tag == None: return False elif isinstance(tag, basestring): - return tag in self.tags - else: - for t in tag: - if t in self.tags: - return True - return False + return (tag in self.tags) + else: + return True if len(set(tag) & set(self.tags)) else False class Dataset(object): From cabab73045943f28a230bfa74ff270fbebf083c2 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 9 Nov 2010 08:42:51 -0500 Subject: [PATCH 02/23] Spacing fixes. --- test_tablib.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test_tablib.py b/test_tablib.py index 342fd45..8687473 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -8,6 +8,7 @@ import unittest import tablib + class TablibTestCase(unittest.TestCase): """Tablib test cases.""" @@ -15,6 +16,7 @@ class TablibTestCase(unittest.TestCase): """Create simple data set with headers.""" global data, book + data = tablib.Dataset() book = tablib.Databook() @@ -352,20 +354,20 @@ class TablibTestCase(unittest.TestCase): def test_wipe(self): """Purge a dataset.""" - + new_row = (1, 2, 3) data.append(new_row) # Verify width/data self.assertTrue(data.width == len(new_row)) self.assertTrue(data[0] == new_row) - + data.wipe() new_row = (1, 2, 3, 4) data.append(new_row) self.assertTrue(data.width == len(new_row)) self.assertTrue(data[0] == new_row) - - + + if __name__ == '__main__': unittest.main() From 5fad80a540d95c7ba5e7dfa55f885509c992736b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 9 Nov 2010 08:43:34 -0500 Subject: [PATCH 03/23] Update column append examples. --- README.rst | 5 ++--- docs/tutorial.rst | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 3d01ed8..00b6345 100644 --- a/README.rst +++ b/README.rst @@ -61,7 +61,7 @@ Intelligently add new rows: :: Intelligently add new columns: :: - >>> data.append(col=('age', 90, 67, 83)) + >>> data.append(col=(90, 67, 83), header='age') Slice rows: :: @@ -171,8 +171,7 @@ To install tablib, simply: :: Or, if you absolutely must: :: $ easy_install tablib - - + Contribute ---------- diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 774158d..d89c8c1 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -87,7 +87,7 @@ Adding Columns Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. :: - data.append(col=['Age', 22, 20]) + data.append(col=[22, 20], header='Age') Let's view the data now. :: @@ -350,4 +350,4 @@ The resulting **tests.xls** will have the following layout: ---- -Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. \ No newline at end of file +Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. From 9ddb4de9420dc3ae3eb5133e95083a22b0f6000c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 9 Nov 2010 12:27:15 -0500 Subject: [PATCH 04/23] Documentation typo. --- docs/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index d89c8c1..dfe10ff 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -243,7 +243,7 @@ Filtering Datasets with Tags .. versionadded:: 0.9.0 -When constructing a :class:`Dataset` object, you can add tags to rows by speficying the ``tags`` parameter. +When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter. This allows you to filter your :class:`Dataset` later. This can be useful so seperate rows of data based on arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`. From 657ab98d04d7d32b36eeae63c4da48e6b0a9c100 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Thu, 11 Nov 2010 09:00:06 +0100 Subject: [PATCH 05/23] Support for Dataset transposition. Unit-tested. --- tablib/core.py | 30 +++++++++++++++++++++++++++++- test_tablib.py | 13 +++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index 1417000..23026a2 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -507,7 +507,35 @@ class Dataset(object): _dset._data = [row for row in _dset._data if row.has_tag(tag)] return _dset - + + def transpose(self): + """Transpose a :class:`Dataset`, turning rows into columns and vice + versa, returning a new ``Dataset`` instance. The first row of the + original instance becomes the new header row.""" + + # Don't transpose if there is no data + if not self: + return + + _dset = Dataset() + # The first element of the headers stays in the headers, + # it is our "hinge" on which we rotate the data + new_headers = [self.headers[0]] + self[self.headers[0]] + + _dset.headers = new_headers + for column in self.headers: + + if column == self.headers[0]: + # It's in the headers, so skip it + continue + + # Adding the column name as now they're a regular column + row_data = [column] + self[column] + row_data = Row(row_data) + _dset.append(row=row_data) + + return _dset + def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() diff --git a/test_tablib.py b/test_tablib.py index 342fd45..55546b5 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -349,6 +349,19 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(tablib.detect(_json)[0], tablib.formats.json) self.assertEqual(tablib.detect(_bunk)[0], None) + def test_transpose(self): + """Transpose a dataset.""" + + transposed_founders = self.founders.transpose() + first_row = transposed_founders[0] + second_row = transposed_founders[1] + + self.assertEqual(transposed_founders.headers, + ["first_name","John", "George", "Thomas"]) + self.assertEqual(first_row, + ("last_name","Adams", "Washington", "Jefferson")) + self.assertEqual(second_row, + ("gpa",90, 67, 50)) def test_wipe(self): """Purge a dataset.""" From 83a8346e8f7e3a267dfaab93951c13ac96c08528 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 11 Nov 2010 10:58:48 -0500 Subject: [PATCH 06/23] Added ordered dict license. --- NOTICE | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/NOTICE b/NOTICE index f072112..88d5d2d 100644 --- a/NOTICE +++ b/NOTICE @@ -1,4 +1,31 @@ -Tablib includes some vendorized python libraries: pyyaml, simplejson, and xlwt. +Tablib includes some vendorized python libraries: ordereddict, pyyaml, +simplejson, and xlwt. + + +OrderedDict License +=================== + +Copyright (c) 2009 Raymond Hettinger + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation files +(the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. From a91254117caeca06df7ff54345deb64f10450b7b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 11 Nov 2010 11:02:07 -0500 Subject: [PATCH 07/23] Added ordered dict library. --- tablib/packages/ordereddict.py | 127 +++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 tablib/packages/ordereddict.py diff --git a/tablib/packages/ordereddict.py b/tablib/packages/ordereddict.py new file mode 100644 index 0000000..5b0303f --- /dev/null +++ b/tablib/packages/ordereddict.py @@ -0,0 +1,127 @@ +# Copyright (c) 2009 Raymond Hettinger +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +from UserDict import DictMixin + +class OrderedDict(dict, DictMixin): + + def __init__(self, *args, **kwds): + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__end + except AttributeError: + self.clear() + self.update(*args, **kwds) + + def clear(self): + self.__end = end = [] + end += [None, end, end] # sentinel node for doubly linked list + self.__map = {} # key --> [key, prev, next] + dict.clear(self) + + def __setitem__(self, key, value): + if key not in self: + end = self.__end + curr = end[1] + curr[2] = end[1] = self.__map[key] = [key, curr, end] + dict.__setitem__(self, key, value) + + def __delitem__(self, key): + dict.__delitem__(self, key) + key, prev, next = self.__map.pop(key) + prev[2] = next + next[1] = prev + + def __iter__(self): + end = self.__end + curr = end[2] + while curr is not end: + yield curr[0] + curr = curr[2] + + def __reversed__(self): + end = self.__end + curr = end[1] + while curr is not end: + yield curr[0] + curr = curr[1] + + def popitem(self, last=True): + if not self: + raise KeyError('dictionary is empty') + if last: + key = reversed(self).next() + else: + key = iter(self).next() + value = self.pop(key) + return key, value + + def __reduce__(self): + items = [[k, self[k]] for k in self] + tmp = self.__map, self.__end + del self.__map, self.__end + inst_dict = vars(self).copy() + self.__map, self.__end = tmp + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def keys(self): + return list(self) + + setdefault = DictMixin.setdefault + update = DictMixin.update + pop = DictMixin.pop + values = DictMixin.values + items = DictMixin.items + iterkeys = DictMixin.iterkeys + itervalues = DictMixin.itervalues + iteritems = DictMixin.iteritems + + def __repr__(self): + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + + def copy(self): + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + if isinstance(other, OrderedDict): + if len(self) != len(other): + return False + for p, q in zip(self.items(), other.items()): + if p != q: + return False + return True + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other From 10ce000d31f57c70753b797342c139e2f37a1872 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 11 Nov 2010 11:02:14 -0500 Subject: [PATCH 08/23] Updated changelog. --- HISTORY.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index d467712..668f201 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,12 @@ History ------- +0.9.2 (?) ++++++++++ + +* Added transpose method to Datasets +* Backwards-compatible OrderedDict support. + 0.9.1 (2010-11-04) ++++++++++++++++++ From 1427be2901cb53c52d60a1438492b01cee39b867 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Mon, 15 Nov 2010 08:59:49 +0100 Subject: [PATCH 09/23] Support for row and column stacking. Unit-tested. --- tablib/core.py | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++ test_tablib.py | 38 ++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index 23026a2..47d2e13 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -535,6 +535,59 @@ class Dataset(object): _dset.append(row=row_data) return _dset + + def row_stack(self, other): + + """Stack two :class:`Dataset` instances together by + joining them at the row level, and return a new + combined ``Dataset`` instance.""" + + if not isinstance(other, Dataset): + return + + if self.width != other.width: + raise InvalidDimensions + + # Copy the source data + _dset = copy(self) + + rows_to_stack = [row for row in _dset._data] + other_rows = [row for row in other._data] + + rows_to_stack.extend(other_rows) + _dset._data = rows_to_stack + + return _dset + + def column_stack(self, other): + + """Stack two :class:`Dataset` instances together by + joining at the column level, and return a new + combined ``Dataset`` instance. Requires headers + to be set.""" + + if not isinstance(other, Dataset): + return + + if not self.headers or not other.headers: + raise HeadersNeeded + + if self.height != other.height: + raise InvalidDimensions + + new_headers = self.headers + other.headers + + _dset = Dataset() + + for column in self.headers: + _dset.append(col=self[column]) + + for column in other.headers: + _dset.append(col=other[column]) + + _dset.headers = new_headers + + return _dset def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" diff --git a/test_tablib.py b/test_tablib.py index f203179..126ee84 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -3,6 +3,7 @@ """Tests for Tablib.""" +from copy import copy import unittest import tablib @@ -364,7 +365,44 @@ class TablibTestCase(unittest.TestCase): ("last_name","Adams", "Washington", "Jefferson")) self.assertEqual(second_row, ("gpa",90, 67, 50)) + + def test_row_stacking(self): + """Row stacking.""" + + to_join = tablib.Dataset(headers=self.founders.headers) + + for row in self.founders: + to_join.append(row=row) + + row_stacked = self.founders.row_stack(to_join) + + for column in row_stacked.headers: + + original_data = self.founders[column] + expected_data = original_data + original_data + self.assertEqual(row_stacked[column], expected_data) + + def test_column_stacking(self): + + """Column stacking""" + + to_join = tablib.Dataset(headers=self.founders.headers) + + for row in self.founders: + to_join.append(row=row) + + column_stacked = self.founders.column_stack(to_join) + + for index, row in enumerate(column_stacked): + + original_data = self.founders[index] + expected_data = original_data + original_data + self.assertEqual(row, expected_data) + + self.assertEqual(column_stacked[0], + ("John", "Adams", 90, "John", "Adams", 90)) + def test_wipe(self): """Purge a dataset.""" From 36bbe2726bb014188241164956273096ac795c05 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Mon, 15 Nov 2010 09:00:57 +0100 Subject: [PATCH 10/23] Remove unneded import --- test_tablib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_tablib.py b/test_tablib.py index 126ee84..e9ef5af 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -3,7 +3,6 @@ """Tests for Tablib.""" -from copy import copy import unittest import tablib From d8136ab6138104583aee6d304eb2d91f809a6ea0 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Wed, 17 Nov 2010 22:51:43 +0100 Subject: [PATCH 11/23] Whitespace --- tablib/core.py | 130 ++++++++++++++++++++++++------------------------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 23026a2..3a80e60 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -77,40 +77,40 @@ class Row(object): return False elif isinstance(tag, basestring): return (tag in self.tags) - else: + else: return True if len(set(tag) & set(self.tags)) else False class Dataset(object): - """The :class:`Dataset` object is the heart of Tablib. It provides all core + """The :class:`Dataset` object is the heart of Tablib. It provides all core functionality. - + Usually you create a :class:`Dataset` instance in your main module, and append rows and columns as you collect data. :: - + data = tablib.Dataset() data.headers = ('name', 'age') - + for (name, age) in some_collector(): data.append((name, age)) - + You can also set rows and headers upon instantiation. This is useful if dealing with dozens or hundres of :class:`Dataset` objects. :: - + headers = ('first_name', 'last_name') data = [('John', 'Adams'), ('George', 'Washington')] - + data = tablib.Dataset(*data, headers=headers) - - + + :param \*args: (optional) list of rows to populate Dataset :param headers: (optional) list strings for Dataset header row .. admonition:: Format Attributes Definition - If you look at the code, the various output/import formats are not - defined within the :class:`Dataset` object. To add support for a new format, see + If you look at the code, the various output/import formats are not + defined within the :class:`Dataset` object. To add support for a new format, see :ref:`Adding New Formats `. """ @@ -118,7 +118,7 @@ class Dataset(object): def __init__(self, *args, **kwargs): self._data = list(Row(arg) for arg in args) self.__headers = None - + # ('title', index) tuples self._separators = [] @@ -134,7 +134,7 @@ class Dataset(object): self._register_formats() - + def __len__(self): return self.height @@ -153,7 +153,7 @@ class Dataset(object): else: return [result.tuple for result in _results] - + def __setitem__(self, key, value): self._validate(value) self._data[key] = Row(value) @@ -163,10 +163,10 @@ class Dataset(object): if isinstance(key, basestring): if key in self.headers: - + pos = self.headers.index(key) del self.headers[pos] - + for i, row in enumerate(self._data): del row[pos] @@ -183,7 +183,7 @@ class Dataset(object): except AttributeError: return '' - + @classmethod def _register_formats(cls): """Adds format properties.""" @@ -193,7 +193,7 @@ class Dataset(object): setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set)) except AttributeError: setattr(cls, fmt.title, property(fmt.export_set)) - + except AttributeError: pass @@ -234,21 +234,21 @@ class Dataset(object): def _clean_col(self, col): """Prepares the given column for insert/append.""" - + col = list(col) - + if self.headers: header = [col.pop(0)] else: header = [] - + if len(col) == 1 and callable(col[0]): col = map(col[0], self._data) col = tuple(header + col) - + return col - + @property def height(self): """The number of rows currently in the :class:`Dataset`. @@ -262,7 +262,7 @@ class Dataset(object): """The number of columns currently in the :class:`Dataset`. Cannot be directly modified. """ - + try: return len(self._data[0]) except IndexError: @@ -275,7 +275,7 @@ class Dataset(object): @property def headers(self): """An *optional* list of strings to be used for header rows and attribute names. - + This must be set manually. The given list length must equal :class:`Dataset.width`. """ @@ -297,9 +297,9 @@ class Dataset(object): @property def dict(self): - """A JSON representation of the :class:`Dataset` object. If headers have been - set, a JSON list of objects will be returned. If no headers have - been set, a JSON list of lists (rows) will be returned instead. + """A JSON representation of the :class:`Dataset` object. If headers have been + set, a JSON list of objects will be returned. If no headers have + been set, a JSON list of lists (rows) will be returned instead. A dataset object can also be imported by setting the `Dataset.json` attribute: :: @@ -309,10 +309,10 @@ class Dataset(object): """ return self._package() - + @dict.setter def dict(self, pickle): - """A native Python representation of the Dataset object. If headers have been + """A native Python representation of the Dataset object. If headers have been set, a list of Python dictionaries will be returned. If no headers have been set, a list of tuples (rows) will be returned instead. @@ -320,7 +320,7 @@ class Dataset(object): data = tablib.Dataset() data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] - + """ if not len(pickle): return @@ -330,7 +330,7 @@ class Dataset(object): self.wipe() for row in pickle: self.append(Row(row)) - + # if list of objects elif isinstance(pickle[0], dict): self.wipe() @@ -353,11 +353,11 @@ class Dataset(object): """ pass - + @property def csv(): - """A CSV representation of the :class:`Dataset` object. The top row will contain - headers, if they have been set. Otherwise, the top row will contain + """A CSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain the first row of the dataset. A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. :: @@ -371,8 +371,8 @@ class Dataset(object): @property def tsv(): - """A TSV representation of the :class:`Dataset` object. The top row will contain - headers, if they have been set. Otherwise, the top row will contain + """A TSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain the first row of the dataset. A dataset object can also be imported by setting the :class:`Dataset.tsv` attribute. :: @@ -385,9 +385,9 @@ class Dataset(object): @property def yaml(): - """A YAML representation of the :class:`Dataset` object. If headers have been - set, a YAML list of objects will be returned. If no headers have - been set, a YAML list of lists (rows) will be returned instead. + """A YAML representation of the :class:`Dataset` object. If headers have been + set, a YAML list of objects will be returned. If no headers have + been set, a YAML list of lists (rows) will be returned instead. A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: @@ -398,12 +398,12 @@ class Dataset(object): """ pass - + @property def json(): - """A JSON representation of the :class:`Dataset` object. If headers have been - set, a JSON list of objects will be returned. If no headers have - been set, a JSON list of lists (rows) will be returned instead. + """A JSON representation of the :class:`Dataset` object. If headers have been + set, a JSON list of objects will be returned. If no headers have + been set, a JSON list of lists (rows) will be returned instead. A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: @@ -444,18 +444,18 @@ class Dataset(object): def insert(self, index, row=None, col=None, header=None, tags=list()): - """Inserts a row or column to the :class:`Dataset` at the given index. - - Rows and columns inserted must be the correct size (height or width). - + """Inserts a row or column to the :class:`Dataset` at the given index. + + Rows and columns inserted must be the correct size (height or width). + The default behaviour is to insert the given row to the :class:`Dataset` object at the given index. If the ``col`` parameter is given, however, a new column will be insert to the :class:`Dataset` object instead. You can also insert a column of a single callable object, which will - add a new column with the return values of the callable each as an + add a new column with the return values of the callable each as an item in the column. :: - + data.append(col=random.randint) See :ref:`dyncols` for an in-depth example. @@ -469,7 +469,7 @@ class Dataset(object): If inserting a row, you can add :ref:`tags ` to the row you are inserting. This gives you the ability to :class:`filter ` your :class:`Dataset` later. - + """ if row: self._validate(row) @@ -477,7 +477,7 @@ class Dataset(object): elif col: col = list(col) - # Callable Columns... + # Callable Columns... if len(col) == 1 and callable(col[0]): col = map(col[0], self._data) @@ -489,7 +489,7 @@ class Dataset(object): if not header: raise HeadersNeeded() self.headers.insert(index, header) - + if self.height and self.width: for i, row in enumerate(self._data): @@ -501,11 +501,11 @@ class Dataset(object): def filter(self, tag): """Returns a new instance of the :class:`Dataset`, excluding any rows - that do not contain the given :ref:`tags `. + that do not contain the given :ref:`tags `. """ _dset = copy(self) _dset._data = [row for row in _dset._data if row.has_tag(tag)] - + return _dset def transpose(self): @@ -521,7 +521,7 @@ class Dataset(object): # The first element of the headers stays in the headers, # it is our "hinge" on which we rotate the data new_headers = [self.headers[0]] + self[self.headers[0]] - + _dset.headers = new_headers for column in self.headers: @@ -562,7 +562,7 @@ class Databook(object): """Removes all :class:`Dataset` objects from the :class:`Databook`.""" self._datasets = [] - + @classmethod def _register_formats(cls): """Adds format properties.""" @@ -572,7 +572,7 @@ class Databook(object): setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book)) except AttributeError: setattr(cls, fmt.title, property(fmt.export_book)) - + except AttributeError: pass @@ -583,7 +583,7 @@ class Databook(object): self._datasets.append(dataset) else: raise InvalidDatasetType - + def _package(self): """Packages :class:`Databook` for delivery.""" @@ -607,12 +607,12 @@ def detect(stream): for fmt in formats.available: try: if fmt.detect(stream): - return (fmt, stream) + return (fmt, stream) except AttributeError: - pass + pass return (None, stream) - - + + def import_set(stream): """Return dataset of given stream.""" (format, stream) = detect(stream) @@ -621,7 +621,7 @@ def import_set(stream): data = Dataset() format.import_set(data, stream) return data - + except AttributeError, e: return None From 24d800fac3da85a1231e3cb7a30ddd98342c3af6 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Wed, 17 Nov 2010 23:03:43 +0100 Subject: [PATCH 12/23] Support for pickling/unpickling Row objects. Makes Datasets pickleable. --- tablib/core.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index 3a80e60..4b92777 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -51,6 +51,19 @@ class Row(object): def __delitem__(self, i): del self._row[i] + def __getstate__(self): + + result = dict() + result["_row"] = self._row + result["tags"] = self.tags + + return result + + def __setstate__(self, state): + + self._row = state["_row"] + self.tags = state["tags"] + def append(self, value): self._row.append(value) From ea4aef88b6b8f0f374395a1ac39b785e1f595f37 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 19:15:36 -0500 Subject: [PATCH 13/23] Subtle format fixes. --- tablib/core.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 4b92777..8b5d31f 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -52,17 +52,15 @@ class Row(object): del self._row[i] def __getstate__(self): - result = dict() - result["_row"] = self._row - result["tags"] = self.tags + result['_row'] = self._row + result['tags'] = self.tags return result def __setstate__(self, state): - - self._row = state["_row"] - self.tags = state["tags"] + self._row = state['_row'] + self.tags = state['tags'] def append(self, value): self._row.append(value) From eaed0e48c282041ffb80990c978672817787ec12 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 19:50:05 -0500 Subject: [PATCH 14/23] Formating. --- tablib/core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tablib/core.py b/tablib/core.py index 8b5d31f..816e0e1 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -21,6 +21,7 @@ __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' + class Row(object): """Internal Row object. Mainly used for filtering.""" From 9e3ab4c13f850871975512d19b538a7e667a37e1 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 19:50:22 -0500 Subject: [PATCH 15/23] Support for locked header row. --- tablib/formats/_xls.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 97a9580..08bc0f6 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -26,7 +26,7 @@ def export_set(dataset): ws = wb.add_sheet(dataset.title if dataset.title else 'Tabbed Dataset') dset_sheet(dataset, ws) - + stream = cStringIO.StringIO() wb.save(stream) return stream.getvalue() @@ -63,6 +63,11 @@ def dset_sheet(dataset, ws): if (i == 0) and dataset.headers: ws.write(i, j, col, bold) + # frozen header row + ws.panes_frozen = True + ws.horz_split_pos = 1 + + # bold separators elif len(row) < dataset.width: ws.write(i, j, col, bold) @@ -77,4 +82,4 @@ def dset_sheet(dataset, ws): except TypeError: ws.write(i, j, col) - \ No newline at end of file + From 46f302255dc4b807114eaca3c9584323f1913f81 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 19:54:50 -0500 Subject: [PATCH 16/23] Updated prophesy. --- HISTORY.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index 668f201..1346379 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,8 +4,11 @@ History 0.9.2 (?) +++++++++ -* Added transpose method to Datasets +* Tanspose method added to Datasets +* New frozen top row in Excel output * Backwards-compatible OrderedDict support. +* Pickling support for Datasets and Rows +* ? Support for row/column stacking 0.9.1 (2010-11-04) ++++++++++++++++++ From a2990d5852206ac0ab11fa87bb256b3924871169 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 20:01:31 -0500 Subject: [PATCH 17/23] Change stacking method names. --- tablib/core.py | 10 +++++----- test_tablib.py | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 4f86b41..26923ea 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -547,8 +547,8 @@ class Dataset(object): _dset.append(row=row_data) return _dset - - def row_stack(self, other): + + def stack_rows(self, other): """Stack two :class:`Dataset` instances together by joining them at the row level, and return a new @@ -562,16 +562,16 @@ class Dataset(object): # Copy the source data _dset = copy(self) - + rows_to_stack = [row for row in _dset._data] other_rows = [row for row in other._data] rows_to_stack.extend(other_rows) _dset._data = rows_to_stack - + return _dset - def column_stack(self, other): + def stack_columns(self, other): """Stack two :class:`Dataset` instances together by joining at the column level, and return a new diff --git a/test_tablib.py b/test_tablib.py index e9ef5af..40e83ce 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -374,7 +374,7 @@ class TablibTestCase(unittest.TestCase): for row in self.founders: to_join.append(row=row) - row_stacked = self.founders.row_stack(to_join) + row_stacked = self.founders.stack_rows(to_join) for column in row_stacked.headers: @@ -391,7 +391,7 @@ class TablibTestCase(unittest.TestCase): for row in self.founders: to_join.append(row=row) - column_stacked = self.founders.column_stack(to_join) + column_stacked = self.founders.stack_columns(to_join) for index, row in enumerate(column_stacked): @@ -402,6 +402,7 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(column_stacked[0], ("John", "Adams", 90, "John", "Adams", 90)) + def test_wipe(self): """Purge a dataset.""" From 196edb82ccbfbca2fe6696f69973384cf570c7f9 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 20:02:08 -0500 Subject: [PATCH 18/23] trailing whitespae --- test_tablib.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/test_tablib.py b/test_tablib.py index 40e83ce..8e2454f 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -194,7 +194,7 @@ class TablibTestCase(unittest.TestCase): data.tsv data.xls - + def test_book_export_no_exceptions(self): """Test that varoius exports don't error out.""" @@ -245,7 +245,7 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_yaml, data.yaml) - + def test_yaml_import_book(self): """Generate and import YAML book serialization.""" data.append(self.john) @@ -258,7 +258,7 @@ class TablibTestCase(unittest.TestCase): book.yaml = _yaml self.assertEqual(_yaml, book.yaml) - + def test_csv_import_set(self): """Generate and import CSV set serialization.""" @@ -286,7 +286,7 @@ class TablibTestCase(unittest.TestCase): def test_csv_format_detect(self): """Test CSV format detection.""" - + _csv = ( '1,2,3\n' '4,5,6\n' @@ -295,13 +295,13 @@ class TablibTestCase(unittest.TestCase): _bunk = ( '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) - + self.assertTrue(tablib.formats.csv.detect(_csv)) self.assertFalse(tablib.formats.csv.detect(_bunk)) def test_tsv_format_detect(self): """Test TSV format detection.""" - + _tsv = ( '1\t2\t3\n' '4\t5\t6\n' @@ -310,7 +310,7 @@ class TablibTestCase(unittest.TestCase): _bunk = ( '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) - + self.assertTrue(tablib.formats.tsv.detect(_tsv)) self.assertFalse(tablib.formats.tsv.detect(_bunk)) @@ -357,23 +357,23 @@ class TablibTestCase(unittest.TestCase): transposed_founders = self.founders.transpose() first_row = transposed_founders[0] second_row = transposed_founders[1] - + self.assertEqual(transposed_founders.headers, ["first_name","John", "George", "Thomas"]) self.assertEqual(first_row, ("last_name","Adams", "Washington", "Jefferson")) self.assertEqual(second_row, ("gpa",90, 67, 50)) - + def test_row_stacking(self): """Row stacking.""" to_join = tablib.Dataset(headers=self.founders.headers) - + for row in self.founders: to_join.append(row=row) - + row_stacked = self.founders.stack_rows(to_join) for column in row_stacked.headers: @@ -387,7 +387,7 @@ class TablibTestCase(unittest.TestCase): """Column stacking""" to_join = tablib.Dataset(headers=self.founders.headers) - + for row in self.founders: to_join.append(row=row) @@ -401,7 +401,7 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(column_stacked[0], ("John", "Adams", 90, "John", "Adams", 90)) - + def test_wipe(self): """Purge a dataset.""" From 1a9aee928970ecaaba40529fd2def1e47059b828 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 20:48:50 -0500 Subject: [PATCH 19/23] Column stacking only requires headers if headers exist. --- tablib/core.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 26923ea..7d3c94c 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -548,11 +548,11 @@ class Dataset(object): return _dset - def stack_rows(self, other): + def stack_rows(self, other): """Stack two :class:`Dataset` instances together by - joining them at the row level, and return a new - combined ``Dataset`` instance.""" + joining at the row level, and return new combined + ``Dataset`` instance.""" if not isinstance(other, Dataset): return @@ -571,23 +571,27 @@ class Dataset(object): return _dset - def stack_columns(self, other): + def stack_columns(self, other): """Stack two :class:`Dataset` instances together by joining at the column level, and return a new - combined ``Dataset`` instance. Requires headers - to be set.""" + combined ``Dataset`` instance. If either ``Dataset`` + has headers set, than the other must as well.""" if not isinstance(other, Dataset): return - if not self.headers or not other.headers: - raise HeadersNeeded + if self.headers or other.headers: + if not self.headers or not other.headers: + raise HeadersNeeded if self.height != other.height: raise InvalidDimensions - new_headers = self.headers + other.headers + try: + new_headers = self.headers + other.headers + except TypeError: + new_headers = None _dset = Dataset() From 1aa275bf998f79bb4cc2573130cbb51dedeb230f Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 20:55:38 -0500 Subject: [PATCH 20/23] Updated TODO. --- TODO.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/TODO.rst b/TODO.rst index c28c8b7..c974634 100644 --- a/TODO.rst +++ b/TODO.rst @@ -2,8 +2,7 @@ * Write more exhausive unit-tests. * Write stress tests. * Make CSV write customizable. -* HTML Table exports. -* ``Dataset.traspose()`` support? - - - +* HTML Table exports. +* Integrate django-tablib +* Mention django-tablib in Documention +* Dataset title usage in documentation (#17) \ No newline at end of file From 823a543f41eec8e08c195140c149eb5f41565f1d Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 20:58:50 -0500 Subject: [PATCH 21/23] Version bump (v0.9.2) --- setup.py | 2 +- tablib/core.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 577e4d0..e779457 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ required = [] setup( name='tablib', - version='0.9.1', + version='0.9.2', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), diff --git a/tablib/core.py b/tablib/core.py index 7d3c94c..bd2d4ba 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -15,8 +15,8 @@ from tablib import formats __title__ = 'tablib' -__version__ = '0.9.1' -__build__ = 0x000901 +__version__ = '0.9.2' +__build__ = 0x000902 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' From 9b74b139fda16271e4accd3d1a675614056fedd0 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 21:00:01 -0500 Subject: [PATCH 22/23] Ordered dict in TODO --- TODO.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TODO.rst b/TODO.rst index c974634..231f03e 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,4 +1,4 @@ -* Polish *&* announce http://tablib.org. +* Backwards-compatible OrderedDict support * Write more exhausive unit-tests. * Write stress tests. * Make CSV write customizable. From 91bd4eb9c736270ab5b5eb88f7ebaa790c0ddcfc Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 17 Nov 2010 21:00:13 -0500 Subject: [PATCH 23/23] Updated history --- HISTORY.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 1346379..c64d7b5 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,14 +1,14 @@ History ------- -0.9.2 (?) -+++++++++ +0.9.2 (2010-11-17) +++++++++++++++++++ * Tanspose method added to Datasets * New frozen top row in Excel output -* Backwards-compatible OrderedDict support. * Pickling support for Datasets and Rows -* ? Support for row/column stacking +* Support for row/column stacking + 0.9.1 (2010-11-04) ++++++++++++++++++