diff --git a/AUTHORS b/AUTHORS index 722a130..3d48743 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,4 +10,4 @@ Development Lead Patches and Suggestions ``````````````````````` -- A Lucky Someone \ No newline at end of file +- Luke Lee \ No newline at end of file diff --git a/HISTORY.rst b/HISTORY.rst index d4b58d4..06d8257 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,13 @@ History ======= +0.8.0 (2010-09-25) +------------------ +* New format plugin system! +* Imports! ELEGANT Imports! +* Tests. Lots of tests. + + 0.7.1 (2010-09-20) ------------------ diff --git a/README.rst b/README.rst index c0333e7..df59abb 100644 --- a/README.rst +++ b/README.rst @@ -15,15 +15,27 @@ Tablib is a format-agnostic tabular dataset library, written in Python. Output formats supported: -- Excel -- JSON -- YAML -- CSV +- Excel (Sets + Books) +- JSON (Sets + Books) +- YAML (Sets + Books) +- CSV (Sets) -At this time, Tablib supports the **export** of it's powerful Dataset object instances into any of the above formats. Import is underway. +Import formats supported: + +- JSON (Sets + Books) +- YAML (Sets + Books) +- CSV (Sets) Note that tablib *purposefully* excludes XML support. It always will. +Overview +-------- + +`tablib.Dataset()` + A Dataset is a table of tabular data. It may or may not have a header row. They can be build and maniuplated as raw Python datatypes (Lists of tuples|dictonaries). Datasets can be imported from JSON, YAML, and CSV; they can be exported to Excel (XLS), JSON, YAML, and CSV. + +`tablib.Databook()` + A Databook is a set of Datasets. The most common form of a Databook is an Excel file with multiple spreadsheets. Databooks can be imported from JSON and YAML; they can be exported to Excel (XLS), JSON, and YAML. Usage ----- @@ -64,6 +76,9 @@ Easily delete rows: :: >>> del data[1] +Exports +------- + Drumroll please........... JSON! @@ -109,8 +124,41 @@ EXCEL! >>> open('people.xls', 'wb').write(data.xls) It's that easy. + +Imports! +-------- + +JSON +++++ + +:: + + >>> data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + >>> print data[0] + ('John', 'Adams', 90) + + +YAML +++++ +:: + + >>> data.yaml = '- {age: 90, first_name: John, last_name: Adams}' + >>> print data[0] + ('John', 'Adams', 90) + +CSV ++++ +:: + + >>> data.yaml = 'age, first_name, last_name\n90, John, Adams' + >>> print data[0] + ('John', 'Adams', 90) - + >>> print data.yaml + - {age: 90, first_name: John, last_name: Adams} + + + Installation ------------ @@ -131,13 +179,10 @@ If you'd like to contribute, simply fork `the repository`_, commit your changes Roadmap ------- -- Import datasets from CSV, JSON, YAML - Release CLI Interface - Auto-detect import format - Add possible other exports (SQL?) -- Possibly plugin-ify format architecture - Ability to assign types to rows (set, regex=, &c.) -- Plugin support .. _`the repository`: http://github.com/kennethreitz/tablib .. _AUTHORS: http://github.com/kennethreitz/tablib/blob/master/AUTHORS diff --git a/setup.py b/setup.py index 8816512..0c95cb4 100644 --- a/setup.py +++ b/setup.py @@ -11,14 +11,13 @@ def publish(): """Publish to PyPi""" os.system("python setup.py sdist upload") - if sys.argv[-1] == "publish": publish() sys.exit() setup( name='tablib', - version='0.7.1', + version='0.8.0', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), diff --git a/tablib/__init__.py b/tablib/__init__.py index 5d20240..fadd8dd 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -1 +1,8 @@ -from core import * \ No newline at end of file +""" Tablib. +""" + +from tablib.core import ( + Databook, Dataset, InvalidDatasetType, + InvalidDimensions, UnsupportedFormat +) + diff --git a/tablib/core.py b/tablib/core.py index 0167c34..cdaf0d5 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -1,28 +1,14 @@ # -*- coding: utf-8 -*- -# _____ ______ ______ _________ -# __ /_______ ____ /_ ___ /_ _____ ______ / -# _ __/_ __ `/__ __ \__ __ \_ _ \_ __ / -# / /_ / /_/ / _ /_/ /_ /_/ // __// /_/ / -# \__/ \__,_/ /_.___/ /_.___/ \___/ \__,_/ +""" Tablib - Core Library. +""" + +from tablib.formats import FORMATS as formats -import csv -import cStringIO -import random - -import simplejson as json -import xlwt -import yaml - -from helpers import * - - -# __all__ = ['Dataset', 'DataBook'] - -__name__ = 'tablib' -__version__ = '0.7.1' -__build__ = 0x000701 +__title__ = 'tablib' +__version__ = '0.8.0' +__build__ = 0x000800 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' @@ -32,22 +18,20 @@ class Dataset(object): """Epic Tabular-Dataset object. """ def __init__(self, *args, **kwargs): - self._data = None - self._saved_file = None - self._saved_format = None self._data = list(args) self.__headers = None try: self.headers = kwargs['headers'] - except KeyError, why: + except KeyError: self.headers = None try: self.title = kwargs['title'] - except KeyError, why: + except KeyError: self.title = None + self._register_formats() def __len__(self): return self.height @@ -79,6 +63,20 @@ class Dataset(object): except AttributeError: return '' + + @classmethod + def _register_formats(cls): + """Adds format properties.""" + for fmt in formats: + try: + try: + setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_set)) + + except AttributeError: + pass + def _validate(self, row=None, col=None, safety=False): """Assures size of every row in dataset is of proper proportions.""" @@ -113,6 +111,7 @@ class Dataset(object): return data + @property def height(self): """Returns the height of the Dataset.""" @@ -124,12 +123,13 @@ class Dataset(object): """Returns the width of the Dataset.""" try: return len(self._data[0]) - except IndexError, why: + except IndexError: try: return len(self.headers) - except TypeError, e: + except TypeError: return 0 - + + @property def headers(self): """Headers property.""" @@ -143,7 +143,7 @@ class Dataset(object): if collection: try: self.__headers = list(collection) - except TypeError, why: + except TypeError: raise TypeError else: self.__headers = None @@ -154,42 +154,21 @@ class Dataset(object): """Returns python dict of Dataset.""" return self._package() - - @property - def json(self): - """Returns JSON representation of Dataset.""" - return json.dumps(self.dict) - - @property - def yaml(self): - """Returns YAML representation of Dataset.""" - return yaml.dump(self.dict) - - @property - def csv(self): - """Returns CSV representation of Dataset.""" - stream = cStringIO.StringIO() - _csv = csv.writer(stream) - - for row in self._package(dicts=False): - _csv.writerow(row) - - return stream.getvalue() - - @property - def xls(self): - """Returns XLS representation of Dataset.""" - - wb = xlwt.Workbook(encoding='utf8') - ws = wb.add_sheet(self.title if self.title else 'Tabbed Dataset') - - for i, row in enumerate(self._package(dicts=False)): - for j, col in enumerate(row): - ws.write(i, j, col) - - stream = cStringIO.StringIO() - wb.save(stream) - return stream.getvalue() + + @dict.setter + def dict(self, pickle): + """Returns python dict of Dataset.""" + if not len(pickle): + return + if isinstance(pickle[0], list): + for row in pickle: + self.append(row) + elif isinstance(pickle[0], dict): + self.headers = pickle[0].keys() + for row in pickle: + self.append(row.values()) + else: + raise UnsupportedFormat def append(self, row=None, col=None): @@ -222,6 +201,12 @@ class Dataset(object): self._data.insert(i, tuple(row)) elif col: pass + + + def wipe(self): + """Erases all data from Dataset.""" + self._data = list() + self.__headers = None class Databook(object): @@ -231,6 +216,7 @@ class Databook(object): def __init__(self, sets=[]): self._datasets = sets + self._register_formats() def __repr__(self): @@ -239,9 +225,26 @@ class Databook(object): except AttributeError: return '' + def wipe(self): + """Wipe book clean.""" + self._datasets = [] + + @classmethod + def _register_formats(cls): + """Adds format properties.""" + for fmt in formats: + try: + try: + setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_book)) + + except AttributeError: + pass + def add_sheet(self, dataset): - """Add given dataset .""" + """Adds given dataset.""" if type(dataset) is Dataset: self._datasets.append(dataset) else: @@ -249,6 +252,7 @@ class Databook(object): def _package(self): + """Packages Databook for delivery.""" collector = [] for dset in self._datasets: collector.append(dict( @@ -263,40 +267,6 @@ class Databook(object): """The number of the Datasets within DataBook.""" return len(self._datasets) - @property - def xls(self): - """Returns XLS representation of DataBook.""" - - - wb = xlwt.Workbook(encoding='utf8') - - for i, dset in enumerate(self._datasets): - ws = wb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i)) - - #for row in self._package(dicts=False): - for i, row in enumerate(dset._package(dicts=False)): - for j, col in enumerate(row): - ws.write(i, j, col) - - - stream = cStringIO.StringIO() - wb.save(stream) - return stream.getvalue() - - - @property - def json(self): - """Returns JSON representation of Databook.""" - - return json.dumps(self._package()) - - - @property - def yaml(self): - """Returns YAML representation of Databook.""" - - return yaml.dump(self._package()) - class InvalidDatasetType(Exception): diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py new file mode 100644 index 0000000..b22a959 --- /dev/null +++ b/tablib/formats/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +""" Tablib - formats +""" + +import _csv as csv +import _json as json +import _xls as xls +import _yaml as yaml + +FORMATS = (csv, json, xls, yaml) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py new file mode 100644 index 0000000..8b19da7 --- /dev/null +++ b/tablib/formats/_csv.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +""" Tablib - CSV Support. +""" + +import cStringIO +import csv +import os + +import simplejson as json + +import tablib + + +title = 'csv' +extentions = ('csv',) + + + +def export_set(dataset): + """Returns CSV representation of Dataset.""" + stream = cStringIO.StringIO() + _csv = csv.writer(stream) + + for row in dataset._package(dicts=False): + _csv.writerow(row) + + return stream.getvalue() + + +def import_set(dset, in_stream, headers=True): + """Returns dataset from CSV stream.""" + + dset.wipe() + + rows = csv.reader(in_stream.split()) + for i, row in enumerate(rows): + + if (i == 0) and (headers): + dset.headers = row + else: + dset.append(row) diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py new file mode 100644 index 0000000..1f92b58 --- /dev/null +++ b/tablib/formats/_json.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +""" Tablib - JSON Support +""" + +import simplejson as json +import tablib.core + +title = 'json' +extentions = ('json', 'jsn') + + +def export_set(dataset): + """Returns JSON representation of Dataset.""" + return json.dumps(dataset.dict) + + +def export_book(databook): + """Returns JSON representation of Databook.""" + return json.dumps(databook._package()) + + +def import_set(dset, in_stream): + """Returns dataset from JSON stream.""" + + dset.wipe() + dset.dict = json.loads(in_stream) + + +def import_book(dbook, in_stream): + """Returns databook from JSON stream.""" + + dbook.wipe() + for sheet in json.loads(in_stream): + data = tablib.core.Dataset() + data.title = sheet['title'] + data.dict = sheet['data'] + dbook.add_sheet(data) diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py new file mode 100644 index 0000000..1a739af --- /dev/null +++ b/tablib/formats/_xls.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +""" Tablib - XLS Support. +""" + +import xlwt +import cStringIO + + +title = 'xls' +extentions = ('xls',) + + +def export_set(dataset): + """Returns XLS representation of Dataset.""" + + wb = xlwt.Workbook(encoding='utf8') + ws = wb.add_sheet(dataset.title if dataset.title else 'Tabbed Dataset') + + for i, row in enumerate(dataset._package(dicts=False)): + for j, col in enumerate(row): + ws.write(i, j, col) + + stream = cStringIO.StringIO() + wb.save(stream) + return stream.getvalue() + + +def export_book(databook): + """Returns XLS representation of DataBook.""" + + wb = xlwt.Workbook(encoding='utf8') + + for i, dset in enumerate(databook._datasets): + ws = wb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i)) + + #for row in self._package(dicts=False): + for i, row in enumerate(dset._package(dicts=False)): + for j, col in enumerate(row): + ws.write(i, j, col) + + + stream = cStringIO.StringIO() + wb.save(stream) + return stream.getvalue() \ No newline at end of file diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py new file mode 100644 index 0000000..4cac8aa --- /dev/null +++ b/tablib/formats/_yaml.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +""" Tablib - YAML Support. +""" + +import yaml +import tablib + + + +title = 'yaml' +extentions = ('yaml', 'yml') + + + +def export_set(dataset): + """Returns YAML representation of Dataset.""" + return yaml.dump(dataset.dict) + + +def export_book(databook): + """Returns YAML representation of Databook.""" + return yaml.dump(databook._package()) + + +def import_set(dset, in_stream): + """Returns dataset from YAML stream.""" + + dset.wipe() + dset.dict = yaml.load(in_stream) + + +def import_book(dbook, in_stream): + """Returns databook from YAML stream.""" + + dbook.wipe() + + for sheet in yaml.load(in_stream): + data = tablib.core.Dataset() + data.title = sheet['title'] + data.dict = sheet['data'] + dbook.add_sheet(data) \ No newline at end of file diff --git a/tablib/helpers.py b/tablib/helpers.py index 0a91e56..f75c466 100644 --- a/tablib/helpers.py +++ b/tablib/helpers.py @@ -1,5 +1,8 @@ # -*- coding: utf-8 -*- +""" Tablib - General Helpers. +""" + import sys @@ -12,6 +15,19 @@ class Struct(object): def __getitem__(self, key): return getattr(self, key, None) + def dictionary(self): + """Returns dictionary representation of object.""" + return self.__dict__ + + def items(self): + """Returns items within object.""" + return self.__dict__.items() + + def keys(self): + """Returns keys within object.""" + return self.__dict__.keys() + + def piped(): """Returns piped input via stdin, else False.""" diff --git a/test_tablib.py b/test_tablib.py index 959a01c..67b693d 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -13,8 +13,10 @@ class TablibTestCase(unittest.TestCase): def setUp(self): """Create simple data set with headers.""" - global data + + global data, book data = tablib.Dataset() + book = tablib.Databook() self.headers = ('first_name', 'last_name', 'gpa') self.john = ('John', 'Adams', 90) @@ -181,6 +183,101 @@ class TablibTestCase(unittest.TestCase): data.csv data.xls + + def test_book_export_no_exceptions(self): + """Test that varoius exports don't error out.""" + + book = tablib.Databook() + book.add_sheet(data) + + book.json + book.yaml + book.xls + + + def test_json_import_set(self): + """Generate and import JSON set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _json = data.json + + data.json = _json + + self.assertEqual(_json, data.json) + + + def test_json_import_book(self): + """Generate and import JSON book serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + book.add_sheet(data) + _json = book.json + + book.json = _json + + self.assertEqual(_json, book.json) + + + def test_yaml_import_set(self): + """Generate and import YAML set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _yaml = data.yaml + + data.yaml = _yaml + + self.assertEqual(_yaml, data.yaml) + + + def test_yaml_import_book(self): + """Generate and import YAML book serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + book.add_sheet(data) + _yaml = book.yaml + + book.yaml = _yaml + + self.assertEqual(_yaml, book.yaml) + + + def test_csv_import_set(self): + """Generate and import CSV set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _csv = data.csv + + data.csv = _csv + + self.assertEqual(_csv, data.csv) + + + def test_wipe(self): + """Purge a dataset.""" + + new_row = (1, 2, 3) + data.append(new_row) + + # Verify width/data + self.assertTrue(data.width == len(new_row)) + self.assertTrue(data[0] == new_row) + + data.wipe() + new_row = (1, 2, 3, 4) + data.append(new_row) + self.assertTrue(data.width == len(new_row)) + self.assertTrue(data[0] == new_row) + if __name__ == '__main__': unittest.main()