diff --git a/.gitignore b/.gitignore index 68f4c75..7b0b8c5 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ MANIFEST # python skin *.pyc -.pyo +*.pyo # osx noise .DS_Store @@ -15,3 +15,5 @@ profile .idea .idea/* +# vi noise +*.swp diff --git a/AUTHORS b/AUTHORS index 722a130..3d48743 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,4 +10,4 @@ Development Lead Patches and Suggestions ``````````````````````` -- A Lucky Someone \ No newline at end of file +- Luke Lee \ No newline at end of file diff --git a/HISTORY.rst b/HISTORY.rst index 2bf7a3b..9f86fe2 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,7 +1,42 @@ History ======= -0.6.2 (2010-09-14) +0.8.1 (2010-09-28) +------------------ +* Packaging Fix + + +0.8.0 (2010-09-25) +------------------ +* New format plugin system! +* Imports! ELEGANT Imports! +* Tests. Lots of tests. + + +0.7.1 (2010-09-20) +------------------ + +* Reverting methods back to properties. +* Windows bug compenated in documentation. + + +0.7.0 (2010-09-20) +------------------ + +* Renamed DataBook Databook for consistiency. +* Export properties changed to methods (XLS filename / StringIO bug). +* Optional Dataset.xls(path='filename') support (for writing on windows). +* Added utf-8 on the worksheet level. + + +0.6.4 (2010-09-19) +------------------ + +* Updated unicode export for XLS. +* More exhaustive unit tests. + + +0.6.3 (2010-09-14) ------------------ * Added Dataset.append() support for columns. diff --git a/README.rst b/README.rst index 32d477f..df59abb 100644 --- a/README.rst +++ b/README.rst @@ -15,15 +15,27 @@ Tablib is a format-agnostic tabular dataset library, written in Python. Output formats supported: -- Excel -- JSON -- YAML -- CSV +- Excel (Sets + Books) +- JSON (Sets + Books) +- YAML (Sets + Books) +- CSV (Sets) -At this time, Tablib supports the **export** of it's powerful Dataset object instances into any of the above formats. Import is underway. +Import formats supported: + +- JSON (Sets + Books) +- YAML (Sets + Books) +- CSV (Sets) Note that tablib *purposefully* excludes XML support. It always will. +Overview +-------- + +`tablib.Dataset()` + A Dataset is a table of tabular data. It may or may not have a header row. They can be build and maniuplated as raw Python datatypes (Lists of tuples|dictonaries). Datasets can be imported from JSON, YAML, and CSV; they can be exported to Excel (XLS), JSON, YAML, and CSV. + +`tablib.Databook()` + A Databook is a set of Datasets. The most common form of a Databook is an Excel file with multiple spreadsheets. Databooks can be imported from JSON and YAML; they can be exported to Excel (XLS), JSON, and YAML. Usage ----- @@ -64,6 +76,9 @@ Easily delete rows: :: >>> del data[1] +Exports +------- + Drumroll please........... JSON! @@ -106,11 +121,44 @@ EXCEL! ++++++ :: - >>> open('people.xls').write(data.xls) - + >>> open('people.xls', 'wb').write(data.xls) + It's that easy. + +Imports! +-------- + +JSON +++++ + +:: + + >>> data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + >>> print data[0] + ('John', 'Adams', 90) + + +YAML +++++ +:: + + >>> data.yaml = '- {age: 90, first_name: John, last_name: Adams}' + >>> print data[0] + ('John', 'Adams', 90) + +CSV ++++ +:: + + >>> data.yaml = 'age, first_name, last_name\n90, John, Adams' + >>> print data[0] + ('John', 'Adams', 90) - + >>> print data.yaml + - {age: 90, first_name: John, last_name: Adams} + + + Installation ------------ @@ -131,14 +179,10 @@ If you'd like to contribute, simply fork `the repository`_, commit your changes Roadmap ------- -- Add ability to add/remove full columns -- Import datasets from CSV, JSON, YAML - Release CLI Interface - Auto-detect import format - Add possible other exports (SQL?) -- Possibly plugin-ify format architecture - Ability to assign types to rows (set, regex=, &c.) -- Plugin support .. _`the repository`: http://github.com/kennethreitz/tablib -.. _AUTHORS: http://github.com/kennethreitz/tablib/blob/master/AUTHORS \ No newline at end of file +.. _AUTHORS: http://github.com/kennethreitz/tablib/blob/master/AUTHORS diff --git a/setup.py b/setup.py index 02c84d3..920c148 100644 --- a/setup.py +++ b/setup.py @@ -11,21 +11,20 @@ def publish(): """Publish to PyPi""" os.system("python setup.py sdist upload") - if sys.argv[-1] == "publish": publish() sys.exit() setup( name='tablib', - version='0.6.3', + version='0.8.1', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', - packages=['tablib'], + packages=['tablib', 'tablib.formats'], install_requires=['xlwt', 'simplejson', 'PyYAML'], license='MIT', classifiers=( diff --git a/tablib/__init__.py b/tablib/__init__.py index 5d20240..fadd8dd 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -1 +1,8 @@ -from core import * \ No newline at end of file +""" Tablib. +""" + +from tablib.core import ( + Databook, Dataset, InvalidDatasetType, + InvalidDimensions, UnsupportedFormat +) + diff --git a/tablib/core.py b/tablib/core.py index e4ceee1..ddb9769 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -1,28 +1,14 @@ # -*- coding: utf-8 -*- -# _____ ______ ______ _________ -# __ /_______ ____ /_ ___ /_ _____ ______ / -# _ __/_ __ `/__ __ \__ __ \_ _ \_ __ / -# / /_ / /_/ / _ /_/ /_ /_/ // __// /_/ / -# \__/ \__,_/ /_.___/ /_.___/ \___/ \__,_/ +""" Tablib - Core Library. +""" + +from tablib.formats import FORMATS as formats -import csv -import cStringIO -import random - -import simplejson as json -import xlwt -import yaml - -from helpers import * - - -# __all__ = ['Dataset', 'DataBook'] - -__name__ = 'tablib' -__version__ = '0.6.3' -__build__ = 0x000603 +__title__ = 'tablib' +__version__ = '0.8.1' +__build__ = 0x000801 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' @@ -32,29 +18,27 @@ class Dataset(object): """Epic Tabular-Dataset object. """ def __init__(self, *args, **kwargs): - self._data = None - self._saved_file = None - self._saved_format = None self._data = list(args) self.__headers = None try: self.headers = kwargs['headers'] - except KeyError, why: + except KeyError: self.headers = None try: self.title = kwargs['title'] - except KeyError, why: + except KeyError: self.title = None + self._register_formats() def __len__(self): return self.height def __getitem__(self, key): - if is_string(key): + if isinstance(key, basestring): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data return [row[pos] for row in self._data] @@ -79,6 +63,20 @@ class Dataset(object): except AttributeError: return '' + + @classmethod + def _register_formats(cls): + """Adds format properties.""" + for fmt in formats: + try: + try: + setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_set)) + + except AttributeError: + pass + def _validate(self, row=None, col=None, safety=False): """Assures size of every row in dataset is of proper proportions.""" @@ -113,6 +111,7 @@ class Dataset(object): return data + @property def height(self): """Returns the height of the Dataset.""" @@ -124,12 +123,13 @@ class Dataset(object): """Returns the width of the Dataset.""" try: return len(self._data[0]) - except IndexError, why: + except IndexError: try: return len(self.headers) - except TypeError, e: + except TypeError: return 0 - + + @property def headers(self): """Headers property.""" @@ -143,56 +143,32 @@ class Dataset(object): if collection: try: self.__headers = list(collection) - except TypeError, why: + except TypeError: raise TypeError else: self.__headers = None - @property def dict(self): """Returns python dict of Dataset.""" return self._package() - - @property - def json(self): - """Returns JSON representation of Dataset.""" - return json.dumps(self.dict) - - - @property - def yaml(self): - """Returns YAML representation of Dataset.""" - return yaml.dump(self.dict) - - - @property - def csv(self): - """Returns CSV representation of Dataset.""" - stream = cStringIO.StringIO() - _csv = csv.writer(stream) - - for row in self._package(dicts=False): - _csv.writerow(row) - - return stream.getvalue() - - - @property - def xls(self): - """Returns XLS representation of Dataset.""" - stream = cStringIO.StringIO() - - wb = xlwt.Workbook() - ws = wb.add_sheet(self.title if self.title else 'Tabbed Dataset') - for i, row in enumerate(self._package(dicts=False)): - for j, col in enumerate(row): - ws.write(i, j, str(col)) - - wb.save(stream) - return stream.getvalue() + + @dict.setter + def dict(self, pickle): + """Returns python dict of Dataset.""" + if not len(pickle): + return + if isinstance(pickle[0], list): + for row in pickle: + self.append(row) + elif isinstance(pickle[0], dict): + self.headers = pickle[0].keys() + for row in pickle: + self.append(row.values()) + else: + raise UnsupportedFormat def append(self, row=None, col=None): @@ -225,15 +201,22 @@ class Dataset(object): self._data.insert(i, tuple(row)) elif col: pass + + + def wipe(self): + """Erases all data from Dataset.""" + self._data = list() + self.__headers = None -class DataBook(object): +class Databook(object): """A book of Dataset objects. Currently, this exists only for XLS workbook support. """ def __init__(self, sets=[]): self._datasets = sets + self._register_formats() def __repr__(self): @@ -242,9 +225,26 @@ class DataBook(object): except AttributeError: return '' + def wipe(self): + """Wipe book clean.""" + self._datasets = [] + + @classmethod + def _register_formats(cls): + """Adds format properties.""" + for fmt in formats: + try: + try: + setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book)) + except AttributeError: + setattr(cls, fmt.title, property(fmt.export_book)) + + except AttributeError: + pass + def add_sheet(self, dataset): - """Add given dataset .""" + """Adds given dataset.""" if type(dataset) is Dataset: self._datasets.append(dataset) else: @@ -252,6 +252,7 @@ class DataBook(object): def _package(self): + """Packages Databook for delivery.""" collector = [] for dset in self._datasets: collector.append(dict( @@ -267,39 +268,6 @@ class DataBook(object): return len(self._datasets) - @property - def xls(self): - """Returns XLS representation of DataBook.""" - - stream = cStringIO.StringIO() - wb = xlwt.Workbook() - - for i, dset in enumerate(self._datasets): - ws = wb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i)) - - #for row in self._package(dicts=False): - for i, row in enumerate(dset._package(dicts=False)): - for j, col in enumerate(row): - ws.write(i, j, str(col)) - - wb.save(stream) - return stream.getvalue() - - - @property - def json(self): - """Returns JSON representation of Databook.""" - - return json.dumps(self._package()) - - - @property - def yaml(self): - """Returns YAML representation of Databook.""" - - return yaml.dump(self._package()) - - class InvalidDatasetType(Exception): "Only Datasets can be added to a DataBook" diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py new file mode 100644 index 0000000..b22a959 --- /dev/null +++ b/tablib/formats/__init__.py @@ -0,0 +1,11 @@ +# -*- coding: utf-8 -*- + +""" Tablib - formats +""" + +import _csv as csv +import _json as json +import _xls as xls +import _yaml as yaml + +FORMATS = (csv, json, xls, yaml) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py new file mode 100644 index 0000000..8b19da7 --- /dev/null +++ b/tablib/formats/_csv.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +""" Tablib - CSV Support. +""" + +import cStringIO +import csv +import os + +import simplejson as json + +import tablib + + +title = 'csv' +extentions = ('csv',) + + + +def export_set(dataset): + """Returns CSV representation of Dataset.""" + stream = cStringIO.StringIO() + _csv = csv.writer(stream) + + for row in dataset._package(dicts=False): + _csv.writerow(row) + + return stream.getvalue() + + +def import_set(dset, in_stream, headers=True): + """Returns dataset from CSV stream.""" + + dset.wipe() + + rows = csv.reader(in_stream.split()) + for i, row in enumerate(rows): + + if (i == 0) and (headers): + dset.headers = row + else: + dset.append(row) diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py new file mode 100644 index 0000000..1f92b58 --- /dev/null +++ b/tablib/formats/_json.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +""" Tablib - JSON Support +""" + +import simplejson as json +import tablib.core + +title = 'json' +extentions = ('json', 'jsn') + + +def export_set(dataset): + """Returns JSON representation of Dataset.""" + return json.dumps(dataset.dict) + + +def export_book(databook): + """Returns JSON representation of Databook.""" + return json.dumps(databook._package()) + + +def import_set(dset, in_stream): + """Returns dataset from JSON stream.""" + + dset.wipe() + dset.dict = json.loads(in_stream) + + +def import_book(dbook, in_stream): + """Returns databook from JSON stream.""" + + dbook.wipe() + for sheet in json.loads(in_stream): + data = tablib.core.Dataset() + data.title = sheet['title'] + data.dict = sheet['data'] + dbook.add_sheet(data) diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py new file mode 100644 index 0000000..1a739af --- /dev/null +++ b/tablib/formats/_xls.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +""" Tablib - XLS Support. +""" + +import xlwt +import cStringIO + + +title = 'xls' +extentions = ('xls',) + + +def export_set(dataset): + """Returns XLS representation of Dataset.""" + + wb = xlwt.Workbook(encoding='utf8') + ws = wb.add_sheet(dataset.title if dataset.title else 'Tabbed Dataset') + + for i, row in enumerate(dataset._package(dicts=False)): + for j, col in enumerate(row): + ws.write(i, j, col) + + stream = cStringIO.StringIO() + wb.save(stream) + return stream.getvalue() + + +def export_book(databook): + """Returns XLS representation of DataBook.""" + + wb = xlwt.Workbook(encoding='utf8') + + for i, dset in enumerate(databook._datasets): + ws = wb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i)) + + #for row in self._package(dicts=False): + for i, row in enumerate(dset._package(dicts=False)): + for j, col in enumerate(row): + ws.write(i, j, col) + + + stream = cStringIO.StringIO() + wb.save(stream) + return stream.getvalue() \ No newline at end of file diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py new file mode 100644 index 0000000..4cac8aa --- /dev/null +++ b/tablib/formats/_yaml.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +""" Tablib - YAML Support. +""" + +import yaml +import tablib + + + +title = 'yaml' +extentions = ('yaml', 'yml') + + + +def export_set(dataset): + """Returns YAML representation of Dataset.""" + return yaml.dump(dataset.dict) + + +def export_book(databook): + """Returns YAML representation of Databook.""" + return yaml.dump(databook._package()) + + +def import_set(dset, in_stream): + """Returns dataset from YAML stream.""" + + dset.wipe() + dset.dict = yaml.load(in_stream) + + +def import_book(dbook, in_stream): + """Returns databook from YAML stream.""" + + dbook.wipe() + + for sheet in yaml.load(in_stream): + data = tablib.core.Dataset() + data.title = sheet['title'] + data.dict = sheet['data'] + dbook.add_sheet(data) \ No newline at end of file diff --git a/tablib/helpers.py b/tablib/helpers.py index b64d4b6..f75c466 100644 --- a/tablib/helpers.py +++ b/tablib/helpers.py @@ -1,5 +1,8 @@ # -*- coding: utf-8 -*- +""" Tablib - General Helpers. +""" + import sys @@ -12,14 +15,23 @@ class Struct(object): def __getitem__(self, key): return getattr(self, key, None) + def dictionary(self): + """Returns dictionary representation of object.""" + return self.__dict__ + + def items(self): + """Returns items within object.""" + return self.__dict__.items() + + def keys(self): + """Returns keys within object.""" + return self.__dict__.keys() + + def piped(): - """Returns piped input via stdin, else False""" + """Returns piped input via stdin, else False.""" with sys.stdin as stdin: + # TTY is only way to detect if stdin contains data return stdin.read() if not stdin.isatty() else None - -def is_string(obj): - """Tests if an object is a string""" - - return True if type(obj).__name__ == 'str' else False \ No newline at end of file diff --git a/test_tablib.py b/test_tablib.py old mode 100644 new mode 100755 index 93ff252..67b693d --- a/test_tablib.py +++ b/test_tablib.py @@ -1,36 +1,61 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +"""Tests for tablib.""" + import unittest import tablib + class TablibTestCase(unittest.TestCase): + """Tablib test cases.""" + def setUp(self): - global data + """Create simple data set with headers.""" + + global data, book data = tablib.Dataset() + book = tablib.Databook() + + self.headers = ('first_name', 'last_name', 'gpa') + self.john = ('John', 'Adams', 90) + self.george = ('George', 'Washington', 67) + self.tom = ('Thomas', 'Jefferson', 50) + + self.founders = tablib.Dataset(headers=self.headers) + self.founders.append(self.john) + self.founders.append(self.george) + self.founders.append(self.tom) + def tearDown(self): + """Teardown.""" pass - + def test_empty_append(self): - new_row = (1,2,3) + """Verify append() correctly adds tuple with no headers.""" + new_row = (1, 2, 3) data.append(new_row) + # Verify width/data self.assertTrue(data.width == len(new_row)) + self.assertTrue(data[0] == new_row) def test_empty_append_with_headers(self): - + """Verify append() correctly detects mismatch of number of + headers and data. + """ data.headers = ['first', 'second'] - new_row = (1,2,3,4) - + new_row = (1, 2, 3, 4) + self.assertRaises(tablib.InvalidDimensions, data.append, new_row) def test_add_column(self): - # No Headers + """Verify adding column works with/without headers.""" data.append(['kenneth']) data.append(['bessie']) @@ -48,12 +73,10 @@ class TablibTestCase(unittest.TestCase): data.append(col=new_col) self.assertEquals(data[new_col[0]], new_col[1:]) - - + def test_add_column_no_data_no_headers(self): - - # no headers + """Verify adding new column with no headers.""" new_col = ('reitz', 'monke') @@ -65,8 +88,7 @@ class TablibTestCase(unittest.TestCase): def test_add_column_no_data_with_headers(self): - - # no headers + """Verify adding new column with headers.""" data.headers = ('first', 'last') @@ -77,11 +99,185 @@ class TablibTestCase(unittest.TestCase): self.assertEquals(data.width, 3) new_col = ('foo', 'bar') - + self.assertRaises(tablib.InvalidDimensions, data.append, col=new_col) - def tuple_check(self): - data.append(col=(1,2,3)) + + def test_header_slicing(self): + """Verify slicing by headers.""" + + self.assertEqual(self.founders['first_name'], + [self.john[0], self.george[0], self.tom[0]]) + self.assertEqual(self.founders['last_name'], + [self.john[1], self.george[1], self.tom[1]]) + self.assertEqual(self.founders['gpa'], + [self.john[2], self.george[2], self.tom[2]]) + + + def test_data_slicing(self): + """Verify slicing by data.""" + + # Slice individual rows + self.assertEqual(self.founders[0], self.john) + self.assertEqual(self.founders[:1], [self.john]) + self.assertEqual(self.founders[1:2], [self.george]) + self.assertEqual(self.founders[-1], self.tom) + self.assertEqual(self.founders[3:], []) + + # Slice multiple rows + self.assertEqual(self.founders[:], [self.john, self.george, self.tom]) + self.assertEqual(self.founders[0:2], [self.john, self.george]) + self.assertEqual(self.founders[1:3], [self.george, self.tom]) + self.assertEqual(self.founders[2:], [self.tom]) + + + def test_delete(self): + """Verify deleting from dataset works.""" + + # Delete from front of object + del self.founders[0] + self.assertEqual(self.founders[:], [self.george, self.tom]) + + # Verify dimensions, width should NOT change + self.assertEqual(self.founders.height, 2) + self.assertEqual(self.founders.width, 3) + + # Delete from back of object + del self.founders[1] + self.assertEqual(self.founders[:], [self.george]) + + # Verify dimensions, width should NOT change + self.assertEqual(self.founders.height, 1) + self.assertEqual(self.founders.width, 3) + + # Delete from invalid index + self.assertRaises(IndexError, self.founders.__delitem__, 3) + + + def test_csv_export(self): + """Verify exporting dataset object as CSV.""" + + # Build up the csv string with headers first, followed by each row + csv = '' + for col in self.headers: + csv += col + ',' + + csv = csv.strip(',') + '\r\n' + + for founder in self.founders: + for col in founder: + csv += str(col) + ',' + csv = csv.strip(',') + '\r\n' + + self.assertEqual(csv, self.founders.csv) + + + def test_unicode_append(self): + """Passes in a single unicode charecter and exports.""" + + new_row = ('å', 'é') + data.append(new_row) + + data.json + data.yaml + data.csv + data.xls + + + def test_book_export_no_exceptions(self): + """Test that varoius exports don't error out.""" + + book = tablib.Databook() + book.add_sheet(data) + + book.json + book.yaml + book.xls + + + def test_json_import_set(self): + """Generate and import JSON set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _json = data.json + + data.json = _json + + self.assertEqual(_json, data.json) + + + def test_json_import_book(self): + """Generate and import JSON book serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + book.add_sheet(data) + _json = book.json + + book.json = _json + + self.assertEqual(_json, book.json) + + + def test_yaml_import_set(self): + """Generate and import YAML set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _yaml = data.yaml + + data.yaml = _yaml + + self.assertEqual(_yaml, data.yaml) + + + def test_yaml_import_book(self): + """Generate and import YAML book serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + book.add_sheet(data) + _yaml = book.yaml + + book.yaml = _yaml + + self.assertEqual(_yaml, book.yaml) + + + def test_csv_import_set(self): + """Generate and import CSV set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _csv = data.csv + + data.csv = _csv + + self.assertEqual(_csv, data.csv) + + + def test_wipe(self): + """Purge a dataset.""" + + new_row = (1, 2, 3) + data.append(new_row) + + # Verify width/data + self.assertTrue(data.width == len(new_row)) + self.assertTrue(data[0] == new_row) + + data.wipe() + new_row = (1, 2, 3, 4) + data.append(new_row) + self.assertTrue(data.width == len(new_row)) + self.assertTrue(data[0] == new_row) + if __name__ == '__main__': - unittest.main() \ No newline at end of file + unittest.main()