Merge branch 'feature/imports' into develop

This commit is contained in:
Kenneth Reitz
2010-09-25 15:57:30 -04:00
8 changed files with 328 additions and 80 deletions
+1 -1
View File
@@ -131,7 +131,7 @@ If you'd like to contribute, simply fork `the repository`_, commit your changes
Roadmap
-------
- Import datasets from CSV, JSON, YAML
- Import datasets from CSV.
- Release CLI Interface
- Auto-detect import format
- Add possible other exports (SQL?)
+65 -78
View File
@@ -1,9 +1,8 @@
# -*- coding: utf-8 -*-
""" Tablib - Core Library
""" Tablib - core.
"""
import csv
import cStringIO
@@ -11,6 +10,8 @@ import simplejson as json
import xlwt
import yaml
from tablib.formats import formats
__title__ = 'tablib'
__version__ = '0.7.1'
@@ -24,9 +25,6 @@ class Dataset(object):
"""Epic Tabular-Dataset object. """
def __init__(self, *args, **kwargs):
self._data = None
self._saved_file = None
self._saved_format = None
self._data = list(args)
self.__headers = None
@@ -40,6 +38,7 @@ class Dataset(object):
except KeyError:
self.title = None
self._register_formats()
def __len__(self):
return self.height
@@ -71,6 +70,20 @@ class Dataset(object):
except AttributeError:
return '<dataset object>'
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats:
try:
try:
setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_set))
except AttributeError:
pass
def _validate(self, row=None, col=None, safety=False):
"""Assures size of every row in dataset is of proper proportions."""
@@ -105,6 +118,7 @@ class Dataset(object):
return data
@property
def height(self):
"""Returns the height of the Dataset."""
@@ -121,7 +135,8 @@ class Dataset(object):
return len(self.headers)
except TypeError:
return 0
@property
def headers(self):
"""Headers property."""
@@ -146,42 +161,21 @@ class Dataset(object):
"""Returns python dict of Dataset."""
return self._package()
@property
def json(self):
"""Returns JSON representation of Dataset."""
return json.dumps(self.dict)
@property
def yaml(self):
"""Returns YAML representation of Dataset."""
return yaml.dump(self.dict)
@property
def csv(self):
"""Returns CSV representation of Dataset."""
stream = cStringIO.StringIO()
_csv = csv.writer(stream)
for row in self._package(dicts=False):
_csv.writerow(row)
return stream.getvalue()
@property
def xls(self):
"""Returns XLS representation of Dataset."""
workb = xlwt.Workbook(encoding='utf8')
works = workb.add_sheet(self.title if self.title else 'Tabbed Dataset')
for i, row in enumerate(self._package(dicts=False)):
for j, col in enumerate(row):
works.write(i, j, col)
stream = cStringIO.StringIO()
workb.save(stream)
return stream.getvalue()
@dict.setter
def dict(self, pickle):
"""Returns python dict of Dataset."""
if not len(pickle):
return
if isinstance(pickle[0], list):
for row in pickle:
self.append(row)
elif isinstance(pickle[0], dict):
self.headers = pickle[0].keys()
for row in pickle:
self.append(row.values())
else:
raise UnsupportedFormat
def append(self, row=None, col=None):
@@ -214,6 +208,12 @@ class Dataset(object):
self._data.insert(i, tuple(row))
elif col:
pass
def wipe(self):
"""Erases all data from Dataset."""
self._data = list()
self.__headers = None
class Databook(object):
@@ -223,14 +223,35 @@ class Databook(object):
def __init__(self, sets=[]):
self._datasets = sets
self._register_formats()
def __repr__(self):
return '<databook object>'
try:
return '<%s databook>' % (self.title.lower())
except AttributeError:
return '<databook object>'
def wipe(self):
"""Wipe book clean."""
self._datasets = []
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats:
try:
try:
setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_book))
except AttributeError:
pass
def add_sheet(self, dataset):
"""Add given dataset ."""
"""Adds given dataset."""
if type(dataset) is Dataset:
self._datasets.append(dataset)
else:
@@ -253,40 +274,6 @@ class Databook(object):
"""The number of the Datasets within DataBook."""
return len(self._datasets)
@property
def xls(self):
"""Returns XLS representation of DataBook."""
workb = xlwt.Workbook(encoding='utf8')
for i, dset in enumerate(self._datasets):
works = workb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i))
#for row in self._package(dicts=False):
for i, row in enumerate(dset._package(dicts=False)):
for j, col in enumerate(row):
works.write(i, j, col)
stream = cStringIO.StringIO()
workb.save(stream)
return stream.getvalue()
@property
def json(self):
"""Returns JSON representation of Databook."""
return json.dumps(self._package())
@property
def yaml(self):
"""Returns YAML representation of Databook."""
return yaml.dump(self._package())
class InvalidDatasetType(Exception):
+11
View File
@@ -0,0 +1,11 @@
# -*- coding: utf-8 -*-
""" Tablib - formats
"""
import _csv as csv
import _json as json
import _xls as xls
import _yaml as yaml
formats = (csv, json, xls, yaml)
+39
View File
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
import cStringIO
import csv
import os
import simplejson as json
import tablib
title = 'csv'
extentions = ('csv',)
def export_set(dataset):
"""Returns CSV representation of Dataset."""
stream = cStringIO.StringIO()
_csv = csv.writer(stream)
for row in dataset._package(dicts=False):
_csv.writerow(row)
return stream.getvalue()
def import_set(dset, in_stream, headers=True):
"""Returns dataset from CSV stream."""
dset.wipe()
rows = csv.reader(in_stream.split())
for i, row in enumerate(rows):
if (i == 0) and (headers):
dset.headers = row
else:
dset.append(row)
+35
View File
@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
import simplejson as json
import tablib.core
title = 'json'
extentions = ('json', 'jsn')
def export_set(dataset):
"""Returns JSON representation of Dataset."""
return json.dumps(dataset.dict)
def export_book(databook):
"""Returns JSON representation of Databook."""
return json.dumps(databook._package())
def import_set(dset, in_stream):
"""Returns dataset from JSON stream."""
dset.wipe()
dset.dict = json.loads(in_stream)
def import_book(dbook, in_stream):
"""Returns databook from JSON stream."""
dbook.wipe()
for sheet in json.loads(in_stream):
data = tablib.core.Dataset()
data.title = sheet['title']
data.dict = sheet['data']
dbook.add_sheet(data)
+42
View File
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
import xlwt
import cStringIO
title = 'xls'
extentions = ('xls',)
def export_set(dataset):
"""Returns XLS representation of Dataset."""
wb = xlwt.Workbook(encoding='utf8')
ws = wb.add_sheet(dataset.title if dataset.title else 'Tabbed Dataset')
for i, row in enumerate(dataset._package(dicts=False)):
for j, col in enumerate(row):
ws.write(i, j, col)
stream = cStringIO.StringIO()
wb.save(stream)
return stream.getvalue()
def export_book(databook):
"""Returns XLS representation of DataBook."""
wb = xlwt.Workbook(encoding='utf8')
for i, dset in enumerate(databook._datasets):
ws = wb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i))
#for row in self._package(dicts=False):
for i, row in enumerate(dset._package(dicts=False)):
for j, col in enumerate(row):
ws.write(i, j, col)
stream = cStringIO.StringIO()
wb.save(stream)
return stream.getvalue()
+37
View File
@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
import yaml
import tablib
title = 'yaml'
extentions = ('yaml', 'yml')
def export_set(dataset):
"""Returns YAML representation of Dataset."""
return yaml.dump(dataset.dict)
def export_book(databook):
"""Returns YAML representation of Databook."""
return yaml.dump(databook._package())
def import_set(dset, in_stream):
"""Returns dataset from YAML stream."""
dset.wipe()
dset.dict = yaml.load(in_stream)
def import_book(dbook, in_stream):
"""Returns databook from YAML stream."""
dbook.wipe()
for sheet in yaml.load(in_stream):
data = tablib.core.Dataset()
data.title = sheet['title']
data.dict = sheet['data']
dbook.add_sheet(data)
+98 -1
View File
@@ -13,8 +13,10 @@ class TablibTestCase(unittest.TestCase):
def setUp(self):
"""Create simple data set with headers."""
global data
global data, book
data = tablib.Dataset()
book = tablib.Databook()
self.headers = ('first_name', 'last_name', 'gpa')
self.john = ('John', 'Adams', 90)
@@ -181,6 +183,101 @@ class TablibTestCase(unittest.TestCase):
data.csv
data.xls
def test_book_export_no_exceptions(self):
"""Test that varoius exports don't error out."""
book = tablib.Databook()
book.add_sheet(data)
book.json
book.yaml
book.xls
def test_json_import_set(self):
"""Generate and import JSON set serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
_json = data.json
data.json = _json
self.assertEqual(_json, data.json)
def test_json_import_book(self):
"""Generate and import JSON book serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
book.add_sheet(data)
_json = book.json
book.json = _json
self.assertEqual(_json, book.json)
def test_yaml_import_set(self):
"""Generate and import YAML set serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
_yaml = data.yaml
data.yaml = _yaml
self.assertEqual(_yaml, data.yaml)
def test_yaml_import_book(self):
"""Generate and import YAML book serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
book.add_sheet(data)
_yaml = book.yaml
book.yaml = _yaml
self.assertEqual(_yaml, book.yaml)
def test_csv_import_set(self):
"""Generate and import CSV set serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
_csv = data.csv
data.csv = _csv
self.assertEqual(_csv, data.csv)
def test_wipe(self):
"""Purge a dataset."""
new_row = (1, 2, 3)
data.append(new_row)
# Verify width/data
self.assertTrue(data.width == len(new_row))
self.assertTrue(data[0] == new_row)
data.wipe()
new_row = (1, 2, 3, 4)
data.append(new_row)
self.assertTrue(data.width == len(new_row))
self.assertTrue(data[0] == new_row)
if __name__ == '__main__':
unittest.main()