Files
tablib/tablib/core.py
T
Kenneth Reitz 9b3268f0ad Whoops.
2010-09-20 14:37:10 -04:00

318 lines
6.3 KiB
Python

# -*- coding: utf-8 -*-
# _____ ______ ______ _________
# __ /_______ ____ /_ ___ /_ _____ ______ /
# _ __/_ __ `/__ __ \__ __ \_ _ \_ __ /
# / /_ / /_/ / _ /_/ /_ /_/ // __// /_/ /
# \__/ \__,_/ /_.___/ /_.___/ \___/ \__,_/
import csv
import StringIO
import random
import simplejson as json
import xlwt
import yaml
from helpers import *
# __all__ = ['Dataset', 'DataBook']
__name__ = 'tablib'
__version__ = '0.7.0'
__build__ = 0x000700
__author__ = 'Kenneth Reitz'
__license__ = 'MIT'
__copyright__ = 'Copyright 2010 Kenneth Reitz'
class Dataset(object):
"""Epic Tabular-Dataset object. """
def __init__(self, *args, **kwargs):
self._data = None
self._saved_file = None
self._saved_format = None
self._data = list(args)
self.__headers = None
try:
self.headers = kwargs['headers']
except KeyError, why:
self.headers = None
try:
self.title = kwargs['title']
except KeyError, why:
self.title = None
def __len__(self):
return self.height
def __getitem__(self, key):
if isinstance(key, basestring):
if key in self.headers:
pos = self.headers.index(key) # get 'key' index from each data
return [row[pos] for row in self._data]
else:
raise KeyError
else:
return self._data[key]
def __setitem__(self, key, value):
self._validate(value)
self._data[key] = tuple(value)
def __delitem__(self, key):
del self._data[key]
def __repr__(self):
try:
return '<%s dataset>' % (self.title.lower())
except AttributeError:
return '<dataset object>'
def _validate(self, row=None, col=None, safety=False):
"""Assures size of every row in dataset is of proper proportions."""
if row:
is_valid = (len(row) == self.width) if self.width else True
elif col:
if self.headers:
is_valid = (len(col) - 1) == self.height
else:
is_valid = (len(col) == self.height) if self.height else True
else:
is_valid = all((len(x)== self.width for x in self._data))
if is_valid:
return True
else:
if not safety:
raise InvalidDimensions
return False
def _package(self, dicts=True):
"""Packages Dataset into lists of dictionaries for transmission."""
if self.headers:
if dicts:
data = [dict(zip(self.headers, data_row)) for data_row in self ._data]
else:
data = [list(self.headers)] + list(self._data)
else:
data = [list(row) for row in self._data]
return data
@property
def height(self):
"""Returns the height of the Dataset."""
return len(self._data)
@property
def width(self):
"""Returns the width of the Dataset."""
try:
return len(self._data[0])
except IndexError, why:
try:
return len(self.headers)
except TypeError, e:
return 0
@property
def headers(self):
"""Headers property."""
return self.__headers
@headers.setter
def headers(self, collection):
"""Validating headers setter."""
self._validate(collection)
if collection:
try:
self.__headers = list(collection)
except TypeError, why:
raise TypeError
else:
self.__headers = None
@property
def dict(self):
"""Returns python dict of Dataset."""
return self._package()
def json(self):
"""Returns JSON representation of Dataset."""
return json.dumps(self.dict)
def yaml(self):
"""Returns YAML representation of Dataset."""
return yaml.dump(self.dict)
def csv(self):
"""Returns CSV representation of Dataset."""
stream = StringIO.StringIO()
_csv = csv.writer(stream)
for row in self._package(dicts=False):
_csv.writerow(row)
return stream.getvalue()
def xls(self, path=None):
"""Returns XLS representation of Dataset."""
wb = xlwt.Workbook(encoding='utf8')
ws = wb.add_sheet(self.title if self.title else 'Tabbed Dataset')
for i, row in enumerate(self._package(dicts=False)):
for j, col in enumerate(row):
ws.write(i, j, col)
if path:
wb.save(path)
return True
else:
stream = StringIO.StringIO()
wb.save(stream)
return stream.getvalue()
def append(self, row=None, col=None):
"""Adds a row to the end of Dataset"""
if row:
self._validate(row)
self._data.append(tuple(row))
elif col:
self._validate(col=col)
if self.headers:
# pop the first item off, add to headers
self.headers.append(col[0])
col = col[1:]
if self.height and self.width:
for i, row in enumerate(self._data):
_row = list(row)
_row.append(col[i])
self._data[i] = tuple(_row)
else:
self._data = [tuple([row]) for row in col]
def insert(self, i, row=None, col=None):
"""Inserts a row at given position in Dataset"""
if row:
self._validate(row)
self._data.insert(i, tuple(row))
elif col:
pass
class Databook(object):
"""A book of Dataset objects.
Currently, this exists only for XLS workbook support.
"""
def __init__(self, sets=[]):
self._datasets = sets
def __repr__(self):
try:
return '<%s databook>' % (self.title.lower())
except AttributeError:
return '<databook object>'
def add_sheet(self, dataset):
"""Add given dataset ."""
if type(dataset) is Dataset:
self._datasets.append(dataset)
else:
raise InvalidDatasetType
def _package(self):
collector = []
for dset in self._datasets:
collector.append(dict(
title = dset.title,
data = dset.dict
))
return collector
@property
def size(self):
"""The number of the Datasets within DataBook."""
return len(self._datasets)
def xls(self, path=None):
"""Returns XLS representation of DataBook."""
wb = xlwt.Workbook(encoding='utf8')
for i, dset in enumerate(self._datasets):
ws = wb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i))
#for row in self._package(dicts=False):
for i, row in enumerate(dset._package(dicts=False)):
for j, col in enumerate(row):
ws.write(i, j, col)
if path:
wb.save(path)
return True
else:
stream = cStringIO.StringIO()
wb.save(stream)
return stream.getvalue()
def json(self):
"""Returns JSON representation of Databook."""
return json.dumps(self._package())
def yaml(self):
"""Returns YAML representation of Databook."""
return yaml.dump(self._package())
class InvalidDatasetType(Exception):
"Only Datasets can be added to a DataBook"
class InvalidDimensions(Exception):
"Invalid size"
class UnsupportedFormat(NotImplementedError):
"Format is not supported"