diff --git a/.gitignore b/.gitignore index b9b321d..897c9ee 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ profile # pycharm noise .idea .idea/* +hi diff --git a/HISTORY.rst b/HISTORY.rst index a119b99..996624f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,7 +1,11 @@ History ======= -0.1.0 (2010-09-??) +0.6.0 (2010-09-11) ------------------ -* Initial Release \ No newline at end of file +* Public Release. +* Export Support for XLS, JSON, YAML, and CSV. +* DataBook Export for XLS, JSON, and YAML. +* Python Dict Property Support. + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..b1643fd --- /dev/null +++ b/MANIFEST @@ -0,0 +1,8 @@ +HISTORY.rst +README.rst +setup.py +tabbed +tablib/__init__.py +tablib/cli.py +tablib/core.py +tablib/helpers.py diff --git a/NOTICE b/NOTICE new file mode 100644 index 0000000..e69de29 diff --git a/README.rst b/README.rst index 47b91cb..812a0a6 100644 --- a/README.rst +++ b/README.rst @@ -1,18 +1,17 @@ -Tabbed: format-agnostic tabular dataset library +Tablib: format-agnostic tabular dataset library =============================================== :: - _____ ______ ______ _________ - __ /_______ ____ /_ ___ /_ _____ ______ / - _ __/_ __ `/__ __ \__ __ \_ _ \_ __ / - / /_ / /_/ / _ /_/ /_ /_/ // __// /_/ / - \__/ \__,_/ /_.___/ /_.___/ \___/ \__,_/ - -.. *Tabbed is under active documentation-driven development.* + _____ ______ ___________ ______ + __ /_______ ____ /_ ___ /___(_)___ /_ + _ __/_ __ `/__ __ \__ / __ / __ __ \ + / /_ / /_/ / _ /_/ /_ / _ / _ /_/ / + \__/ \__,_/ /_.___/ /_/ /_/ /_.___/ -Tabbed is a format-agnostic tabular dataset library, written in Python. + +Tablib is a format-agnostic tabular dataset library, written in Python. It is a full python module which doubles as a CLI application for quick dataset conversions. @@ -22,29 +21,15 @@ Formats supported: - YAML - Excel - CSV -.. - HTML -At this time, Tabbed supports the **export** of it's powerful Dataset object instances into any of the above formats. Import is underway. +At this time, Tablib supports the **export** of it's powerful Dataset object instances into any of the above formats. Import is underway. -Please note that tabbed *purposefully* excludes XML support. It always will. +Please note that tablib *purposefully* excludes XML support. It always will. Features -------- -.. Convert datafile formats via API: :: -.. -.. tablib.source(filename='data.csv').export('data.json') - - -.. Convert datafile formats via CLI: :: -.. -.. $ tabbed data.csv data.json - -.. Convert data formats via CLI pipe interface: :: -.. -.. $ curl http://domain.dev/dataset.json | tabbed --to excel | gist -p - Populate fresh data files: :: @@ -76,11 +61,11 @@ Slice rows: :: # >>> [('John', 'Adams', 4.0), ('George', 'Washington', 2.6)] -.. Slice columns by header: :: -.. -.. print data['first_name'] -.. # >>> ['John', 'George', 'Henry'] -.. +Slice columns by header: :: + + print data['first_name'] + # >>> ['John', 'George', 'Henry'] + Manipulate rows by index: :: @@ -88,14 +73,8 @@ Manipulate rows by index: :: print data[0:1] # >>> [('George', 'Washington', 2.6), ('Henry', 'Ford', 2.3)] - .. # Update saved file - .. data.save() -.. Export to various formats: :: -.. -.. # Save copy as CSV -.. data.export('backup.csv') Roadmap ------- diff --git a/setup.py b/setup.py index c01b835..3c895d1 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,6 @@ import os import sys -import tablib from distutils.core import setup @@ -18,26 +17,27 @@ if sys.argv[-1] == "publish": setup( name='tablib', - version='0.0.4', - description='Python wrapper for Gist API', + version='0.6.1', + description='Format agnostic tabular data library (XLS, CSV, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', packages=['tablib'], + install_requires=['xlwt', 'simplejson', 'PyYAML'], license='MIT', classifiers=( 'Development Status :: 4 - Beta', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.5', - 'Programming Language :: Python :: 2.6', + # 'Programming Language :: Python :: 2.5', + # 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', ), - entry_points={ - 'console_scripts': [ - 'tabbed = tablib.cli:start', - ], - } + # entry_points={ + # 'console_scripts': [ + # 'tabbed = tablib.cli:start', + # ], + # } ) diff --git a/tablib/cli.py b/tablib/cli.py index c23b427..94920ee 100644 --- a/tablib/cli.py +++ b/tablib/cli.py @@ -28,7 +28,7 @@ for format in FORMATS: def start(in_file=None, out_file=None, **opts): """Covertly convert dataset formats""" - opts = Object(**opts) + opts = Struct(**opts) if opts.version: print('Tabbed, Ver. %s' % tabbed.core.__version__) diff --git a/tablib/core.py b/tablib/core.py index 56322a6..f21bb88 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -9,24 +9,23 @@ import csv import cStringIO -import os +import random + from helpers import * -from packages import simplejson as json -from packages import xlwt +import simplejson as json -try: - import yaml -except ImportError, why: - from packages import yaml - +import xlwt +import yaml -__all__ = ['Dataset', 'source'] + + +__all__ = ['Dataset', 'DataBook', 'source'] __name__ = 'tablib' -__version__ = '0.0.5' -__build__ = '0x000005' +__version__ = '0.6.0' +__build__ = 0x000600 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' @@ -43,25 +42,23 @@ class Dataset(object): self._data = None self._saved_file = None self._saved_format = None - - self._data = list(args) try: - self.headers = kwargs['headers'] + self.headers = kwargs['headers'] except KeyError, why: self.headers = None try: - self.title = kwargs['title'] + self.title = kwargs['title'] except KeyError, why: self.title = None - + def __len__(self): return self.height - + def __getitem__(self, key): if is_string(key): if key in self.headers: @@ -81,11 +78,11 @@ class Dataset(object): def __delitem__(self, key): del self._data[key] - + def __repr__(self): - if self.title: + try: return '<%s dataset>' % (self.title.lower()) - else: + except AttributeError: return '' @@ -127,21 +124,25 @@ class Dataset(object): def width(self): """Returns the width of the Dataset.""" try: - return len(self._data[0]) + return len(self._data[0]) except KeyError, why: - return 0 + return 0 + + @property + def dict(self): + """Returns python dict of Dataset.""" + return self._package() - @property def json(self): """Returns JSON representation of Dataset.""" - return json.dumps(self._package()) + return json.dumps(self.dict) + - @property def yaml(self): """Returns YAML representation of Dataset.""" - return yaml.dump(self._package()) + return yaml.dump(self.dict) @property @@ -152,7 +153,7 @@ class Dataset(object): for row in self._package(dicts=False): _csv.writerow(row) - + return stream.getvalue() @@ -160,23 +161,24 @@ class Dataset(object): def xls(self): """Returns XLS representation of Dataset.""" stream = cStringIO.StringIO() - + wb = xlwt.Workbook() ws = wb.add_sheet(self.title if self.title else 'Tabbed Dataset') # for row in self._package(dicts=False): for i, row in enumerate(self._package(dicts=False)): for j, col in enumerate(row): - ws.write(i, j, col) + ws.write(i, j, str(col)) wb.save(stream) return stream.getvalue() - + def append(self, row): """Adds a row to the end of Dataset""" self._validate(row) self._data.append(tuple(row)) + def index(self, i, row): """Inserts a row at given position in Dataset""" self._validate(row) @@ -187,32 +189,100 @@ class Dataset(object): # todo: accpept string if headers, or index nubmer pass - def save(self, filename=None, format=None): """Saves dataset""" if not format: format = filename.split('.')[-1].lower() # set format from filename - + if format not in FILE_EXTENSIONS: raise UnsupportedFormat - + # note export format # open file, save the bitch - + def export(self): """Exports Dataset to given filename or file-object.""" + pass +class DataBook(object): + """A book of Dataset objects. + Currently, this exists only for XLS workbook support. + """ + + def __init__(self, sets=[]): + self._datasets = sets + + def __repr__(self): + try: + return '<%s databook>' % (self.title.lower()) + except AttributeError: + return '' + + def add_sheet(self, dataset): + """Add given dataset .""" + if type(dataset) is Dataset: + self._datasets.append(dataset) + else: + raise InvalidDatasetType + + def _package(self): + collector = [] + for dset in self._datasets: + collector.append(dict( + title = dset.title, + data = dset.dict + )) + return collector + + @property + def size(self): + """The number of the Datasets within DataBook.""" + return len(self._datasets) + + + @property + def xls(self): + """Returns XLS representation of DataBook.""" + + stream = cStringIO.StringIO() + wb = xlwt.Workbook() + + for dset in self._datasets: + ws = wb.add_sheet(dset.title if dset.title else 'Tabbed Dataset %s' % (int(random.random() * 100000000))) + + #for row in self._package(dicts=False): + for i, row in enumerate(dset._package(dicts=False)): + for j, col in enumerate(row): + ws.write(i, j, str(col)) + + wb.save(stream) + return stream.getvalue() + + @property + def json(self): + """Returns JSON representation of Databook.""" + + return json.dumps(self._package()) + + @property + def yaml(self): + """Returns YAML representation of Databook.""" + + return yaml.dump(self._package()) + + +class InvalidDatasetType(Exception): + "Only Datasets can be added to a DataBook" + class InvalidDimensions(Exception): "Invalid size" - class UnsupportedFormat(NotImplementedError): "Format is not supported" - def source(src=None, file=None, filename=None): """docstring for import""" diff --git a/tablib/helpers.py b/tablib/helpers.py index 1afbc4c..a12c4dd 100644 --- a/tablib/helpers.py +++ b/tablib/helpers.py @@ -3,7 +3,7 @@ import sys -class Object(object): +class Struct(object): """Your attributes are belong to us.""" def __init__(self, **entries): diff --git a/tablib/packages/xlwt/BIFFRecords.py b/tablib/packages/xlwt/BIFFRecords.py deleted file mode 100644 index 632c64e..0000000 --- a/tablib/packages/xlwt/BIFFRecords.py +++ /dev/null @@ -1,2393 +0,0 @@ -# -*- coding: cp1252 -*- -from struct import pack -from UnicodeUtils import upack1, upack2 -import sys - -class SharedStringTable(object): - _SST_ID = 0x00FC - _CONTINUE_ID = 0x003C - - def __init__(self, encoding): - self.encoding = encoding - self._str_indexes = {} - self._tally = [] - self._add_calls = 0 - # Following 3 attrs are used for temporary storage in the - # get_biff_record() method and methods called by it. The pseudo- - # initialisation here is for documentation purposes only. - self._sst_record = None - self._continues = None - self._current_piece = None - - def add_str(self, s): - if self.encoding != 'ascii' and not isinstance(s, unicode): - s = unicode(s, self.encoding) - self._add_calls += 1 - if s not in self._str_indexes: - idx = len(self._str_indexes) - self._str_indexes[s] = idx - self._tally.append(1) - else: - idx = self._str_indexes[s] - self._tally[idx] += 1 - return idx - - def del_str(self, idx): - # This is called when we are replacing the contents of a string cell. - assert self._tally[idx] > 0 - self._tally[idx] -= 1 - self._add_calls -= 1 - - def str_index(self, s): - return self._str_indexes[s] - - def get_biff_record(self): - self._sst_record = '' - self._continues = [None, None] - self._current_piece = pack(' 0x2020: # limit for BIFF7/8 - chunks = [] - pos = 0 - while pos < len(data): - chunk_pos = pos + 0x2020 - chunk = data[pos:chunk_pos] - chunks.append(chunk) - pos = chunk_pos - continues = pack('<2H', self._REC_ID, len(chunks[0])) + chunks[0] - for chunk in chunks[1:]: - continues += pack('<2H%ds'%len(chunk), 0x003C, len(chunk), chunk) - # 0x003C -- CONTINUE record id - return continues - else: - return self.get_rec_header() + data - - -class Biff8BOFRecord(BiffRecord): - """ - Offset Size Contents - 0 2 Version, contains 0600H for BIFF8 and BIFF8X - 2 2 Type of the following data: - 0005H = Workbook globals - 0006H = Visual Basic module - 0010H = Worksheet - 0020H = Chart - 0040H = Macro sheet - 0100H = Workspace file - 4 2 Build identifier - 6 2 Build year - 8 4 File history flags - 12 4 Lowest Excel version that can read all records in this file - """ - _REC_ID = 0x0809 - # stream types - BOOK_GLOBAL = 0x0005 - VB_MODULE = 0x0006 - WORKSHEET = 0x0010 - CHART = 0x0020 - MACROSHEET = 0x0040 - WORKSPACE = 0x0100 - - def __init__(self, rec_type): - version = 0x0600 - build = 0x0DBB - year = 0x07CC - file_hist_flags = 0x00L - ver_can_read = 0x06L - - self._rec_data = pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read) - - -class InteraceHdrRecord(BiffRecord): - _REC_ID = 0x00E1 - - def __init__(self): - self._rec_data = pack('BB', 0xB0, 0x04) - - -class InteraceEndRecord(BiffRecord): - _REC_ID = 0x00E2 - - def __init__(self): - self._rec_data = '' - - -class MMSRecord(BiffRecord): - _REC_ID = 0x00C1 - - def __init__(self): - self._rec_data = pack('> 15 - c = low_15 | high_15 - passwd_hash ^= c - passwd_hash ^= len(plaintext) - passwd_hash ^= 0xCE4B - return passwd_hash - - def __init__(self, passwd = ""): - self._rec_data = pack('