Compare commits

...

57 Commits

Author SHA1 Message Date
Kenneth Reitz 0409ff50af Merge branch 'release/0.9.0' 2010-11-04 05:47:25 -04:00
Kenneth Reitz dd24edcc24 Big history update. 2010-11-04 05:47:13 -04:00
Kenneth Reitz cf28f4baa8 Merge branch 'release/0.9.0' 2010-11-04 05:43:54 -04:00
Kenneth Reitz 52dcf79c41 No append documentation necessary. 2010-11-04 05:43:44 -04:00
Kenneth Reitz 49f098ee22 Verb-age update for documentation. 2010-11-04 05:43:23 -04:00
Kenneth Reitz 642b1d8def Exception documentation update. 2010-11-04 05:43:00 -04:00
Kenneth Reitz f6964bba8f Version bump. 2010-11-04 04:49:37 -04:00
Kenneth Reitz 8d6e75ad20 Fixes for 0.9.0. 2010-11-04 04:49:31 -04:00
Kenneth Reitz 30487999ba CI done. 2010-11-04 04:47:52 -04:00
Kenneth Reitz b74308e81e Append fixed. 2010-11-04 04:47:25 -04:00
Kenneth Reitz 577289cbc3 Callable Columns again :) 2010-11-04 04:46:54 -04:00
Kenneth Reitz cf10703e31 Updated Callable Columns support. 2010-11-04 04:46:38 -04:00
Kenneth Reitz 778ad0265e Added new required headers for adding columns. 2010-11-04 04:26:03 -04:00
Kenneth Reitz e3dedb8887 Cleanup todo. 2010-11-04 04:22:50 -04:00
Kenneth Reitz c6e240fa52 Cleanups. 2010-11-04 04:22:37 -04:00
Kenneth Reitz 5c747c9c2e Keepin' it DRY. 2010-11-04 04:20:45 -04:00
Kenneth Reitz 0bbd990ed8 whitespace fix. 2010-11-04 04:13:09 -04:00
Kenneth Reitz fcada243a2 Added new Row class and Dataset.filter(). 2010-11-04 04:13:02 -04:00
Kenneth Reitz fca8ad6182 Ugh.... 2010-11-04 03:55:42 -04:00
Kenneth Reitz 35d9e390fd New todo. 2010-11-04 01:33:12 -04:00
Kenneth Reitz 8ca180c461 Documentation configuration changes for colors. 2010-11-04 01:20:45 -04:00
Kenneth Reitz ff63558a67 Added TSV to Readme. 2010-11-04 01:07:04 -04:00
Kenneth Reitz f621b56178 TODO! 2010-11-04 01:06:17 -04:00
Kenneth Reitz 2b529bcb1c Quotation constancy. 2010-11-04 01:06:07 -04:00
Kenneth Reitz 90c3435600 TODO Update. 2010-11-04 01:02:33 -04:00
Kenneth Reitz 1fa28ee2ca Added test_suite.sh script. 2010-11-04 01:01:54 -04:00
Kenneth Reitz a5cae7c249 Adde Luca Beltrame to AUTHORS. 2010-11-04 00:59:06 -04:00
Kenneth Reitz 666991ca1e Merge branch 'master' of github.com:kennethreitz/tablib into develop 2010-11-04 00:57:55 -04:00
Kenneth Reitz 5f4162918f New site URL. 2010-11-04 00:57:25 -04:00
Kenneth Reitz b554ce36bb Official removal of cli interface. Bad idea. 2010-11-04 00:57:18 -04:00
Kenneth Reitz e5e22d3ca2 Documentation typo fix. 2010-11-04 00:57:12 -04:00
Kenneth Reitz 8626351618 Official removal of cli interface. Bad idea. 2010-11-04 00:56:31 -04:00
Kenneth Reitz cdfacb6d6e Whitespace. 2010-10-26 05:53:07 -07:00
Kenneth Reitz 108c9de130 Merge branch 'tsv' into develop 2010-10-19 11:12:00 -04:00
Luca Beltrame 271aeebf56 Merge branch 'tsv_origin' into tsv_format 2010-10-19 10:49:10 +02:00
Luca Beltrame e75a00541d Support for TSV-files. Unit-tested. 2010-10-19 10:45:54 +02:00
Kenneth Reitz 3b0e0c7991 Updates. 2010-10-10 10:01:51 -04:00
Kenneth Reitz 23440fb7a3 Documentation update. 2010-10-10 06:23:11 -04:00
Kenneth Reitz 459f310857 Trying a few things. 2010-10-10 06:22:59 -04:00
Kenneth Reitz f9021f53c2 Future release? 2010-10-10 04:37:16 -04:00
Kenneth Reitz 7fda829d27 Documentation update. 2010-10-10 04:37:09 -04:00
Kenneth Reitz ca08ac8a7b Documentation update. 2010-10-10 03:03:57 -04:00
Kenneth Reitz 08b51113d3 Added seamless deletion of columns. 2010-10-10 03:03:50 -04:00
Kenneth Reitz 3e391fc8e3 Auto version usage. 2010-10-10 02:33:03 -04:00
Kenneth Reitz a230844914 Docs update. 2010-10-10 02:32:52 -04:00
Kenneth Reitz bc82be09c5 Big Documentation Upgrade. 2010-10-10 02:32:41 -04:00
Kenneth Reitz ed9fe01604 Added column insertion.
Documentation update.
2010-10-08 15:47:10 -04:00
Kenneth Reitz e69546a0ff Major documentation update. 2010-10-08 15:46:50 -04:00
Kenneth Reitz d4b659ece9 documentation update 2010-10-08 11:50:43 -04:00
Kenneth Reitz 55eb3f93e3 documentation update 2010-10-08 11:49:53 -04:00
Kenneth Reitz be7182aea9 installation documentation update. 2010-10-08 09:41:19 -04:00
Kenneth Reitz 48def2cba6 Documentation Update. Site should be up soon. 2010-10-07 17:52:21 -04:00
Kenneth Reitz df8c0335d1 Fixed incorrect packaging. 2010-10-07 16:01:27 -04:00
Kenneth Reitz d0b09f0fce Doc upgrades. 2010-10-07 16:01:17 -04:00
Kenneth Reitz 9efd982bfa Documentation update. 2010-10-07 16:01:09 -04:00
Kenneth Reitz a3c82804cd Simple fix. 2010-10-06 20:01:52 -04:00
Kenneth Reitz 2e75e93f57 Merge branch 'release/0.8.5' into develop 2010-10-06 15:46:55 -04:00
21 changed files with 1317 additions and 301 deletions
+1 -1
View File
@@ -17,4 +17,4 @@ profile
# vi noise
*.swp
docs/_build/*
docs/_build/*
+2 -1
View File
@@ -11,4 +11,5 @@ Patches and Suggestions
```````````````````````
- Luke Lee
- Josh Ourisman
- Josh Ourisman
- Luca Beltrame
+29 -15
View File
@@ -1,54 +1,68 @@
History
=======
-------
0.9.0 (2010-11-04)
++++++++++++++++++
* Massive documentation update!
* Tablib.org!
* Row taggins and Dataset filtering!
* Column insert/delete support
* Column append API change (header required)
* Internal Changes (Row object and use thereof)
0.8.5 (2010-10-06)
------------------
++++++++++++++++++
* New import system. All dependencies attempt to load from site-packages,
then fallback on vendorized modules.
0.8.4 (2010-10-04)
------------------
++++++++++++++++++
* Upated XLS output: Only wrap if '\n' in cell.
* Upated XLS output: Only wrap if '\\n' in cell.
0.8.3 (2010-10-04)
------------------
++++++++++++++++++
* Ability to append new column passing a callable
as the value that will be applied to every row.
0.8.2 (2010-10-04)
------------------
++++++++++++++++++
* Added alignment wrapping to written cells.
* Added separator support to XLS.
0.8.1 (2010-09-28)
------------------
++++++++++++++++++
* Packaging Fix
0.8.0 (2010-09-25)
------------------
++++++++++++++++++
* New format plugin system!
* Imports! ELEGANT Imports!
* Tests. Lots of tests.
0.7.1 (2010-09-20)
------------------
++++++++++++++++++
* Reverting methods back to properties.
* Windows bug compenated in documentation.
0.7.0 (2010-09-20)
------------------
++++++++++++++++++
* Renamed DataBook Databook for consistiency.
* Export properties changed to methods (XLS filename / StringIO bug).
@@ -57,31 +71,31 @@ History
0.6.4 (2010-09-19)
------------------
++++++++++++++++++
* Updated unicode export for XLS.
* More exhaustive unit tests.
0.6.3 (2010-09-14)
------------------
++++++++++++++++++
* Added Dataset.append() support for columns.
0.6.2 (2010-09-13)
------------------
++++++++++++++++++
* Fixed Dataset.append() error on empty dataset.
* Updated Dataset.headers property w/ validation.
* Added Testing Fixtures.
0.6.1 (2010-09-12)
------------------
++++++++++++++++++
* Packaging hotfixes.
0.6.0 (2010-09-11)
------------------
++++++++++++++++++
* Public Release.
* Export Support for XLS, JSON, YAML, and CSV.
+2
View File
@@ -18,12 +18,14 @@ Output formats supported:
- Excel (Sets + Books)
- JSON (Sets + Books)
- YAML (Sets + Books)
- TSV (Sets)
- CSV (Sets)
Import formats supported:
- JSON (Sets + Books)
- YAML (Sets + Books)
- TSV (Sets)
- CSV (Sets)
Note that tablib *purposefully* excludes XML support. It always will.
+9
View File
@@ -0,0 +1,9 @@
* Polish *&* announce http://tablib.org.
* Write more exhausive unit-tests.
* Write stress tests.
* Make CSV write customizable.
* HTML Table exports.
* ``Dataset.traspose()`` support?
+64
View File
@@ -0,0 +1,64 @@
.. _api:
===
API
===
.. module:: tablib
This part of the documentation covers all the interfaces of Tablib. For
parts where Tablib depends on external libraries, we document the most
important right here and provide links to the canonical documentation.
--------------
Dataset Object
--------------
.. autoclass:: Dataset
:inherited-members:
---------------
Databook Object
---------------
.. autoclass:: Databook
:inherited-members:
---------
Functions
---------
.. autofunction:: detect
.. autofunction:: import_set
----------
Exceptions
----------
.. class:: InvalidDatasetType
You're trying to add something that doesn't quite look right.
.. class:: InvalidDimensions
You're trying to add something that doesn't quite fit right.
.. class:: UnsupportedFormat
You're trying to add something that doesn't quite taste right.
Now, go start some :ref:`Tablib Development <development>`.
+12 -11
View File
@@ -12,11 +12,12 @@
# serve to show the default.
import sys, os
import tablib
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('..'))
# -- General configuration -----------------------------------------------------
@@ -41,16 +42,16 @@ master_doc = 'index'
# General information about the project.
project = u'Tablib'
copyright = u'2010, Kenneth Reitz'
copyright = u'2010, Kenneth Reitz. Styles (modified) &copy; Armin Ronacher'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '0.8.3'
version = tablib.core.__version__
# The full version, including alpha/beta/rc tags.
release = '0.8.3'
release = version
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@@ -70,18 +71,18 @@ exclude_patterns = ['_build']
#default_role = None
# If true, '()' will be appended to :func: etc. cross-reference text.
#add_function_parentheses = True
add_function_parentheses = True
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
#add_module_names = True
# add_module_names = True
# If true, sectionauthor and moduleauthor directives will be shown in the
# output. They are ignored by default.
#show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = 'flask_theme_support.FlaskyStyle'
# A list of ignored prefixes for module index sorting.
#modindex_common_prefix = []
@@ -120,7 +121,7 @@ html_theme = 'default'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ['static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
@@ -128,7 +129,7 @@ html_static_path = ['_static']
# If true, SmartyPants will be used to convert quotes and dashes to
# typographically correct entities.
#html_use_smartypants = True
html_use_smartypants = True
# Custom sidebar templates, maps document names to template names.
#html_sidebars = {}
@@ -147,10 +148,10 @@ html_static_path = ['_static']
#html_split_index = False
# If true, links to the reST sources are added to the pages.
#html_show_sourcelink = True
html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#html_show_sphinx = True
html_show_sphinx = False
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
#html_show_copyright = True
+246
View File
@@ -0,0 +1,246 @@
.. _development:
Development
===========
Tablib is under active development, and contributors are welcome.
If you have a feature request, suggestion, or bug report, please open a new issue on GitHub_. To submit patches, please send a pull request on GitHub_.
If you'd like to contribute, there's plenty to do. Here's a short todo list.
.. include:: ../TODO.rst
.. _GitHub: http://github.com/kennethreitz/tablib/
.. _design:
---------------------
Design Considerations
---------------------
Tablib was developed with a few :pep:`20` idioms in mind.
#. Beautiful is better than ugly.
#. Explicit is better than implicit.
#. Simple is better than complex.
#. Complex is better than complicated.
#. Readability counts.
A few other things to keep in mind:
#. Keep your code DRY.
#. Strive to be as simple (to use) as possible.
.. _scm:
--------------
Source Control
--------------
Tablib source is controlled with Git_, the lean, mean, distributed source control machine.
The repository is publicly accessable.
``git clone git://github.com/kennethreitz/tablib.git``
The project is hosted both on **GitHub** and **git.kennethreitz.com**.
GitHub:
http://github.com/kennethreitz/tablib
"Mirror":
http://git.kennethreitz.com/projects/tablib
Git Branch Structure
++++++++++++++++++++
Feature / Hotfix / Release branches follow a `Successful Git Branching Model`_ . Git-flow_ is a great tool for managing the repository. I highly recommend it.
``develop``
The "next release" branch. Likely unstable.
``master``
Current production release (|version|) on PyPi.
``gh-pages``
Current release of http://tablib.org.
Each release is tagged.
When submitting patches, please place your feature/change in its own branch prior to opening a pull reqeust on GitHub_.
.. _Git: http://git-scm.org
.. _`Successful Git Branching Model`: http://nvie.com/posts/a-successful-git-branching-model/
.. _git-flow: http://github.com/nvie/gitflow
.. _newformats:
------------------
Adding New Formats
------------------
Tablib welcomes new format additions! Format suggestions include:
* Tab Seperated Values
* MySQL Dump
* HTML Table
Coding by Convention
++++++++++++++++++++
Tablib features a micro-framework for adding format support. The easiest way to understand it is to use it. So, let's define our own format, named *xxx*.
1. Write a new format interface.
:class:`tablib.core` follows a simple pattern for automatically utilizing your format throughout Tablib. Function names are crucial.
Example **tablib/formats/_xxx.py**: ::
title = 'xxx'
def export_set(dset):
....
# returns string representation of given dataset
def export_book(dbook):
....
# returns string representation of given databook
def import_set(dset, in_stream):
...
# populates given Dataset with given datastream
def import_book(dbook, in_stream):
...
# returns Databook instance
def detect(stream):
...
# returns True if given stream is parsable as xxx
.. admonition:: Excluding Support
If the format excludes support for an import/export mechanism (*eg.* :class:`csv <tablib.Dataset.csv>` excludes :class:`Databook <tablib.Databook>` support), simply don't define the respecive functions. Appropriate errors will be raised.
2.
Add your new format module to the :class:`tablib.formats.avalable` tuple.
3.
Add a mock property to the :class:`Dataset <tablib.Dataset>` class with verbose `reStructured Text`_ docstring. This alleviates IDE confusion, and allows for pretty auto-generated Sphinx_ documentation.
4. Write respective :ref:`tests <testing>`.
.. _testing:
--------------
Testing Tablib
--------------
Testing is crucial to Tablib's stability. This stable project is used in production by many companies and developers, so it is important to be certian that every version released is fully operational. When developing a new feature for Tablib, be sure to write proper tests for it as well.
When developing a feature for Tablib, the easiest way to test your changes for potential issues is to simply run the test suite directly. ::
$ ./test_tablib.py
`Hudson CI`_, amongst other tools, supports Java's xUnit testing report format. Nose_ allows us to generate our own xUnit reports.
Installing nose is simple. ::
$ pip install nose
Once installed, we can generate our xUnit report with a single command. ::
$ nosetests test_tablib.py --with-xunit
This will generate a **nosetests.xml** file, which can then be analyzed.
.. _Nose: http://somethingaboutorange.com/mrl/projects/nose/
.. _hudson:
----------------------
Continuous Integration
----------------------
Every commit made to the **develop** branch is automatically tested and inspected upon receipt with `Hudson CI`_. If you have access to the main respository and broke the build, you will receive an email accordingly.
Anyone may view the build status and history at any time.
http://git.kennethreitz.com/ci/
If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server.
Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets.
.. _`Hudson CI`: http://hudson.dev.java.net
.. _`Kenneth Reitz`: http://kennethreitz.com/contact-me/
.. _docs:
-----------------
Building the Docs
-----------------
Documentation is written in the powerful, flexible, and standard Python documentation format, `reStructured Text`_.
Documentation builds are powered by the powerful Pocoo project, Sphinx_. The :ref:`API Documentation <api>` is mostly documented inline throught the module.
The Docs live in ``tablib/docs``. In order to build them, you will first need to install Sphinx. ::
$ pip install sphinx
Then, to build an HTML version of the docs, simply run the following from the **docs** directory: ::
$ make html
Your ``docs/_build/html`` directory will then contain an HTML representation of the documentation, ready for publication on most web servers.
You can also generate the documentation in **ebpub**, **latex**, **json**, *&c* similarly.
.. admonition:: GitHub Pages
To push the documentation up to `GitHub Pages`_, you will first need to run `sphinx-to-github`_ against your ``docs/_build/html`` directory.
GitHub Pages are powered by an HTML generation system called Jeckyl_, which is configured to ignore files and folders that begin with "``_``" (*ie.* **_static**).
and `sphinx-to-github`_. ::
Installing sphinx-to-github is simple. ::
$ pip install sphinx-to-github
Running it against the docs is even simpler. ::
$ sphinx-to-github _build/html
Move the resulting files to the **gh-pages** branch of your repository, and push it up to GitHub.
.. _`reStructured Text`: http://docutils.sourceforge.net/rst.html
.. _Sphinx: http://sphinx.pocoo.org
.. _`GitHub Pages`: http://pages.github.com
.. _Jeckyl: http://github.com/mojombo/jekyll
.. _`sphinx-to-github`: http://github.com/michaeljones/sphinx-to-github
----------
Make sure to check out the :ref:`API Documentation <api>`.
+47 -19
View File
@@ -3,32 +3,60 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to Tablib's documentation!
==================================
Tablib: Pythonic Tabular Data
=============================
Contents:
.. Contents:
..
.. .. toctree::
.. :maxdepth: 2
..
.. Indices and tables
.. ==================
..
.. * :ref:`genindex`
.. * :ref:`modindex`
.. * :ref:`search`
Tablib is an :ref:`MIT Lisenced <mit>` format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & exmport.
I recommend you start with :ref:`Installation <install>`.
User's Guide
------------
This part of the documentation, which is mostly prose, begins with some background information about Tablib, then focuses on step-by-step instructions for getting the most out of your datasets.
.. toctree::
:maxdepth: 2
Indices and tables
==================
intro
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
.. toctree::
:maxdepth: 2
Dataset Object
--------------
.. module:: tablib
install
.. autoclass:: Databook
:members:
:inherited-members:
.. toctree::
:maxdepth: 2
Databook Object
---------------
tutorial
.. autoclass:: Dataset
:members:
:inherited-members:
.. toctree::
:maxdepth: 2
development
API Reference
-------------
If you are looking for information on a specific function, class or
method, this part of the documentation is for you.
.. toctree::
:maxdepth: 2
api
+78
View File
@@ -0,0 +1,78 @@
.. _install:
Installation
============
This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancments <peed-extentions>`.
.. _installing:
-----------------
Installing Tablib
-----------------
To install Tablib, it only takes one simple command. ::
$ pip install tablib
Or, if you must: ::
$ easy_install tablib
But, you really shouldn't do that.
-------------------
Download the Source
-------------------
You can also install tablib from source. The latest release (|version|) is available from GitHub.
* tarball_
* zipball_
.. _
Once you have a copy of the source, you can embed it in your Python package, or install it into your site-packages easily. ::
$ python setup.py install
To download the full source history from Git, see :ref:`Source Control <scm>`.
.. _tarball: http://github.com/kennethreitz/tablib/tarball/master
.. _zipball: http://github.com/kennethreitz/tablib/zipball/master
.. _speed-extentions:
Speed Extentions
----------------
.. versionadded:: 0.8.5
Tablib is partially dependent on the **pyyaml**, **simplejson**, and **xlwt** modules. To reduce installation issues, fully integrated versions of all required libraries are included in Tablib.
However, if performance is important to you (and it should be), you can install **pyyaml** with C extentions from PyPi. ::
$ pip install PyYAML
If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. ::
$ pip install simplejson
.. _updates:
Staying Updated
---------------
The latest version of Tablib will always be available here:
* PyPi: http://pypi.python.org/pypi/tablib/
* GitHub: http://github.com/kennethreitz/tablib/
When a new version is available, upgrading is simple. ::
$ pip install tablib --upgrade
Now, go get a :ref:`Quick Start <quickstart>`.
+55
View File
@@ -0,0 +1,55 @@
.. _intro:
Introduction
============
This part of the documentation covers all the interfaces of Tablib.
Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to Pythonically import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags / filtering, and seamless format import/exmport.
Philosphy
---------
Tablib was developed with a few :pep:`20` idioms in mind.
#. Beautiful is better than ugly.
#. Explicit is better than implicit.
#. Simple is better than complex.
#. Complex is better than complicated.
#. Readability counts.
All contributions to Tablib should keep these important rules in mind.
.. _mit:
MIT License
-----------
A large number of open source projects you find today are `GPL Licensed`_. While the GPL certianly has essential applications, it should most certianly not be your go-to license for your next open source project.
A project that is released as GPL cannot be usd in any commercial product without the product itself also being offered as open source. The MIT and BSD licenses are great alternatives to the GPL that allow your open-source software to be used in proprietary, closed-source software.
Tablib is released under terms of `The MIT License`_.
.. _`GPL Licensed`: http://www.opensource.org/licenses/gpl-license.php
.. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php
.. _pythonsupport:
Pythons Supported
-----------------
At this time, the following Python platforms are officially supported:
* Python 2.6
* Python 2.7
Support for other Pythons will be rolled out soon.
Now, go :ref:`Install Tablib <install>`.
+353
View File
@@ -0,0 +1,353 @@
.. _quickstart:
==========
Quickstart
==========
.. module:: tablib
Eager to get started? This page gives a good introduction in how to get started with Tablib. This assumes you already have Tablib installed. If you do not, head over to the :ref:`Installation <install>` section.
First, make sure that:
* Tablib is :ref:`installed <install>`
* Tablib is :ref:`up-to-date <updates>`
Lets gets started with some simple use cases and examples.
------------------
Creating a Dataset
------------------
A :class:`Dataset <tablib.Dataset>` is nothing more than what its name implies—a set of data.
Creating your own instance of the :class:`tablib.Dataset` object is simple. ::
data = tablib.Dataset()
You can now start filling this :class:`Dataset <tablib.Dataset>` object with data.
.. admonition:: Example Context
From here on out, if you see ``data``, assume that it's a fresh :class:`Dataset <tablib.Dataset>` object.
-----------
Adding Rows
-----------
Let's say you want to collect a simple list of names. ::
# collection of names
names = ['Kenneth Reitz', 'Bessie Monke']
for name in names:
# split name appropriately
fname, lname = name.split()
# add names to Dataset
data.append([fname, lname])
You can get a nice, Pythonic view of the dataset at any time with :class:`Dataset.dict`.
>>> data.dict
[('Kenneth', 'Reitz'), ('Bessie', 'Monke')]
--------------
Adding Headers
--------------
It's time enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. ::
data.headers = ['First Name', 'Last Name']
Now our data looks a little different. ::
>>> data.dict
[{'Last Name': 'Reitz', 'First Name': 'Kenneth'}, {'Last Name': 'Monke', 'First Name': 'Bessie'}]
--------------
Adding Columns
--------------
Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. ::
data.append(col=['Age', 22, 20])
Let's view the data now. ::
>>> data.dict
[{'Last Name': 'Reitz', 'First Name': 'Kenneth', 'Age': 22}, {'Last Name': 'Monke', 'First Name': 'Bessie', 'Age': 20}]
It's that easy.
--------------
Exporting Data
--------------
Tablib's killer feature is the ability to export your :class:`Dataset` objects into a number of formats.
**Comma-Seperated Values** ::
>>> data.csv
Last Name,First Name,Age
Reitz,Kenneth,22
Monke,Bessie,20
**JavaScript Object Notation** ::
>>> data.json
[{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}]
**YAML Ain't Markup Language** ::
>>> data.yaml
- {Age: 22, First Name: Kenneth, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Last Name: Monke}
**Microsoft Excel** ::
>>> data.xls
<censored binary data>
------------------------
Selecting Rows & Columns
------------------------
You can slice and dice your data, just like a standard Python list. ::
>>> data[0]
('Kenneth', 'Reitz', 22)
If we had a set of data consisting of thousands of rows, it could be useful to get a list of values in a column.
To do so, we access the :class:`Dataset` as if it were a standard Python dictionary. ::
>>> data['First Name']
['Kenneth', 'Bessie']
Let's find the average age. ::
>>> ages = data['Age']
>>> float(sum(ages)) / len(ages)
21.0
-----------------------
Removing Rows & Columns
-----------------------
It's easier than you could imagine. ::
>>> del data['Col Name']
::
>>> del data[0:12]
==============
Advanced Usage
==============
This part of the documentation services to give you an idea that are otherwise hard to extract from the :ref:`API Documentation <api>`
And now for something completely different.
.. _dyncols:
---------------
Dynamic Columns
---------------
.. versionadded:: 0.8.3
Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. A dynamic column is a single callable object (*ie.* a function).
Let's add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. ::
import random
def random_grade(row):
"""Returns a random integer for entry."""
return (random.randint(60,100)/100.0)
data.append(col=[random_grade], header='Grade')
Let's have a look at our data. ::
>>> data.yaml
- {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke}
Let's remove that column. ::
>>> del data['Grade']
When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row.
For example, we can use the data available in the row to guess the gender of a student. ::
def guess_gender(row):
"""Calculates gender of given student data row."""
m_names = ('Kenneth', 'Mike', 'Yuri')
f_names = ('Bessie', 'Samantha', 'Heather')
name = row[0]
if name in m_names:
return 'Male'
elif name in f_names:
return 'Female'
else:
return 'Unknown'
Adding this function to our dataset as a dynamic column would result in: ::
>>> data.yaml
- {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke}
.. _tags:
----------------------------
Filtering Datasets with Tags
----------------------------
.. versionadded:: 0.9.0
When constructing a :class:`Dataset` object, you can add tags to rows by speficying the ``tags`` parameter.
This allows you to filter your :class:`Dataset` later. This can be useful so seperate rows of data based on
arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`.
Let's tag some students. ::
students = tablib.Dataset()
students.headers = ['first', 'last']
students.append(['Kenneth', 'Reitz'], tags=['male', 'technical'])
students.append(['Bessie', 'Monke'], tags=['female', 'creative'])
Now that we have extra meta-data on our rows, we can use easily filter our :class:`Dataset`. Let's just see Male students. ::
>>> data.filter(['male']).yaml
- {first: Kenneth, Last: Reitz}
It's that simple. The original :class:`Dataset` is untouched.
Excel Workbook With Multiple Sheets
------------------------------------
When dealine with a large number of :class:`Datasets <Dataset>` in spreadsheet format, it's quite common to group mulitple spreadsheets into a single Excel file, known as a Workbook. Tablib makes it extremely easy to build webooks with the handy, :class:`Databook` class.
Let's say we have 3 different :class:`Datasets <Dataset>`. All we have to do is add then to a :class:`Databook` object... ::
book = tablib.Databook([data1, data2, data3])
... and export to Excel just like :class:`Datasets <Dataset>`. ::
with open('students.xls', 'wb') as f:
f.write(book.xls)
The resulting **students.xls** file will contain a seperate spreadsheet for each :class:`Dataset` object in the :class:`Databook`.
.. admonition:: Binary Warning
Make sure to open the output file in binary mode.
.. _seperators:
----------
Seperators
----------
.. versionadded:: 0.8.2
When, it's often useful to create a blank row containing information on the upcomming data. So,
::
daniel_tests = [
('11/24/09', 'Math 101 Mid-term Exam', 56.),
('05/24/10', 'Math 101 Final Exam', 62.)
]
suzie_tests = [
('11/24/09', 'Math 101 Mid-term Exam', 56.),
('05/24/10', 'Math 101 Final Exam', 62.)
]
# Create new dataset
tests = tablib.Dataset()
tests.headers = ['Date', 'Test Name', 'Grade']
# Daniel's Tests
tests.append_seperator('Daniel\'s Scores')
for test_row in daniel_tests:
tests.append(test_row)
# Susie's Tests
tests.append_seperator('Susie\'s Scores')
for test_row in suzie_tests:
tests.append(test_row)
# Write spreadsheet to disk
with open('grades.xls', 'wb') as f:
f.write(tests.xls)
The resulting **tests.xls** will have the following layout:
Daniel's Scores:
* '11/24/09', 'Math 101 Mid-term Exam', 56.
* '05/24/10', 'Math 101 Final Exam', 62.
Suzie's Scores:
* '11/24/09', 'Math 101 Mid-term Exam', 56.
* '05/24/10', 'Math 101 Final Exam', 62.
.. admonition:: Format Support
At this time, only :class:`Excel <Dataset.xls>` output supports seperators.
----
Now, go check out the :ref:`API Documentation <api>` or begin :ref:`Tablib Development <development>`.
Vendored
+11 -1
View File
@@ -1,7 +1,17 @@
import os
from fabric.api import *
def scrub():
""" Death to the bytecode! """
local("rm -fr dist build")
local('rm -fr dist build')
local("find . -name \"*.pyc\" -exec rm '{}' ';'")
def docs():
"""Build docs."""
os.system('make html')
os.chdir('_build/html')
os.system('sphinxtogithub .')
os.system('git add -A')
os.system('git commit -m \'documentation update\'')
os.system('git push origin gh-pages')
+4 -11
View File
@@ -17,21 +17,19 @@ if sys.argv[-1] == "publish":
required = []
# if sys.version_info < (2, 6):
# required.append('simplejson')
setup(
name='tablib',
version='0.8.5',
version='0.9.0',
description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
long_description=open('README.rst').read() + '\n\n' +
open('HISTORY.rst').read(),
author='Kenneth Reitz',
author_email='me@kennethreitz.com',
url='http://github.com/kennethreitz/tablib',
url='http://tablib.org',
packages= [
'tablib', 'tablib.formats',
'tablib.packages.simplejson'
'tablib.packages',
'tablib.packages.simplejson',
'tablib.packages.xlwt',
'tablib.packages.yaml',
],
@@ -49,9 +47,4 @@ setup(
# 'Programming Language :: Python :: 3.0',
# 'Programming Language :: Python :: 3.1',
),
# entry_points={
# 'console_scripts': [
# 'tabbed = tablib.cli:start',
# ],
# }
)
-14
View File
@@ -1,14 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Tabbed -- CLI for Tablib
Copyright (c) 2010 Kenneth Reitz. MIT License.
"""
import tablib.cli
if __name__ == '__main__':
tablib.cli.start()
-84
View File
@@ -1,84 +0,0 @@
#!/usr/bin/env python
# encoding: utf-8
""" Tabbed CLI Inteface Application
"""
import io
import sys
import argue
import tablib
from helpers import Struct, piped
FORMATS = [fmt.title for fmt in tablib.formats.FORMATS]
opts = []
opts.append(('v', 'version', False, 'Report tabbed version'))
for format in FORMATS:
opts.append(('', format, False, 'Output to %s' % (format.upper())))
@argue.command(options=opts, usage='[FILE] [--FORMAT | FILE]')
def start(in_file=None, out_file=None, **opts):
"""Covertly convert dataset formats"""
opts = Struct(**opts)
if opts.version:
print('Tabbed, Ver. %s' % tablib.core.__version__)
sys.exit(0)
stdin = piped()
if stdin:
data = tablib.import_set(stdin)
elif in_file:
try:
in_stream =- io.open(in_file, 'r').read()
except Exception, e:
print(' %s cannot be read.' % in_file)
sys.exit(65)
try:
tablib.import_set(in_stream)
except Exception, e:
raise e
print('Import format not supported.')
sys.exit(65)
else:
print('Please provide input.')
sys.exit(65)
_formats_sum = sum(opts[f] for f in FORMATS)
# Multiple output formats given
if _formats_sum > 1:
print('Please specify a single output format.')
sys.exit(64)
# No output formats given
elif _formats_sum < 1:
print('Please specify an output format.')
sys.exit(64)
# fetch options.formats list
# if sum(()) > 1
# log only one data format please
# if sum of formats == 0, specity format
# look for filename
# print opts.__dict__
# print in_file
# print out_file
+295 -120
View File
@@ -9,19 +9,83 @@
:license: MIT, see LICENSE for more details.
"""
from tablib.formats import FORMATS as formats
from copy import copy
from tablib import formats
__title__ = 'tablib'
__version__ = '0.8.5'
__build__ = 0x000805
__version__ = '0.9.0'
__build__ = 0x000900
__author__ = 'Kenneth Reitz'
__license__ = 'MIT'
__copyright__ = 'Copyright 2010 Kenneth Reitz'
class Row(object):
"""Internal Row object. Mainly used for filtering."""
__slots__ = ['tuple', '_row', 'tags']
def __init__(self, row=list(), tags=list()):
self._row = list(row)
self.tags = list(tags)
def __iter__(self):
return (col for col in self._row)
def __len__(self):
return len(self._row)
def __repr__(self):
return repr(self._row)
def __getslice__(self, i, j):
return self._row[i,j]
def __getitem__(self, i):
return self._row[i]
def __setitem__(self, i, value):
self._row[i] = value
def __delitem__(self, i):
del self._row[i]
def append(self, value):
self._row.append(value)
def insert(self, index, value):
self._row.insert(index, value)
def __contains__(self, item):
return (item in self._row)
@property
def tuple(self):
'''Tuple representation of :class:`Row`.'''
return tuple(self._row)
@property
def list(self):
'''List representation of :class:`Row`.'''
return list(self._row)
def has_tag(self, tag):
"""Returns true if current row contains tag."""
if tag == None:
return False
elif isinstance(tag, basestring):
return tag in self.tags
else:
for t in tag:
if t in self.tags:
return True
return False
class Dataset(object):
"""The tablib Dataset object is the heart of tablib. It provides all core
"""The :class:`Dataset` object is the heart of Tablib. It provides all core
functionality.
Usually you create a :class:`Dataset` instance in your main module, and append
@@ -44,68 +108,18 @@ class Dataset(object):
:param \*args: (optional) list of rows to populate Dataset
:param headers: (optional) list strings for Dataset header row
.. admonition:: About the Format Attributes
If you look at the code, the various output/import formats are not
defined within the itself. To add support for a new format, see
:ref:`Adding New Formats`.
.. attribute:: csv
A CSV representation of the Dataset object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the `Dataset.csv` attribute: ::
data = tablib.Dataset()
data.csv = 'age, first_name, last_name\\n90, John, Adams'
Import assumes (for now) that headers exist.
.. attribute:: dict
.. admonition:: Format Attributes Definition
An native Python representation of the Dataset object. If headers have been
set, a list of Python dictionaries will be returned. If no headers have been
set, a list of tuples (rows) will be returned instead.
If you look at the code, the various output/import formats are not
defined within the :class:`Dataset` object. To add support for a new format, see
:ref:`Adding New Formats <newformats>`.
A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
data = tablib.Dataset()
data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
.. attribute:: xls
An Excel Spreadsheet representation of the Dataset object, including
:ref:`seperators`.
*Note:* `Dataset.xls` contains binary data, so make sure to write in binary
mode::
with open('output.xls', 'wb') as f:
f.write(data.xls)
.. attribute:: yaml
A YAML representation of the Dataset object. If headers have been
set, a YAML list of objects will be returned. If no headers have
been set, a YAML list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the `Dataset.json` attribute: ::
data = tablib.Dataset()
data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
Import assumes (for now) that headers exist.
"""
def __init__(self, *args, **kwargs):
self._data = list(args)
self._data = list(Row(arg) for arg in args)
self.__headers = None
# ('title', index) tuples
@@ -136,16 +150,34 @@ class Dataset(object):
else:
raise KeyError
else:
return self._data[key]
_results = self._data[key]
if isinstance(_results, Row):
return _results.tuple
else:
return [result.tuple for result in _results]
def __setitem__(self, key, value):
self._validate(value)
self._data[key] = tuple(value)
self._data[key] = Row(value)
def __delitem__(self, key):
del self._data[key]
if isinstance(key, basestring):
if key in self.headers:
pos = self.headers.index(key)
del self.headers[pos]
for i, row in enumerate(self._data):
del row[pos]
self._data[i] = row
else:
raise KeyError
else:
del self._data[key]
def __repr__(self):
@@ -158,7 +190,7 @@ class Dataset(object):
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats:
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
@@ -174,12 +206,12 @@ class Dataset(object):
if row:
is_valid = (len(row) == self.width) if self.width else True
elif col:
if self.headers:
is_valid = (len(col) - 1) == self.height
if len(col) < 1:
is_valid = True
else:
is_valid = (len(col) == self.height) if self.height else True
else:
is_valid = all((len(x)== self.width for x in self._data))
is_valid = all((len(x) == self.width for x in self._data))
if is_valid:
return True
@@ -202,16 +234,38 @@ class Dataset(object):
return data
def _clean_col(self, col):
"""Prepares the given column for insert/append."""
col = list(col)
if self.headers:
header = [col.pop(0)]
else:
header = []
if len(col) == 1 and callable(col[0]):
col = map(col[0], self._data)
col = tuple(header + col)
return col
@property
def height(self):
"""Returns the height of the Dataset."""
"""The number of rows currently in the :class:`Dataset`.
Cannot be directly modified.
"""
return len(self._data)
@property
def width(self):
"""Returns the width of the Dataset."""
"""The number of columns currently in the :class:`Dataset`.
Cannot be directly modified.
"""
try:
return len(self._data[0])
except IndexError:
@@ -223,7 +277,11 @@ class Dataset(object):
@property
def headers(self):
"""Headers property."""
"""An *optional* list of strings to be used for header rows and attribute names.
This must be set manually. The given list length must equal :class:`Dataset.width`.
"""
return self.__headers
@@ -242,7 +300,7 @@ class Dataset(object):
@property
def dict(self):
"""A JSON representation of the Dataset object. If headers have been
"""A JSON representation of the :class:`Dataset` object. If headers have been
set, a JSON list of objects will be returned. If no headers have
been set, a JSON list of lists (rows) will be returned instead.
@@ -257,7 +315,16 @@ class Dataset(object):
@dict.setter
def dict(self, pickle):
"""A native Python representation of the Dataset object. If headers have been
set, a list of Python dictionaries will be returned. If no headers have been
set, a list of tuples (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
data = tablib.Dataset()
data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
"""
if not len(pickle):
return
@@ -265,59 +332,110 @@ class Dataset(object):
if isinstance(pickle[0], list):
self.wipe()
for row in pickle:
self.append(row)
self.append(Row(row))
# if list of objects
elif isinstance(pickle[0], dict):
self.wipe()
self.headers = pickle[0].keys()
for row in pickle:
self.append(row.values())
self.append(Row(row.values()))
else:
raise UnsupportedFormat
@property
def xls():
"""An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set.
.. admonition:: Binary Warning
:class:`Dataset.xls` contains binary data, so make sure to write in binary mode::
with open('output.xls', 'wb') as f:
f.write(data.xls)'
"""
pass
@property
def csv():
"""A CSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
data = tablib.Dataset()
data.csv = 'age, first_name, last_name\\n90, John, Adams'
Import assumes (for now) that headers exist.
"""
pass
@property
def tsv():
"""A TSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
data = tablib.Dataset()
data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams'
Import assumes (for now) that headers exist.
"""
@property
def yaml():
"""A YAML representation of the :class:`Dataset` object. If headers have been
set, a YAML list of objects will be returned. If no headers have
been set, a YAML list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
data = tablib.Dataset()
data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
Import assumes (for now) that headers exist.
"""
pass
@property
def json():
"""A JSON representation of the :class:`Dataset` object. If headers have been
set, a JSON list of objects will be returned. If no headers have
been set, a JSON list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
data = tablib.Dataset()
data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]'
Import assumes (for now) that headers exist.
"""
def append(self, row=None, col=None, header=None, tags=list()):
"""Adds a row or column to the :class:`Dataset`.
Usage is :class:`Dataset.insert` for documentation.
"""
def append(self, row=None, col=None):
"""Adds a row to the end of Dataset"""
if row is not None:
self._validate(row)
self._data.append(tuple(row))
self.insert(self.height, row=row, tags=tags)
elif col is not None:
col = list(col)
if self.headers:
header = [col.pop(0)]
else:
header = []
if len(col) == 1 and callable(col[0]):
col = map(col[0], self._data)
col = tuple(header + col)
self._validate(col=col)
if self.headers:
# pop the first item off, add to headers
self.headers.append(col[0])
col = col[1:]
if self.height and self.width:
for i, row in enumerate(self._data):
_row = list(row)
_row.append(col[i])
self._data[i] = tuple(_row)
else:
self._data = [tuple([row]) for row in col]
self.insert(self.width, col=col, header=header)
def insert_separator(self, index, text='-'):
"""Adds a separator to Dataset at given index."""
"""Adds a separator to :class:`Dataset` at given index."""
sep = (index, text)
self._separators.append(sep)
def append_separator(self, text='-'):
"""Adds a separator to Dataset."""
"""Adds a :ref:`seperator <seperators>` to the :class:`Dataset`."""
# change offsets if headers are or aren't defined
if not self.headers:
@@ -328,24 +446,79 @@ class Dataset(object):
self.insert_separator(index, text)
def insert(self, i, row=None):
"""Inserts a row at given position in Dataset"""
def insert(self, index, row=None, col=None, header=None, tags=list()):
"""Inserts a row or column to the :class:`Dataset` at the given index.
Rows and columns inserted must be the correct size (height or width).
The default behaviour is to insert the given row to the :class:`Dataset`
object at the given index. If the ``col`` parameter is given, however,
a new column will be insert to the :class:`Dataset` object instead.
You can also insert a column of a single callable object, which will
add a new column with the return values of the callable each as an
item in the column. ::
data.append(col=random.randint)
See :ref:`dyncols` for an in-depth example.
.. versionchanged:: 0.9.0
If inserting a column, and :class:`Dataset.headers` is set, the
header attribute must be set, and will be considered the header for
that row.
.. versionadded:: 0.9.0
If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
This gives you the ability to :class:`filter <Dataset.filter>` your
:class:`Dataset` later.
"""
if row:
self._validate(row)
self._data.insert(i, tuple(row))
self._data.insert(index, Row(row, tags=tags))
elif col:
pass
col = list(col)
# Callable Columns...
if len(col) == 1 and callable(col[0]):
col = map(col[0], self._data)
col = self._clean_col(col)
self._validate(col=col)
if self.headers:
# pop the first item off, add to headers
if not header:
raise HeadersNeeded()
self.headers.insert(index, header)
if self.height and self.width:
for i, row in enumerate(self._data):
row.insert(index, col[i])
self._data[i] = row
else:
self._data = [Row([row]) for row in col]
def filter(self, tag):
"""Returns a new instance of the :class:`Dataset`, excluding any rows
that do not contain the given :ref:`tags <tags>`.
"""
_dset = copy(self)
_dset._data[:] = [row for row in self._data if row.has_tag(tag)]
return _dset
def wipe(self):
"""Erases all data from Dataset."""
"""Removes all content and headers from the :class:`Dataset` object."""
self._data = list()
self.__headers = None
class Databook(object):
"""A book of Dataset objects.
Currently, this exists only for XLS workbook support.
"""A book of :class:`Dataset` objects.
"""
def __init__(self, sets=[]):
@@ -361,14 +534,14 @@ class Databook(object):
def wipe(self):
"""Wipe book clean."""
"""Removes all :class:`Dataset` objects from the :class:`Databook`."""
self._datasets = []
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats:
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
@@ -380,7 +553,7 @@ class Databook(object):
def add_sheet(self, dataset):
"""Adds given dataset."""
"""Adds given :class:`Dataset` to the :class:`Databook`."""
if type(dataset) is Dataset:
self._datasets.append(dataset)
else:
@@ -388,7 +561,7 @@ class Databook(object):
def _package(self):
"""Packages Databook for delivery."""
"""Packages :class:`Databook` for delivery."""
collector = []
for dset in self._datasets:
collector.append(dict(
@@ -400,13 +573,13 @@ class Databook(object):
@property
def size(self):
"""The number of the Datasets within DataBook."""
"""The number of the :class:`Dataset` objects within :class:`Databook`."""
return len(self._datasets)
def detect(stream):
"""Return (format, stream) of given stream."""
for fmt in formats:
for fmt in formats.available:
try:
if fmt.detect(stream):
return (fmt, stream)
@@ -435,6 +608,8 @@ class InvalidDatasetType(Exception):
class InvalidDimensions(Exception):
"Invalid size"
class HeadersNeeded(Exception):
"Header parameter must be given when appending a column in this Dataset."
class UnsupportedFormat(NotImplementedError):
"Format is not supported"
+2 -1
View File
@@ -7,5 +7,6 @@ import _csv as csv
import _json as json
import _xls as xls
import _yaml as yaml
import _tsv as tsv
FORMATS = (json, xls, yaml, csv)
available = (json, xls, yaml, csv, tsv)
+51
View File
@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
""" Tablib - TSV (Tab Separated Values) Support.
"""
import cStringIO
import csv
import os
import tablib
title = 'tsv'
extentions = ('tsv',)
def export_set(dataset):
"""Returns a TSV representation of Dataset."""
stream = cStringIO.StringIO()
_tsv = csv.writer(stream, delimiter='\t')
for row in dataset._package(dicts=False):
_tsv.writerow(row)
return stream.getvalue()
def import_set(dset, in_stream, headers=True):
"""Returns dataset from TSV stream."""
dset.wipe()
rows = csv.reader(in_stream.split('\r\n'), delimiter='\t')
for i, row in enumerate(rows):
# Skip empty rows
if not row:
continue
if (i == 0) and (headers):
dset.headers = row
else:
dset.append(row)
def detect(stream):
"""Returns True if given stream is valid TSV."""
try:
rows = dialect = csv.Sniffer().sniff(stream, delimiters='\t')
return True
except csv.Error:
return False
Executable
+4
View File
@@ -0,0 +1,4 @@
nosetests test_tablib.py --with-xunit --with-coverage
coverage xml
rm -fr pylint.txt
pylint -d W0312 -d W0212 -d E1101 -d E0202 -d W0102 -d E0102 -f parseable ./tablib > pylint.txt || true
+52 -23
View File
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tests for tablib."""
"""Tests for Tablib."""
import unittest
@@ -69,10 +69,10 @@ class TablibTestCase(unittest.TestCase):
# With Headers
data.headers = ('fname', 'lname')
new_col = ['age', 21, 22]
data.append(col=new_col)
new_col = [21, 22]
data.append(col=new_col, header='age')
self.assertEquals(data[new_col[0]], new_col[1:])
self.assertEquals(data['age'], new_col)
def test_add_column_no_data_no_headers(self):
@@ -87,27 +87,12 @@ class TablibTestCase(unittest.TestCase):
self.assertEquals(data.height, len(new_col))
def test_add_column_no_data_with_headers(self):
"""Verify adding new column with headers."""
data.headers = ('first', 'last')
new_col = ('age',)
data.append(col=new_col)
self.assertEquals(len(data.headers), 3)
self.assertEquals(data.width, 3)
new_col = ('foo', 'bar')
self.assertRaises(tablib.InvalidDimensions, data.append, col=new_col)
def test_add_callable_column(self):
"""Verify adding column with values specified as callable."""
new_col = ['first_again', lambda x: x[0]]
self.founders.append(col=new_col)
self.assertTrue(map(lambda x: x[0] == x[-1], self.founders))
new_col = [lambda x: x[0]]
self.founders.append(col=new_col, header='first_again')
#
# self.assertTrue(map(lambda x: x[0] == x[-1], self.founders))
def test_header_slicing(self):
@@ -178,6 +163,22 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(csv, self.founders.csv)
def test_tsv_export(self):
"""Verify exporting dataset object as CSV."""
# Build up the csv string with headers first, followed by each row
tsv = ''
for col in self.headers:
tsv += col + '\t'
tsv = tsv.strip('\t') + '\r\n'
for founder in self.founders:
for col in founder:
tsv += str(col) + '\t'
tsv = tsv.strip('\t') + '\r\n'
self.assertEqual(tsv, self.founders.tsv)
def test_unicode_append(self):
"""Passes in a single unicode charecter and exports."""
@@ -188,6 +189,7 @@ class TablibTestCase(unittest.TestCase):
data.json
data.yaml
data.csv
data.tsv
data.xls
@@ -268,6 +270,18 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(_csv, data.csv)
def test_tsv_import_set(self):
"""Generate and import TSV set serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
_tsv = data.tsv
data.tsv = _tsv
self.assertEqual(_tsv, data.tsv)
def test_csv_format_detect(self):
"""Test CSV format detection."""
@@ -283,6 +297,21 @@ class TablibTestCase(unittest.TestCase):
self.assertTrue(tablib.formats.csv.detect(_csv))
self.assertFalse(tablib.formats.csv.detect(_bunk))
def test_tsv_format_detect(self):
"""Test TSV format detection."""
_tsv = (
'1\t2\t3\n'
'4\t5\t6\n'
'7\t8\t9\n'
)
_bunk = (
'¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
)
self.assertTrue(tablib.formats.tsv.detect(_tsv))
self.assertFalse(tablib.formats.tsv.detect(_bunk))
def test_json_format_detect(self):
"""Test JSON format detection."""