Merge branch 'tsv_origin' into tsv_format

This commit is contained in:
Luca Beltrame
2010-10-19 10:49:10 +02:00
15 changed files with 1201 additions and 129 deletions
+26 -15
View File
@@ -1,54 +1,65 @@
History
=======
-------
??
++
* Massive documentation update
* Added column insert/delete support
* Added
0.8.5 (2010-10-06)
------------------
++++++++++++++++++
* New import system. All dependencies attempt to load from site-packages,
then fallback on vendorized modules.
0.8.4 (2010-10-04)
------------------
++++++++++++++++++
* Upated XLS output: Only wrap if '\n' in cell.
* Upated XLS output: Only wrap if '\\n' in cell.
0.8.3 (2010-10-04)
------------------
++++++++++++++++++
* Ability to append new column passing a callable
as the value that will be applied to every row.
0.8.2 (2010-10-04)
------------------
++++++++++++++++++
* Added alignment wrapping to written cells.
* Added separator support to XLS.
0.8.1 (2010-09-28)
------------------
++++++++++++++++++
* Packaging Fix
0.8.0 (2010-09-25)
------------------
++++++++++++++++++
* New format plugin system!
* Imports! ELEGANT Imports!
* Tests. Lots of tests.
0.7.1 (2010-09-20)
------------------
++++++++++++++++++
* Reverting methods back to properties.
* Windows bug compenated in documentation.
0.7.0 (2010-09-20)
------------------
++++++++++++++++++
* Renamed DataBook Databook for consistiency.
* Export properties changed to methods (XLS filename / StringIO bug).
@@ -57,31 +68,31 @@ History
0.6.4 (2010-09-19)
------------------
++++++++++++++++++
* Updated unicode export for XLS.
* More exhaustive unit tests.
0.6.3 (2010-09-14)
------------------
++++++++++++++++++
* Added Dataset.append() support for columns.
0.6.2 (2010-09-13)
------------------
++++++++++++++++++
* Fixed Dataset.append() error on empty dataset.
* Updated Dataset.headers property w/ validation.
* Added Testing Fixtures.
0.6.1 (2010-09-12)
------------------
++++++++++++++++++
* Packaging hotfixes.
0.6.0 (2010-09-11)
------------------
++++++++++++++++++
* Public Release.
* Export Support for XLS, JSON, YAML, and CSV.
+13
View File
@@ -0,0 +1,13 @@
* Roll documentation out.
* Release *&* announce http://tablib.org.
* Add required header parameter for columular ``Dataset.append()``.
* Write exhausive unit-tests.
* Write stress tests
* Make CSV write customizable.
* Continuous Integration scripts in repo.
* ``Dataset.traspose()`` support
+64
View File
@@ -0,0 +1,64 @@
.. _api:
===
API
===
.. module:: tablib
This part of the documentation covers all the interfaces of Tablib. For
parts where Tablib depends on external libraries, we document the most
important right here and provide links to the canonical documentation.
--------------
Dataset Object
--------------
.. autoclass:: Dataset
:inherited-members:
---------------
Databook Object
---------------
.. autoclass:: Databook
:inherited-members:
---------
Functions
---------
.. autofunction:: detect
.. autofunction:: import_set
----------
Exceptions
----------
.. class:: InvalidDatasetType
Raised when shit goes down.
.. class:: InvalidDimensions
Raised when shit goes down.
.. class:: UnsupportedFormat
Raised when shit goes down.
Now, go start some :ref:`Tablib Development <development>`.
+5 -4
View File
@@ -12,11 +12,12 @@
# serve to show the default.
import sys, os
import tablib
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#sys.path.insert(0, os.path.abspath('.'))
sys.path.insert(0, os.path.abspath('..'))
# -- General configuration -----------------------------------------------------
@@ -48,9 +49,9 @@ copyright = u'2010, Kenneth Reitz'
# built documents.
#
# The short X.Y version.
version = '0.8.3'
version = tablib.core.__version__
# The full version, including alpha/beta/rc tags.
release = '0.8.3'
release = version
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@@ -120,7 +121,7 @@ html_theme = 'default'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ['static']
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
+246
View File
@@ -0,0 +1,246 @@
.. _development:
Development
===========
Tablib is under active development, and contributors are welcome.
If you have a feature request, suggestion, or bug report, please open a new issue on GitHub_. To submit patches, please send a pull request on GitHub_.
If you'd like to contribute, there's plenty to do. Here's a short todo list.
.. include:: ../TODO.rst
.. _GitHub: http://github.com/kennethreitz/tablib/
.. _design:
---------------------
Design Considerations
---------------------
Tablib was developed with a few :pep:`20` idioms in mind.
#. Beautiful is better than ugly.
#. Explicit is better than implicit.
#. Simple is better than complex.
#. Complex is better than complicated.
#. Readability counts.
A few other things to keep in mind:
#. Keep your code DRY.
#. Strive to be as simple (to use) as possible.
.. _scm:
--------------
Source Control
--------------
Tablib source is controlled with Git_, the lean, mean, distributed source control machine.
The repository is publicly accessable.
``git clone git://github.com/kennethreitz/tablib.git``
The project is hosted both on **GitHub** and **git.kennethreitz.com**.
GitHub:
http://github.com/kennethreitz/tablib
"Mirror":
http://git.kennethreitz.com/projects/tablib
Git Branch Structure
++++++++++++++++++++
Feature / Hotfix / Release branches follow a `Successful Git Branching Model`_ . Git-flow_ is a great tool for managing the repository. I highly recommend it.
``develop``
The "next release" branch. Likely unstable.
``master``
Current production release (|version|) on PyPi.
``gh-pages``
Current release of http://tablib.org.
Each release is tagged.
When submitting patches, please place your feature/change in its own branch prior to opening a pull reqeust on GitHub_.
.. _Git: http://git-scm.org
.. _`Successful Git Branching Model`: http://nvie.com/posts/a-successful-git-branching-model/
.. _git-flow: http://github.com/nvie/gitflow
.. _newformats:
------------------
Adding New Formats
------------------
Tablib welcomes new format additions! Format suggestions include:
* Tab Seperated Values
* MySQL Dump
* HTML Table
Coding by Convention
++++++++++++++++++++
Tablib features a micro-framework for adding format support. The easiest way to understand it is to use it. So, let's define our own format, named *xxx*.
1. Write a new format interface.
:class:`tablib.core` follows a simple pattern for automatically utilizing your format throughout Tablib. Function names are crucial.
Example **tablib/formats/_xxx.py**: ::
title = 'xxx'
def export_set(dset):
....
# returns string representation of given dataset
def export_book(dbook):
....
# returns string representation of given databook
def import_set(dset, in_stream):
...
# populates given Dataset with given datastream
def import_book(dbook, in_stream):
...
# returns Databook instance
def detect(stream):
...
# returns True if given stream is parsable as xxx
.. admonition:: Excluding Support
If the format excludes support for an import/export mechanism (*eg.* :class:`csv <tablib.Dataset.csv>` excludes :class:`Databook <tablib.Databook>` support), simply don't define the respecive functions. Appropriate errors will be raised.
2.
Add your new format module to the :class:`tablib.formats.avalable` tuple.
3.
Add a mock property to the :class:`Dataset <tablib.Dataset>` class with verbose `reStructured Text`_ docstring. This alleviates IDE confusion, and allows for pretty auto-generated Sphinx_ documentation.
4. Write respective :ref:`tests <testing>`.
.. _testing:
--------------
Testing Tablib
--------------
Testing is crucial to Tablib's stability. This stable project is used in production by many companies and developers, so it is important to be certian that every version released is fully operational. When developing a new feature for Tablib, be sure to write proper tests for it as well.
When developing a feature for Tablib, the easiest way to test your changes for potential issues is to simply run the test suite directly. ::
$ ./test_tablib.py
`Hudson CI`_, amongst other tools, supports Java's xUnit testing report format. Nose_ allows us to generate our own xUnit reports.
Installing nose is simple. ::
$ pip install nose
Once installed, we can generate our xUnit report with a single command. ::
$ nosetests test_tablib.py --with-xunit
This will generate a **nosetests.xml** file, which can then be analyzed.
.. _Nose: http://somethingaboutorange.com/mrl/projects/nose/
.. _hudson:
----------------------
Continuous Integration
----------------------
Every commit made to the **develop** branch is automatically tested and inspected upon receipt with `Hudson CI`_. If you have access to the main respository and broke the build, you will receive an email accordingly.
Anyone may view the build status and history at any time.
http://git.kennethreitz.com/ci/
If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server.
Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets.
.. _`Hudson CI`: http://hudson.dev.java.net
.. _`Kenneth Reitz`: http://kennethreitz.com/contact-me/
.. _docs:
-----------------
Building the Docs
-----------------
Documentation is written in the powerful, flexible, and standard Python documentation format, `reStructured Text`_.
Documentation builds are powered by the powerful Pocoo project, Sphinx_. The :ref:`API Documentation <api>` is mostly documented inline throught the module.
The Docs live in ``tablib/docs``. In order to build them, you will first need to install Sphinx. ::
$ pip install sphinx
Then, to build an HTML version of the docs, simply run the following from the **docs** directory: ::
$ make html
Your ``docs/_build/html`` directory will then contain an HTML representation of the documentation, ready for publication on most web servers.
You can also generate the documentation in **ebpub**, **latex**, **json**, *&c* similarly.
.. admonition:: GitHub Pages
To push the documentation up to `GitHub Pages`_, you will first need to run `sphinx-to-github`_ against your ``docs/_build/html`` directory.
GitHub Pages are powered by an HTML generation system called Jeckyl_, which is configured to ignore files and folders that begin with "``_``" (*ie.* **_static**).
and `sphinx-to-github`_. ::
Installing sphinx-to-github is simple. ::
$ pip install sphinx-to-github
Running it against the docs is even simpler. ::
$ sphinx-to-github _build/html
Move the resulting files to the **gh-pages** branch of your repository, and push it up to GitHub.
.. _`reStructured Text`: http://docutils.sourceforge.net/rst.html
.. _Sphinx: http://sphinx.pocoo.org
.. _`GitHub Pages`: http://pages.github.com
.. _Jeckyl: http://github.com/mojombo/jekyll
.. _`sphinx-to-github`: http://github.com/michaeljones/sphinx-to-github
----------
Make sure to check out the :ref:`API Documentation <api>`.
+49 -19
View File
@@ -3,32 +3,62 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to Tablib's documentation!
==================================
Tablib: Pythonic Tabular Data
=============================
Contents:
Welcome to Tablib's documentation.
.. Contents:
..
.. .. toctree::
.. :maxdepth: 2
..
.. Indices and tables
.. ==================
..
.. * :ref:`genindex`
.. * :ref:`modindex`
.. * :ref:`search`
Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Oh, and it's :ref:`MIT Lisenced <mit>`.
I recommend you start with :ref:`Installation <install>`.
User's Guide
------------
This part of the documentation, which is mostly prose, begins with some background information about Tablib, then focuses on step-by-step instructions for getting the most out of your datasets.
.. toctree::
:maxdepth: 2
Indices and tables
==================
intro
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
.. toctree::
:maxdepth: 2
Dataset Object
--------------
.. module:: tablib
install
.. autoclass:: Databook
:members:
:inherited-members:
.. toctree::
:maxdepth: 2
Databook Object
---------------
tutorial
.. autoclass:: Dataset
:members:
:inherited-members:
.. toctree::
:maxdepth: 2
development
API Reference
-------------
If you are looking for information on a specific function, class or
method, this part of the documentation is for you.
.. toctree::
:maxdepth: 2
api
+78
View File
@@ -0,0 +1,78 @@
.. _install:
Installation
============
This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancments <peed-extentions>`.
.. _installing:
-----------------
Installing Tablib
-----------------
To install Tablib, it only takes one simple command. ::
$ pip install tablib
Or, if you must: ::
$ easy_install tablib
But, you really shouldn't do that.
-------------------
Download the Source
-------------------
You can also install tablib from source. The latest release (|version|) is available from GitHub.
* tarball_
* zipball_
.. _
Once you have a copy of the source, you can embed it in your Python package, or install it into your site-packages easily. ::
$ python setup.py install
To download the full source history from Git, see :ref:`Source Control <scm>`.
.. _tarball: http://github.com/kennethreitz/tablib/tarball/master
.. _zipball: http://github.com/kennethreitz/tablib/zipball/master
.. _speed-extentions:
Speed Extentions
----------------
.. versionadded:: 0.8.5
Tablib is partially dependent on the **pyyaml**, **simplejson**, and **xlwt** modules. To reduce installation issues, fully integrated versions of all required libraries are included in Tablib.
However, if performance is important to you (and it should be), you can install **pyyaml** with C extentions from PyPi. ::
$ pip install PyYAML
If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. ::
$ pip install simplejson
.. _updates:
Staying Updated
---------------
The latest version of Tablib will always be available here:
* PyPi: http://pypi.python.org/pypi/tablib/
* GitHub: http://github.com/kennethreitz/tablib/
When a new version is available, upgrading is simple. ::
$ pip install tablib --upgrade
Now, go get a :ref:`Quick Start <quickstart>`.
+68
View File
@@ -0,0 +1,68 @@
.. _intro:
Introduction
============
This part of the documentation covers all the interfaces of Tablib.
Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to Pythonically import, export, and manipulate tabular data sets.
Inception
---------
Tablib was build by `Kenneth Reitz`_ to fufill a specfic need.
Tablib was born.
.. _`Kenneth Reitz`: http://kennethreitz.com
Philosphy
---------
Tablib was developed with a few :pep:`20` idioms in mind.
#. Beautiful is better than ugly.
#. Explicit is better than implicit.
#. Simple is better than complex.
#. Complex is better than complicated.
#. Readability counts.
Besides, Why not?
:ref:`seperators`
.. _mit:
MIT License
-----------
A large number of open source projects you find today are `GPL Licensed`_. While the GPL certianly has essential applications, it should most certianly not be your go-to license for your next open source project.
A project that is released as GPL cannot be usd in any commercial product without the product itself also being offered as open source. The MIT and BSD licenses are great alternatives to the GPL that allow your open-source software to be used in proprietary, closed-source software.
Tablib is released under terms of `The MIT License`_.
.. _`GPL Licensed`: http://www.opensource.org/licenses/gpl-license.php
.. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php
.. _pythonsupport:
Pythons Supported
-----------------
At this time, the following Python platforms are officially supported:
* Python 2.6
* Python 2.7
Support for other Pythons will be rolled out soon.
Now, go :ref:`Install Tablib <install>`.
+337
View File
@@ -0,0 +1,337 @@
.. _quickstart:
==========
Quickstart
==========
.. module:: tablib
Eager to get started? This page gives a good introduction in how to get started with Tablib. This assumes you already have Tablib installed. If you do not, head over to the :ref:`Installation <install>` section.
First, make sure that:
* Tablib is :ref:`installed <install>`
* Tablib is :ref:`up-to-date <updates>`
Lets gets started with some simple use cases and examples.
------------------
Creating a Dataset
------------------
A :class:`Dataset <tablib.Dataset>` is nothing more than what its name implies—a set of data.
Creating your own instance of the :class:`tablib.Dataset` object is simple. ::
data = tablib.Dataset()
You can now start filling this :class:`Dataset <tablib.Dataset>` object with data.
.. admonition:: Example Context
From here on out, if you see ``data``, assume that it's a fresh :class:`Dataset <tablib.Dataset>` object.
-----------
Adding Rows
-----------
Let's say you want to collect a simple list of names. ::
# collection of names
names = ['Kenneth Reitz', 'Bessie Monke']
for name in names:
# split name appropriately
fname, lname = name.split()
# add names to Dataset
data.append([fname, lname])
You can get a nice, Pythonic view of the dataset at any time with :class:`Dataset.dict`.
>>> data.dict
[('Kenneth', 'Reitz'), ('Bessie', 'Monke')]
--------------
Adding Headers
--------------
It's time enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. ::
data.headers = ['First Name', 'Last Name']
Now our data looks a little different. ::
>>> data.dict
[{'Last Name': 'Reitz', 'First Name': 'Kenneth'}, {'Last Name': 'Monke', 'First Name': 'Bessie'}]
--------------
Adding Columns
--------------
Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. ::
data.append(col=['Age', 22, 20])
Let's view the data now. ::
>>> data.dict
[{'Last Name': 'Reitz', 'First Name': 'Kenneth', 'Age': 22}, {'Last Name': 'Monke', 'First Name': 'Bessie', 'Age': 20}]
It's that easy.
--------------
Exporting Data
--------------
Tablib's killer feature is the ability to export your :class:`Dataset` objects into a number of formats.
**Comma-Seperated Values** ::
>>> data.csv
Last Name,First Name,Age
Reitz,Kenneth,22
Monke,Bessie,20
**JavaScript Object Notation** ::
>>> data.json
[{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}]
**YAML Ain't Markup Language** ::
>>> data.yaml
- {Age: 22, First Name: Kenneth, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Last Name: Monke}
**Microsoft Excel** ::
>>> data.xls
<censored binary data>
------------------------
Selecting Rows & Columns
------------------------
You can slice and dice your data, just like a standard Python list. ::
>>> data[0]
('Kenneth', 'Reitz', 22)
If we had a set of data consisting of thousands of rows, it could be useful to get a list of values in a column.
To do so, we access the :class:`Dataset` as if it were a standard Python dictionary. ::
>>> data['First Name']
['Kenneth', 'Bessie']
Let's find the average age. ::
>>> ages = data['Age']
>>> float(sum(ages)) / len(ages)
21.0
-----------------------
Removing Rows & Columns
-----------------------
::
>>> del data['Col Name']
::
>>> del data[0:12]
Fucking easy.
==============
Advanced Usage
==============
This part of the documentation services to give you an idea that are otherwise hard to extract from the :ref:`API Documentation <api>`
And now for something completely different.
---------------
Dynamic Columns
---------------
.. versionadded:: 0.8.3
Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. A dynamic column is a single callable object (*ie.* a function).
For now, this is only supported on :class:`Dataset` objects that have no defined :class:`headers <Dataset.headers>`.
So, let's save our headers for later, then remove them. ::
_headers = list(data.headers)
data.headers = None
We can now add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. ::
import random
def random_grade(row):
"""Returns a random integer for entry."""
return (random.randint(60,100)/100.0)
data.append(col=[random_grade])
Now add the headers back, with our new column. ::
>>> data.headers = _headers + ['Random']
Let's have a look at our data. ::
>>> data.yaml
- {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke}
Let's remove that column. ::
>>> del data['Grade']
When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row.
For example, we can use the data available in the row to guess the gender of a student. ::
def guess_gender(row):
"""Calculates gender of given student data row."""
m_names = ('Kenneth', 'Mike', 'Yuri')
f_names = ('Bessie', 'Samantha', 'Heather')
name = row[0]
if name in m_names:
return 'Male'
elif name in f_names:
return 'Female'
else:
return 'Unknown'
Adding this function to our dataset as a dynamic column would result in: ::
>>> data.yaml
- {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke}
Excel Workbook With Multiple Sheets
------------------------------------
:class:`Databook`
::
book = tablib.Databook([data, data, data])
::
with open('students.xls', 'wb') as f:
f.write(book.xls)
The resulting **students.xls** file will contain a seperate spreadsheet for each :class:`Dataset` object in the :class:`Databook`.
.. admonition:: Binary Warning
Make sure to open the output file in binary mode.
.. _seperators:
----------
Seperators
----------
.. versionadded:: 0.8.2
When, it's often useful to create a blank row containing information on the upcomming data. So,
::
daniel_tests = [
('11/24/09', 'Math 101 Mid-term Exam', 56.),
('05/24/10', 'Math 101 Final Exam', 62.)
]
suzie_tests = [
('11/24/09', 'Math 101 Mid-term Exam', 56.),
('05/24/10', 'Math 101 Final Exam', 62.)
]
# Create new dataset
tests = tablib.Dataset()
tests.headers = ['Date', 'Test Name', 'Grade']
# Daniel's Tests
tests.append_seperator('Daniel\'s Scores')
for test_row in daniel_tests:
tests.append(test_row)
# Susie's Tests
tests.append_seperator('Susie\'s Scores')
for test_row in suzie_tests:
tests.append(test_row)
# Write spreadsheet to disk
with open('grades.xls', 'wb') as f:
f.write(tests.xls)
The resulting **tests.xls** will have the following layout:
Daniel's Scores:
* '11/24/09', 'Math 101 Mid-term Exam', 56.
* '05/24/10', 'Math 101 Final Exam', 62.
Suzie's Scores:
* '11/24/09', 'Math 101 Mid-term Exam', 56.
* '05/24/10', 'Math 101 Final Exam', 62.
.. admonition:: Format Support
At this time, only :class:`Excel <Dataset.xls>` output supports seperators.
----
Now, go check out the :ref:`API Documentation <api>` or begin :ref:`Tablib Development <development>`.
Vendored
+11 -1
View File
@@ -1,7 +1,17 @@
import os
from fabric.api import *
def scrub():
""" Death to the bytecode! """
local("rm -fr dist build")
local('rm -fr dist build')
local("find . -name \"*.pyc\" -exec rm '{}' ';'")
def docs():
"""Build docs."""
os.system('make html')
os.chdir('_build/html')
os.system('sphinxtogithub .')
os.system('git add -A')
os.system('git commit -m \'documentation update\'')
os.system('git push origin gh-pages')
+2 -1
View File
@@ -31,7 +31,8 @@ setup(
url='http://github.com/kennethreitz/tablib',
packages= [
'tablib', 'tablib.formats',
'tablib.packages.simplejson'
'tablib.packages',
'tablib.packages.simplejson',
'tablib.packages.xlwt',
'tablib.packages.yaml',
],
+202 -87
View File
@@ -9,7 +9,7 @@
:license: MIT, see LICENSE for more details.
"""
from tablib.formats import FORMATS as formats
from tablib import formats
__title__ = 'tablib'
@@ -21,7 +21,7 @@ __copyright__ = 'Copyright 2010 Kenneth Reitz'
class Dataset(object):
"""The tablib Dataset object is the heart of tablib. It provides all core
"""The :class:`Dataset` object is the heart of Tablib. It provides all core
functionality.
Usually you create a :class:`Dataset` instance in your main module, and append
@@ -44,64 +44,14 @@ class Dataset(object):
:param \*args: (optional) list of rows to populate Dataset
:param headers: (optional) list strings for Dataset header row
.. admonition:: About the Format Attributes
If you look at the code, the various output/import formats are not
defined within the itself. To add support for a new format, see
:ref:`Adding New Formats`.
.. attribute:: csv
A CSV representation of the Dataset object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the `Dataset.csv` attribute: ::
data = tablib.Dataset()
data.csv = 'age, first_name, last_name\\n90, John, Adams'
Import assumes (for now) that headers exist.
.. attribute:: dict
.. admonition:: Format Attributes Definition
An native Python representation of the Dataset object. If headers have been
set, a list of Python dictionaries will be returned. If no headers have been
set, a list of tuples (rows) will be returned instead.
If you look at the code, the various output/import formats are not
defined within the :class:`Dataset` object. To add support for a new format, see
:ref:`Adding New Formats <newformats>`.
A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
data = tablib.Dataset()
data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
.. attribute:: xls
An Excel Spreadsheet representation of the Dataset object, including
:ref:`seperators`.
*Note:* `Dataset.xls` contains binary data, so make sure to write in binary
mode::
with open('output.xls', 'wb') as f:
f.write(data.xls)
.. attribute:: yaml
A YAML representation of the Dataset object. If headers have been
set, a YAML list of objects will be returned. If no headers have
been set, a YAML list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the `Dataset.json` attribute: ::
data = tablib.Dataset()
data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
Import assumes (for now) that headers exist.
"""
def __init__(self, *args, **kwargs):
@@ -145,7 +95,21 @@ class Dataset(object):
def __delitem__(self, key):
del self._data[key]
if isinstance(key, basestring):
if key in self.headers:
pos = self.headers.index(key)
del self.headers[pos]
for i, row in enumerate(self._data):
_row = list(row)
del _row[pos]
self._data[i] = tuple(_row)
else:
raise KeyError
else:
del self._data[key]
def __repr__(self):
@@ -158,7 +122,7 @@ class Dataset(object):
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats:
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
@@ -202,16 +166,38 @@ class Dataset(object):
return data
def _clean_col(self, col):
"""Prepares the given column for insert/append."""
col = list(col)
if self.headers:
header = [col.pop(0)]
else:
header = []
if len(col) == 1 and callable(col[0]):
col = map(col[0], self._data)
col = tuple(header + col)
return col
@property
def height(self):
"""Returns the height of the Dataset."""
"""The number of rows currently in the :class:`Dataset`.
Cannot be directly modified.
"""
return len(self._data)
@property
def width(self):
"""Returns the width of the Dataset."""
"""The number of columns currently in the :class:`Dataset`.
Cannot be directly modified.
"""
try:
return len(self._data[0])
except IndexError:
@@ -223,7 +209,11 @@ class Dataset(object):
@property
def headers(self):
"""Headers property."""
"""An *optional* list of strings to be used for header rows and attribute names.
This must be set manually. The given list length must equal :class:`Dataset.width`.
"""
return self.__headers
@@ -242,7 +232,7 @@ class Dataset(object):
@property
def dict(self):
"""A JSON representation of the Dataset object. If headers have been
"""A JSON representation of the :class:`Dataset` object. If headers have been
set, a JSON list of objects will be returned. If no headers have
been set, a JSON list of lists (rows) will be returned instead.
@@ -257,7 +247,16 @@ class Dataset(object):
@dict.setter
def dict(self, pickle):
"""A native Python representation of the Dataset object. If headers have been
set, a list of Python dictionaries will be returned. If no headers have been
set, a list of tuples (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
data = tablib.Dataset()
data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
"""
if not len(pickle):
return
@@ -276,21 +275,110 @@ class Dataset(object):
else:
raise UnsupportedFormat
@property
def xls():
"""An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set.
.. admonition:: Binary Warning
:class:`Dataset.xls` contains binary data, so make sure to write in binary mode::
with open('output.xls', 'wb') as f:
f.write(data.xls)'
"""
pass
@property
def csv():
"""A CSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
data = tablib.Dataset()
data.csv = 'age, first_name, last_name\\n90, John, Adams'
Import assumes (for now) that headers exist.
"""
pass
@property
def tsv():
"""A TSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
data = tablib.Dataset()
data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams'
Import assumes (for now) that headers exist.
"""
@property
def yaml():
"""A YAML representation of the :class:`Dataset` object. If headers have been
set, a YAML list of objects will be returned. If no headers have
been set, a YAML list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
data = tablib.Dataset()
data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
Import assumes (for now) that headers exist.
"""
pass
@property
def json():
"""A JSON representation of the :class:`Dataset` object. If headers have been
set, a JSON list of objects will be returned. If no headers have
been set, a JSON list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
data = tablib.Dataset()
data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]'
Import assumes (for now) that headers exist.
"""
def append(self, row=None, col=None):
"""Adds a row to the end of Dataset"""
"""Adds a row or column to the :class:`Dataset`.
Rows and Columns appended must be the correct size (height or width).
The default behaviour is to append the given row to the :class:`Dataset` object. If the ``col`` parameter is given, however, a new column will be added to the :class:`Dataset` object. If appending a column, and :class:`Dataset.headers` is set, the first item in list will be considered the header for that row. ::
Append a new row to the dataset: ::
data.append(('Kenneth', 'Reitz'))
Append a new column to the dataset: ::
data.append(col=('Age', 90, 67, 22))
You can also add a column of a single callable object, which will
add a new column with the return values of the callable each as an
item in the column. The callable can be written to perform calculations
on the current row. The callable receives a tuple representation of
the current data row as the first parameter. ::
data.append(col=[random.choice])
"""
if row is not None:
self._validate(row)
self._data.append(tuple(row))
elif col is not None:
col = list(col)
if self.headers:
header = [col.pop(0)]
else:
header = []
if len(col) == 1 and callable(col[0]):
col = map(col[0], self._data)
col = tuple(header + col)
col = self._clean_col(col)
self._validate(col=col)
@@ -310,14 +398,14 @@ class Dataset(object):
def insert_separator(self, index, text='-'):
"""Adds a separator to Dataset at given index."""
"""Adds a separator to :class:`Dataset` at given index."""
sep = (index, text)
self._separators.append(sep)
def append_separator(self, text='-'):
"""Adds a separator to Dataset."""
"""Adds a :ref:`seperator <seperators>` to the :class:`Dataset`."""
# change offsets if headers are or aren't defined
if not self.headers:
@@ -328,24 +416,51 @@ class Dataset(object):
self.insert_separator(index, text)
def insert(self, i, row=None):
"""Inserts a row at given position in Dataset"""
def insert(self, index, row=None, col=None):
"""Inserts a row or column to the :class:`Dataset` at the given index.
Rows and columns inserted must be the correct size (height or width).
The default behaviour is to insert the given row to the :class:`Dataset` object at the given index. If the ``col`` parameter is given, however, a new column will be insert to the :class:`Dataset` object instead. If inserting a column, and :class:`Dataset.headers` is set, the first item in list will be considered the header for the inserted row. ::
You can also insert a column of a single callable object, which will
add a new column with the return values of the callable each as an
item in the column. ::
data.append(col=random.randint)
"""
if row:
self._validate(row)
self._data.insert(i, tuple(row))
elif col:
pass
col = self._clean_col(col)
self._validate(col=col)
if self.headers:
# pop the first item off, add to headers
self.headers.insert(index, col[0])
col = col[1:]
if self.height and self.width:
for i, row in enumerate(self._data):
_row = list(row)
_row.insert(index, col[i])
self._data[i] = tuple(_row)
else:
self._data = [tuple([row]) for row in col]
def wipe(self):
"""Erases all data from Dataset."""
"""Removes all content and headers from the :class:`Dataset` object."""
self._data = list()
self.__headers = None
class Databook(object):
"""A book of Dataset objects.
Currently, this exists only for XLS workbook support.
"""A book of :class:`Dataset` objects.
"""
def __init__(self, sets=[]):
@@ -361,14 +476,14 @@ class Databook(object):
def wipe(self):
"""Wipe book clean."""
"""Removes all :class:`Dataset` objects from the :class:`Databook`."""
self._datasets = []
@classmethod
def _register_formats(cls):
"""Adds format properties."""
for fmt in formats:
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
@@ -380,7 +495,7 @@ class Databook(object):
def add_sheet(self, dataset):
"""Adds given dataset."""
"""Adds given :class:`Dataset` to the :class:`Databook`."""
if type(dataset) is Dataset:
self._datasets.append(dataset)
else:
@@ -388,7 +503,7 @@ class Databook(object):
def _package(self):
"""Packages Databook for delivery."""
"""Packages :class:`Databook` for delivery."""
collector = []
for dset in self._datasets:
collector.append(dict(
@@ -400,13 +515,13 @@ class Databook(object):
@property
def size(self):
"""The number of the Datasets within DataBook."""
"""The number of the :class:`Dataset` objects within :class:`Databook`."""
return len(self._datasets)
def detect(stream):
"""Return (format, stream) of given stream."""
for fmt in formats:
for fmt in formats.available:
try:
if fmt.detect(stream):
return (fmt, stream)
+2 -1
View File
@@ -7,5 +7,6 @@ import _csv as csv
import _json as json
import _xls as xls
import _yaml as yaml
import _tsv as tsv
FORMATS = (json, xls, yaml, csv)
available = (json, xls, yaml, csv, tsv)
+53
View File
@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
""" Tablib - TSV (Tab Separated Values) Support.
"""
import cStringIO
import csv
import os
import tablib
title = 'tsv'
extentions = ('tsv',)
def export_set(dataset):
"""Returns a TSV representation of Dataset."""
stream = cStringIO.StringIO()
_tsv = csv.writer(stream, delimiter="\t")
for row in dataset._package(dicts=False):
_tsv.writerow(row)
return stream.getvalue()
def import_set(dset, in_stream, headers=True):
"""Returns dataset from TSV stream."""
dset.wipe()
rows = csv.reader(in_stream.split("\r\n"), delimiter="\t")
for i, row in enumerate(rows):
# Skip empty rows
if not row:
continue
if (i == 0) and (headers):
dset.headers = row
else:
dset.append(row)
def detect(stream):
"""Returns True if given stream is valid TSV."""
try:
rows = dialect = csv.Sniffer().sniff(stream, delimiters="\t")
return True
except csv.Error:
return False
+45 -1
View File
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tests for tablib."""
"""Tests for Tablib."""
import unittest
@@ -178,6 +178,22 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(csv, self.founders.csv)
def test_tsv_export(self):
"""Verify exporting dataset object as CSV."""
# Build up the csv string with headers first, followed by each row
tsv = ''
for col in self.headers:
tsv += col + '\t'
tsv = tsv.strip('\t') + '\r\n'
for founder in self.founders:
for col in founder:
tsv += str(col) + '\t'
tsv = tsv.strip('\t') + '\r\n'
self.assertEqual(tsv, self.founders.tsv)
def test_unicode_append(self):
"""Passes in a single unicode charecter and exports."""
@@ -188,6 +204,7 @@ class TablibTestCase(unittest.TestCase):
data.json
data.yaml
data.csv
data.tsv
data.xls
@@ -268,6 +285,18 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(_csv, data.csv)
def test_tsv_import_set(self):
"""Generate and import TSV set serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
_tsv = data.tsv
data.tsv = _tsv
self.assertEqual(_tsv, data.tsv)
def test_csv_format_detect(self):
"""Test CSV format detection."""
@@ -283,6 +312,21 @@ class TablibTestCase(unittest.TestCase):
self.assertTrue(tablib.formats.csv.detect(_csv))
self.assertFalse(tablib.formats.csv.detect(_bunk))
def test_tsv_format_detect(self):
"""Test TSV format detection."""
_tsv = (
'1\t2\t3\n'
'4\t5\t6\n'
'7\t8\t9\n'
)
_bunk = (
'¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
)
self.assertTrue(tablib.formats.tsv.detect(_tsv))
self.assertFalse(tablib.formats.tsv.detect(_bunk))
def test_json_format_detect(self):
"""Test JSON format detection."""