From a3c82804cdd7c9f2215b5abd074ca2d166aa1f7c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 6 Oct 2010 20:01:52 -0400 Subject: [PATCH 01/50] Simple fix. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e93f61b..bfe0b4b 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ setup( url='http://github.com/kennethreitz/tablib', packages= [ 'tablib', 'tablib.formats', - 'tablib.packages.simplejson' + 'tablib.packages.simplejson', 'tablib.packages.xlwt', 'tablib.packages.yaml', ], From 9efd982bfac2bc00877b0cf09cd8b28e1ddc07d5 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 7 Oct 2010 16:01:09 -0400 Subject: [PATCH 02/50] Documentation update. --- docs/api.rst | 26 ++++++++++++++++++++++++++ docs/index.rst | 38 +++++++++++++++++++++++++------------- docs/install.rst | 0 3 files changed, 51 insertions(+), 13 deletions(-) create mode 100644 docs/api.rst create mode 100644 docs/install.rst diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..28f3f70 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,26 @@ +.. _api: + +API +=== + +.. module:: flask + +This part of the documentation covers all the interfaces of Tablib. For +parts where Tablib depends on external libraries, we document the most +important right here and provide links to the canonical documentation. + + +Dataset Object +-------------- +.. module:: tablib + +.. autoclass:: Dataset + :members: + :inherited-members: + +Databook Object +--------------- + +.. autoclass:: Databook + :members: + :inherited-members: diff --git a/docs/index.rst b/docs/index.rst index bb0589c..97e520b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,14 +3,19 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to Tablib's documentation! -================================== +Welcome to Tablib +================= + +Welcome to Tablib's docuemtation. + +I recommend you start off with `Installation`. Contents: .. toctree:: :maxdepth: 2 + Indices and tables ================== @@ -18,17 +23,24 @@ Indices and tables * :ref:`modindex` * :ref:`search` -Dataset Object --------------- -.. module:: tablib -.. autoclass:: Databook - :members: - :inherited-members: +User's Guide +------------ -Databook Object ---------------- +This part of the documentation, which is mostly prose, begins with some background information about Tablib, then focuses on step-by-step instructions for web development with Flask. -.. autoclass:: Dataset - :members: - :inherited-members: +.. toctree:: + :maxdepth: 2 + + installation + +API Reference +------------- + +If you are looking for information on a specific function, class or +method, this part of the documentation is for you. + +.. toctree:: + :maxdepth: 2 + + api \ No newline at end of file diff --git a/docs/install.rst b/docs/install.rst new file mode 100644 index 0000000..e69de29 From d0b09f0fce9ed602c398a0381a0a171d1fe91d61 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 7 Oct 2010 16:01:17 -0400 Subject: [PATCH 03/50] Doc upgrades. --- tablib/core.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 544c208..9658d67 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -49,7 +49,7 @@ class Dataset(object): .. admonition:: About the Format Attributes If you look at the code, the various output/import formats are not - defined within the itself. To add support for a new format, see + defined within the :class:`Dataset` object. To add support for a new format, see :ref:`Adding New Formats`. .. attribute:: csv @@ -58,7 +58,7 @@ class Dataset(object): headers, if they have been set. Otherwise, the top row will contain the first row of the dataset. - A dataset object can also be imported by setting the `Dataset.csv` attribute: :: + A dataset object can also be imported by setting the:class:`Dataset.csv` attribute. :: data = tablib.Dataset() data.csv = 'age, first_name, last_name\\n90, John, Adams' @@ -72,7 +72,7 @@ class Dataset(object): set, a list of Python dictionaries will be returned. If no headers have been set, a list of tuples (rows) will be returned instead. - A dataset object can also be imported by setting the `Dataset.dict` attribute: :: + A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. :: data = tablib.Dataset() data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] @@ -83,11 +83,12 @@ class Dataset(object): An Excel Spreadsheet representation of the Dataset object, including :ref:`seperators`. - *Note:* `Dataset.xls` contains binary data, so make sure to write in binary - mode:: + .. admonition:: Binary Warning + + :class:`Dataset.xls` contains binary data, so make sure to write in binary mode:: - with open('output.xls', 'wb') as f: - f.write(data.xls) + with open('output.xls', 'wb') as f: + f.write(data.xls) .. attribute:: yaml @@ -96,7 +97,7 @@ class Dataset(object): set, a YAML list of objects will be returned. If no headers have been set, a YAML list of lists (rows) will be returned instead. - A dataset object can also be imported by setting the `Dataset.json` attribute: :: + A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: data = tablib.Dataset() data.yaml = '- {age: 90, first_name: John, last_name: Adams}' From df8c0335d19ce2f264ae16ba42d1eace7296a566 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 7 Oct 2010 16:01:27 -0400 Subject: [PATCH 04/50] Fixed incorrect packaging. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index bfe0b4b..e658c34 100644 --- a/setup.py +++ b/setup.py @@ -31,6 +31,7 @@ setup( url='http://github.com/kennethreitz/tablib', packages= [ 'tablib', 'tablib.formats', + 'tablib.packages', 'tablib.packages.simplejson', 'tablib.packages.xlwt', 'tablib.packages.yaml', From 48def2cba6a9125b32b4455374297b7160507f37 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 7 Oct 2010 17:52:21 -0400 Subject: [PATCH 05/50] Documentation Update. Site should be up soon. --- docs/development.rst | 18 +++++++++++++++ docs/index.rst | 52 +++++++++++++++++++++++++++++--------------- docs/install.rst | 41 ++++++++++++++++++++++++++++++++++ docs/intro.rst | 19 ++++++++++++++++ docs/quickstart.rst | 20 +++++++++++++++++ 5 files changed, 133 insertions(+), 17 deletions(-) create mode 100644 docs/development.rst create mode 100644 docs/intro.rst create mode 100644 docs/quickstart.rst diff --git a/docs/development.rst b/docs/development.rst new file mode 100644 index 0000000..7a574c6 --- /dev/null +++ b/docs/development.rst @@ -0,0 +1,18 @@ +.. _development: + +Development +=========== + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + +Future of Tablib +---------------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + +Design Considerations +--------------------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 97e520b..058335e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,27 +3,29 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to Tablib -================= +Tablib: Pythonic Tabular Data +============================= Welcome to Tablib's docuemtation. +.. Contents: +.. +.. .. toctree:: +.. :maxdepth: 2 +.. + +.. Indices and tables +.. ================== +.. +.. * :ref:`genindex` +.. * :ref:`modindex` +.. * :ref:`search` + + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + I recommend you start off with `Installation`. -Contents: - -.. toctree:: - :maxdepth: 2 - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - - User's Guide ------------ @@ -32,7 +34,23 @@ This part of the documentation, which is mostly prose, begins with some backgrou .. toctree:: :maxdepth: 2 - installation + intro + +.. toctree:: + :maxdepth: 2 + + install + +.. toctree:: + :maxdepth: 2 + + quickstart + +.. toctree:: + :maxdepth: 2 + + development + API Reference ------------- diff --git a/docs/install.rst b/docs/install.rst index e69de29..4801eee 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -0,0 +1,41 @@ +.. _install: + +Installation +============ + +This part of the documentation covers all the interfaces of Tablib. For +parts where Tablib depends on external libraries, we document the most +important right here and provide links to the canonical documentation. + + +Installing Tablib +----------------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +Pythons Supported +----------------- + +At this time, only the following Python platforms are officially supported: + +* Python 2.6 +* Python 2.7 + +Other Pythons will be supported soon. + + +C Extentions +------------ + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + +Virtualenv +---------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +Staying Updated +--------------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file diff --git a/docs/intro.rst b/docs/intro.rst new file mode 100644 index 0000000..bfa8a3e --- /dev/null +++ b/docs/intro.rst @@ -0,0 +1,19 @@ +.. _intro: + +Introduction +============ + +This part of the documentation covers all the interfaces of Tablib. For +parts where Tablib depends on external libraries, we document the most +important right here and provide links to the canonical documentation. + +Why Tablib? +----------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + +Boiler Plate is Bad +------------------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. diff --git a/docs/quickstart.rst b/docs/quickstart.rst new file mode 100644 index 0000000..419ac82 --- /dev/null +++ b/docs/quickstart.rst @@ -0,0 +1,20 @@ +.. _quickstart: + +Quickstart +========== + +This part of the documentation covers all the interfaces of Tablib. For +parts where Tablib depends on external libraries, we document the most +important right here and provide links to the canonical documentation. + + +Basic Usage +----------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + +Advanced Usage +-------------- + +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file From be7182aea958520a9a2c37ae05130c88119f4e8d Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 8 Oct 2010 09:41:19 -0400 Subject: [PATCH 06/50] installation documentation update. --- docs/install.rst | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 4801eee..ac6caf1 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -11,7 +11,31 @@ important right here and provide links to the canonical documentation. Installing Tablib ----------------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +To install Tablib, it only takes one simple command. :: + + $ pip install tablib + +Or, if you must: :: + + $ easy_install tablib + +But, you really shouldn't do that. + +Speed Extentions +---------------- + +Tablib is partially dependent on the **pyyaml**, **simplejson**, and **xlwt** modules. To reduce installation issues, fully integrated versions of all required libraries are included in Tablib. + +However, if performance is important to you (and it should be), you should install **simplejson** and **pyyaml** with C extentions from PyPi. :: + + $ pip install PyYAML + +If you're using Python 2.6+, the built-in **json** module is already optimized and in use. If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. :: + + $ pip install simplejson + +.. If you're using a Python < 2.6, you can speed up JSON + Pythons Supported ----------------- @@ -21,13 +45,9 @@ At this time, only the following Python platforms are officially supported: * Python 2.6 * Python 2.7 -Other Pythons will be supported soon. +Support for other Pythons will be rolled out soon. -C Extentions ------------- - -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Virtualenv @@ -35,6 +55,7 @@ Virtualenv Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + Staying Updated --------------- From 55eb3f93e37141ed33455bc4b7db4b6f721445fd Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 8 Oct 2010 11:49:53 -0400 Subject: [PATCH 07/50] documentation update --- docs/conf.py | 2 +- docs/index.rst | 2 +- docs/install.rst | 34 ++++++++++++++++++---------------- fabfile.py | 9 ++++++++- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 4d80fe7..b340f43 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -120,7 +120,7 @@ html_theme = 'default' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ['static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. diff --git a/docs/index.rst b/docs/index.rst index 058335e..75cbbe2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,7 +22,7 @@ Welcome to Tablib's docuemtation. .. * :ref:`search` -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. I recommend you start off with `Installation`. diff --git a/docs/install.rst b/docs/install.rst index ac6caf1..d1cacbf 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -3,10 +3,7 @@ Installation ============ -This part of the documentation covers all the interfaces of Tablib. For -parts where Tablib depends on external libraries, we document the most -important right here and provide links to the canonical documentation. - +This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancments `.. Installing Tablib ----------------- @@ -21,26 +18,30 @@ Or, if you must: :: But, you really shouldn't do that. +.. _speed: + Speed Extentions ---------------- +.. versionadded:: 0.8.5 + Tablib is partially dependent on the **pyyaml**, **simplejson**, and **xlwt** modules. To reduce installation issues, fully integrated versions of all required libraries are included in Tablib. -However, if performance is important to you (and it should be), you should install **simplejson** and **pyyaml** with C extentions from PyPi. :: +However, if performance is important to you (and it should be), you can install **pyyaml** with C extentions from PyPi. :: $ pip install PyYAML -If you're using Python 2.6+, the built-in **json** module is already optimized and in use. If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. :: +If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. :: $ pip install simplejson -.. If you're using a Python < 2.6, you can speed up JSON + Pythons Supported ----------------- -At this time, only the following Python platforms are officially supported: +At this time, the following Python platforms are officially supported: * Python 2.6 * Python 2.7 @@ -49,14 +50,15 @@ Support for other Pythons will be rolled out soon. - -Virtualenv ----------- - -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. - - Staying Updated --------------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file +The latest version of Tablib will always be available here: + +* PyPi: http://pypi.python.org/pypi/tablib/ +* GitHub: http://github.com/kennethreitz/tablib/ + +When a new version is available, upgrading is simple. :: + + $ pip install tablib --upgrade + diff --git a/fabfile.py b/fabfile.py index 887bc25..e0375ba 100644 --- a/fabfile.py +++ b/fabfile.py @@ -1,7 +1,14 @@ +import os from fabric.api import * def scrub(): """ Death to the bytecode! """ - local("rm -fr dist build") + local('rm -fr dist build') local("find . -name \"*.pyc\" -exec rm '{}' ';'") + +def docs(): + """Build docs.""" + os.system('make html') + os.system('cd _build/html') + os.system('git commit -am \'documentation update\'') \ No newline at end of file From d4b659ece9dc1fa682d2d937752acf6cc0b82ae6 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 8 Oct 2010 11:50:43 -0400 Subject: [PATCH 08/50] documentation update --- fabfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fabfile.py b/fabfile.py index e0375ba..98b5149 100644 --- a/fabfile.py +++ b/fabfile.py @@ -11,4 +11,5 @@ def docs(): """Build docs.""" os.system('make html') os.system('cd _build/html') - os.system('git commit -am \'documentation update\'') \ No newline at end of file + os.system('git commit -am \'documentation update\'') + os.system('git push origin gh-pages') \ No newline at end of file From e69546a0ffed82b937e91e34d5d1c1f597575fb5 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 8 Oct 2010 15:46:50 -0400 Subject: [PATCH 09/50] Major documentation update. --- docs/api.rst | 11 ++-- docs/development.rst | 127 ++++++++++++++++++++++++++++++++++++++++++- docs/index.rst | 4 +- docs/install.rst | 13 +++-- docs/intro.rst | 22 ++++++-- docs/quickstart.rst | 4 +- fabfile.py | 6 +- 7 files changed, 166 insertions(+), 21 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 28f3f70..ce169b7 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -3,7 +3,7 @@ API === -.. module:: flask +.. module:: tablib This part of the documentation covers all the interfaces of Tablib. For parts where Tablib depends on external libraries, we document the most @@ -12,15 +12,16 @@ important right here and provide links to the canonical documentation. Dataset Object -------------- -.. module:: tablib - .. autoclass:: Dataset - :members: :inherited-members: + Databook Object --------------- .. autoclass:: Databook - :members: :inherited-members: + + + +Now, go start some :ref:`Tablib Development `. \ No newline at end of file diff --git a/docs/development.rst b/docs/development.rst index 7a574c6..e608ddc 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -5,14 +5,137 @@ Development Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. - +.. _future: Future of Tablib ---------------- Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +.. _design: Design Considerations --------------------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + + +.. _scm: +Source Control +-------------- + +Git. + + +.. _newformats: +Adding New Formats +------------------ + +Don't. + + +.. _testing: +Testing Tablib +-------------- +Testing is crucial to Tablib's stability. This stable project is used in production by many companies and developers, so it is important to be certian that every version released is fully operational. When developing a new feature for Tablib, be sure to write proper tests for it as well. + + +Running the Test Suite +++++++++++++++++++++++ + +When developing a feature for Tablib, the easiest way to test your changes for potential issues is to simply run the test suite directly. :: + + $ ./test_tablib.py + + +`Hudson CI`_, amongst other tools, supports Java's xUnit testing report format. Nose_ allows us to generate our own xUnit reports. + +Installing nose is simple. :: + + $ pip install nose + +Once installed, we can generate our xUnit report with a single command. :: + + $ nosetests test_tablib.py --with-xunit + +This will generate a **nosetests.xml** file, which can then be analyzed. + +.. _Nose: http://somethingaboutorange.com/mrl/projects/nose/ + +Extending the Test Suite +++++++++++++++++++++++++ + +Example: :: + + import tablib.formats.sql.test_sql + + +.. _hudson: +Continuous Integration +---------------------- + +Every commit made to the **develop** branch is automatically tested and inspected upon receipt with `Hudson CI`_. If you have access to the main respository and broke the build, you will receive an email accordingly. + +Anyone may view the build status and history at any time. + + http://git.kennethreitz.com/ci/ + + +If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server. + + +Additional reports will also be included here in the future, including PEP 8 checks and stress reports for extremely large datasets. + +.. _`Hudson CI`: http://hudson.dev.java.net +.. _`Kenneth Reitz`: http://kennethreitz.com/contact-me/ + + +.. _docs: +Building the Docs +----------------- + +Documentation is written in the powerful, flexible, and standard Python documentation format, `reStructured Text`_. +Documentation builds are powered by powerful Pocoo project, Sphinx_. The :ref:`API Documentation ` is mostly documented inline throught the module. + +The Docs live in ``tablib/docs``. In order to build them, you will first need to install Sphinx. :: + + $ pip install sphinx + + +Then, to build an HTML version of the docs, simply run the following from the **docs** directory: :: + + $ make html + +Your ``docs/_build/html`` directory will then contain an HTML representation of the documentation, ready for publication on most web servers. + +You can also generate the documentation in **ebpub**, **latex**, **json**, *&c* similarly. + +.. admonition:: GitHub Pages + + To push the documentation up to `GitHub Pages`_, you will first need to run `sphinx-to-github`_ against your ``docs/_build/html`` directory. + + GitHub Pages are powered by an HTML generation system called Jeckyl_, which is configured to ignore files and folders that begin with "``_``" (*ie.* **_static**). + + + + + + + and `sphinx-to-github`_. :: + + Installing sphinx-to-github is simple. :: + + $ pip install sphinx-to-github + + Running it against the docs is even simpler. :: + + $ sphinx-to-github _build/html + + Move the resulting files to the **gh-pages** branch of your repository, and push it up to GitHub. + +.. _`reStructured Text`: http://docutils.sourceforge.net/rst.html +.. _Sphinx: http://sphinx.pocoo.org +.. _`GitHub Pages`: http://pages.github.com +.. _Jeckyl: http://github.com/mojombo/jekyll +.. _`sphinx-to-github`: http://github.com/michaeljones/sphinx-to-github + +Make sure to check out the :ref:`API Documentation `. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 75cbbe2..ff757a6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,9 +22,9 @@ Welcome to Tablib's docuemtation. .. * :ref:`search` -Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. +Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Oh, and it's :ref:`MIT Lisenced `. -I recommend you start off with `Installation`. +I recommend you start off with :ref:`Installation `. User's Guide ------------ diff --git a/docs/install.rst b/docs/install.rst index d1cacbf..c8cc48b 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -1,10 +1,11 @@ .. _install: - Installation ============ -This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancments `.. +This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancments `. + +.. _installing: Installing Tablib ----------------- @@ -18,8 +19,8 @@ Or, if you must: :: But, you really shouldn't do that. -.. _speed: +.. _speed: Speed Extentions ---------------- @@ -37,7 +38,7 @@ If you're using Python 2.5 (currently unsupported), you should also install the - +.. _pythonsupport: Pythons Supported ----------------- @@ -49,7 +50,7 @@ At this time, the following Python platforms are officially supported: Support for other Pythons will be rolled out soon. - +.. _updates: Staying Updated --------------- @@ -62,3 +63,5 @@ When a new version is available, upgrading is simple. :: $ pip install tablib --upgrade + +Now, go get a :ref:`Quick Start `. \ No newline at end of file diff --git a/docs/intro.rst b/docs/intro.rst index bfa8a3e..b9c5f78 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -7,13 +7,27 @@ This part of the documentation covers all the interfaces of Tablib. For parts where Tablib depends on external libraries, we document the most important right here and provide links to the canonical documentation. + Why Tablib? ----------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Why not? + +It Should Be Easy +----------------- -Boiler Plate is Bad -------------------- +.. _mit: +MIT License +----------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +A large number of open source projects you find today are `GPL Licensed`_. While the GPL certianly has essential applications, it should most certianly not be your go-to license for you next open source project. + +A project that was released as GPL cannot be usd in any commercial product without the product itself also being offered as open source. The MIT and BSD licenses are fantastic alternatives to this *major* problem. + +Tablib is released under terms of `The MIT License`_. + +.. _`GPL Licensed`: http://www.opensource.org/licenses/gpl-license.php +.. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php + +Now, go :ref:`Install Tablib `. \ No newline at end of file diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 419ac82..1c613a3 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -17,4 +17,6 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor Advanced Usage -------------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. \ No newline at end of file +Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + +Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. \ No newline at end of file diff --git a/fabfile.py b/fabfile.py index 98b5149..391bf92 100644 --- a/fabfile.py +++ b/fabfile.py @@ -10,6 +10,8 @@ def scrub(): def docs(): """Build docs.""" os.system('make html') - os.system('cd _build/html') - os.system('git commit -am \'documentation update\'') + os.chdir('_build/html') + os.system('sphinxtogithub .') + os.system('git add -A') + os.system('git commit -m \'documentation update\'') os.system('git push origin gh-pages') \ No newline at end of file From ed9fe01604b9ddc9c2156ae4f2d7e3813bb11f6a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 8 Oct 2010 15:47:10 -0400 Subject: [PATCH 10/50] Added column insertion. Documentation update. --- tablib/core.py | 248 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 166 insertions(+), 82 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 9658d67..ca95642 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -21,7 +21,7 @@ __copyright__ = 'Copyright 2010 Kenneth Reitz' class Dataset(object): - """The tablib Dataset object is the heart of tablib. It provides all core + """The :class:`Dataset` object is the heart of Tablib. It provides all core functionality. Usually you create a :class:`Dataset` instance in your main module, and append @@ -44,65 +44,14 @@ class Dataset(object): :param \*args: (optional) list of rows to populate Dataset :param headers: (optional) list strings for Dataset header row - - - .. admonition:: About the Format Attributes - - If you look at the code, the various output/import formats are not - defined within the :class:`Dataset` object. To add support for a new format, see - :ref:`Adding New Formats`. - - .. attribute:: csv - - A CSV representation of the Dataset object. The top row will contain - headers, if they have been set. Otherwise, the top row will contain - the first row of the dataset. - - A dataset object can also be imported by setting the:class:`Dataset.csv` attribute. :: - - data = tablib.Dataset() - data.csv = 'age, first_name, last_name\\n90, John, Adams' - - Import assumes (for now) that headers exist. - .. attribute:: dict + .. admonition:: Format Attributes Definition - An native Python representation of the Dataset object. If headers have been - set, a list of Python dictionaries will be returned. If no headers have been - set, a list of tuples (rows) will be returned instead. + If you look at the code, the various output/import formats are not + defined within the :class:`Dataset` object. To add support for a new format, see + :ref:`Adding New Formats `. - A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. :: - - data = tablib.Dataset() - data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] - - - .. attribute:: xls - - An Excel Spreadsheet representation of the Dataset object, including - :ref:`seperators`. - - .. admonition:: Binary Warning - - :class:`Dataset.xls` contains binary data, so make sure to write in binary mode:: - - with open('output.xls', 'wb') as f: - f.write(data.xls) - - - .. attribute:: yaml - - A YAML representation of the Dataset object. If headers have been - set, a YAML list of objects will be returned. If no headers have - been set, a YAML list of lists (rows) will be returned instead. - - A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: - - data = tablib.Dataset() - data.yaml = '- {age: 90, first_name: John, last_name: Adams}' - - Import assumes (for now) that headers exist. """ def __init__(self, *args, **kwargs): @@ -203,16 +152,38 @@ class Dataset(object): return data + + def _clean_col(self, col): + """Prepares the given column for insert/append.""" + + col = list(col) + + if self.headers: + header = [col.pop(0)] + else: + header = [] + + if len(col) == 1 and callable(col[0]): + col = map(col[0], self._data) + col = tuple(header + col) + + return col + @property def height(self): - """Returns the height of the Dataset.""" + """The number of rows currently in the :class:`Dataset`. + Cannot be directly modified. + """ return len(self._data) @property def width(self): - """Returns the width of the Dataset.""" + """The number of columns currently in the :class:`Dataset`. + Cannot be directly modified. + """ + try: return len(self._data[0]) except IndexError: @@ -224,7 +195,11 @@ class Dataset(object): @property def headers(self): - """Headers property.""" + """An *optional* list of strings to be used for header rows and attribute names. + + This must be set manually. The given list length must equal :class:`Dataset.width`. + + """ return self.__headers @@ -243,7 +218,7 @@ class Dataset(object): @property def dict(self): - """A JSON representation of the Dataset object. If headers have been + """A JSON representation of the :class:`Dataset` object. If headers have been set, a JSON list of objects will be returned. If no headers have been set, a JSON list of lists (rows) will be returned instead. @@ -258,7 +233,16 @@ class Dataset(object): @dict.setter def dict(self, pickle): - + """A native Python representation of the Dataset object. If headers have been + set, a list of Python dictionaries will be returned. If no headers have been + set, a list of tuples (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. :: + + data = tablib.Dataset() + data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] + + """ if not len(pickle): return @@ -277,21 +261,94 @@ class Dataset(object): else: raise UnsupportedFormat + @property + def xls(): + """An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set. + + .. admonition:: Binary Warning + + :class:`Dataset.xls` contains binary data, so make sure to write in binary mode:: + + with open('output.xls', 'wb') as f: + f.write(data.xls)' + + + """ + pass + + + @property + def csv(): + """A CSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain + the first row of the dataset. + + A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. :: + + data = tablib.Dataset() + data.csv = 'age, first_name, last_name\\n90, John, Adams' + + Import assumes (for now) that headers exist. + """ + pass + + @property + def yaml(): + """A YAML representation of the :class:`Dataset` object. If headers have been + set, a YAML list of objects will be returned. If no headers have + been set, a YAML list of lists (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: + + data = tablib.Dataset() + data.yaml = '- {age: 90, first_name: John, last_name: Adams}' + + Import assumes (for now) that headers exist. + """ + pass + + + @property + def json(): + """A JSON representation of the :class:`Dataset` object. If headers have been + set, a JSON list of objects will be returned. If no headers have + been set, a JSON list of lists (rows) will be returned instead. + + A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: + + data = tablib.Dataset() + data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]' + + Import assumes (for now) that headers exist. + """ + def append(self, row=None, col=None): - """Adds a row to the end of Dataset""" + """Adds a row or column to the :class:`Dataset`. + + Rows and Columns appended must be the correct size (height or width). + + The default behaviour is to append the given row to the :class:`Dataset` object. If the ``col`` parameter is given, however, a new column will be added to the :class:`Dataset` object. If appending a column, and :class:`Dataset.headers` is set, the first item in list will be considered the header for that row. :: + + Append a new row to the dataset: :: + + data.append(('Kenneth', 'Reitz')) + + Append a new column to the dataset: :: + + data.append(col=('Age', 90, 67, 22)) + + You can also add a column of a single callable object, which will + add a new column with the return values of the callable each as an + item in the column. :: + + data.append(col=random.randint) + """ if row is not None: self._validate(row) self._data.append(tuple(row)) elif col is not None: - col = list(col) - if self.headers: - header = [col.pop(0)] - else: - header = [] - if len(col) == 1 and callable(col[0]): - col = map(col[0], self._data) - col = tuple(header + col) + col = self._clean_col(col) self._validate(col=col) @@ -311,14 +368,14 @@ class Dataset(object): def insert_separator(self, index, text='-'): - """Adds a separator to Dataset at given index.""" + """Adds a separator to :class:`Dataset` at given index.""" sep = (index, text) self._separators.append(sep) def append_separator(self, text='-'): - """Adds a separator to Dataset.""" + """Adds a separator to the :class:`Dataset`.""" # change offsets if headers are or aren't defined if not self.headers: @@ -329,24 +386,51 @@ class Dataset(object): self.insert_separator(index, text) - def insert(self, i, row=None): - """Inserts a row at given position in Dataset""" + def insert(self, index, row=None, col=None): + """Inserts a row or column to the :class:`Dataset` at the given index. + + Rows and columns inserted must be the correct size (height or width). + + The default behaviour is to insert the given row to the :class:`Dataset` object at the given index. If the ``col`` parameter is given, however, a new column will be insert to the :class:`Dataset` object instead. If inserting a column, and :class:`Dataset.headers` is set, the first item in list will be considered the header for the inserted row. :: + + You can also insert a column of a single callable object, which will + add a new column with the return values of the callable each as an + item in the column. :: + + data.append(col=random.randint) + """ if row: self._validate(row) self._data.insert(i, tuple(row)) elif col: - pass + col = self._clean_col(col) + + self._validate(col=col) + + if self.headers: + # pop the first item off, add to headers + self.headers.insert(index, col[0]) + col = col[1:] + + if self.height and self.width: + + for i, row in enumerate(self._data): + _row = list(row) + _row.insert(index, col[i]) + self._data[i] = tuple(_row) + else: + self._data = [tuple([row]) for row in col] def wipe(self): - """Erases all data from Dataset.""" + """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None class Databook(object): - """A book of Dataset objects. - Currently, this exists only for XLS workbook support. + """A book of :class:`Dataset` objects. + """ def __init__(self, sets=[]): @@ -362,7 +446,7 @@ class Databook(object): def wipe(self): - """Wipe book clean.""" + """Removes all :class:`Dataset` objects from the :class:`Databook`.""" self._datasets = [] @@ -381,7 +465,7 @@ class Databook(object): def add_sheet(self, dataset): - """Adds given dataset.""" + """Adds given :class:`Dataset` to the :class:`Databook`.""" if type(dataset) is Dataset: self._datasets.append(dataset) else: @@ -389,7 +473,7 @@ class Databook(object): def _package(self): - """Packages Databook for delivery.""" + """Packages :class:`Databook` for delivery.""" collector = [] for dset in self._datasets: collector.append(dict( @@ -401,7 +485,7 @@ class Databook(object): @property def size(self): - """The number of the Datasets within DataBook.""" + """The number of the :class:`Dataset` objects within :class:`Databook`.""" return len(self._datasets) From bc82be09c567871370eca21d5bf4aab73d87b350 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 02:32:41 -0400 Subject: [PATCH 11/50] Big Documentation Upgrade. --- docs/development.rst | 16 ++++- docs/install.rst | 36 ++++++----- docs/intro.rst | 53 +++++++++++++--- docs/quickstart.rst | 141 ++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 212 insertions(+), 34 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index e608ddc..99cb982 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -16,15 +16,25 @@ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor Design Considerations --------------------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Tablib was developed with a few `The Zen of Python`_ idioms in mind. +#. Beautiful is better than ugly. +#. Explicit is better than implicit. +#. Simple is better than complex. +#. Complex is better than complicated. +#. Readability counts. -.. _scm: +It strives to be as simple to use as possible. + +.. _git: Source Control -------------- -Git. +Git. +GitHub. +git.kennethreitz.com +Git Flow .. _newformats: Adding New Formats diff --git a/docs/install.rst b/docs/install.rst index c8cc48b..c3420b9 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -2,7 +2,7 @@ Installation ============ -This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancments `. +This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. Please read this section carefully, or you may miss out on some nice :ref:`speed enhancments `. .. _installing: @@ -20,7 +20,27 @@ Or, if you must: :: But, you really shouldn't do that. -.. _speed: +Download the Source +------------------- + +You can also install tablib from source. The latest release (|version|) is available from GitHub. + +* tarball_ +* zipball_ + +.. _ +Once you have a copy of the source, you can embed it in your Python package, or install it into your site-packages easily. :: + + $ python setup.py install + + +To download the full source history from Git, see :ref:`Source Control `. + +.. _tarball: http://github.com/kennethreitz/tablib/tarball/master +.. _zipball: http://github.com/kennethreitz/tablib/zipball/master + + +.. _speed-extentions: Speed Extentions ---------------- @@ -38,18 +58,6 @@ If you're using Python 2.5 (currently unsupported), you should also install the -.. _pythonsupport: -Pythons Supported ------------------ - -At this time, the following Python platforms are officially supported: - -* Python 2.6 -* Python 2.7 - -Support for other Pythons will be rolled out soon. - - .. _updates: Staying Updated --------------- diff --git a/docs/intro.rst b/docs/intro.rst index b9c5f78..eab26cc 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -3,25 +3,43 @@ Introduction ============ -This part of the documentation covers all the interfaces of Tablib. For -parts where Tablib depends on external libraries, we document the most -important right here and provide links to the canonical documentation. +This part of the documentation covers all the interfaces of Tablib. +Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to Pythonically import, export, and manipulate tabular data sets. -Why Tablib? ------------ +Inception +--------- -Why not? +Tablib was build by `Kenneth Reitz`_ to fufill a specfic need. -It Should Be Easy ------------------ +Tablib was born. + +.. _`Kenneth Reitz`: http://kennethreitz.com + + +Philosphy +--------- + +Tablib was developed with a few :pep:`20` idioms in mind. + + +#. Beautiful is better than ugly. +#. Explicit is better than implicit. +#. Simple is better than complex. +#. Complex is better than complicated. +#. Readability counts. + +Besides, Why not? + +:ref:`seperators` .. _mit: + MIT License ----------- -A large number of open source projects you find today are `GPL Licensed`_. While the GPL certianly has essential applications, it should most certianly not be your go-to license for you next open source project. +A large number of open source projects you find today are `GPL Licensed`_. While the GPL certianly has essential applications, it should most certianly not be your go-to license for your next open source project. A project that was released as GPL cannot be usd in any commercial product without the product itself also being offered as open source. The MIT and BSD licenses are fantastic alternatives to this *major* problem. @@ -30,4 +48,21 @@ Tablib is released under terms of `The MIT License`_. .. _`GPL Licensed`: http://www.opensource.org/licenses/gpl-license.php .. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php + + +.. _pythonsupport: + +Pythons Supported +----------------- + +At this time, the following Python platforms are officially supported: + +* Python 2.6 +* Python 2.7 + +Support for other Pythons will be rolled out soon. + + + + Now, go :ref:`Install Tablib `. \ No newline at end of file diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 1c613a3..53c6a4c 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -1,22 +1,147 @@ .. _quickstart: - Quickstart ========== -This part of the documentation covers all the interfaces of Tablib. For -parts where Tablib depends on external libraries, we document the most -important right here and provide links to the canonical documentation. +.. module:: tablib -Basic Usage +Eager to get started? This page gives a good introduction in how to get started with Tablib. This assumes you already have Tablib installed. If you do not, head over to the :ref:`Installation ` section. + +First, make sure that: + +* Tablib is :ref:`installed ` +* Tablib is :ref:`up-to-date ` + + +Lets gets started with some simple use cases and examples. + +Creating a Dataset +------------------ + +A :class:`Dataset ` is nothing more than what its name implies—a set of data. + +Creating your own instance of the :class:`tablib.Dataset` object is simple. :: + + data = tablib.Dataset() + +You can now start filling this :class:`Dataset ` object with data. + +.. admonition:: Example Context + + From here on out, if you see ``data``, assume that it's a fresh :class:`Dataset ` object. + + +Adding Rows ----------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Let's say you want to collect a simple list of names. :: + + # collection of names + names = ['Kenneth Reitz', 'Bessie Monke'] + + for name in names: + # split name appropriately + fname, lname = name.split() + + # add names to Dataset + data.append([fname, lname]) + +You can get a nice, Pythonic view of the dataset at any time with :class:`Dataset.dict`. + + >>> data.dict + [('Kenneth', 'Reitz'), ('Bessie', 'Monke')] -Advanced Usage +Adding Headers -------------- -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +It's time enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. :: + + data.headers = ['First Name', 'Last Name'] + +Let's view the data in YAML this time. :: + + >>> data.yaml + - {First Name: Kenneth, Last Name: Reitz} + - {First Name: Bessie, Last Name: Monke} + + +Adding Columns +-------------- + +Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. :: + + data.append(col=['Age', 22, 20]) + +Let's view the data in CSV this time. :: + + >>> data.csv + Last Name,First Name,Age + Reitz,Kenneth,22 + Monke,Bessie,20 + +It's that easy. + +Selecting Rows & Columns +------------------------ + +You can slice and dice your data, just like a standard Python list. :: + + >>> data[0] + ('Kenneth', 'Reitz', 22) + + +If we had a set of data consisting of thousands of rows, it could be useful to get a list of values in a column. +To do so, we access the :class:`Dataset` as if it were a standard Python dictionary. :: + + >>> data['First Name'] + ['Kenneth', 'Bessie'] + +Let's find the average age. :: + + >>> ages = data['Age'] + >>> float(sum(ages)) / len(ages) + 21.0 + + + +Dynamic Columns +--------------- + +.. newversion: 0.8.0 + +Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. + + + +:: + + import random + + data.append(col=random.randint) + +Let's delete that column. + +.. _seperators: + +Seperators +---------- + + + +Transposition +------------- + +Thanks to Luca Beltrame, :class:`Dataset` objects +:: + + data.transpose() + + +Shortcuts +--------- + +Population upon instantiation. + Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. \ No newline at end of file From a2308449140055dc44dc4ff575aff63446db7186 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 02:32:52 -0400 Subject: [PATCH 12/50] Docs update. --- tablib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index ca95642..1ab2e00 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -372,10 +372,10 @@ class Dataset(object): sep = (index, text) self._separators.append(sep) - + def append_separator(self, text='-'): - """Adds a separator to the :class:`Dataset`.""" + """Adds a :ref:`seperator ` to the :class:`Dataset`.""" # change offsets if headers are or aren't defined if not self.headers: From 3e391fc8e39bc8a646a329d84b2c428f0997faab Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 02:33:03 -0400 Subject: [PATCH 13/50] Auto version usage. --- docs/conf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b340f43..f51c9dc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,11 +12,12 @@ # serve to show the default. import sys, os +import tablib # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- @@ -48,9 +49,9 @@ copyright = u'2010, Kenneth Reitz' # built documents. # # The short X.Y version. -version = '0.8.3' +version = tablib.core.__version__ # The full version, including alpha/beta/rc tags. -release = '0.8.3' +release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 08b51113d314aa9abe063e7ec8e5c1da41c6fb9a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 03:03:50 -0400 Subject: [PATCH 14/50] Added seamless deletion of columns. --- tablib/core.py | 22 +++++++++++++++++----- tablib/formats/__init__.py | 2 +- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 1ab2e00..3bfc312 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -9,7 +9,7 @@ :license: MIT, see LICENSE for more details. """ -from tablib.formats import FORMATS as formats +from tablib import formats __title__ = 'tablib' @@ -95,7 +95,19 @@ class Dataset(object): def __delitem__(self, key): - del self._data[key] + if isinstance(key, basestring): + if key in self.headers: + pos = self.headers.index(key) + del self.headers[pos] + + for i, row in enumerate(self._data): + _row = list(row) + del _row[pos] + self._data[i] = tuple(_row) + else: + raise KeyError + else: + del self._data[key] def __repr__(self): @@ -108,7 +120,7 @@ class Dataset(object): @classmethod def _register_formats(cls): """Adds format properties.""" - for fmt in formats: + for fmt in formats.available: try: try: setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set)) @@ -453,7 +465,7 @@ class Databook(object): @classmethod def _register_formats(cls): """Adds format properties.""" - for fmt in formats: + for fmt in formats.available: try: try: setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book)) @@ -491,7 +503,7 @@ class Databook(object): def detect(stream): """Return (format, stream) of given stream.""" - for fmt in formats: + for fmt in formats.available: try: if fmt.detect(stream): return (fmt, stream) diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 69eada7..0ce9b71 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -8,4 +8,4 @@ import _json as json import _xls as xls import _yaml as yaml -FORMATS = (json, xls, yaml, csv) +available = (json, xls, yaml, csv) From ca08ac8a7bd8eed94e1aa186a9530be728c1dca7 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 03:03:57 -0400 Subject: [PATCH 15/50] Documentation update. --- docs/quickstart.rst | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 53c6a4c..ae113a4 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -108,20 +108,41 @@ Let's find the average age. :: Dynamic Columns --------------- -.. newversion: 0.8.0 +.. versionadded:: 0.8.3 -Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. +Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. For now, this is only supported on :class:`Dataset` objects that have no defined :class:`headers `. +Let's save our headers for later. :: + _headers = list(data.headers) + data.headers = None -:: +test :: import random - data.append(col=random.randint) + def random_grade(*args): + """Returns a random integer for entry.""" + return (random.randint(60,100)/100.0) + data.append(col=[random_grade]) + + +:: + >>> data.yaml + - [Reitz, Kenneth, 22, 0.83] + - [Monke, Bessie, 21, 0.73] + +Now we can add our headers back. +:: + >>> data.headers = _headers + ['Random'] + Let's delete that column. +:: + >>> del data['Grade'] + + .. _seperators: Seperators From 7fda829d275c1375d7399f2c9234e0bf0093dbc3 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 04:37:09 -0400 Subject: [PATCH 16/50] Documentation update. --- docs/api.rst | 37 +++++++++ docs/index.rst | 2 +- docs/{quickstart.rst => tutorial.rst} | 111 +++++++++++++++++++------- tablib/core.py | 8 +- 4 files changed, 127 insertions(+), 31 deletions(-) rename docs/{quickstart.rst => tutorial.rst} (62%) diff --git a/docs/api.rst b/docs/api.rst index ce169b7..4a0ff52 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,8 +1,10 @@ .. _api: +=== API === + .. module:: tablib This part of the documentation covers all the interfaces of Tablib. For @@ -10,18 +12,53 @@ parts where Tablib depends on external libraries, we document the most important right here and provide links to the canonical documentation. +-------------- Dataset Object -------------- + + .. autoclass:: Dataset :inherited-members: +--------------- Databook Object --------------- + .. autoclass:: Databook :inherited-members: +--------- +Functions +--------- + + +.. autofunction:: detect + +.. autofunction:: import_set + + +---------- +Exceptions +---------- + + +.. class:: InvalidDatasetType + + Raised when shit goes down. + + +.. class:: InvalidDimensions + + Raised when shit goes down. + + +.. class:: UnsupportedFormat + + Raised when shit goes down. + + Now, go start some :ref:`Tablib Development `. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index ff757a6..5dcc564 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,7 +44,7 @@ This part of the documentation, which is mostly prose, begins with some backgrou .. toctree:: :maxdepth: 2 - quickstart + tutorial .. toctree:: :maxdepth: 2 diff --git a/docs/quickstart.rst b/docs/tutorial.rst similarity index 62% rename from docs/quickstart.rst rename to docs/tutorial.rst index ae113a4..cfede22 100644 --- a/docs/quickstart.rst +++ b/docs/tutorial.rst @@ -1,7 +1,10 @@ .. _quickstart: + +========== Quickstart ========== + .. module:: tablib @@ -15,9 +18,13 @@ First, make sure that: Lets gets started with some simple use cases and examples. + + +------------------ Creating a Dataset ------------------ + A :class:`Dataset ` is nothing more than what its name implies—a set of data. Creating your own instance of the :class:`tablib.Dataset` object is simple. :: @@ -31,9 +38,12 @@ You can now start filling this :class:`Dataset ` object with dat From here on out, if you see ``data``, assume that it's a fresh :class:`Dataset ` object. + +----------- Adding Rows ----------- + Let's say you want to collect a simple list of names. :: # collection of names @@ -52,9 +62,12 @@ You can get a nice, Pythonic view of the dataset at any time with :class:`Datase [('Kenneth', 'Reitz'), ('Bessie', 'Monke')] + +-------------- Adding Headers -------------- + It's time enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. :: data.headers = ['First Name', 'Last Name'] @@ -66,9 +79,14 @@ Let's view the data in YAML this time. :: - {First Name: Bessie, Last Name: Monke} + + + +-------------- Adding Columns -------------- + Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. :: data.append(col=['Age', 22, 20]) @@ -82,9 +100,13 @@ Let's view the data in CSV this time. :: It's that easy. + + +------------------------ Selecting Rows & Columns ------------------------ + You can slice and dice your data, just like a standard Python list. :: >>> data[0] @@ -105,64 +127,97 @@ Let's find the average age. :: +----------------------- +Removing Rows & Columns +----------------------- + +data.insert('MI', ) + +>>> del data['Row Name'] +Fucking easy. + + + +============== +Advanced Usage +============== + +And now for something completely different. + +--------------- Dynamic Columns --------------- .. versionadded:: 0.8.3 -Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. For now, this is only supported on :class:`Dataset` objects that have no defined :class:`headers `. +Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. A dynamic column is a single callable object (*ie.* a function). +For now, this is only supported on :class:`Dataset` objects that have no defined :class:`headers `. -Let's save our headers for later. :: +So, let's save our headers for later, then remove them. :: _headers = list(data.headers) data.headers = None -test :: + +We can now add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. :: import random - def random_grade(*args): + def random_grade(row): """Returns a random integer for entry.""" return (random.randint(60,100)/100.0) data.append(col=[random_grade]) -:: - >>> data.yaml - - [Reitz, Kenneth, 22, 0.83] - - [Monke, Bessie, 21, 0.73] +Now add the headers back, with our new column. :: -Now we can add our headers back. -:: >>> data.headers = _headers + ['Random'] -Let's delete that column. +Let's have a look at our data. :: + + >>> data.yaml + - {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz} + - {Age: 21, First Name: Bessie, Grade: 0.75, Last Name: Monke} + + +Let's remove that column. :: -:: >>> del data['Grade'] +When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row. + +For example, we can use the data available in the row to guess the gender of a student. :: + + def guess_gender(row): + """Calculates gender of given student data row.""" + m_names = ('Kenneth', 'Mike', 'Yuri') + f_names = ('Bessie', 'Samantha', 'Heather') + + name = row[0] + + if name in m_names: + return 'Male' + elif name in f_names: + return 'Female' + else: + return 'Unknown' + +Adding this function to our dataset as a dynamic column would result in: :: + + >>> data.yaml + - {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz} + - {Age: 21, First Name: Bessie, Gender: Female, Last Name: Monke} + + + .. _seperators: +---------- Seperators ---------- - - - -Transposition -------------- - -Thanks to Luca Beltrame, :class:`Dataset` objects -:: - - data.transpose() - - -Shortcuts ---------- - -Population upon instantiation. +.. versionadded:: 0.8.2 Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. \ No newline at end of file diff --git a/tablib/core.py b/tablib/core.py index 3bfc312..6e5e60b 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -96,7 +96,9 @@ class Dataset(object): def __delitem__(self, key): if isinstance(key, basestring): + if key in self.headers: + pos = self.headers.index(key) del self.headers[pos] @@ -352,9 +354,11 @@ class Dataset(object): You can also add a column of a single callable object, which will add a new column with the return values of the callable each as an - item in the column. :: + item in the column. The callable can be written to perform calculations + on the current row. The callable receives a tuple representation of + the current data row as the first parameter. :: - data.append(col=random.randint) + data.append(col=[random.choice]) """ if row is not None: self._validate(row) From f9021f53c2b95ab33bf67e0e07ef6022f387acbc Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 04:37:16 -0400 Subject: [PATCH 17/50] Future release? --- HISTORY.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 4bbf180..4339bc7 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,12 @@ History ======= +?? +-- +* Massive documentation update +* Added column insert/delete support +* Added + 0.8.5 (2010-10-06) ------------------ From 459f310857cf64e32843a014e9aa99b71a6c9f75 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 06:22:59 -0400 Subject: [PATCH 18/50] Trying a few things. --- HISTORY.rst | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 4339bc7..0e895db 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,60 +1,65 @@ History -======= +------- + ?? --- +++ + * Massive documentation update * Added column insert/delete support * Added + 0.8.5 (2010-10-06) ------------------- +++++++++++++++++++ * New import system. All dependencies attempt to load from site-packages, then fallback on vendorized modules. 0.8.4 (2010-10-04) ------------------- +++++++++++++++++++ -* Upated XLS output: Only wrap if '\n' in cell. +* Upated XLS output: Only wrap if '\\n' in cell. 0.8.3 (2010-10-04) ------------------- +++++++++++++++++++ * Ability to append new column passing a callable as the value that will be applied to every row. 0.8.2 (2010-10-04) ------------------- +++++++++++++++++++ * Added alignment wrapping to written cells. * Added separator support to XLS. 0.8.1 (2010-09-28) ------------------- +++++++++++++++++++ + * Packaging Fix 0.8.0 (2010-09-25) ------------------- +++++++++++++++++++ + * New format plugin system! * Imports! ELEGANT Imports! * Tests. Lots of tests. 0.7.1 (2010-09-20) ------------------- +++++++++++++++++++ * Reverting methods back to properties. * Windows bug compenated in documentation. 0.7.0 (2010-09-20) ------------------- +++++++++++++++++++ * Renamed DataBook Databook for consistiency. * Export properties changed to methods (XLS filename / StringIO bug). @@ -63,31 +68,31 @@ History 0.6.4 (2010-09-19) ------------------- +++++++++++++++++++ * Updated unicode export for XLS. * More exhaustive unit tests. 0.6.3 (2010-09-14) ------------------- +++++++++++++++++++ * Added Dataset.append() support for columns. 0.6.2 (2010-09-13) ------------------- +++++++++++++++++++ * Fixed Dataset.append() error on empty dataset. * Updated Dataset.headers property w/ validation. * Added Testing Fixtures. 0.6.1 (2010-09-12) ------------------- +++++++++++++++++++ * Packaging hotfixes. 0.6.0 (2010-09-11) ------------------- +++++++++++++++++++ * Public Release. * Export Support for XLS, JSON, YAML, and CSV. From 23440fb7a3849217c2549023fd46b5156bec3416 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 06:23:11 -0400 Subject: [PATCH 19/50] Documentation update. --- TODO.rst | 10 ++++ docs/development.rst | 46 ++++++++++++--- docs/index.rst | 2 +- docs/install.rst | 6 +- docs/intro.rst | 2 +- docs/tutorial.rst | 134 +++++++++++++++++++++++++++++++++++++++---- 6 files changed, 178 insertions(+), 22 deletions(-) create mode 100644 TODO.rst diff --git a/TODO.rst b/TODO.rst new file mode 100644 index 0000000..f0fba1b --- /dev/null +++ b/TODO.rst @@ -0,0 +1,10 @@ +* Roll documetation out. +* http://tablib.org. +* add required header parameter for col append. +* Write exhausive unit-tests. +* Write stress tests. + +* ``Dataset.traspose()`` support + + + diff --git a/docs/development.rst b/docs/development.rst index 99cb982..e665265 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -5,18 +5,18 @@ Development Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -.. _future: -Future of Tablib ----------------- +There's a todo list. -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +.. include:: ../TODO.rst .. _design: + +--------------------- Design Considerations --------------------- -Tablib was developed with a few `The Zen of Python`_ idioms in mind. +Tablib was developed with a few :pep:`20` idioms in mind. #. Beautiful is better than ugly. #. Explicit is better than implicit. @@ -26,17 +26,35 @@ Tablib was developed with a few `The Zen of Python`_ idioms in mind. It strives to be as simple to use as possible. -.. _git: + +.. _scm: + +-------------- Source Control -------------- + +Tablib source is controlled with Git_, the lean, mean, distributed source control machine. + +**Branch Structure** + + ``develop`` + The "next release" branch. Likely unstable. + ``master`` + Current production release (|version|) on PyPi. + Git. GitHub. git.kennethreitz.com Git Flow +.. _Git: http://git-scm.org + .. _newformats: + + +------------------ Adding New Formats ------------------ @@ -44,14 +62,19 @@ Don't. .. _testing: + +-------------- Testing Tablib -------------- + Testing is crucial to Tablib's stability. This stable project is used in production by many companies and developers, so it is important to be certian that every version released is fully operational. When developing a new feature for Tablib, be sure to write proper tests for it as well. +++++++++++++++++++++++ Running the Test Suite ++++++++++++++++++++++ + When developing a feature for Tablib, the easiest way to test your changes for potential issues is to simply run the test suite directly. :: $ ./test_tablib.py @@ -71,9 +94,12 @@ This will generate a **nosetests.xml** file, which can then be analyzed. .. _Nose: http://somethingaboutorange.com/mrl/projects/nose/ + +++++++++++++++++++++++++ Extending the Test Suite ++++++++++++++++++++++++ + Example: :: import tablib.formats.sql.test_sql @@ -93,18 +119,20 @@ Anyone may view the build status and history at any time. If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server. -Additional reports will also be included here in the future, including PEP 8 checks and stress reports for extremely large datasets. +Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets. .. _`Hudson CI`: http://hudson.dev.java.net .. _`Kenneth Reitz`: http://kennethreitz.com/contact-me/ .. _docs: + +----------------- Building the Docs ----------------- Documentation is written in the powerful, flexible, and standard Python documentation format, `reStructured Text`_. -Documentation builds are powered by powerful Pocoo project, Sphinx_. The :ref:`API Documentation ` is mostly documented inline throught the module. +Documentation builds are powered by the powerful Pocoo project, Sphinx_. The :ref:`API Documentation ` is mostly documented inline throught the module. The Docs live in ``tablib/docs``. In order to build them, you will first need to install Sphinx. :: @@ -148,4 +176,6 @@ You can also generate the documentation in **ebpub**, **latex**, **json**, *&c* .. _Jeckyl: http://github.com/mojombo/jekyll .. _`sphinx-to-github`: http://github.com/michaeljones/sphinx-to-github +---------- + Make sure to check out the :ref:`API Documentation `. \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 5dcc564..a9b2726 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,7 +6,7 @@ Tablib: Pythonic Tabular Data ============================= -Welcome to Tablib's docuemtation. +Welcome to Tablib's documentation. .. Contents: .. diff --git a/docs/install.rst b/docs/install.rst index c3420b9..bcf0fc3 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -6,6 +6,8 @@ This part of the documentation covers the installation of Tablib. The first step .. _installing: + +----------------- Installing Tablib ----------------- @@ -19,7 +21,7 @@ Or, if you must: :: But, you really shouldn't do that. - +------------------- Download the Source ------------------- @@ -34,7 +36,7 @@ Once you have a copy of the source, you can embed it in your Python package, or $ python setup.py install -To download the full source history from Git, see :ref:`Source Control `. +To download the full source history from Git, see :ref:`Source Control `. .. _tarball: http://github.com/kennethreitz/tablib/tarball/master .. _zipball: http://github.com/kennethreitz/tablib/zipball/master diff --git a/docs/intro.rst b/docs/intro.rst index eab26cc..616b3e1 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -41,7 +41,7 @@ MIT License A large number of open source projects you find today are `GPL Licensed`_. While the GPL certianly has essential applications, it should most certianly not be your go-to license for your next open source project. -A project that was released as GPL cannot be usd in any commercial product without the product itself also being offered as open source. The MIT and BSD licenses are fantastic alternatives to this *major* problem. +A project that is released as GPL cannot be usd in any commercial product without the product itself also being offered as open source. The MIT and BSD licenses are great alternatives to the GPL that allow your open-source software to be used in proprietary, closed-source software. Tablib is released under terms of `The MIT License`_. diff --git a/docs/tutorial.rst b/docs/tutorial.rst index cfede22..dfe066a 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -72,16 +72,14 @@ It's time enhance our :class:`Dataset` by giving our columns some titles. To do data.headers = ['First Name', 'Last Name'] -Let's view the data in YAML this time. :: +Now our data looks a little different. :: - >>> data.yaml - - {First Name: Kenneth, Last Name: Reitz} - - {First Name: Bessie, Last Name: Monke} + >>> data.dict + [{'Last Name': 'Reitz', 'First Name': 'Kenneth'}, {'Last Name': 'Monke', 'First Name': 'Bessie'}] - -------------- Adding Columns -------------- @@ -91,15 +89,44 @@ Now that we have a basic :class:`Dataset` in place, let's add a column of **ages data.append(col=['Age', 22, 20]) -Let's view the data in CSV this time. :: +Let's view the data now. :: + + >>> data.dict + [{'Last Name': 'Reitz', 'First Name': 'Kenneth', 'Age': 22}, {'Last Name': 'Monke', 'First Name': 'Bessie', 'Age': 21}] + +It's that easy. + + +-------------- +Exporting Data +-------------- + +Tablib's killer feature is the ability to export your :class:`Dataset` objects into a number of formats. + +**Comma-Seperated Values** :: >>> data.csv Last Name,First Name,Age Reitz,Kenneth,22 - Monke,Bessie,20 + Monke,Bessie,21 -It's that easy. +**JavaScript Object Notation** :: + >>> data.json + [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}] + + +**YAML Ain't Markup Language** :: + + >>> data.yaml + - {Age: 22, First Name: Kenneth, Last Name: Reitz} + - {Age: 21, First Name: Bessie, Last Name: Monke} + + +**Microsoft Excel** :: + + >>> data.xls + ------------------------ @@ -131,9 +158,14 @@ Let's find the average age. :: Removing Rows & Columns ----------------------- -data.insert('MI', ) +:: + + >>> del data['Col Name'] + +:: + + >>> del data[0:12] ->>> del data['Row Name'] Fucking easy. @@ -142,8 +174,12 @@ Fucking easy. Advanced Usage ============== + +This part of the documentation services to give you an idea that are otherwise hard to extract from the :ref:`API Documentation ` + And now for something completely different. + --------------- Dynamic Columns --------------- @@ -212,12 +248,90 @@ Adding this function to our dataset as a dynamic column would result in: :: + + +Excel Workbook With Multiple Sheets +------------------------------------ + +:class:`Databook` + +:: + + book = tablib.Databook([data, data, data]) + +:: + + with open('students.xls', 'wb') as f: + f.write(book.xls) + +The resulting **students.xls** file will contain a seperate spreadsheet for each :class:`Dataset` object in the :class:`Databook`. + +.. admonition:: Binary Warning + + Make sure to open the output file in binary mode. + + .. _seperators: ---------- Seperators ---------- + .. versionadded:: 0.8.2 +When, it's often useful to create a blank row containing information on the upcomming data. So, + + + +:: + + daniel_tests = [ + ('11/24/09', 'Math 101 Mid-term Exam', 56.), + ('05/24/10', 'Math 101 Final Exam', 62.) + ] + + suzie_tests = [ + ('11/24/09', 'Math 101 Mid-term Exam', 56.), + ('05/24/10', 'Math 101 Final Exam', 62.) + ] + + # Create new dataset + tests = tablib.Dataset() + tests.headers = ['Date', 'Test Name', 'Grade'] + + # Daniel's Tests + tests.append_seperator('Daniel\'s Scores') + + for test_row in daniel_tests: + tests.append(test_row) + + # Susie's Tests + tests.append_seperator('Susie\'s Scores') + + for test_row in suzie_tests: + tests.append(test_row) + + # Write spreadsheet to disk + with open('grades.xls', 'wb') as f: + f.write(tests.xls) + +The resulting **tests.xls** will have the following layout: + + + Daniel's Scores: + * '11/24/09', 'Math 101 Mid-term Exam', 56. + * '05/24/10', 'Math 101 Final Exam', 62. + + Suzie's Scores: + * '11/24/09', 'Math 101 Mid-term Exam', 56. + * '05/24/10', 'Math 101 Final Exam', 62. + + + +.. admonition:: Format Support + + At this time, only :class:`Excel ` output supports seperators. + +---- Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. \ No newline at end of file From 3b0e0c7991da92e8a65c637afd19ac52e93deddd Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 10 Oct 2010 10:01:51 -0400 Subject: [PATCH 20/50] Updates. --- TODO.rst | 11 ++-- docs/development.rst | 125 ++++++++++++++++++++++++++++++++----------- docs/index.rst | 4 +- docs/install.rst | 1 + docs/tutorial.rst | 12 ++--- test_tablib.py | 2 +- 6 files changed, 112 insertions(+), 43 deletions(-) diff --git a/TODO.rst b/TODO.rst index f0fba1b..69b127b 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,8 +1,11 @@ -* Roll documetation out. -* http://tablib.org. -* add required header parameter for col append. +* Roll documentation out. +* Release *&* announce http://tablib.org. +* Add required header parameter for columular ``Dataset.append()``. * Write exhausive unit-tests. -* Write stress tests. +* Write stress tests +* Make CSV write customizable. + +* Continuous Integration scripts in repo. * ``Dataset.traspose()`` support diff --git a/docs/development.rst b/docs/development.rst index e665265..5fb9c2b 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -3,11 +3,17 @@ Development =========== -Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +Tablib is under active development, and contributors are welcome. -There's a todo list. +If you have a feature request, suggestion, or bug report, please open a new issue on GitHub_. To submit patches, please send a pull request on GitHub_. + +If you'd like to contribute, there's plenty to do. Here's a short todo list. + + .. include:: ../TODO.rst + + +.. _GitHub: http://github.com/kennethreitz/tablib/ -.. include:: ../TODO.rst .. _design: @@ -24,8 +30,10 @@ Tablib was developed with a few :pep:`20` idioms in mind. #. Complex is better than complicated. #. Readability counts. -It strives to be as simple to use as possible. +A few other things to keep in mind: +#. Keep your code DRY. +#. Strive to be as simple (to use) as possible. .. _scm: @@ -36,30 +44,100 @@ Source Control Tablib source is controlled with Git_, the lean, mean, distributed source control machine. -**Branch Structure** +The repository is publicly accessable. - ``develop`` - The "next release" branch. Likely unstable. - ``master`` - Current production release (|version|) on PyPi. + ``git clone git://github.com/kennethreitz/tablib.git`` + +The project is hosted both on **GitHub** and **git.kennethreitz.com**. + + + GitHub: + http://github.com/kennethreitz/tablib + "Mirror": + http://git.kennethreitz.com/projects/tablib -Git. -GitHub. -git.kennethreitz.com -Git Flow +Git Branch Structure +++++++++++++++++++++ + +Feature / Hotfix / Release branches follow a `Successful Git Branching Model`_ . Git-flow_ is a great tool for managing the repository. I highly recommend it. + +``develop`` + The "next release" branch. Likely unstable. +``master`` + Current production release (|version|) on PyPi. +``gh-pages`` + Current release of http://tablib.org. + +Each release is tagged. + +When submitting patches, please place your feature/change in its own branch prior to opening a pull reqeust on GitHub_. + .. _Git: http://git-scm.org +.. _`Successful Git Branching Model`: http://nvie.com/posts/a-successful-git-branching-model/ +.. _git-flow: http://github.com/nvie/gitflow + .. _newformats: - ------------------ Adding New Formats ------------------ -Don't. +Tablib welcomes new format additions! Format suggestions include: +* Tab Seperated Values +* MySQL Dump +* HTML Table + + +Coding by Convention +++++++++++++++++++++ + +Tablib features a micro-framework for adding format support. The easiest way to understand it is to use it. So, let's define our own format, named *xxx*. + +1. Write a new format interface. + + :class:`tablib.core` follows a simple pattern for automatically utilizing your format throughout Tablib. Function names are crucial. + + Example **tablib/formats/_xxx.py**: :: + + title = 'xxx' + + def export_set(dset): + .... + # returns string representation of given dataset + + def export_book(dbook): + .... + # returns string representation of given databook + + def import_set(dset, in_stream): + ... + # populates given Dataset with given datastream + + def import_book(dbook, in_stream): + ... + # returns Databook instance + + def detect(stream): + ... + # returns True if given stream is parsable as xxx + +.. admonition:: Excluding Support + + + If the format excludes support for an import/export mechanism (*eg.* :class:`csv ` excludes :class:`Databook ` support), simply don't define the respecive functions. Appropriate errors will be raised. + +2. + + Add your new format module to the :class:`tablib.formats.avalable` tuple. + +3. + Add a mock property to the :class:`Dataset ` class with verbose `reStructured Text`_ docstring. This alleviates IDE confusion, and allows for pretty auto-generated Sphinx_ documentation. + +4. Write respective :ref:`tests `. .. _testing: @@ -69,12 +147,6 @@ Testing Tablib Testing is crucial to Tablib's stability. This stable project is used in production by many companies and developers, so it is important to be certian that every version released is fully operational. When developing a new feature for Tablib, be sure to write proper tests for it as well. - -++++++++++++++++++++++ -Running the Test Suite -++++++++++++++++++++++ - - When developing a feature for Tablib, the easiest way to test your changes for potential issues is to simply run the test suite directly. :: $ ./test_tablib.py @@ -95,17 +167,10 @@ This will generate a **nosetests.xml** file, which can then be analyzed. .. _Nose: http://somethingaboutorange.com/mrl/projects/nose/ -++++++++++++++++++++++++ -Extending the Test Suite -++++++++++++++++++++++++ - - -Example: :: - - import tablib.formats.sql.test_sql - .. _hudson: + +---------------------- Continuous Integration ---------------------- diff --git a/docs/index.rst b/docs/index.rst index a9b2726..87a8d3b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -24,12 +24,12 @@ Welcome to Tablib's documentation. Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Oh, and it's :ref:`MIT Lisenced `. -I recommend you start off with :ref:`Installation `. +I recommend you start with :ref:`Installation `. User's Guide ------------ -This part of the documentation, which is mostly prose, begins with some background information about Tablib, then focuses on step-by-step instructions for web development with Flask. +This part of the documentation, which is mostly prose, begins with some background information about Tablib, then focuses on step-by-step instructions for getting the most out of your datasets. .. toctree:: :maxdepth: 2 diff --git a/docs/install.rst b/docs/install.rst index bcf0fc3..0b5d212 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -21,6 +21,7 @@ Or, if you must: :: But, you really shouldn't do that. + ------------------- Download the Source ------------------- diff --git a/docs/tutorial.rst b/docs/tutorial.rst index dfe066a..a403d4a 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -92,7 +92,7 @@ Now that we have a basic :class:`Dataset` in place, let's add a column of **ages Let's view the data now. :: >>> data.dict - [{'Last Name': 'Reitz', 'First Name': 'Kenneth', 'Age': 22}, {'Last Name': 'Monke', 'First Name': 'Bessie', 'Age': 21}] + [{'Last Name': 'Reitz', 'First Name': 'Kenneth', 'Age': 22}, {'Last Name': 'Monke', 'First Name': 'Bessie', 'Age': 20}] It's that easy. @@ -108,19 +108,19 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i >>> data.csv Last Name,First Name,Age Reitz,Kenneth,22 - Monke,Bessie,21 + Monke,Bessie,20 **JavaScript Object Notation** :: >>> data.json - [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}] + [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}] **YAML Ain't Markup Language** :: >>> data.yaml - {Age: 22, First Name: Kenneth, Last Name: Reitz} - - {Age: 21, First Name: Bessie, Last Name: Monke} + - {Age: 20, First Name: Bessie, Last Name: Monke} **Microsoft Excel** :: @@ -214,7 +214,7 @@ Let's have a look at our data. :: >>> data.yaml - {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz} - - {Age: 21, First Name: Bessie, Grade: 0.75, Last Name: Monke} + - {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke} Let's remove that column. :: @@ -244,7 +244,7 @@ Adding this function to our dataset as a dynamic column would result in: :: >>> data.yaml - {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz} - - {Age: 21, First Name: Bessie, Gender: Female, Last Name: Monke} + - {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke} diff --git a/test_tablib.py b/test_tablib.py index ebad061..f875cb7 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""Tests for tablib.""" +"""Tests for Tablib.""" import unittest From e75a00541d83c1df6ede20fec3a5611f960b6821 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Tue, 19 Oct 2010 10:45:54 +0200 Subject: [PATCH 21/50] Support for TSV-files. Unit-tested. --- tablib/core.py | 14 ++++++++++ tablib/formats/__init__.py | 3 ++- tablib/formats/_tsv.py | 53 ++++++++++++++++++++++++++++++++++++++ test_tablib.py | 44 +++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tablib/formats/_tsv.py diff --git a/tablib/core.py b/tablib/core.py index 6e5e60b..c4071c8 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -306,6 +306,20 @@ class Dataset(object): """ pass + @property + def tsv(): + """A TSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain + the first row of the dataset. + + A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. :: + + data = tablib.Dataset() + data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams' + + Import assumes (for now) that headers exist. + """ + @property def yaml(): """A YAML representation of the :class:`Dataset` object. If headers have been diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 0ce9b71..f5960b8 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -7,5 +7,6 @@ import _csv as csv import _json as json import _xls as xls import _yaml as yaml +import _tsv as tsv -available = (json, xls, yaml, csv) +available = (json, xls, yaml, csv, tsv) diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py new file mode 100644 index 0000000..8603c45 --- /dev/null +++ b/tablib/formats/_tsv.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +""" Tablib - TSV (Tab Separated Values) Support. +""" + +import cStringIO +import csv +import os + +import tablib + + +title = 'tsv' +extentions = ('tsv',) + + + +def export_set(dataset): + """Returns a TSV representation of Dataset.""" + stream = cStringIO.StringIO() + _tsv = csv.writer(stream, delimiter="\t") + + for row in dataset._package(dicts=False): + _tsv.writerow(row) + + return stream.getvalue() + + +def import_set(dset, in_stream, headers=True): + """Returns dataset from TSV stream.""" + + dset.wipe() + + rows = csv.reader(in_stream.split("\r\n"), delimiter="\t") + for i, row in enumerate(rows): + + # Skip empty rows + if not row: + continue + + if (i == 0) and (headers): + dset.headers = row + else: + dset.append(row) + + +def detect(stream): + """Returns True if given stream is valid TSV.""" + try: + rows = dialect = csv.Sniffer().sniff(stream, delimiters="\t") + return True + except csv.Error: + return False diff --git a/test_tablib.py b/test_tablib.py index f875cb7..a576ce8 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -178,6 +178,22 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(csv, self.founders.csv) + def test_tsv_export(self): + """Verify exporting dataset object as CSV.""" + + # Build up the csv string with headers first, followed by each row + tsv = '' + for col in self.headers: + tsv += col + '\t' + + tsv = tsv.strip('\t') + '\r\n' + + for founder in self.founders: + for col in founder: + tsv += str(col) + '\t' + tsv = tsv.strip('\t') + '\r\n' + + self.assertEqual(tsv, self.founders.tsv) def test_unicode_append(self): """Passes in a single unicode charecter and exports.""" @@ -188,6 +204,7 @@ class TablibTestCase(unittest.TestCase): data.json data.yaml data.csv + data.tsv data.xls @@ -268,6 +285,18 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_tsv_import_set(self): + """Generate and import TSV set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _tsv = data.tsv + + data.tsv = _tsv + + self.assertEqual(_tsv, data.tsv) + def test_csv_format_detect(self): """Test CSV format detection.""" @@ -283,6 +312,21 @@ class TablibTestCase(unittest.TestCase): self.assertTrue(tablib.formats.csv.detect(_csv)) self.assertFalse(tablib.formats.csv.detect(_bunk)) + def test_tsv_format_detect(self): + """Test TSV format detection.""" + + _tsv = ( + '1\t2\t3\n' + '4\t5\t6\n' + '7\t8\t9\n' + ) + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.tsv.detect(_tsv)) + self.assertFalse(tablib.formats.tsv.detect(_bunk)) + def test_json_format_detect(self): """Test JSON format detection.""" From cdfacb6d6e2a968540cdfab4d54cb69ce4487407 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 26 Oct 2010 05:53:07 -0700 Subject: [PATCH 22/50] Whitespace. --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index b99b85a..b9eae9e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,4 @@ profile # vi noise *.swp -docs/_build/* \ No newline at end of file +docs/_build/* From 8626351618bff4735deef06990270bb02318e8a3 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 00:56:31 -0400 Subject: [PATCH 23/50] Official removal of cli interface. Bad idea. --- tabbed | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100755 tabbed diff --git a/tabbed b/tabbed deleted file mode 100755 index c30ec36..0000000 --- a/tabbed +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Tabbed -- CLI for Tablib -Copyright (c) 2010 Kenneth Reitz. MIT License. -""" - -import tablib.cli - - -if __name__ == '__main__': - - tablib.cli.start() \ No newline at end of file From e5e22d3ca21d3f6ef3906cd6d321f7e8064a7c29 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 00:57:12 -0400 Subject: [PATCH 24/50] Documentation typo fix. --- docs/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index a403d4a..0aa0f80 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -208,7 +208,7 @@ We can now add a dynamic column to our :class:`Dataset` object. In this example, Now add the headers back, with our new column. :: - >>> data.headers = _headers + ['Random'] + >>> data.headers = _headers + ['Grade'] Let's have a look at our data. :: From b554ce36bb5383a1dfd0b06372043ca482f96b3f Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 00:57:18 -0400 Subject: [PATCH 25/50] Official removal of cli interface. Bad idea. --- tablib/cli.py | 84 --------------------------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 tablib/cli.py diff --git a/tablib/cli.py b/tablib/cli.py deleted file mode 100644 index 6d773c9..0000000 --- a/tablib/cli.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -""" Tabbed CLI Inteface Application -""" - -import io -import sys - -import argue - -import tablib -from helpers import Struct, piped - - - -FORMATS = [fmt.title for fmt in tablib.formats.FORMATS] - -opts = [] - -opts.append(('v', 'version', False, 'Report tabbed version')) - -for format in FORMATS: - opts.append(('', format, False, 'Output to %s' % (format.upper()))) - - - -@argue.command(options=opts, usage='[FILE] [--FORMAT | FILE]') -def start(in_file=None, out_file=None, **opts): - """Covertly convert dataset formats""" - - opts = Struct(**opts) - - if opts.version: - print('Tabbed, Ver. %s' % tablib.core.__version__) - sys.exit(0) - - stdin = piped() - - if stdin: - data = tablib.import_set(stdin) - - elif in_file: - - try: - in_stream =- io.open(in_file, 'r').read() - except Exception, e: - print(' %s cannot be read.' % in_file) - sys.exit(65) - - try: - tablib.import_set(in_stream) - except Exception, e: - raise e - print('Import format not supported.') - sys.exit(65) - else: - print('Please provide input.') - sys.exit(65) - - - _formats_sum = sum(opts[f] for f in FORMATS) - - # Multiple output formats given - if _formats_sum > 1: - print('Please specify a single output format.') - sys.exit(64) - - # No output formats given - elif _formats_sum < 1: - print('Please specify an output format.') - sys.exit(64) - - - # fetch options.formats list - # if sum(()) > 1 - # log only one data format please - # if sum of formats == 0, specity format - - # look for filename - - # print opts.__dict__ - # print in_file - # print out_file \ No newline at end of file From 5f4162918f4e234fab3e4be39d3670617e19c54b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 00:57:25 -0400 Subject: [PATCH 26/50] New site URL. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e658c34..50159a5 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ setup( open('HISTORY.rst').read(), author='Kenneth Reitz', author_email='me@kennethreitz.com', - url='http://github.com/kennethreitz/tablib', + url='http://tablib.org', packages= [ 'tablib', 'tablib.formats', 'tablib.packages', From a5cae7c24919b495b080d9de4dc621f8de8e8828 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 00:59:06 -0400 Subject: [PATCH 27/50] Adde Luca Beltrame to AUTHORS. --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index fa467c3..e755348 100644 --- a/AUTHORS +++ b/AUTHORS @@ -11,4 +11,5 @@ Patches and Suggestions ``````````````````````` - Luke Lee -- Josh Ourisman \ No newline at end of file +- Josh Ourisman +- Luca Beltrame \ No newline at end of file From 1fa28ee2caae7429dc873c4f6b4ac0f09d88dfd6 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 01:01:54 -0400 Subject: [PATCH 28/50] Added test_suite.sh script. --- test_suite.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 test_suite.sh diff --git a/test_suite.sh b/test_suite.sh new file mode 100755 index 0000000..157f457 --- /dev/null +++ b/test_suite.sh @@ -0,0 +1,4 @@ +nosetests test_tablib.py --with-xunit --with-coverage +coverage xml +rm -fr pylint.txt +pylint -d W0312 -d W0212 -d E1101 -d E0202 -d W0102 -d E0102 -f parseable ./tablib > pylint.txt || true \ No newline at end of file From 90c3435600cd26ac64ae5bfcd9ba3cfada492b7a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 01:02:33 -0400 Subject: [PATCH 29/50] TODO Update. --- TODO.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/TODO.rst b/TODO.rst index 69b127b..8878f79 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,13 +1,12 @@ -* Roll documentation out. -* Release *&* announce http://tablib.org. +* Polish *&* announce http://tablib.org. * Add required header parameter for columular ``Dataset.append()``. -* Write exhausive unit-tests. +* Write more exhausive unit-tests. * Write stress tests * Make CSV write customizable. * Continuous Integration scripts in repo. -* ``Dataset.traspose()`` support +* ``Dataset.traspose()`` support? From 2b529bcb1c630c173a03a2d2bc22c05cb7ce81f7 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 01:06:07 -0400 Subject: [PATCH 30/50] Quotation constancy. --- tablib/formats/_tsv.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 8603c45..808f202 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -18,7 +18,7 @@ extentions = ('tsv',) def export_set(dataset): """Returns a TSV representation of Dataset.""" stream = cStringIO.StringIO() - _tsv = csv.writer(stream, delimiter="\t") + _tsv = csv.writer(stream, delimiter='\t') for row in dataset._package(dicts=False): _tsv.writerow(row) @@ -28,12 +28,10 @@ def export_set(dataset): def import_set(dset, in_stream, headers=True): """Returns dataset from TSV stream.""" - dset.wipe() - rows = csv.reader(in_stream.split("\r\n"), delimiter="\t") + rows = csv.reader(in_stream.split('\r\n'), delimiter='\t') for i, row in enumerate(rows): - # Skip empty rows if not row: continue @@ -47,7 +45,7 @@ def import_set(dset, in_stream, headers=True): def detect(stream): """Returns True if given stream is valid TSV.""" try: - rows = dialect = csv.Sniffer().sniff(stream, delimiters="\t") + rows = dialect = csv.Sniffer().sniff(stream, delimiters='\t') return True except csv.Error: return False From f621b56178f099e32fa6cf012a5206740d139bc2 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 01:06:17 -0400 Subject: [PATCH 31/50] TODO! --- TODO.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO.rst b/TODO.rst index 8878f79..cf51cfe 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,3 +1,4 @@ +* Add Dataset Tags. * Polish *&* announce http://tablib.org. * Add required header parameter for columular ``Dataset.append()``. * Write more exhausive unit-tests. From ff63558a67063ba2792ec69953510dad4c168bd2 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 01:07:04 -0400 Subject: [PATCH 32/50] Added TSV to Readme. --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index e626d41..3d01ed8 100644 --- a/README.rst +++ b/README.rst @@ -18,12 +18,14 @@ Output formats supported: - Excel (Sets + Books) - JSON (Sets + Books) - YAML (Sets + Books) +- TSV (Sets) - CSV (Sets) Import formats supported: - JSON (Sets + Books) - YAML (Sets + Books) +- TSV (Sets) - CSV (Sets) Note that tablib *purposefully* excludes XML support. It always will. From 8ca180c461cc350ea64ebeb94abc70e2eb5a140b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 01:20:45 -0400 Subject: [PATCH 33/50] Documentation configuration changes for colors. --- docs/conf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index f51c9dc..325002c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,7 @@ master_doc = 'index' # General information about the project. project = u'Tablib' -copyright = u'2010, Kenneth Reitz' +copyright = u'2010, Kenneth Reitz. Styles (modified) © Armin Ronacher' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -71,18 +71,18 @@ exclude_patterns = ['_build'] #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = 'flask_theme_support.FlaskyStyle' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] @@ -129,7 +129,7 @@ html_static_path = ['static'] # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} @@ -148,10 +148,10 @@ html_static_path = ['static'] #html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +html_show_sphinx = False # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True From 35d9e390fd2695b3e64957d6345333b87dfb7aae Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 01:33:12 -0400 Subject: [PATCH 34/50] New todo. --- TODO.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TODO.rst b/TODO.rst index cf51cfe..a181699 100644 --- a/TODO.rst +++ b/TODO.rst @@ -2,8 +2,9 @@ * Polish *&* announce http://tablib.org. * Add required header parameter for columular ``Dataset.append()``. * Write more exhausive unit-tests. -* Write stress tests +* Write stress tests. * Make CSV write customizable. +* HTML Table exports. * Continuous Integration scripts in repo. From fca8ad6182ea8381bfce5984ff47629090fafc02 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 03:55:42 -0400 Subject: [PATCH 35/50] Ugh.... --- tablib/core.py | 107 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 91 insertions(+), 16 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index c4071c8..3346cb9 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -9,6 +9,8 @@ :license: MIT, see LICENSE for more details. """ +from copy import copy + from tablib import formats @@ -19,6 +21,68 @@ __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' +class Row(object): + """Internal Row object. Mainly used for filtering.""" + + __slots__ = ['tuple', '_row', 'tags'] + + def __init__(self, row=list(), tags=list()): + self._row = list(row) + self.tags = list(tags) + + def __iter__(self): + return (col for col in self._row) + + def __len__(self): + return len(self._row) + + def __repr__(self): + return repr(self._row) + + def __getslice__(self, i, j): + return self._row[i,j] + + def __getitem__(self, i): + return self._row[i] + + def __setitem__(self, i, value): + self._row[i] = value + + def __delitem__(self, i): + del self._row[i] + + def append(self, value): + self._row.append(value) + + def insert(self, index, value): + self._row.insert(index, value) + + def __contains__(self, item): + return (item in self._row) + + @property + def tuple(self): + '''Tuple representation of :class:`Row`.''' + return tuple(self._row) + + @property + def list(self): + '''List representation of :class:`Row`.''' + return list(self._row) + + def has_tag(self, tag): + """Returns true if current row contains tag.""" + + if tag == None: + return False + elif isinstance(tag, basestring): + return tag in self.tags + else: + for t in tag: + if t in self.tags: + return True + return False + class Dataset(object): """The :class:`Dataset` object is the heart of Tablib. It provides all core @@ -55,7 +119,7 @@ class Dataset(object): """ def __init__(self, *args, **kwargs): - self._data = list(args) + self._data = list(Row(arg) for arg in args) self.__headers = None # ('title', index) tuples @@ -86,12 +150,12 @@ class Dataset(object): else: raise KeyError else: - return self._data[key] + return tuple(self._data[key]) def __setitem__(self, key, value): self._validate(value) - self._data[key] = tuple(value) + self._data[key] = Row(value) def __delitem__(self, key): @@ -103,9 +167,9 @@ class Dataset(object): del self.headers[pos] for i, row in enumerate(self._data): - _row = list(row) - del _row[pos] - self._data[i] = tuple(_row) + + del row[pos] + self._data[i] = row else: raise KeyError else: @@ -264,14 +328,14 @@ class Dataset(object): if isinstance(pickle[0], list): self.wipe() for row in pickle: - self.append(row) + self.append(Row(row)) # if list of objects elif isinstance(pickle[0], dict): self.wipe() self.headers = pickle[0].keys() for row in pickle: - self.append(row.values()) + self.append(Row(row.values())) else: raise UnsupportedFormat @@ -285,8 +349,6 @@ class Dataset(object): with open('output.xls', 'wb') as f: f.write(data.xls)' - - """ pass @@ -351,7 +413,7 @@ class Dataset(object): """ - def append(self, row=None, col=None): + def append(self, row=None, col=None, tags=list()): """Adds a row or column to the :class:`Dataset`. Rows and Columns appended must be the correct size (height or width). @@ -374,9 +436,10 @@ class Dataset(object): data.append(col=[random.choice]) """ + if row is not None: self._validate(row) - self._data.append(tuple(row)) + self._data.append(Row(row, tags=tags)) elif col is not None: col = self._clean_col(col) @@ -421,7 +484,11 @@ class Dataset(object): Rows and columns inserted must be the correct size (height or width). - The default behaviour is to insert the given row to the :class:`Dataset` object at the given index. If the ``col`` parameter is given, however, a new column will be insert to the :class:`Dataset` object instead. If inserting a column, and :class:`Dataset.headers` is set, the first item in list will be considered the header for the inserted row. :: + The default behaviour is to insert the given row to the :class:`Dataset` + object at the given index. If the ``col`` parameter is given, however, + a new column will be insert to the :class:`Dataset` object instead. If + inserting a column, and :class:`Dataset.headers` is set, the first item + in list will be considered the header for the inserted row. :: You can also insert a column of a single callable object, which will add a new column with the return values of the callable each as an @@ -431,7 +498,7 @@ class Dataset(object): """ if row: self._validate(row) - self._data.insert(i, tuple(row)) + self._data.insert(i, Row(row)) elif col: col = self._clean_col(col) @@ -450,7 +517,16 @@ class Dataset(object): self._data[i] = tuple(_row) else: self._data = [tuple([row]) for row in col] - + + def filter(self, tag): + """Returns a new instance of the :class:`Dataset` containing only rows + with given tags. + """ + _dset = copy(self) + _dset._data[:] = [row for row in self._data if row.has_tag(tag)] + + + return _dset def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" @@ -460,7 +536,6 @@ class Dataset(object): class Databook(object): """A book of :class:`Dataset` objects. - """ def __init__(self, sets=[]): From fcada243a234532cc8e9282eb96f4b6049e915f3 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:13:02 -0400 Subject: [PATCH 36/50] Added new Row class and Dataset.filter(). --- tablib/core.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 3346cb9..ede3b13 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -150,7 +150,12 @@ class Dataset(object): else: raise KeyError else: - return tuple(self._data[key]) + _results = self._data[key] + if isinstance(_results, Row): + return _results.tuple + else: + return [result.tuple for result in _results] + def __setitem__(self, key, value): @@ -453,11 +458,10 @@ class Dataset(object): if self.height and self.width: for i, row in enumerate(self._data): - _row = list(row) - _row.append(col[i]) - self._data[i] = tuple(_row) + row.append(col[i]) + self._data[i] = row else: - self._data = [tuple([row]) for row in col] + self._data = [Row([row]) for row in col] def insert_separator(self, index, text='-'): @@ -479,7 +483,7 @@ class Dataset(object): self.insert_separator(index, text) - def insert(self, index, row=None, col=None): + def insert(self, index, row=None, col=None, tags=list()): """Inserts a row or column to the :class:`Dataset` at the given index. Rows and columns inserted must be the correct size (height or width). @@ -498,7 +502,7 @@ class Dataset(object): """ if row: self._validate(row) - self._data.insert(i, Row(row)) + self._data.insert(i, Row(row, tags=tags)) elif col: col = self._clean_col(col) @@ -512,11 +516,11 @@ class Dataset(object): if self.height and self.width: for i, row in enumerate(self._data): - _row = list(row) - _row.insert(index, col[i]) - self._data[i] = tuple(_row) + + row.insert(index, col[i]) + self._data[i] = row else: - self._data = [tuple([row]) for row in col] + self._data = [Row([row]) for row in col] def filter(self, tag): """Returns a new instance of the :class:`Dataset` containing only rows From 0bbd990ed8aae827be2a04100def9e8401f8ac99 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:13:09 -0400 Subject: [PATCH 37/50] whitespace fix. --- tablib/core.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index ede3b13..d1a574d 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -156,8 +156,7 @@ class Dataset(object): else: return [result.tuple for result in _results] - - + def __setitem__(self, key, value): self._validate(value) self._data[key] = Row(value) From 5c747c9c2e7c9920ea495e64bad4009e9ee99b3e Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:20:45 -0400 Subject: [PATCH 38/50] Keepin' it DRY. --- tablib/core.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index d1a574d..9f5ffc4 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -442,26 +442,9 @@ class Dataset(object): """ if row is not None: - self._validate(row) - self._data.append(Row(row, tags=tags)) + self.insert(self.height, row=row, tags=tags) elif col is not None: - col = self._clean_col(col) - - self._validate(col=col) - - if self.headers: - # pop the first item off, add to headers - self.headers.append(col[0]) - col = col[1:] - - if self.height and self.width: - - for i, row in enumerate(self._data): - row.append(col[i]) - self._data[i] = row - else: - self._data = [Row([row]) for row in col] - + self.insert(self.width, col=col) def insert_separator(self, index, text='-'): """Adds a separator to :class:`Dataset` at given index.""" @@ -501,7 +484,7 @@ class Dataset(object): """ if row: self._validate(row) - self._data.insert(i, Row(row, tags=tags)) + self._data.insert(index, Row(row, tags=tags)) elif col: col = self._clean_col(col) From c6e240fa52373bd70304341eeb9721881eb61120 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:22:37 -0400 Subject: [PATCH 39/50] Cleanups. --- tablib/core.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 9f5ffc4..df5aade 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -505,13 +505,12 @@ class Dataset(object): self._data = [Row([row]) for row in col] def filter(self, tag): - """Returns a new instance of the :class:`Dataset` containing only rows - with given tags. + """Returns a new instance of the :class:`Dataset`, excluding any rows + that do not contain the given tags. """ _dset = copy(self) _dset._data[:] = [row for row in self._data if row.has_tag(tag)] - - + return _dset def wipe(self): From e3dedb8887944919f7383d6ddac4ea79f05e8c58 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:22:50 -0400 Subject: [PATCH 40/50] Cleanup todo. --- TODO.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.rst b/TODO.rst index a181699..e606f36 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,4 +1,3 @@ -* Add Dataset Tags. * Polish *&* announce http://tablib.org. * Add required header parameter for columular ``Dataset.append()``. * Write more exhausive unit-tests. From 778ad0265e8421afc2df849dfd1449131cfa8487 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:26:03 -0400 Subject: [PATCH 41/50] Added new required headers for adding columns. --- tablib/core.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index df5aade..be04a0e 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -417,7 +417,7 @@ class Dataset(object): """ - def append(self, row=None, col=None, tags=list()): + def append(self, row=None, col=None, header=None, tags=list()): """Adds a row or column to the :class:`Dataset`. Rows and Columns appended must be the correct size (height or width). @@ -444,7 +444,7 @@ class Dataset(object): if row is not None: self.insert(self.height, row=row, tags=tags) elif col is not None: - self.insert(self.width, col=col) + self.insert(self.width, col=col, header=header) def insert_separator(self, index, text='-'): """Adds a separator to :class:`Dataset` at given index.""" @@ -465,7 +465,7 @@ class Dataset(object): self.insert_separator(index, text) - def insert(self, index, row=None, col=None, tags=list()): + def insert(self, index, row=None, col=None, header=None, tags=list()): """Inserts a row or column to the :class:`Dataset` at the given index. Rows and columns inserted must be the correct size (height or width). @@ -492,8 +492,9 @@ class Dataset(object): if self.headers: # pop the first item off, add to headers - self.headers.insert(index, col[0]) - col = col[1:] + if not header: + raise HeadersNeeded() + self.headers.insert(header) if self.height and self.width: @@ -610,6 +611,8 @@ class InvalidDatasetType(Exception): class InvalidDimensions(Exception): "Invalid size" - +class HeadersNeeded(Exception): + "Header parameter must be given when appending a column in this Dataset." + class UnsupportedFormat(NotImplementedError): "Format is not supported" From cf10703e31391c89c4f5e1dbc105a26c2f82a1a6 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:46:38 -0400 Subject: [PATCH 42/50] Updated Callable Columns support. --- test_tablib.py | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/test_tablib.py b/test_tablib.py index a576ce8..342fd45 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -69,10 +69,10 @@ class TablibTestCase(unittest.TestCase): # With Headers data.headers = ('fname', 'lname') - new_col = ['age', 21, 22] - data.append(col=new_col) + new_col = [21, 22] + data.append(col=new_col, header='age') - self.assertEquals(data[new_col[0]], new_col[1:]) + self.assertEquals(data['age'], new_col) def test_add_column_no_data_no_headers(self): @@ -87,27 +87,12 @@ class TablibTestCase(unittest.TestCase): self.assertEquals(data.height, len(new_col)) - def test_add_column_no_data_with_headers(self): - """Verify adding new column with headers.""" - - data.headers = ('first', 'last') - - new_col = ('age',) - data.append(col=new_col) - - self.assertEquals(len(data.headers), 3) - self.assertEquals(data.width, 3) - - new_col = ('foo', 'bar') - - self.assertRaises(tablib.InvalidDimensions, data.append, col=new_col) - def test_add_callable_column(self): """Verify adding column with values specified as callable.""" - new_col = ['first_again', lambda x: x[0]] - self.founders.append(col=new_col) - - self.assertTrue(map(lambda x: x[0] == x[-1], self.founders)) + new_col = [lambda x: x[0]] + self.founders.append(col=new_col, header='first_again') +# +# self.assertTrue(map(lambda x: x[0] == x[-1], self.founders)) def test_header_slicing(self): From 577289cbc376219e3b2d910e453d5d52b15a3a0b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:46:54 -0400 Subject: [PATCH 43/50] Callable Columns again :) --- tablib/core.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index be04a0e..7388370 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -206,12 +206,12 @@ class Dataset(object): if row: is_valid = (len(row) == self.width) if self.width else True elif col: - if self.headers: - is_valid = (len(col) - 1) == self.height + if len(col) < 1: + is_valid = True else: is_valid = (len(col) == self.height) if self.height else True else: - is_valid = all((len(x)== self.width for x in self._data)) + is_valid = all((len(x) == self.width for x in self._data)) if is_valid: return True @@ -486,15 +486,20 @@ class Dataset(object): self._validate(row) self._data.insert(index, Row(row, tags=tags)) elif col: + col = list(col) + + # Callable Columns... + if len(col) == 1 and callable(col[0]): + col = map(col[0], self._data) + col = self._clean_col(col) - self._validate(col=col) if self.headers: # pop the first item off, add to headers if not header: raise HeadersNeeded() - self.headers.insert(header) + self.headers.insert(index, header) if self.height and self.width: From b74308e81ee50d9fb31c6e1c6bcd214d43861a79 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:47:25 -0400 Subject: [PATCH 44/50] Append fixed. --- TODO.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.rst b/TODO.rst index e606f36..c31e414 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,5 +1,4 @@ * Polish *&* announce http://tablib.org. -* Add required header parameter for columular ``Dataset.append()``. * Write more exhausive unit-tests. * Write stress tests. * Make CSV write customizable. From 30487999ba7f245fc385036c89effcc3e23807cd Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:47:52 -0400 Subject: [PATCH 45/50] CI done. --- TODO.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/TODO.rst b/TODO.rst index c31e414..c28c8b7 100644 --- a/TODO.rst +++ b/TODO.rst @@ -3,9 +3,6 @@ * Write stress tests. * Make CSV write customizable. * HTML Table exports. - -* Continuous Integration scripts in repo. - * ``Dataset.traspose()`` support? From 8d6e75ad20a2ae56ae7270ff7f4f8943af69e8db Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:49:31 -0400 Subject: [PATCH 46/50] Fixes for 0.9.0. --- setup.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/setup.py b/setup.py index 50159a5..a5c5588 100644 --- a/setup.py +++ b/setup.py @@ -17,12 +17,9 @@ if sys.argv[-1] == "publish": required = [] -# if sys.version_info < (2, 6): -# required.append('simplejson') - setup( name='tablib', - version='0.8.5', + version='0.9.0', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), @@ -50,9 +47,4 @@ setup( # 'Programming Language :: Python :: 3.0', # 'Programming Language :: Python :: 3.1', ), - # entry_points={ - # 'console_scripts': [ - # 'tabbed = tablib.cli:start', - # ], - # } ) From f6964bba8f16d588836b6491e61ba7278c06f514 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 04:49:37 -0400 Subject: [PATCH 47/50] Version bump. --- tablib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 7388370..07397c1 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -15,8 +15,8 @@ from tablib import formats __title__ = 'tablib' -__version__ = '0.8.5' -__build__ = 0x000805 +__version__ = '0.9.0' +__build__ = 0x000900 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' From 642b1d8deffbc619f749a7d2685bc1dfb81f2c5f Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 05:43:00 -0400 Subject: [PATCH 48/50] Exception documentation update. --- docs/api.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 4a0ff52..942b69d 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -48,17 +48,17 @@ Exceptions .. class:: InvalidDatasetType - Raised when shit goes down. + You're trying to add something that doesn't quite look right. .. class:: InvalidDimensions - Raised when shit goes down. + You're trying to add something that doesn't quite fit right. .. class:: UnsupportedFormat - Raised when shit goes down. + You're trying to add something that doesn't quite taste right. Now, go start some :ref:`Tablib Development `. \ No newline at end of file From 49f098ee22f707050783ca0e991de9c563f4aae9 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 05:43:23 -0400 Subject: [PATCH 49/50] Verb-age update for documentation. --- docs/index.rst | 4 +--- docs/intro.rst | 17 ++------------ docs/tutorial.rst | 60 ++++++++++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 40 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 87a8d3b..4a4de40 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -6,8 +6,6 @@ Tablib: Pythonic Tabular Data ============================= -Welcome to Tablib's documentation. - .. Contents: .. .. .. toctree:: @@ -22,7 +20,7 @@ Welcome to Tablib's documentation. .. * :ref:`search` -Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Oh, and it's :ref:`MIT Lisenced `. +Tablib is an :ref:`MIT Lisenced ` format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & exmport. I recommend you start with :ref:`Installation `. diff --git a/docs/intro.rst b/docs/intro.rst index 616b3e1..a327d28 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -4,17 +4,7 @@ Introduction ============ This part of the documentation covers all the interfaces of Tablib. -Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to Pythonically import, export, and manipulate tabular data sets. - - -Inception ---------- - -Tablib was build by `Kenneth Reitz`_ to fufill a specfic need. - -Tablib was born. - -.. _`Kenneth Reitz`: http://kennethreitz.com +Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to Pythonically import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags / filtering, and seamless format import/exmport. Philosphy @@ -29,10 +19,7 @@ Tablib was developed with a few :pep:`20` idioms in mind. #. Complex is better than complicated. #. Readability counts. -Besides, Why not? - -:ref:`seperators` - +All contributions to Tablib should keep these important rules in mind. .. _mit: diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0aa0f80..774158d 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -158,7 +158,7 @@ Let's find the average age. :: Removing Rows & Columns ----------------------- -:: +It's easier than you could imagine. :: >>> del data['Col Name'] @@ -166,9 +166,6 @@ Removing Rows & Columns >>> del data[0:12] -Fucking easy. - - ============== Advanced Usage @@ -180,6 +177,8 @@ This part of the documentation services to give you an idea that are otherwise h And now for something completely different. +.. _dyncols: + --------------- Dynamic Columns --------------- @@ -187,15 +186,8 @@ Dynamic Columns .. versionadded:: 0.8.3 Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. A dynamic column is a single callable object (*ie.* a function). -For now, this is only supported on :class:`Dataset` objects that have no defined :class:`headers `. -So, let's save our headers for later, then remove them. :: - - _headers = list(data.headers) - data.headers = None - - -We can now add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. :: +Let's add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. :: import random @@ -203,12 +195,7 @@ We can now add a dynamic column to our :class:`Dataset` object. In this example, """Returns a random integer for entry.""" return (random.randint(60,100)/100.0) - data.append(col=[random_grade]) - - -Now add the headers back, with our new column. :: - - >>> data.headers = _headers + ['Grade'] + data.append(col=[random_grade], header='Grade') Let's have a look at our data. :: @@ -247,19 +234,48 @@ Adding this function to our dataset as a dynamic column would result in: :: - {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke} +.. _tags: +---------------------------- +Filtering Datasets with Tags +---------------------------- + +.. versionadded:: 0.9.0 + + +When constructing a :class:`Dataset` object, you can add tags to rows by speficying the ``tags`` parameter. +This allows you to filter your :class:`Dataset` later. This can be useful so seperate rows of data based on +arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`. + +Let's tag some students. :: + + students = tablib.Dataset() + + students.headers = ['first', 'last'] + + students.append(['Kenneth', 'Reitz'], tags=['male', 'technical']) + students.append(['Bessie', 'Monke'], tags=['female', 'creative']) + +Now that we have extra meta-data on our rows, we can use easily filter our :class:`Dataset`. Let's just see Male students. :: + + + >>> data.filter(['male']).yaml + - {first: Kenneth, Last: Reitz} + +It's that simple. The original :class:`Dataset` is untouched. Excel Workbook With Multiple Sheets ------------------------------------ -:class:`Databook` +When dealine with a large number of :class:`Datasets ` in spreadsheet format, it's quite common to group mulitple spreadsheets into a single Excel file, known as a Workbook. Tablib makes it extremely easy to build webooks with the handy, :class:`Databook` class. + -:: +Let's say we have 3 different :class:`Datasets `. All we have to do is add then to a :class:`Databook` object... :: - book = tablib.Databook([data, data, data]) + book = tablib.Databook([data1, data2, data3]) -:: +... and export to Excel just like :class:`Datasets `. :: with open('students.xls', 'wb') as f: f.write(book.xls) From 52dcf79c41d34a4c8b7019e077400fff102e65cf Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 4 Nov 2010 05:43:44 -0400 Subject: [PATCH 50/50] No append documentation necessary. --- tablib/core.py | 48 ++++++++++++++++++++---------------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 07397c1..43bab93 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -415,30 +415,11 @@ class Dataset(object): Import assumes (for now) that headers exist. """ - + def append(self, row=None, col=None, header=None, tags=list()): - """Adds a row or column to the :class:`Dataset`. - - Rows and Columns appended must be the correct size (height or width). - - The default behaviour is to append the given row to the :class:`Dataset` object. If the ``col`` parameter is given, however, a new column will be added to the :class:`Dataset` object. If appending a column, and :class:`Dataset.headers` is set, the first item in list will be considered the header for that row. :: - - Append a new row to the dataset: :: - - data.append(('Kenneth', 'Reitz')) - - Append a new column to the dataset: :: - - data.append(col=('Age', 90, 67, 22)) - - You can also add a column of a single callable object, which will - add a new column with the return values of the callable each as an - item in the column. The callable can be written to perform calculations - on the current row. The callable receives a tuple representation of - the current data row as the first parameter. :: - - data.append(col=[random.choice]) + """Adds a row or column to the :class:`Dataset`. + Usage is :class:`Dataset.insert` for documentation. """ if row is not None: @@ -451,7 +432,7 @@ class Dataset(object): sep = (index, text) self._separators.append(sep) - + def append_separator(self, text='-'): """Adds a :ref:`seperator ` to the :class:`Dataset`.""" @@ -472,15 +453,26 @@ class Dataset(object): The default behaviour is to insert the given row to the :class:`Dataset` object at the given index. If the ``col`` parameter is given, however, - a new column will be insert to the :class:`Dataset` object instead. If - inserting a column, and :class:`Dataset.headers` is set, the first item - in list will be considered the header for the inserted row. :: - + a new column will be insert to the :class:`Dataset` object instead. + You can also insert a column of a single callable object, which will add a new column with the return values of the callable each as an item in the column. :: data.append(col=random.randint) + + See :ref:`dyncols` for an in-depth example. + + .. versionchanged:: 0.9.0 + If inserting a column, and :class:`Dataset.headers` is set, the + header attribute must be set, and will be considered the header for + that row. + + .. versionadded:: 0.9.0 + If inserting a row, you can add :ref:`tags ` to the row you are inserting. + This gives you the ability to :class:`filter ` your + :class:`Dataset` later. + """ if row: self._validate(row) @@ -512,7 +504,7 @@ class Dataset(object): def filter(self, tag): """Returns a new instance of the :class:`Dataset`, excluding any rows - that do not contain the given tags. + that do not contain the given :ref:`tags `. """ _dset = copy(self) _dset._data[:] = [row for row in self._data if row.has_tag(tag)]