diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..018f9b7 --- /dev/null +++ b/HACKING @@ -0,0 +1,14 @@ +Where possible, please follow PEP8 with regard to coding style. Sometimes the line +length restriction is too hard to follow, so don't bend over backwards there. + +Triple-quotes should always be """, single quotes are ' unless using " +would result in less escaping within the string. + +All modules, functions, and methods should be well documented reStructuredText for +Sphinx AutoDoc. + +All functionality should be available in pure Python. Optional C (via Cython) +implementations may be written for performance reasons, but should never +replace the Python implementation. + +Lastly, don't take yourself too seriously :) \ No newline at end of file diff --git a/docs/development.rst b/docs/development.rst index 77af330..6255d5e 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -87,7 +87,7 @@ Adding New Formats Tablib welcomes new format additions! Format suggestions include: -* Tab Seperated Values +* Tab Separated Values * MySQL Dump * HTML Table @@ -178,7 +178,7 @@ Every commit made to the **develop** branch is automatically tested and inspecte Anyone may view the build status and history at any time. - http://git.kennethreitz.com/ci/ + http://ci.kennethreitz.com/ If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server. diff --git a/docs/index.rst b/docs/index.rst index 4726d7b..77ce7cf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,8 +3,10 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Tablib: Pythonic Tabular Data -============================= +Tablib: Pythonic Tabular Datasets +================================= + +Release |version|. .. Contents: .. diff --git a/docs/install.rst b/docs/install.rst index 9b9a519..b6c3f31 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -55,7 +55,7 @@ However, if performance is important to you (and it should be), you can install $ pip install PyYAML -If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. :: +If you're using Python 2.5, you should also install the **simplejson** module (pip will do this for you). If you're using Python 2.6+, the built-in **json** module is already optimized and in use. :: $ pip install simplejson diff --git a/docs/intro.rst b/docs/intro.rst index c2d75b2..bc4256a 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -36,6 +36,31 @@ Tablib is released under terms of `The MIT License`_. .. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php +.. _license: + +Tablib License +-------------- + +Copyright (c) 2011 Kenneth Reitz. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + .. _pythonsupport: @@ -44,8 +69,10 @@ Pythons Supported At this time, the following Python platforms are officially supported: -* Python 2.6 -* Python 2.7 +* cPython 2.5 +* cPython 2.6 +* cPython 2.7 +* PyPy-c 1.4 Support for other Pythons will be rolled out soon. diff --git a/tablib/core.py b/tablib/core.py index dfbcb6a..c4efce3 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -26,6 +26,7 @@ __build__ = 0x000904 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2011 Kenneth Reitz' +__docformat__ = 'restructuredtext' class Row(object): @@ -136,6 +137,9 @@ class Dataset(object): # ('title', index) tuples self._separators = [] + + # (column, callback) tuples + self._formatters = [] try: self.headers = kwargs['headers'] @@ -236,13 +240,29 @@ class Dataset(object): def _package(self, dicts=True): """Packages Dataset into lists of dictionaries for transmission.""" + _data = list(self._data) + + # Execute formatters + if self._formatters: + for row_i, row in enumerate(_data): + for col, callback in self._formatters: + try: + if col is None: + for j, c in enumerate(row): + _data[row_i][j] = callback(c) + else: + _data[row_i][col] = callback(row[col]) + except IndexError: + raise InvalidDatasetIndex + + if self.headers: if dicts: - data = [OrderedDict(zip(self.headers, data_row)) for data_row in self ._data] + data = [OrderedDict(zip(self.headers, data_row)) for data_row in _data] else: - data = [list(self.headers)] + list(self._data) + data = [list(self.headers)] + list(_data) else: - data = [list(row) for row in self._data] + data = [list(row) for row in _data] return data @@ -385,6 +405,7 @@ class Dataset(object): """ pass + @property def tsv(): """A TSV representation of the :class:`Dataset` object. The top row will contain @@ -469,6 +490,29 @@ class Dataset(object): self.insert_separator(index, text) + def add_formatter(self, col, handler): + """Adds a :ref:`formatter` to the :class:`Dataset`. + + .. versionadded:: 0.9.5 + :param col: column to. Accepts index int or header str. + :param handler: reference to callback function to execute + against each cell value. + """ + + if isinstance(col, basestring): + if col in self.headers: + col = self.headers.index(key) # get 'key' index from each data + else: + raise KeyError + + if not col > self.width: + self._formatters.append((col, handler)) + else: + raise InvalidDatasetIndex + + return True + + def insert(self, index, row=None, col=None, header=None, tags=list()): """Inserts a row or column to the :class:`Dataset` at the given index. @@ -658,12 +702,14 @@ class Dataset(object): return _dset + def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None + class Databook(object): """A book of :class:`Dataset` objects. """ @@ -758,6 +804,9 @@ class InvalidDatasetType(Exception): class InvalidDimensions(Exception): "Invalid size" + +class InvalidDatasetIndex(Exception): + "Outside of Dataset size" class HeadersNeeded(Exception): "Header parameter must be given when appending a column in this Dataset." diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 2c74a1c..b71755b 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -31,7 +31,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() - rows = csv.reader(in_stream.split()) + rows = csv.reader(in_stream.splitlines()) for i, row in enumerate(rows): if (i == 0) and (headers): diff --git a/tablib/helpers.py b/tablib/helpers.py deleted file mode 100644 index 718d159..0000000 --- a/tablib/helpers.py +++ /dev/null @@ -1,37 +0,0 @@ -# -*- coding: utf-8 -*- - -""" Tablib - General Helpers. -""" - -import sys - - -class Struct(object): - """Your attributes are belong to us.""" - - def __init__(self, **entries): - self.__dict__.update(entries) - - def __getitem__(self, key): - return getattr(self, key, None) - - def dictionary(self): - """Returns dictionary representation of object.""" - return self.__dict__ - - def items(self): - """Returns items within object.""" - return self.__dict__.items() - - def keys(self): - """Returns keys within object.""" - return self.__dict__.keys() - - - -def piped(): - """Returns piped input via stdin, else False.""" - with sys.stdin as stdin: - # TTY is only way to detect if stdin contains data - return stdin.read() if not stdin.isatty() else None - diff --git a/test_suite.sh b/test_suite.sh deleted file mode 100755 index 5a1e7be..0000000 --- a/test_suite.sh +++ /dev/null @@ -1,5 +0,0 @@ -rm -fr nosetests.xml -tox -# coverage xml -rm -fr pylint.txt -# pylint -d W0312 -d W0212 -d E1101 -d E0202 -d W0102 -d E0102 -f parseable ./tablib > pylint.txt || true \ No newline at end of file diff --git a/test_tablib.py b/test_tablib.py index e4a4d11..4079809 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -295,6 +295,19 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_import_set_with_spaces(self): + """Generate and import CSV set serialization when row values have + spaces.""" + data.append(('Bill Gates', 'Microsoft')) + data.append(('Steve Jobs', 'Apple')) + data.headers = ('Name', 'Company') + + _csv = data.csv + + data.csv = _csv + + self.assertEqual(_csv, data.csv) + def test_tsv_import_set(self): """Generate and import TSV set serialization.""" data.append(self.john) diff --git a/tox.ini b/tox.ini index 9cef9b3..ba10930 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,8 @@ envlist = py24,py25,py26,py27 [testenv] commands=py.test --junitxml=junit-{envname}.xml deps = - nose simplejson - pytest \ No newline at end of file + pytest + +[testenv:pypy] +basepython=/usr/bin/pypy-c