Merge branch 'feature/formatters'

2026-06-05 23:10:17 +00:00 · 2011-03-23 00:39:16 -04:00
parent 52e9d44739 65c73dfc42
commit 1efcb7a63d
11 changed files with 120 additions and 55 deletions
@@ -0,0 +1,14 @@
+Where possible, please follow PEP8 with regard to coding style. Sometimes the line 
+length restriction is too hard to follow, so don't bend over backwards there.
+
+Triple-quotes should always be """, single quotes are ' unless using "
+would result in less escaping within the string.
+
+All modules, functions, and methods should be well documented reStructuredText for 
+Sphinx AutoDoc.
+
+All functionality should be available in pure Python. Optional C (via Cython)
+implementations may be written for performance reasons, but should never
+replace the Python implementation.
+
+Lastly, don't take yourself too seriously :)
@@ -87,7 +87,7 @@ Adding New Formats

 Tablib welcomes new format additions! Format suggestions include:

-* Tab Seperated Values
+* Tab Separated Values
 * MySQL Dump
 * HTML Table

@@ -178,7 +178,7 @@ Every commit made to the **develop** branch is automatically tested and inspecte

 Anyone may view the build status and history at any time.

-    http://git.kennethreitz.com/ci/
+    http://ci.kennethreitz.com/


 If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server. 
@@ -3,8 +3,10 @@
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

-Tablib: Pythonic Tabular Data 
-=============================
+Tablib: Pythonic Tabular Datasets 
+=================================
+
+Release |version|.

 .. Contents:
 .. 
@@ -55,7 +55,7 @@ However, if performance is important to you (and it should be), you can install

 	$ pip install PyYAML

-If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. ::
+If you're using Python 2.5, you should also install the **simplejson** module (pip will do this for you). If you're using Python 2.6+, the built-in **json** module is already optimized and in use. ::

 	$ pip install simplejson

@@ -36,6 +36,31 @@ Tablib is released under terms of `The MIT License`_.
 .. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php


+.. _license:
+
+Tablib License
+--------------
+
+Copyright (c) 2011 Kenneth Reitz.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+

 .. _pythonsupport:

@@ -44,8 +69,10 @@ Pythons Supported

 At this time, the following Python platforms are officially supported: 

-* Python 2.6
-* Python 2.7
+* cPython 2.5
+* cPython 2.6
+* cPython 2.7
+* PyPy-c 1.4

 Support for other Pythons will be rolled out soon.

@@ -26,6 +26,7 @@ __build__ = 0x000904
 __author__ = 'Kenneth Reitz'
 __license__ = 'MIT'
 __copyright__ = 'Copyright 2011 Kenneth Reitz'
+__docformat__ = 'restructuredtext'


 class Row(object):
@@ -136,6 +137,9 @@ class Dataset(object):

        # ('title', index) tuples
        self._separators = []
+        
+        # (column, callback) tuples
+        self._formatters = []

        try:
            self.headers = kwargs['headers']
@@ -236,13 +240,29 @@ class Dataset(object):
    def _package(self, dicts=True):
        """Packages Dataset into lists of dictionaries for transmission."""

+        _data = list(self._data)
+        
+        # Execute formatters
+        if self._formatters:
+            for row_i, row in enumerate(_data):
+                for col, callback in self._formatters:
+                    try:                        
+                        if col is None:
+                            for j, c in enumerate(row):
+                                _data[row_i][j] = callback(c)
+                        else:
+                            _data[row_i][col] = callback(row[col])
+                    except IndexError:
+                        raise InvalidDatasetIndex
+                        
+
        if self.headers:
            if dicts:
-                data = [OrderedDict(zip(self.headers, data_row)) for data_row in self ._data]
+                data = [OrderedDict(zip(self.headers, data_row)) for data_row in _data]
            else:
-                data = [list(self.headers)] + list(self._data)
+                data = [list(self.headers)] + list(_data)
        else:
-            data = [list(row) for row in self._data]
+            data = [list(row) for row in _data]

        return data

@@ -385,6 +405,7 @@ class Dataset(object):
        """
        pass

+
    @property
    def tsv():
        """A TSV representation of the :class:`Dataset` object. The top row will contain
@@ -469,6 +490,29 @@ class Dataset(object):
        self.insert_separator(index, text)


+    def add_formatter(self, col, handler):
+        """Adds a :ref:`formatter` to the :class:`Dataset`.
+        
+        .. versionadded:: 0.9.5
+           :param col: column to. Accepts index int or header str.
+           :param handler: reference to callback function to execute 
+           against each cell value.
+        """
+        
+        if isinstance(col, basestring):
+            if col in self.headers:
+                col = self.headers.index(key) # get 'key' index from each data
+            else:
+                raise KeyError
+        
+        if not col > self.width:
+            self._formatters.append((col, handler))
+        else:
+            raise InvalidDatasetIndex
+        
+        return True
+        
+
    def insert(self, index, row=None, col=None, header=None, tags=list()):
        """Inserts a row or column to the :class:`Dataset` at the given index.

@@ -658,12 +702,14 @@ class Dataset(object):

        return _dset

+
    def wipe(self):
        """Removes all content and headers from the :class:`Dataset` object."""
        self._data = list()
        self.__headers = None


+
 class Databook(object):
    """A book of :class:`Dataset` objects.
    """
@@ -758,6 +804,9 @@ class InvalidDatasetType(Exception):

 class InvalidDimensions(Exception):
    "Invalid size"
+    
+class InvalidDatasetIndex(Exception):
+    "Outside of Dataset size"

 class HeadersNeeded(Exception):
    "Header parameter must be given when appending a column in this Dataset."
@@ -31,7 +31,7 @@ def import_set(dset, in_stream, headers=True):

    dset.wipe()

-    rows = csv.reader(in_stream.split())
+    rows = csv.reader(in_stream.splitlines())
    for i, row in enumerate(rows):

        if (i == 0) and (headers):
@@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-
-""" Tablib - General Helpers.
-"""
-
-import sys
-
-
-class Struct(object):
-    """Your attributes are belong to us."""
-    
-    def __init__(self, **entries): 
-        self.__dict__.update(entries)
-        
-    def __getitem__(self, key):
-        return getattr(self, key, None)
-
-    def dictionary(self):
-        """Returns dictionary representation of object."""
-        return self.__dict__
-
-    def items(self):
-        """Returns items within object."""
-        return self.__dict__.items()
-
-    def keys(self):
-        """Returns keys within object."""
-        return self.__dict__.keys()
-
-
-
-def piped():
-    """Returns piped input via stdin, else False."""
-    with sys.stdin as stdin:
-        # TTY is only way to detect if stdin contains data
-        return stdin.read() if not stdin.isatty() else None
-
@@ -1,5 +0,0 @@
-rm -fr nosetests.xml
-tox
-# coverage xml
-rm -fr pylint.txt
-# pylint -d W0312 -d W0212 -d E1101 -d E0202 -d W0102 -d E0102 -f parseable ./tablib > pylint.txt || true
@@ -295,6 +295,19 @@ class TablibTestCase(unittest.TestCase):

        self.assertEqual(_csv, data.csv)

+    def test_csv_import_set_with_spaces(self):
+        """Generate and import CSV set serialization when row values have
+        spaces."""
+        data.append(('Bill Gates', 'Microsoft'))
+        data.append(('Steve Jobs', 'Apple'))
+        data.headers = ('Name', 'Company')
+
+        _csv = data.csv
+
+        data.csv = _csv
+
+        self.assertEqual(_csv, data.csv)
+
    def test_tsv_import_set(self):
        """Generate and import TSV set serialization."""
        data.append(self.john)
@@ -4,6 +4,8 @@ envlist = py24,py25,py26,py27
 [testenv]
 commands=py.test --junitxml=junit-{envname}.xml
 deps = 
-    nose
    simplejson
-    pytest
+    pytest
+    
+[testenv:pypy]
+basepython=/usr/bin/pypy-c