From 7ae7d3ff46f323d0d85678cb3415865357ed0e25 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 5 Apr 2011 08:56:20 -0400 Subject: [PATCH 01/12] Update TODOs --- README.rst | 48 ++++++++++++++++++++++++++---------------------- TODO.rst | 14 +++++--------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/README.rst b/README.rst index d90f160..a412ec9 100644 --- a/README.rst +++ b/README.rst @@ -3,15 +3,15 @@ Tablib: format-agnostic tabular dataset library :: - _____ ______ ___________ ______ - __ /_______ ____ /_ ___ /___(_)___ /_ + _____ ______ ___________ ______ + __ /_______ ____ /_ ___ /___(_)___ /_ _ __/_ __ `/__ __ \__ / __ / __ __ \ / /_ / /_/ / _ /_/ /_ / _ / _ /_/ / \__/ \__,_/ /_.___/ /_/ /_/ /_.___/ -Tablib is a format-agnostic tabular dataset library, written in Python. +Tablib is a format-agnostic tabular dataset library, written in Python. Output formats supported: @@ -29,23 +29,23 @@ Overview `tablib.Dataset()` A Dataset is a table of tabular data. It may or may not have a header row. They can be build and manipulated as raw Python datatypes (Lists of tuples|dictionaries). Datasets can be imported from JSON, YAML, and CSV; they can be exported to Excel (XLS), JSON, YAML, and CSV. - + `tablib.Databook()` A Databook is a set of Datasets. The most common form of a Databook is an Excel file with multiple spreadsheets. Databooks can be imported from JSON and YAML; they can be exported to Excel (XLS), JSON, and YAML. Usage ----- - + Populate fresh data files: :: - + headers = ('first_name', 'last_name') data = [ ('John', 'Adams'), ('George', 'Washington') ] - + data = tablib.Dataset(*data, headers=headers) @@ -56,12 +56,12 @@ Intelligently add new rows: :: Intelligently add new columns: :: >>> data.append(col=(90, 67, 83), header='age') - + Slice rows: :: >>> print data[:2] [('John', 'Adams', 90), ('George', 'Washington', 67)] - + Slice columns by header: :: @@ -77,7 +77,7 @@ Exports Drumroll please........... -JSON! +JSON! +++++ :: @@ -94,26 +94,26 @@ JSON! "first_name": "Henry" } ] - -YAML! + +YAML! +++++ :: >>> print data.yaml - {age: 90, first_name: John, last_name: Adams} - {age: 83, first_name: Henry, last_name: Ford} - -CSV... + +CSV... ++++++ :: >>> print data.csv - first_name,last_name,age - John,Adams,90 - Henry,Ford,83 - -EXCEL! + first_name,last_name,age + John,Adams,90 + Henry,Ford,83 + +EXCEL! ++++++ :: @@ -128,15 +128,17 @@ Installation To install tablib, simply: :: $ pip install tablib - + Or, if you absolutely must: :: $ easy_install tablib - + Contribute ---------- -If you'd like to contribute, simply fork `the repository`_, commit your changes to the **develop** branch (or branch off of it), and send a pull request. Make sure you add yourself to AUTHORS_. +If you'd like to contribute, simply fork `the repository`_, commit your +changes to the **develop** branch (or branch off of it), and send a pull +request. Make sure you add yourself to AUTHORS_. Roadmap @@ -145,6 +147,8 @@ Roadmap v1.0.0: - Add hooks system - Tablib.ext namespace + - Better 2.x/3.x handling (currently internal codebase fork) + - Width detection on XLS out .. _`the repository`: http://github.com/kennethreitz/tablib diff --git a/TODO.rst b/TODO.rst index 9f8c99f..fc1265a 100644 --- a/TODO.rst +++ b/TODO.rst @@ -1,13 +1,9 @@ -* Add seperator support to HTML out * Hooks System - pre/post-append - pre/post-import - pre/post-export -* Big Data -* Backwards-compatible OrderedDict support -* Write more exhausive unit-tests. -* Write stress tests. -* Make CSV write customizable. -* Integrate django-tablib -* Mention django-tablib in Documention -* Dataset title usage in documentation (#17) +* Add Tablib.ext namespace +* Fix 2.x/3.x handling (currently internal codebase fork) +* Make CSV write more customizable. +* Width detection for XLS output +* Documentation Improvements \ No newline at end of file From a2b62669b7fef42e835da0010c63ef1cb9ef0f92 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 11 May 2011 17:58:31 -0400 Subject: [PATCH 02/12] seperator => separator --- docs/tutorial.rst | 54 +++++++++++++++++++++++------------------------ tablib/core.py | 36 +++++++++++++++---------------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 4349952..07939c2 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -30,11 +30,11 @@ A :class:`Dataset ` is nothing more than what its name implies Creating your own instance of the :class:`tablib.Dataset` object is simple. :: data = tablib.Dataset() - + You can now start filling this :class:`Dataset ` object with data. .. admonition:: Example Context - + From here on out, if you see ``data``, assume that it's a fresh :class:`Dataset ` object. @@ -52,7 +52,7 @@ Let's say you want to collect a simple list of names. :: for name in names: # split name appropriately fname, lname = name.split() - + # add names to Dataset data.append([fname, lname]) @@ -76,19 +76,19 @@ Now our data looks a little different. :: >>> data.dict [{'Last Name': 'Reitz', 'First Name': 'Kenneth'}, {'Last Name': 'Monke', 'First Name': 'Bessie'}] - + -------------- -Adding Columns +Adding Columns -------------- Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. :: data.append(col=[22, 20], header='Age') - + Let's view the data now. :: >>> data.dict @@ -106,8 +106,8 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i **Comma-Separated Values** :: >>> data.csv - Last Name,First Name,Age - Reitz,Kenneth,22 + Last Name,First Name,Age + Reitz,Kenneth,22 Monke,Bessie,20 **JavaScript Object Notation** :: @@ -121,7 +121,7 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i >>> data.yaml - {Age: 22, First Name: Kenneth, Last Name: Reitz} - {Age: 20, First Name: Bessie, Last Name: Monke} - + **Microsoft Excel** :: @@ -190,11 +190,11 @@ Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. A dynamic c Let's add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. :: import random - + def random_grade(row): """Returns a random integer for entry.""" return (random.randint(60,100)/100.0) - + data.append(col=[random_grade], header='Grade') Let's have a look at our data. :: @@ -209,7 +209,7 @@ Let's remove that column. :: >>> del data['Grade'] -When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row. +When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row. For example, we can use the data available in the row to guess the gender of a student. :: @@ -217,9 +217,9 @@ For example, we can use the data available in the row to guess the gender of a s """Calculates gender of given student data row.""" m_names = ('Kenneth', 'Mike', 'Yuri') f_names = ('Bessie', 'Samantha', 'Heather') - + name = row[0] - + if name in m_names: return 'Male' elif name in f_names: @@ -243,8 +243,8 @@ Filtering Datasets with Tags .. versionadded:: 0.9.0 -When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter. -This allows you to filter your :class:`Dataset` later. This can be useful so separate rows of data based on +When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter. +This allows you to filter your :class:`Dataset` later. This can be useful so separate rows of data based on arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`. Let's tag some students. :: @@ -266,10 +266,10 @@ It's that simple. The original :class:`Dataset` is untouched. Excel Workbook With Multiple Sheets ------------------------------------- +------------------------------------ When dealing with a large number of :class:`Datasets ` in spreadsheet format, it's quite common to group multiple spreadsheets into a single Excel file, known as a Workbook. Tablib makes it extremely easy to build workbooks with the handy, :class:`Databook` class. - + Let's say we have 3 different :class:`Datasets `. All we have to do is add then to a :class:`Databook` object... :: @@ -287,15 +287,15 @@ The resulting **students.xls** file will contain a separate spreadsheet for each Make sure to open the output file in binary mode. -.. _seperators: +.. _separators: ---------- -Seperators +Separators ---------- .. versionadded:: 0.8.2 -When, it's often useful to create a blank row containing information on the upcoming data. So, +When, it's often useful to create a blank row containing information on the upcoming data. So, @@ -305,24 +305,24 @@ When, it's often useful to create a blank row containing information on the upco ('11/24/09', 'Math 101 Mid-term Exam', 56.), ('05/24/10', 'Math 101 Final Exam', 62.) ] - + suzie_tests = [ ('11/24/09', 'Math 101 Mid-term Exam', 56.), ('05/24/10', 'Math 101 Final Exam', 62.) ] - + # Create new dataset tests = tablib.Dataset() tests.headers = ['Date', 'Test Name', 'Grade'] # Daniel's Tests - tests.append_seperator('Daniel\'s Scores') + tests.append_separator('Daniel\'s Scores') for test_row in daniel_tests: tests.append(test_row) # Susie's Tests - tests.append_seperator('Susie\'s Scores') + tests.append_separator('Susie\'s Scores') for test_row in suzie_tests: tests.append(test_row) @@ -331,7 +331,7 @@ When, it's often useful to create a blank row containing information on the upco with open('grades.xls', 'wb') as f: f.write(tests.xls) -The resulting **tests.xls** will have the following layout: +The resulting **tests.xls** will have the following layout: Daniel's Scores: @@ -347,7 +347,7 @@ The resulting **tests.xls** will have the following layout: .. admonition:: Format Support At this time, only :class:`Excel ` output supports separators. - + ---- Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. diff --git a/tablib/core.py b/tablib/core.py index 8ef6312..896dfcc 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -94,7 +94,7 @@ class Row(object): return (tag in self.tags) else: return bool(len(set(tag) & set(self.tags))) - + @@ -138,7 +138,7 @@ class Dataset(object): # ('title', index) tuples self._separators = [] - + # (column, callback) tuples self._formatters = [] @@ -242,12 +242,12 @@ class Dataset(object): """Packages Dataset into lists of dictionaries for transmission.""" _data = list(self._data) - + # Execute formatters if self._formatters: for row_i, row in enumerate(_data): for col, callback in self._formatters: - try: + try: if col is None: for j, c in enumerate(row): _data[row_i][j] = callback(c) @@ -255,7 +255,7 @@ class Dataset(object): _data[row_i][col] = callback(row[col]) except IndexError: raise InvalidDatasetIndex - + if self.headers: if dicts: @@ -331,8 +331,8 @@ class Dataset(object): headers = property(_get_headers, _set_headers) def _get_dict(self): - """A native Python representation of the :class:`Dataset` object. If headers have - been set, a list of Python dictionaries will be returned. If no headers have been set, + """A native Python representation of the :class:`Dataset` object. If headers have + been set, a list of Python dictionaries will be returned. If no headers have been set, a list of tuples (rows) will be returned instead. A dataset object can also be imported by setting the `Dataset.dict` attribute: :: @@ -379,7 +379,7 @@ class Dataset(object): @property def xls(): - """An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set. + """An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set. .. admonition:: Binary Warning @@ -480,7 +480,7 @@ class Dataset(object): def append_separator(self, text='-'): - """Adds a :ref:`seperator ` to the :class:`Dataset`.""" + """Adds a :ref:`separator ` to the :class:`Dataset`.""" # change offsets if headers are or aren't defined if not self.headers: @@ -493,26 +493,26 @@ class Dataset(object): def add_formatter(self, col, handler): """Adds a :ref:`formatter` to the :class:`Dataset`. - + .. versionadded:: 0.9.5 :param col: column to. Accepts index int or header str. - :param handler: reference to callback function to execute + :param handler: reference to callback function to execute against each cell value. """ - + if isinstance(col, str): if col in self.headers: col = self.headers.index(col) # get 'key' index from each data else: raise KeyError - + if not col > self.width: self._formatters.append((col, handler)) else: raise InvalidDatasetIndex - + return True - + def insert(self, index, row=None, col=None, header=None, tags=list()): """Inserts a row or column to the :class:`Dataset` at the given index. @@ -584,10 +584,10 @@ class Dataset(object): def sort(self, col, reverse=False): """Sort a :class:`Dataset` by a specific column, given string (for header) or integer (for column index). The order can be reversed by - setting ``reverse`` to ``True``. + setting ``reverse`` to ``True``. Returns a new :class:`Dataset` instance where columns have been sorted.""" - + if isinstance(col, str): if not self.headers: @@ -805,7 +805,7 @@ class InvalidDatasetType(Exception): class InvalidDimensions(Exception): "Invalid size" - + class InvalidDatasetIndex(Exception): "Outside of Dataset size" From 59c996f9df7e7a6766e990597136e89fb9fc638b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 11 May 2011 18:21:44 -0400 Subject: [PATCH 03/12] history update --- HISTORY.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 78b8581..3bde64a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,12 @@ History ------- +0.9.6 ++++++ + +* `seperators` renamed to `separators` + + 0.9.5 (2011-03-24) ++++++++++++++++++ @@ -70,7 +76,7 @@ History 0.8.3 (2010-10-04) ++++++++++++++++++ -* Ability to append new column passing a callable +* Ability to append new column passing a callable as the value that will be applied to every row. @@ -98,7 +104,7 @@ History 0.7.1 (2010-09-20) ++++++++++++++++++ -* Reverting methods back to properties. +* Reverting methods back to properties. * Windows bug compensated in documentation. From c3052cc02cd21bcb4a4d221650e196abaa3e739b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 11 May 2011 22:57:12 -0400 Subject: [PATCH 04/12] kill fabfile --- fabfile.py | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 fabfile.py diff --git a/fabfile.py b/fabfile.py deleted file mode 100644 index 1e25b13..0000000 --- a/fabfile.py +++ /dev/null @@ -1,17 +0,0 @@ -import os -from fabric.api import * - - -def scrub(): - """ Death to the bytecode! """ - local('rm -fr dist build') - local("find . -name \"*.pyc\" -exec rm '{}' ';'") - -def docs(): - """Build docs.""" - os.system('make dirhtml') - os.chdir('_build/dirhtml') - os.system('sphinxtogithub .') - os.system('git add -A') - os.system('git commit -m \'documentation update\'') - os.system('git push origin gh-pages') \ No newline at end of file From 7270ce49e193b066dd3ece764d4f5f0057584679 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 11 May 2011 23:37:38 -0400 Subject: [PATCH 05/12] testing webhook --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index e95c7bb..f377dcd 100644 --- a/AUTHORS +++ b/AUTHORS @@ -4,7 +4,7 @@ various contributors: Development Lead ```````````````` -- Kenneth Reitz +- Kenneth Reitz <_@kennethreitz.com> Patches and Suggestions From 84e4bd9a47c1a26c17f03335fa47b623df1db1ed Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 12 May 2011 01:47:49 -0400 Subject: [PATCH 06/12] added csv/unicode test --- test_tablib.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_tablib.py b/test_tablib.py index 320c759..211c6a7 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -499,6 +499,13 @@ class TablibTestCase(unittest.TestCase): for name in [r['last_name'] for r in self.founders.dict]: self.assertTrue(name.isupper()) + def test_unicode_csv(self): + """Check if unicode in csv export doesn't raise.""" + + data = tablib.Dataset() + data.append([u'\xfc', u'\xfd']) + data.csv + if __name__ == '__main__': unittest.main() From 2e5577ee91dbfb77a4698b7503a71964a1dc2def Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 12 May 2011 01:52:48 -0400 Subject: [PATCH 07/12] move csv-unicode branch to bug/csv-unicode refs #7 --- test_tablib.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test_tablib.py b/test_tablib.py index 211c6a7..320c759 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -499,13 +499,6 @@ class TablibTestCase(unittest.TestCase): for name in [r['last_name'] for r in self.founders.dict]: self.assertTrue(name.isupper()) - def test_unicode_csv(self): - """Check if unicode in csv export doesn't raise.""" - - data = tablib.Dataset() - data.append([u'\xfc', u'\xfd']) - data.csv - if __name__ == '__main__': unittest.main() From 5c50c1822e0d9f0fa493c064902d48cb092fd831 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 12 May 2011 02:00:46 -0400 Subject: [PATCH 08/12] integration of unicodecsv module refs #7 --- NOTICE | 51 +++++++++--- tablib/formats/_csv.py | 9 ++- tablib/packages/unicodecsv/__init__.py | 105 +++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 13 deletions(-) create mode 100644 tablib/packages/unicodecsv/__init__.py diff --git a/NOTICE b/NOTICE index ca8ed11..15c0691 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Tablib includes some vendorized python libraries: ordereddict, pyyaml, -simplejson, and xlwt. +simplejson, unicodecsv, and xlwt. Markup License ============== @@ -94,6 +94,37 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +UnicodeCSV License +================== + +Copyright 2010 Jeremy Dunck. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are +permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list + of conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY JEREMY DUNCK ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JEREMY DUNCK OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those of the +authors and should not be interpreted as representing official policies, either expressed +or implied, of Jeremy Dunck. + + + XLWT License ============ @@ -105,15 +136,15 @@ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. +this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. +and/or other materials provided with the distribution. 3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any contributors may be used to endorse or promote products derived from this -software without specific prior written permission. +software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, @@ -131,29 +162,29 @@ THE POSSIBILITY OF SUCH DAMAGE. """ Copyright (C) 2005 Roman V. Kiseliov All rights reserved. - + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - + 3. All advertising materials mentioning features or use of this software must display the following acknowledgment: "This product includes software developed by Roman V. Kiseliov ." - + 4. Redistributions of any form whatsoever must retain the following acknowledgment: "This product includes software developed by Roman V. Kiseliov ." - + THIS SOFTWARE IS PROVIDED BY Roman V. Kiseliov ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 4b1dc02..ddf477a 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -10,7 +10,7 @@ else: from cStringIO import StringIO -import csv +import tablib.packages.unicodecsv as csv import os import tablib @@ -20,11 +20,14 @@ title = 'csv' extentions = ('csv',) +DEFAULT_ENCODING = 'utf-8' + + def export_set(dataset): """Returns CSV representation of Dataset.""" stream = StringIO() - _csv = csv.writer(stream) + _csv = csv.writer(stream, encoding=DEFAULT_ENCODING) for row in dataset._package(dicts=False): _csv.writerow(row) @@ -37,7 +40,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() - rows = csv.reader(in_stream.splitlines()) + rows = csv.reader(in_stream.splitlines(), encoding=DEFAULT_ENCODING) for i, row in enumerate(rows): if (i == 0) and (headers): diff --git a/tablib/packages/unicodecsv/__init__.py b/tablib/packages/unicodecsv/__init__.py new file mode 100644 index 0000000..e640987 --- /dev/null +++ b/tablib/packages/unicodecsv/__init__.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +import csv +from csv import * + +#http://semver.org/ +VERSION = (0, 8, 0) +__version__ = ".".join(map(str,VERSION)) + +def _stringify(s, encoding): + if type(s)==unicode: + return s.encode(encoding) + elif isinstance(s, (int , float)): + pass #let csv.QUOTE_NONNUMERIC do its thing. + elif type(s) != str: + s=str(s) + return s + +def _stringify_list(l, encoding): + return [_stringify(s, encoding) for s in l] + +class UnicodeWriter(object): + """ + >>> import unicodecsv + >>> from cStringIO import StringIO + >>> f = StringIO() + >>> w = unicodecsv.writer(f, encoding='utf-8') + >>> w.writerow((u'é', u'ñ')) + >>> f.seek(0) + >>> r = unicodecsv.reader(f, encoding='utf-8') + >>> row = r.next() + >>> print row[0], row[1] + é ñ + """ + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + self.writer = csv.writer(f) + self.dialect = dialect + self.encoding = encoding + self.writer = csv.writer(f, dialect=dialect, **kwds) + + def writerow(self, row): + self.writer.writerow(_stringify_list(row, self.encoding)) + + def writerows(self, rows): + for row in rows: + self.writerow(row) +writer = UnicodeWriter + +class UnicodeReader(object): + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + self.reader = csv.reader(f, dialect=dialect, **kwds) + self.encoding = encoding + + def next(self): + row = self.reader.next() + return [unicode(s, self.encoding) for s in row] + + def __iter__(self): + return self +reader = UnicodeReader + +class DictWriter(csv.DictWriter): + """ + >>> from cStringIO import StringIO + >>> f = StringIO() + >>> w = DictWriter(f, ['a', 'b'], restval=u'î') + >>> w.writerow({'a':'1'}) + >>> w.writerow({'a':'1', 'b':u'ø'}) + >>> w.writerow({'a':u'é'}) + >>> f.seek(0) + >>> r = DictReader(f, fieldnames=['a'], restkey='r') + >>> r.next() == {'a':u'1', 'r':[u"î"]} + True + >>> r.next() == {'a':u'1', 'r':[u"ø"]} + True + >>> r.next() == {'a':u'é', 'r':[u"î"]} + """ + def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', *args, **kwds): + self.fieldnames = fieldnames + self.encoding = encoding + self.restval = restval + self.writer = csv.DictWriter(csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds) + def writerow(self, d): + for fieldname in self.fieldnames: + if fieldname in d: + d[fieldname] = _stringify(d[fieldname], self.encoding) + else: + d[fieldname] = _stringify(self.restval, self.encoding) + self.writer.writerow(d) + +class DictReader(csv.DictReader): + def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, dialect='excel', encoding='utf-8', *args, **kwds): + self.restkey = restkey + self.encoding = encoding + self.reader = csv.DictReader(csvfile, fieldnames, restkey, restval, dialect, *args, **kwds) + def next(self): + d = self.reader.next() + for k, v in d.items(): + if k == self.restkey: + rest = v + if rest: + d[self.restkey] = [unicode(v, self.encoding) for v in rest] + else: + if v is not None: + d[k] = unicode(v, self.encoding) + return d From bfcfa37ebbf5850b8ba4368ccaac0aa3f4c55f20 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 12 May 2011 02:24:14 -0400 Subject: [PATCH 09/12] Python3 support for csv module. Refs #7 --- tablib/formats/_csv.py | 18 +++++++++++++++--- test_tablib.py | 8 +++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index ddf477a..bfe8b0f 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -5,12 +5,17 @@ import sys if sys.version_info[0] > 2: + is_py3 = True + from io import StringIO + import csv else: + is_py3 = False from cStringIO import StringIO + import tablib.packages.unicodecsv as csv + -import tablib.packages.unicodecsv as csv import os import tablib @@ -27,7 +32,11 @@ DEFAULT_ENCODING = 'utf-8' def export_set(dataset): """Returns CSV representation of Dataset.""" stream = StringIO() - _csv = csv.writer(stream, encoding=DEFAULT_ENCODING) + + if is_py3: + _csv = csv.writer(stream) + else: + _csv = csv.writer(stream, encoding=DEFAULT_ENCODING) for row in dataset._package(dicts=False): _csv.writerow(row) @@ -40,7 +49,10 @@ def import_set(dset, in_stream, headers=True): dset.wipe() - rows = csv.reader(in_stream.splitlines(), encoding=DEFAULT_ENCODING) + if is_py3: + rows = csv.reader(in_stream.splitlines()) + else: + rows = csv.reader(in_stream.splitlines(), encoding=DEFAULT_ENCODING) for i, row in enumerate(rows): if (i == 0) and (headers): diff --git a/test_tablib.py b/test_tablib.py index 211c6a7..c7f4754 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -503,7 +503,13 @@ class TablibTestCase(unittest.TestCase): """Check if unicode in csv export doesn't raise.""" data = tablib.Dataset() - data.append([u'\xfc', u'\xfd']) + + if sys.version_info[0] > 2: + data.append(['\xfc', '\xfd']) + else: + exec("data.append([u'\xfc', u'\xfd'])") + + data.csv From 0e4128c73ebbfdca28c3f8647617aa74c69ccaae Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 12 May 2011 02:30:39 -0400 Subject: [PATCH 10/12] Erik Youngren to authors --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index f377dcd..1ca2018 100644 --- a/AUTHORS +++ b/AUTHORS @@ -13,4 +13,5 @@ Patches and Suggestions - Luke Lee - Josh Ourisman - Luca Beltrame -- Benjamin Wohlwend \ No newline at end of file +- Benjamin Wohlwend +- Erik Youngren \ No newline at end of file From 4be341be4fb889a09391a9fd6657e86e779acdce Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 12 May 2011 02:35:07 -0400 Subject: [PATCH 11/12] history: unicode+csv support refs #7 --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index 3bde64a..1724339 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -5,6 +5,7 @@ History +++++ * `seperators` renamed to `separators` +* Full unicode CSV support 0.9.5 (2011-03-24) From 3b961c59e7d3e65ce0dc14df1ae24545cbe66655 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 12 May 2011 02:52:35 -0400 Subject: [PATCH 12/12] version bump --- HISTORY.rst | 6 +++--- docs/conf.py | 2 +- setup.py | 5 +++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 1724339..f545a4a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,10 +1,10 @@ History ------- -0.9.6 -+++++ +0.9.6 (2011-05-12) +++++++++++++++++++ -* `seperators` renamed to `separators` +* ``seperators`` renamed to ``separators`` * Full unicode CSV support diff --git a/docs/conf.py b/docs/conf.py index a1bdbd6..01f4933 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -48,7 +48,7 @@ copyright = u'2011, Kenneth Reitz. Styles (modified) © Armin Ronacher' # built documents. # # The short X.Y version. -version = '0.9.5' +version = '0.9.6' # The full version, including alpha/beta/rc tags. release = version diff --git a/setup.py b/setup.py index 2aa532a..bb7ef54 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ if sys.version_info[:2] < (2,6): setup( name='tablib', - version='0.9.5', + version='0.9.6', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), @@ -33,7 +33,8 @@ setup( 'tablib', 'tablib.formats', 'tablib.packages', 'tablib.packages.xlwt', - 'tablib.packages.yaml', + 'tablib.packages.yaml', + 'tablib.packages.unicodecsv' ], install_requires=required, license='MIT',