Compare commits

...

20 Commits

Author SHA1 Message Date
Kenneth Reitz 865ce62782 Merge branch 'release/0.9.6' 2011-05-12 02:53:12 -04:00
Kenneth Reitz 3b961c59e7 version bump 2011-05-12 02:52:35 -04:00
Kenneth Reitz 4be341be4f history: unicode+csv support
refs #7
2011-05-12 02:35:07 -04:00
Kenneth Reitz 0e4128c73e Erik Youngren to authors 2011-05-12 02:30:39 -04:00
Kenneth Reitz 4ebd66cb09 Merge branch 'bug/csv-unicode' into develop
Closes #7

Conflicts:
	test_tablib.py
2011-05-12 02:28:03 -04:00
Kenneth Reitz bfcfa37ebb Python3 support for csv module.
Refs #7
2011-05-12 02:24:14 -04:00
Kenneth Reitz 5c50c1822e integration of unicodecsv module
refs #7
2011-05-12 02:01:07 -04:00
Kenneth Reitz 2e5577ee91 move csv-unicode branch to bug/csv-unicode
refs #7
2011-05-12 01:52:48 -04:00
Kenneth Reitz 84e4bd9a47 added csv/unicode test 2011-05-12 01:47:49 -04:00
Kenneth Reitz 7270ce49e1 testing webhook 2011-05-11 23:37:38 -04:00
Kenneth Reitz c3052cc02c kill fabfile 2011-05-11 22:57:12 -04:00
Kenneth Reitz 59c996f9df history update 2011-05-11 18:21:44 -04:00
Kenneth Reitz a2b62669b7 seperator => separator 2011-05-11 17:58:31 -04:00
Kenneth Reitz 15e25ef735 no more reqs 2011-05-10 21:24:56 -04:00
Kenneth Reitz 7ae7d3ff46 Update TODOs 2011-04-05 08:56:20 -04:00
Kenneth Reitz 69ed718191 make it mobile! 2011-03-24 16:46:00 -04:00
Kenneth Reitz 328d3880d5 added google analytics to generated docs 2011-03-24 13:53:03 -04:00
Kenneth Reitz ea3cc847a0 updated roadmap 2011-03-24 06:16:34 -04:00
Kenneth Reitz 8efab51355 Merge branch 'develop' 2011-03-24 06:11:02 -04:00
Kenneth Reitz 10bc5549c9 Merge branch 'release/0.9.5' 2011-03-24 05:58:50 -04:00
16 changed files with 331 additions and 118 deletions
+3 -2
View File
@@ -4,7 +4,7 @@ various contributors:
Development Lead
````````````````
- Kenneth Reitz <me@kennethreitz.com>
- Kenneth Reitz <_@kennethreitz.com>
Patches and Suggestions
@@ -13,4 +13,5 @@ Patches and Suggestions
- Luke Lee
- Josh Ourisman
- Luca Beltrame
- Benjamin Wohlwend
- Benjamin Wohlwend
- Erik Youngren
+9 -2
View File
@@ -1,6 +1,13 @@
History
-------
0.9.6 (2011-05-12)
++++++++++++++++++
* ``seperators`` renamed to ``separators``
* Full unicode CSV support
0.9.5 (2011-03-24)
++++++++++++++++++
@@ -70,7 +77,7 @@ History
0.8.3 (2010-10-04)
++++++++++++++++++
* Ability to append new column passing a callable
* Ability to append new column passing a callable
as the value that will be applied to every row.
@@ -98,7 +105,7 @@ History
0.7.1 (2010-09-20)
++++++++++++++++++
* Reverting methods back to properties.
* Reverting methods back to properties.
* Windows bug compensated in documentation.
+41 -10
View File
@@ -1,5 +1,5 @@
Tablib includes some vendorized python libraries: ordereddict, pyyaml,
simplejson, and xlwt.
simplejson, unicodecsv, and xlwt.
Markup License
==============
@@ -94,6 +94,37 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
UnicodeCSV License
==================
Copyright 2010 Jeremy Dunck. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
THIS SOFTWARE IS PROVIDED BY JEREMY DUNCK ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JEREMY DUNCK OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those of the
authors and should not be interpreted as representing official policies, either expressed
or implied, of Jeremy Dunck.
XLWT License
============
@@ -105,15 +136,15 @@ Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
and/or other materials provided with the distribution.
3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
@@ -131,29 +162,29 @@ THE POSSIBILITY OF SUCH DAMAGE.
"""
Copyright (C) 2005 Roman V. Kiseliov
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. All advertising materials mentioning features or use of this
software must display the following acknowledgment:
"This product includes software developed by
Roman V. Kiseliov <roman@kiseliov.ru>."
4. Redistributions of any form whatsoever must retain the following
acknowledgment:
"This product includes software developed by
Roman V. Kiseliov <roman@kiseliov.ru>."
THIS SOFTWARE IS PROVIDED BY Roman V. Kiseliov ``AS IS'' AND ANY
EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+31 -24
View File
@@ -3,15 +3,15 @@ Tablib: format-agnostic tabular dataset library
::
_____ ______ ___________ ______
__ /_______ ____ /_ ___ /___(_)___ /_
_____ ______ ___________ ______
__ /_______ ____ /_ ___ /___(_)___ /_
_ __/_ __ `/__ __ \__ / __ / __ __ \
/ /_ / /_/ / _ /_/ /_ / _ / _ /_/ /
\__/ \__,_/ /_.___/ /_/ /_/ /_.___/
Tablib is a format-agnostic tabular dataset library, written in Python.
Tablib is a format-agnostic tabular dataset library, written in Python.
Output formats supported:
@@ -29,23 +29,23 @@ Overview
`tablib.Dataset()`
A Dataset is a table of tabular data. It may or may not have a header row. They can be build and manipulated as raw Python datatypes (Lists of tuples|dictionaries). Datasets can be imported from JSON, YAML, and CSV; they can be exported to Excel (XLS), JSON, YAML, and CSV.
`tablib.Databook()`
A Databook is a set of Datasets. The most common form of a Databook is an Excel file with multiple spreadsheets. Databooks can be imported from JSON and YAML; they can be exported to Excel (XLS), JSON, and YAML.
Usage
-----
Populate fresh data files: ::
headers = ('first_name', 'last_name')
data = [
('John', 'Adams'),
('George', 'Washington')
]
data = tablib.Dataset(*data, headers=headers)
@@ -56,12 +56,12 @@ Intelligently add new rows: ::
Intelligently add new columns: ::
>>> data.append(col=(90, 67, 83), header='age')
Slice rows: ::
>>> print data[:2]
[('John', 'Adams', 90), ('George', 'Washington', 67)]
Slice columns by header: ::
@@ -77,7 +77,7 @@ Exports
Drumroll please...........
JSON!
JSON!
+++++
::
@@ -94,26 +94,26 @@ JSON!
"first_name": "Henry"
}
]
YAML!
YAML!
+++++
::
>>> print data.yaml
- {age: 90, first_name: John, last_name: Adams}
- {age: 83, first_name: Henry, last_name: Ford}
CSV...
CSV...
++++++
::
>>> print data.csv
first_name,last_name,age
John,Adams,90
Henry,Ford,83
EXCEL!
first_name,last_name,age
John,Adams,90
Henry,Ford,83
EXCEL!
++++++
::
@@ -128,21 +128,28 @@ Installation
To install tablib, simply: ::
$ pip install tablib
Or, if you absolutely must: ::
$ easy_install tablib
Contribute
----------
If you'd like to contribute, simply fork `the repository`_, commit your changes to the **develop** branch (or branch off of it), and send a pull request. Make sure you add yourself to AUTHORS_.
If you'd like to contribute, simply fork `the repository`_, commit your
changes to the **develop** branch (or branch off of it), and send a pull
request. Make sure you add yourself to AUTHORS_.
Roadmap
-------
- Python 2.4, 3.0, 3.1, 3.2 Support
- Tablib.ext namespace
v1.0.0:
- Add hooks system
- Tablib.ext namespace
- Better 2.x/3.x handling (currently internal codebase fork)
- Width detection on XLS out
.. _`the repository`: http://github.com/kennethreitz/tablib
.. _AUTHORS: http://github.com/kennethreitz/tablib/blob/master/AUTHORS
+5 -9
View File
@@ -1,13 +1,9 @@
* Add seperator support to HTML out
* Hooks System
- pre/post-append
- pre/post-import
- pre/post-export
* Big Data
* Backwards-compatible OrderedDict support
* Write more exhausive unit-tests.
* Write stress tests.
* Make CSV write customizable.
* Integrate django-tablib
* Mention django-tablib in Documention
* Dataset title usage in documentation (#17)
* Add Tablib.ext namespace
* Fix 2.x/3.x handling (currently internal codebase fork)
* Make CSV write more customizable.
* Width detection for XLS output
* Documentation Improvements
+15
View File
@@ -13,4 +13,19 @@
&copy; Copyright {{ copyright }}.
Created using <a href="http://sphinx.pocoo.org/">Sphinx</a>.
</div>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-8742933-9']);
_gaq.push(['_setDomainName', 'none']);
_gaq.push(['_setAllowLinker', true]);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
{%- endblock %}
+43
View File
@@ -385,3 +385,46 @@ a.footnote-reference:hover {
a:hover tt {
background: #EEE;
}
@media screen and (max-width: 600px) {
div.sphinxsidebar {
display: none;
}
div.documentwrapper {
margin-left: 0;
margin-top: 0;
margin-right: 0;
margin-bottom: 0;
}
div.bodywrapper {
margin-top: 0;
margin-right: 0;
margin-bottom: 0;
margin-left: 0;
}
ul {
margin-left: 0;
}
.document {
width: auto;
}
.bodywrapper {
margin: 0;
}
.footer {
width: auto;
}
}
+1 -1
View File
@@ -48,7 +48,7 @@ copyright = u'2011, Kenneth Reitz. Styles (modified) &copy; Armin Ronacher'
# built documents.
#
# The short X.Y version.
version = '0.9.5'
version = '0.9.6'
# The full version, including alpha/beta/rc tags.
release = version
+27 -27
View File
@@ -30,11 +30,11 @@ A :class:`Dataset <tablib.Dataset>` is nothing more than what its name implies
Creating your own instance of the :class:`tablib.Dataset` object is simple. ::
data = tablib.Dataset()
You can now start filling this :class:`Dataset <tablib.Dataset>` object with data.
.. admonition:: Example Context
From here on out, if you see ``data``, assume that it's a fresh :class:`Dataset <tablib.Dataset>` object.
@@ -52,7 +52,7 @@ Let's say you want to collect a simple list of names. ::
for name in names:
# split name appropriately
fname, lname = name.split()
# add names to Dataset
data.append([fname, lname])
@@ -76,19 +76,19 @@ Now our data looks a little different. ::
>>> data.dict
[{'Last Name': 'Reitz', 'First Name': 'Kenneth'}, {'Last Name': 'Monke', 'First Name': 'Bessie'}]
--------------
Adding Columns
Adding Columns
--------------
Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. ::
data.append(col=[22, 20], header='Age')
Let's view the data now. ::
>>> data.dict
@@ -106,8 +106,8 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i
**Comma-Separated Values** ::
>>> data.csv
Last Name,First Name,Age
Reitz,Kenneth,22
Last Name,First Name,Age
Reitz,Kenneth,22
Monke,Bessie,20
**JavaScript Object Notation** ::
@@ -121,7 +121,7 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i
>>> data.yaml
- {Age: 22, First Name: Kenneth, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Last Name: Monke}
**Microsoft Excel** ::
@@ -190,11 +190,11 @@ Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. A dynamic c
Let's add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. ::
import random
def random_grade(row):
"""Returns a random integer for entry."""
return (random.randint(60,100)/100.0)
data.append(col=[random_grade], header='Grade')
Let's have a look at our data. ::
@@ -209,7 +209,7 @@ Let's remove that column. ::
>>> del data['Grade']
When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row.
When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row.
For example, we can use the data available in the row to guess the gender of a student. ::
@@ -217,9 +217,9 @@ For example, we can use the data available in the row to guess the gender of a s
"""Calculates gender of given student data row."""
m_names = ('Kenneth', 'Mike', 'Yuri')
f_names = ('Bessie', 'Samantha', 'Heather')
name = row[0]
if name in m_names:
return 'Male'
elif name in f_names:
@@ -243,8 +243,8 @@ Filtering Datasets with Tags
.. versionadded:: 0.9.0
When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter.
This allows you to filter your :class:`Dataset` later. This can be useful so separate rows of data based on
When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter.
This allows you to filter your :class:`Dataset` later. This can be useful so separate rows of data based on
arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`.
Let's tag some students. ::
@@ -266,10 +266,10 @@ It's that simple. The original :class:`Dataset` is untouched.
Excel Workbook With Multiple Sheets
------------------------------------
------------------------------------
When dealing with a large number of :class:`Datasets <Dataset>` in spreadsheet format, it's quite common to group multiple spreadsheets into a single Excel file, known as a Workbook. Tablib makes it extremely easy to build workbooks with the handy, :class:`Databook` class.
Let's say we have 3 different :class:`Datasets <Dataset>`. All we have to do is add then to a :class:`Databook` object... ::
@@ -287,15 +287,15 @@ The resulting **students.xls** file will contain a separate spreadsheet for each
Make sure to open the output file in binary mode.
.. _seperators:
.. _separators:
----------
Seperators
Separators
----------
.. versionadded:: 0.8.2
When, it's often useful to create a blank row containing information on the upcoming data. So,
When, it's often useful to create a blank row containing information on the upcoming data. So,
@@ -305,24 +305,24 @@ When, it's often useful to create a blank row containing information on the upco
('11/24/09', 'Math 101 Mid-term Exam', 56.),
('05/24/10', 'Math 101 Final Exam', 62.)
]
suzie_tests = [
('11/24/09', 'Math 101 Mid-term Exam', 56.),
('05/24/10', 'Math 101 Final Exam', 62.)
]
# Create new dataset
tests = tablib.Dataset()
tests.headers = ['Date', 'Test Name', 'Grade']
# Daniel's Tests
tests.append_seperator('Daniel\'s Scores')
tests.append_separator('Daniel\'s Scores')
for test_row in daniel_tests:
tests.append(test_row)
# Susie's Tests
tests.append_seperator('Susie\'s Scores')
tests.append_separator('Susie\'s Scores')
for test_row in suzie_tests:
tests.append(test_row)
@@ -331,7 +331,7 @@ When, it's often useful to create a blank row containing information on the upco
with open('grades.xls', 'wb') as f:
f.write(tests.xls)
The resulting **tests.xls** will have the following layout:
The resulting **tests.xls** will have the following layout:
Daniel's Scores:
@@ -347,7 +347,7 @@ The resulting **tests.xls** will have the following layout:
.. admonition:: Format Support
At this time, only :class:`Excel <Dataset.xls>` output supports separators.
----
Now, go check out the :ref:`API Documentation <api>` or begin :ref:`Tablib Development <development>`.
Vendored
-17
View File
@@ -1,17 +0,0 @@
import os
from fabric.api import *
def scrub():
""" Death to the bytecode! """
local('rm -fr dist build')
local("find . -name \"*.pyc\" -exec rm '{}' ';'")
def docs():
"""Build docs."""
os.system('make dirhtml')
os.chdir('_build/dirhtml')
os.system('sphinxtogithub .')
os.system('git add -A')
os.system('git commit -m \'documentation update\'')
os.system('git push origin gh-pages')
-3
View File
@@ -1,3 +0,0 @@
xlwt
simplejson
PyYAML
+3 -2
View File
@@ -22,7 +22,7 @@ if sys.version_info[:2] < (2,6):
setup(
name='tablib',
version='0.9.5',
version='0.9.6',
description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
long_description=open('README.rst').read() + '\n\n' +
open('HISTORY.rst').read(),
@@ -33,7 +33,8 @@ setup(
'tablib', 'tablib.formats',
'tablib.packages',
'tablib.packages.xlwt',
'tablib.packages.yaml',
'tablib.packages.yaml',
'tablib.packages.unicodecsv'
],
install_requires=required,
license='MIT',
+18 -18
View File
@@ -94,7 +94,7 @@ class Row(object):
return (tag in self.tags)
else:
return bool(len(set(tag) & set(self.tags)))
@@ -138,7 +138,7 @@ class Dataset(object):
# ('title', index) tuples
self._separators = []
# (column, callback) tuples
self._formatters = []
@@ -242,12 +242,12 @@ class Dataset(object):
"""Packages Dataset into lists of dictionaries for transmission."""
_data = list(self._data)
# Execute formatters
if self._formatters:
for row_i, row in enumerate(_data):
for col, callback in self._formatters:
try:
try:
if col is None:
for j, c in enumerate(row):
_data[row_i][j] = callback(c)
@@ -255,7 +255,7 @@ class Dataset(object):
_data[row_i][col] = callback(row[col])
except IndexError:
raise InvalidDatasetIndex
if self.headers:
if dicts:
@@ -331,8 +331,8 @@ class Dataset(object):
headers = property(_get_headers, _set_headers)
def _get_dict(self):
"""A native Python representation of the :class:`Dataset` object. If headers have
been set, a list of Python dictionaries will be returned. If no headers have been set,
"""A native Python representation of the :class:`Dataset` object. If headers have
been set, a list of Python dictionaries will be returned. If no headers have been set,
a list of tuples (rows) will be returned instead.
A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
@@ -379,7 +379,7 @@ class Dataset(object):
@property
def xls():
"""An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set.
"""An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
.. admonition:: Binary Warning
@@ -480,7 +480,7 @@ class Dataset(object):
def append_separator(self, text='-'):
"""Adds a :ref:`seperator <seperators>` to the :class:`Dataset`."""
"""Adds a :ref:`separator <separators>` to the :class:`Dataset`."""
# change offsets if headers are or aren't defined
if not self.headers:
@@ -493,26 +493,26 @@ class Dataset(object):
def add_formatter(self, col, handler):
"""Adds a :ref:`formatter` to the :class:`Dataset`.
.. versionadded:: 0.9.5
:param col: column to. Accepts index int or header str.
:param handler: reference to callback function to execute
:param handler: reference to callback function to execute
against each cell value.
"""
if isinstance(col, str):
if col in self.headers:
col = self.headers.index(col) # get 'key' index from each data
else:
raise KeyError
if not col > self.width:
self._formatters.append((col, handler))
else:
raise InvalidDatasetIndex
return True
def insert(self, index, row=None, col=None, header=None, tags=list()):
"""Inserts a row or column to the :class:`Dataset` at the given index.
@@ -584,10 +584,10 @@ class Dataset(object):
def sort(self, col, reverse=False):
"""Sort a :class:`Dataset` by a specific column, given string (for
header) or integer (for column index). The order can be reversed by
setting ``reverse`` to ``True``.
setting ``reverse`` to ``True``.
Returns a new :class:`Dataset` instance where columns have been
sorted."""
if isinstance(col, str):
if not self.headers:
@@ -805,7 +805,7 @@ class InvalidDatasetType(Exception):
class InvalidDimensions(Exception):
"Invalid size"
class InvalidDatasetIndex(Exception):
"Outside of Dataset size"
+18 -3
View File
@@ -5,12 +5,17 @@
import sys
if sys.version_info[0] > 2:
is_py3 = True
from io import StringIO
import csv
else:
is_py3 = False
from cStringIO import StringIO
import tablib.packages.unicodecsv as csv
import csv
import os
import tablib
@@ -20,11 +25,18 @@ title = 'csv'
extentions = ('csv',)
DEFAULT_ENCODING = 'utf-8'
def export_set(dataset):
"""Returns CSV representation of Dataset."""
stream = StringIO()
_csv = csv.writer(stream)
if is_py3:
_csv = csv.writer(stream)
else:
_csv = csv.writer(stream, encoding=DEFAULT_ENCODING)
for row in dataset._package(dicts=False):
_csv.writerow(row)
@@ -37,7 +49,10 @@ def import_set(dset, in_stream, headers=True):
dset.wipe()
rows = csv.reader(in_stream.splitlines())
if is_py3:
rows = csv.reader(in_stream.splitlines())
else:
rows = csv.reader(in_stream.splitlines(), encoding=DEFAULT_ENCODING)
for i, row in enumerate(rows):
if (i == 0) and (headers):
+105
View File
@@ -0,0 +1,105 @@
# -*- coding: utf-8 -*-
import csv
from csv import *
#http://semver.org/
VERSION = (0, 8, 0)
__version__ = ".".join(map(str,VERSION))
def _stringify(s, encoding):
if type(s)==unicode:
return s.encode(encoding)
elif isinstance(s, (int , float)):
pass #let csv.QUOTE_NONNUMERIC do its thing.
elif type(s) != str:
s=str(s)
return s
def _stringify_list(l, encoding):
return [_stringify(s, encoding) for s in l]
class UnicodeWriter(object):
"""
>>> import unicodecsv
>>> from cStringIO import StringIO
>>> f = StringIO()
>>> w = unicodecsv.writer(f, encoding='utf-8')
>>> w.writerow((u'é', u'ñ'))
>>> f.seek(0)
>>> r = unicodecsv.reader(f, encoding='utf-8')
>>> row = r.next()
>>> print row[0], row[1]
é ñ
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
self.writer = csv.writer(f)
self.dialect = dialect
self.encoding = encoding
self.writer = csv.writer(f, dialect=dialect, **kwds)
def writerow(self, row):
self.writer.writerow(_stringify_list(row, self.encoding))
def writerows(self, rows):
for row in rows:
self.writerow(row)
writer = UnicodeWriter
class UnicodeReader(object):
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
self.reader = csv.reader(f, dialect=dialect, **kwds)
self.encoding = encoding
def next(self):
row = self.reader.next()
return [unicode(s, self.encoding) for s in row]
def __iter__(self):
return self
reader = UnicodeReader
class DictWriter(csv.DictWriter):
"""
>>> from cStringIO import StringIO
>>> f = StringIO()
>>> w = DictWriter(f, ['a', 'b'], restval=u'î')
>>> w.writerow({'a':'1'})
>>> w.writerow({'a':'1', 'b':u'ø'})
>>> w.writerow({'a':u'é'})
>>> f.seek(0)
>>> r = DictReader(f, fieldnames=['a'], restkey='r')
>>> r.next() == {'a':u'1', 'r':[u"î"]}
True
>>> r.next() == {'a':u'1', 'r':[u"ø"]}
True
>>> r.next() == {'a':u'é', 'r':[u"î"]}
"""
def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', *args, **kwds):
self.fieldnames = fieldnames
self.encoding = encoding
self.restval = restval
self.writer = csv.DictWriter(csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds)
def writerow(self, d):
for fieldname in self.fieldnames:
if fieldname in d:
d[fieldname] = _stringify(d[fieldname], self.encoding)
else:
d[fieldname] = _stringify(self.restval, self.encoding)
self.writer.writerow(d)
class DictReader(csv.DictReader):
def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, dialect='excel', encoding='utf-8', *args, **kwds):
self.restkey = restkey
self.encoding = encoding
self.reader = csv.DictReader(csvfile, fieldnames, restkey, restval, dialect, *args, **kwds)
def next(self):
d = self.reader.next()
for k, v in d.items():
if k == self.restkey:
rest = v
if rest:
d[self.restkey] = [unicode(v, self.encoding) for v in rest]
else:
if v is not None:
d[k] = unicode(v, self.encoding)
return d
+12
View File
@@ -499,6 +499,18 @@ class TablibTestCase(unittest.TestCase):
for name in [r['last_name'] for r in self.founders.dict]:
self.assertTrue(name.isupper())
def test_unicode_csv(self):
"""Check if unicode in csv export doesn't raise."""
data = tablib.Dataset()
if sys.version_info[0] > 2:
data.append(['\xfc', '\xfd'])
else:
exec("data.append([u'\xfc', u'\xfd'])")
data.csv
if __name__ == '__main__':
unittest.main()