diff --git a/requirements.txt b/requirements.txt index 2fab040..05db063 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +backports.csv==1.0.6 certifi==2017.7.27.1 chardet==3.0.4 et-xmlfile==1.0.1 diff --git a/setup.py b/setup.py index 2d1b0fa..7b93754 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ packages = [ install = [ 'odfpy', 'openpyxl>=2.4.0', - 'unicodecsv', + 'backports.csv', 'xlrd', 'xlwt', 'pyyaml', diff --git a/tablib/compat.py b/tablib/compat.py index f054ebb..660697d 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -27,11 +27,11 @@ if is_py3: else: from cStringIO import StringIO as BytesIO - from cStringIO import StringIO + from StringIO import StringIO from tablib.packages import markup from tablib.packages.statistics import median from itertools import izip_longest - import unicodecsv as csv + from backports import csv import tablib.packages.dbfpy as dbfpy unicode = unicode diff --git a/tablib/core.py b/tablib/core.py index 809bb6e..78c4dce 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -862,7 +862,7 @@ class Dataset(object): against each cell value. """ - if isinstance(col, str): + if isinstance(col, unicode): if col in self.headers: col = self.headers.index(col) # get 'key' index from each data else: diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 06e7830..8b536a7 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -3,15 +3,14 @@ """ Tablib - *SV Support. """ -from tablib.compat import is_py3, csv, StringIO +from tablib.compat import csv, StringIO, unicode title = 'csv' extensions = ('csv',) -DEFAULT_ENCODING = 'utf-8' -DEFAULT_DELIMITER = ',' +DEFAULT_DELIMITER = unicode(',') def export_set(dataset, **kwargs): @@ -19,8 +18,6 @@ def export_set(dataset, **kwargs): stream = StringIO() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) _csv = csv.writer(stream, **kwargs) @@ -36,8 +33,6 @@ def import_set(dset, in_stream, headers=True, **kwargs): dset.wipe() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) rows = csv.reader(StringIO(in_stream), **kwargs) for i, row in enumerate(rows): diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 9380b3b..1c6d6a1 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,6 +3,7 @@ """ Tablib - TSV (Tab Separated Values) Support. """ +from tablib.compat import unicode from tablib.formats._csv import ( export_set as export_set_wrapper, import_set as import_set_wrapper, @@ -12,8 +13,7 @@ from tablib.formats._csv import ( title = 'tsv' extensions = ('tsv',) -DEFAULT_ENCODING = 'utf-8' -DELIMITER = '\t' +DELIMITER = unicode('\t') def export_set(dataset): """Returns TSV representation of Dataset.""" diff --git a/test_tablib.py b/test_tablib.py index 57b1b39..e7b7233 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -1,18 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Tests for Tablib.""" - -import doctest -import json -import unittest -import sys -from uuid import uuid4 +from __future__ import unicode_literals import datetime +import doctest +import json +import sys +import unittest +from uuid import uuid4 import tablib from tablib.compat import markup, unicode, is_py3 from tablib.core import Row +from tablib.formats import csv as csv_format class TablibTestCase(unittest.TestCase): @@ -227,21 +228,21 @@ class TablibTestCase(unittest.TestCase): # Delete from invalid index self.assertRaises(IndexError, self.founders.__delitem__, 3) - + def test_json_export(self): """Verify exporting dataset object as JSON""" - + address_id = uuid4() headers = self.headers + ('address_id',) founders = tablib.Dataset(headers=headers, title='Founders') founders.append(('John', 'Adams', 90, address_id)) founders_json = founders.export('json') - + expected_json = ( '[{"first_name": "John", "last_name": "Adams", "gpa": 90, ' '"address_id": "%s"}]' % str(address_id) ) - + self.assertEqual(founders_json, expected_json) def test_csv_export(self): @@ -571,6 +572,15 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_import_set_with_unicode_str(self): + """Import CSV set with non-ascii characters in unicode literal""" + csv_text = ( + "id,givenname,surname,loginname,email,pref_firstname,pref_lastname\n" + "13765,Ævar,Arnfjörð,testing,test@example.com,Ævar,Arnfjörð" + ) + data.csv = csv_text + self.assertEqual(data.width, 7) + def test_tsv_import_set(self): """Generate and import TSV set serialization.""" data.append(self.john)