From 80e72cfa27264efb9f525bd92ce6476c5eadb3e9 Mon Sep 17 00:00:00 2001 From: Bruno Alla Date: Thu, 12 Jan 2017 09:49:45 +0000 Subject: [PATCH] Fix unicode encode errors on Python 2 -- Fixes #215 Switch csv library to backports.csv as the implementation is closer to the python 3 one. Add a test case covering the problem. Run tests with unicode_literals from future Fix unicode encode errors with unicode characters - Use `backports.csv` instead of `unicodecsv` - Use StringIO instead of cStringIO - Clean-up some Python 2 specific code --- setup.py | 2 +- tablib/compat.py | 4 ++-- tablib/core.py | 2 +- tablib/formats/_csv.py | 9 ++------- tablib/formats/_tsv.py | 4 ++-- test_tablib.py | 19 ++++++++++++++----- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/setup.py b/setup.py index e46eb88..6fc94ae 100755 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ packages = [ install = [ 'odfpy', 'openpyxl', - 'unicodecsv', + 'backports.csv', 'xlrd', 'xlwt', 'pyyaml', diff --git a/tablib/compat.py b/tablib/compat.py index 43e0bbc..d18a781 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -37,11 +37,11 @@ if is_py3: else: from cStringIO import StringIO as BytesIO - from cStringIO import StringIO + from StringIO import StringIO from tablib.packages import markup from itertools import ifilter - import unicodecsv as csv + from backports import csv import tablib.packages.dbfpy as dbfpy unicode = unicode diff --git a/tablib/core.py b/tablib/core.py index c44c6ac..b97da54 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -831,7 +831,7 @@ class Dataset(object): against each cell value. """ - if isinstance(col, str): + if isinstance(col, unicode): if col in self.headers: col = self.headers.index(col) # get 'key' index from each data else: diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 994b23b..b74afd7 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -3,15 +3,14 @@ """ Tablib - *SV Support. """ -from tablib.compat import is_py3, csv, StringIO +from tablib.compat import csv, StringIO, unicode title = 'csv' extensions = ('csv',) -DEFAULT_ENCODING = 'utf-8' -DEFAULT_DELIMITER = ',' +DEFAULT_DELIMITER = unicode(',') def export_set(dataset, **kwargs): @@ -19,8 +18,6 @@ def export_set(dataset, **kwargs): stream = StringIO() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) _csv = csv.writer(stream, **kwargs) @@ -36,8 +33,6 @@ def import_set(dset, in_stream, headers=True, **kwargs): dset.wipe() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) rows = csv.reader(StringIO(in_stream), **kwargs) for i, row in enumerate(rows): diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 9380b3b..1c6d6a1 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,6 +3,7 @@ """ Tablib - TSV (Tab Separated Values) Support. """ +from tablib.compat import unicode from tablib.formats._csv import ( export_set as export_set_wrapper, import_set as import_set_wrapper, @@ -12,8 +13,7 @@ from tablib.formats._csv import ( title = 'tsv' extensions = ('tsv',) -DEFAULT_ENCODING = 'utf-8' -DELIMITER = '\t' +DELIMITER = unicode('\t') def export_set(dataset): """Returns TSV representation of Dataset.""" diff --git a/test_tablib.py b/test_tablib.py index 03a46df..96cd884 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -1,17 +1,17 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Tests for Tablib.""" - -import json -import unittest -import sys -import os +from __future__ import unicode_literals import datetime +import json +import sys +import unittest import tablib from tablib.compat import markup, unicode, is_py3 from tablib.core import Row +from tablib.formats import csv as csv_format class TablibTestCase(unittest.TestCase): @@ -531,6 +531,15 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_import_set_with_unicode_str(self): + """Import CSV set with non-ascii characters in unicode literal""" + csv_text = ( + "id,givenname,surname,loginname,email,pref_firstname,pref_lastname\n" + "13765,Ævar,Arnfjörð,testing,test@example.com,Ævar,Arnfjörð" + ) + data.csv = csv_text + self.assertEqual(data.width, 7) + def test_tsv_import_set(self): """Generate and import TSV set serialization.""" data.append(self.john)