Fix unicode encode errors on Python 2 -- Fixes #215

Switch csv library to backports.csv as the implementation
is closer to the python 3 one. Add a test case covering the
problem.

Run tests with unicode_literals from future

Fix unicode encode errors with unicode characters

- Use `backports.csv` instead of `unicodecsv`
- Use StringIO instead of cStringIO
- Clean-up some Python 2 specific code
This commit is contained in:
Bruno Alla
2017-01-12 09:49:45 +00:00
parent 05bd0d1d42
commit 80e72cfa27
6 changed files with 22 additions and 18 deletions
+1 -1
View File
@@ -44,7 +44,7 @@ packages = [
install = [
'odfpy',
'openpyxl',
'unicodecsv',
'backports.csv',
'xlrd',
'xlwt',
'pyyaml',
+2 -2
View File
@@ -37,11 +37,11 @@ if is_py3:
else:
from cStringIO import StringIO as BytesIO
from cStringIO import StringIO
from StringIO import StringIO
from tablib.packages import markup
from itertools import ifilter
import unicodecsv as csv
from backports import csv
import tablib.packages.dbfpy as dbfpy
unicode = unicode
+1 -1
View File
@@ -831,7 +831,7 @@ class Dataset(object):
against each cell value.
"""
if isinstance(col, str):
if isinstance(col, unicode):
if col in self.headers:
col = self.headers.index(col) # get 'key' index from each data
else:
+2 -7
View File
@@ -3,15 +3,14 @@
""" Tablib - *SV Support.
"""
from tablib.compat import is_py3, csv, StringIO
from tablib.compat import csv, StringIO, unicode
title = 'csv'
extensions = ('csv',)
DEFAULT_ENCODING = 'utf-8'
DEFAULT_DELIMITER = ','
DEFAULT_DELIMITER = unicode(',')
def export_set(dataset, **kwargs):
@@ -19,8 +18,6 @@ def export_set(dataset, **kwargs):
stream = StringIO()
kwargs.setdefault('delimiter', DEFAULT_DELIMITER)
if not is_py3:
kwargs.setdefault('encoding', DEFAULT_ENCODING)
_csv = csv.writer(stream, **kwargs)
@@ -36,8 +33,6 @@ def import_set(dset, in_stream, headers=True, **kwargs):
dset.wipe()
kwargs.setdefault('delimiter', DEFAULT_DELIMITER)
if not is_py3:
kwargs.setdefault('encoding', DEFAULT_ENCODING)
rows = csv.reader(StringIO(in_stream), **kwargs)
for i, row in enumerate(rows):
+2 -2
View File
@@ -3,6 +3,7 @@
""" Tablib - TSV (Tab Separated Values) Support.
"""
from tablib.compat import unicode
from tablib.formats._csv import (
export_set as export_set_wrapper,
import_set as import_set_wrapper,
@@ -12,8 +13,7 @@ from tablib.formats._csv import (
title = 'tsv'
extensions = ('tsv',)
DEFAULT_ENCODING = 'utf-8'
DELIMITER = '\t'
DELIMITER = unicode('\t')
def export_set(dataset):
"""Returns TSV representation of Dataset."""
+14 -5
View File
@@ -1,17 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tests for Tablib."""
import json
import unittest
import sys
import os
from __future__ import unicode_literals
import datetime
import json
import sys
import unittest
import tablib
from tablib.compat import markup, unicode, is_py3
from tablib.core import Row
from tablib.formats import csv as csv_format
class TablibTestCase(unittest.TestCase):
@@ -531,6 +531,15 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(_csv, data.csv)
def test_csv_import_set_with_unicode_str(self):
"""Import CSV set with non-ascii characters in unicode literal"""
csv_text = (
"id,givenname,surname,loginname,email,pref_firstname,pref_lastname\n"
"13765,Ævar,Arnfjörð,testing,test@example.com,Ævar,Arnfjörð"
)
data.csv = csv_text
self.assertEqual(data.width, 7)
def test_tsv_import_set(self):
"""Generate and import TSV set serialization."""
data.append(self.john)