Merge pull request #350 from browniebroke/bugfix/invalid-ascii-csv

Import ascii characters not valid with unicode literals - updated
This commit is contained in:
Timo Furrer
2019-03-02 15:06:21 +01:00
committed by GitHub
7 changed files with 29 additions and 23 deletions
+1
View File
@@ -1,3 +1,4 @@
backports.csv==1.0.6
certifi==2017.7.27.1
chardet==3.0.4
et-xmlfile==1.0.1
+1 -1
View File
@@ -35,7 +35,7 @@ packages = [
install = [
'odfpy',
'openpyxl>=2.4.0',
'unicodecsv',
'backports.csv',
'xlrd',
'xlwt',
'pyyaml',
+2 -2
View File
@@ -27,11 +27,11 @@ if is_py3:
else:
from cStringIO import StringIO as BytesIO
from cStringIO import StringIO
from StringIO import StringIO
from tablib.packages import markup
from tablib.packages.statistics import median
from itertools import izip_longest
import unicodecsv as csv
from backports import csv
import tablib.packages.dbfpy as dbfpy
unicode = unicode
+1 -1
View File
@@ -862,7 +862,7 @@ class Dataset(object):
against each cell value.
"""
if isinstance(col, str):
if isinstance(col, unicode):
if col in self.headers:
col = self.headers.index(col) # get 'key' index from each data
else:
+2 -7
View File
@@ -3,15 +3,14 @@
""" Tablib - *SV Support.
"""
from tablib.compat import is_py3, csv, StringIO
from tablib.compat import csv, StringIO, unicode
title = 'csv'
extensions = ('csv',)
DEFAULT_ENCODING = 'utf-8'
DEFAULT_DELIMITER = ','
DEFAULT_DELIMITER = unicode(',')
def export_set(dataset, **kwargs):
@@ -19,8 +18,6 @@ def export_set(dataset, **kwargs):
stream = StringIO()
kwargs.setdefault('delimiter', DEFAULT_DELIMITER)
if not is_py3:
kwargs.setdefault('encoding', DEFAULT_ENCODING)
_csv = csv.writer(stream, **kwargs)
@@ -36,8 +33,6 @@ def import_set(dset, in_stream, headers=True, **kwargs):
dset.wipe()
kwargs.setdefault('delimiter', DEFAULT_DELIMITER)
if not is_py3:
kwargs.setdefault('encoding', DEFAULT_ENCODING)
rows = csv.reader(StringIO(in_stream), **kwargs)
for i, row in enumerate(rows):
+2 -2
View File
@@ -3,6 +3,7 @@
""" Tablib - TSV (Tab Separated Values) Support.
"""
from tablib.compat import unicode
from tablib.formats._csv import (
export_set as export_set_wrapper,
import_set as import_set_wrapper,
@@ -12,8 +13,7 @@ from tablib.formats._csv import (
title = 'tsv'
extensions = ('tsv',)
DEFAULT_ENCODING = 'utf-8'
DELIMITER = '\t'
DELIMITER = unicode('\t')
def export_set(dataset):
"""Returns TSV representation of Dataset."""
+20 -10
View File
@@ -1,18 +1,19 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Tests for Tablib."""
import doctest
import json
import unittest
import sys
from uuid import uuid4
from __future__ import unicode_literals
import datetime
import doctest
import json
import sys
import unittest
from uuid import uuid4
import tablib
from tablib.compat import markup, unicode, is_py3
from tablib.core import Row
from tablib.formats import csv as csv_format
class TablibTestCase(unittest.TestCase):
@@ -227,21 +228,21 @@ class TablibTestCase(unittest.TestCase):
# Delete from invalid index
self.assertRaises(IndexError, self.founders.__delitem__, 3)
def test_json_export(self):
"""Verify exporting dataset object as JSON"""
address_id = uuid4()
headers = self.headers + ('address_id',)
founders = tablib.Dataset(headers=headers, title='Founders')
founders.append(('John', 'Adams', 90, address_id))
founders_json = founders.export('json')
expected_json = (
'[{"first_name": "John", "last_name": "Adams", "gpa": 90, '
'"address_id": "%s"}]' % str(address_id)
)
self.assertEqual(founders_json, expected_json)
def test_csv_export(self):
@@ -571,6 +572,15 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(_csv, data.csv)
def test_csv_import_set_with_unicode_str(self):
"""Import CSV set with non-ascii characters in unicode literal"""
csv_text = (
"id,givenname,surname,loginname,email,pref_firstname,pref_lastname\n"
"13765,Ævar,Arnfjörð,testing,test@example.com,Ævar,Arnfjörð"
)
data.csv = csv_text
self.assertEqual(data.width, 7)
def test_tsv_import_set(self):
"""Generate and import TSV set serialization."""
data.append(self.john)