From 03086052eddeaf4d1b7521d87c3ac850dcc8e821 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 14 May 2011 10:01:48 -0400 Subject: [PATCH] Merge pull request #11 from cswegger/tablib --- This change applies the same unicode CSV fix for TSV files, since all its done in the exporter is changing a few parameters of the CSV module. All unit tests are still passing after this change. --- HISTORY.rst | 6 ++++++ tablib/formats/_tsv.py | 23 ++++++++++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 2f04060..7212cdb 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,12 @@ History ------- +0.9.8 ++++++ + +* Full Unicode TSV support + + 0.9.7 (2011-05-12) ++++++++++++++++++ diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index acf28da..e2ca25e 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -5,11 +5,15 @@ import sys if sys.version_info[0] > 2: + is_py3 = True from io import StringIO + import csv else: + is_py3 = False from cStringIO import StringIO - -import csv + import tablib.packages.unicodecsv as csv + + import os import tablib @@ -18,12 +22,16 @@ import tablib title = 'tsv' extentions = ('tsv',) - +DEFAULT_ENCODING = 'utf-8' def export_set(dataset): """Returns a TSV representation of Dataset.""" stream = StringIO() - _tsv = csv.writer(stream, delimiter='\t') + + if is_py3: + _tsv = csv.writer(stream, delimiter="\t") + else: + _tsv = csv.writer(stream, encoding=DEFAULT_ENCODING, delimiter="\t") for row in dataset._package(dicts=False): _tsv.writerow(row) @@ -35,7 +43,12 @@ def import_set(dset, in_stream, headers=True): """Returns dataset from TSV stream.""" dset.wipe() - rows = csv.reader(in_stream.split('\r\n'), delimiter='\t') + if is_py3: + rows = csv.reader(in_stream.split('\r\n'), delimiter='\t') + else: + rows = csv.reader(in_stream.split('\r\n'), delimiter='\t', + encoding=DEFAULT_ENCODING) + for i, row in enumerate(rows): # Skip empty rows if not row: