From e75a00541d83c1df6ede20fec3a5611f960b6821 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Tue, 19 Oct 2010 10:45:54 +0200 Subject: [PATCH] Support for TSV-files. Unit-tested. --- tablib/core.py | 14 ++++++++++ tablib/formats/__init__.py | 3 ++- tablib/formats/_tsv.py | 53 ++++++++++++++++++++++++++++++++++++++ test_tablib.py | 44 +++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tablib/formats/_tsv.py diff --git a/tablib/core.py b/tablib/core.py index 6e5e60b..c4071c8 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -306,6 +306,20 @@ class Dataset(object): """ pass + @property + def tsv(): + """A TSV representation of the :class:`Dataset` object. The top row will contain + headers, if they have been set. Otherwise, the top row will contain + the first row of the dataset. + + A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. :: + + data = tablib.Dataset() + data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams' + + Import assumes (for now) that headers exist. + """ + @property def yaml(): """A YAML representation of the :class:`Dataset` object. If headers have been diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 0ce9b71..f5960b8 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -7,5 +7,6 @@ import _csv as csv import _json as json import _xls as xls import _yaml as yaml +import _tsv as tsv -available = (json, xls, yaml, csv) +available = (json, xls, yaml, csv, tsv) diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py new file mode 100644 index 0000000..8603c45 --- /dev/null +++ b/tablib/formats/_tsv.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- + +""" Tablib - TSV (Tab Separated Values) Support. +""" + +import cStringIO +import csv +import os + +import tablib + + +title = 'tsv' +extentions = ('tsv',) + + + +def export_set(dataset): + """Returns a TSV representation of Dataset.""" + stream = cStringIO.StringIO() + _tsv = csv.writer(stream, delimiter="\t") + + for row in dataset._package(dicts=False): + _tsv.writerow(row) + + return stream.getvalue() + + +def import_set(dset, in_stream, headers=True): + """Returns dataset from TSV stream.""" + + dset.wipe() + + rows = csv.reader(in_stream.split("\r\n"), delimiter="\t") + for i, row in enumerate(rows): + + # Skip empty rows + if not row: + continue + + if (i == 0) and (headers): + dset.headers = row + else: + dset.append(row) + + +def detect(stream): + """Returns True if given stream is valid TSV.""" + try: + rows = dialect = csv.Sniffer().sniff(stream, delimiters="\t") + return True + except csv.Error: + return False diff --git a/test_tablib.py b/test_tablib.py index f875cb7..a576ce8 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -178,6 +178,22 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(csv, self.founders.csv) + def test_tsv_export(self): + """Verify exporting dataset object as CSV.""" + + # Build up the csv string with headers first, followed by each row + tsv = '' + for col in self.headers: + tsv += col + '\t' + + tsv = tsv.strip('\t') + '\r\n' + + for founder in self.founders: + for col in founder: + tsv += str(col) + '\t' + tsv = tsv.strip('\t') + '\r\n' + + self.assertEqual(tsv, self.founders.tsv) def test_unicode_append(self): """Passes in a single unicode charecter and exports.""" @@ -188,6 +204,7 @@ class TablibTestCase(unittest.TestCase): data.json data.yaml data.csv + data.tsv data.xls @@ -268,6 +285,18 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_tsv_import_set(self): + """Generate and import TSV set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _tsv = data.tsv + + data.tsv = _tsv + + self.assertEqual(_tsv, data.tsv) + def test_csv_format_detect(self): """Test CSV format detection.""" @@ -283,6 +312,21 @@ class TablibTestCase(unittest.TestCase): self.assertTrue(tablib.formats.csv.detect(_csv)) self.assertFalse(tablib.formats.csv.detect(_bunk)) + def test_tsv_format_detect(self): + """Test TSV format detection.""" + + _tsv = ( + '1\t2\t3\n' + '4\t5\t6\n' + '7\t8\t9\n' + ) + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.tsv.detect(_tsv)) + self.assertFalse(tablib.formats.tsv.detect(_bunk)) + def test_json_format_detect(self): """Test JSON format detection."""