Support for TSV-files. Unit-tested.

This commit is contained in:
Luca Beltrame
2010-10-19 10:45:54 +02:00
parent 3b0e0c7991
commit e75a00541d
4 changed files with 113 additions and 1 deletions
+14
View File
@@ -306,6 +306,20 @@ class Dataset(object):
"""
pass
@property
def tsv():
"""A TSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
data = tablib.Dataset()
data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams'
Import assumes (for now) that headers exist.
"""
@property
def yaml():
"""A YAML representation of the :class:`Dataset` object. If headers have been
+2 -1
View File
@@ -7,5 +7,6 @@ import _csv as csv
import _json as json
import _xls as xls
import _yaml as yaml
import _tsv as tsv
available = (json, xls, yaml, csv)
available = (json, xls, yaml, csv, tsv)
+53
View File
@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
""" Tablib - TSV (Tab Separated Values) Support.
"""
import cStringIO
import csv
import os
import tablib
title = 'tsv'
extentions = ('tsv',)
def export_set(dataset):
"""Returns a TSV representation of Dataset."""
stream = cStringIO.StringIO()
_tsv = csv.writer(stream, delimiter="\t")
for row in dataset._package(dicts=False):
_tsv.writerow(row)
return stream.getvalue()
def import_set(dset, in_stream, headers=True):
"""Returns dataset from TSV stream."""
dset.wipe()
rows = csv.reader(in_stream.split("\r\n"), delimiter="\t")
for i, row in enumerate(rows):
# Skip empty rows
if not row:
continue
if (i == 0) and (headers):
dset.headers = row
else:
dset.append(row)
def detect(stream):
"""Returns True if given stream is valid TSV."""
try:
rows = dialect = csv.Sniffer().sniff(stream, delimiters="\t")
return True
except csv.Error:
return False
+44
View File
@@ -178,6 +178,22 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(csv, self.founders.csv)
def test_tsv_export(self):
"""Verify exporting dataset object as CSV."""
# Build up the csv string with headers first, followed by each row
tsv = ''
for col in self.headers:
tsv += col + '\t'
tsv = tsv.strip('\t') + '\r\n'
for founder in self.founders:
for col in founder:
tsv += str(col) + '\t'
tsv = tsv.strip('\t') + '\r\n'
self.assertEqual(tsv, self.founders.tsv)
def test_unicode_append(self):
"""Passes in a single unicode charecter and exports."""
@@ -188,6 +204,7 @@ class TablibTestCase(unittest.TestCase):
data.json
data.yaml
data.csv
data.tsv
data.xls
@@ -268,6 +285,18 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(_csv, data.csv)
def test_tsv_import_set(self):
"""Generate and import TSV set serialization."""
data.append(self.john)
data.append(self.george)
data.headers = self.headers
_tsv = data.tsv
data.tsv = _tsv
self.assertEqual(_tsv, data.tsv)
def test_csv_format_detect(self):
"""Test CSV format detection."""
@@ -283,6 +312,21 @@ class TablibTestCase(unittest.TestCase):
self.assertTrue(tablib.formats.csv.detect(_csv))
self.assertFalse(tablib.formats.csv.detect(_bunk))
def test_tsv_format_detect(self):
"""Test TSV format detection."""
_tsv = (
'1\t2\t3\n'
'4\t5\t6\n'
'7\t8\t9\n'
)
_bunk = (
'¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
)
self.assertTrue(tablib.formats.tsv.detect(_tsv))
self.assertFalse(tablib.formats.tsv.detect(_bunk))
def test_json_format_detect(self):
"""Test JSON format detection."""