mirror of
https://github.com/kennethreitz/tablib.git
synced 2026-06-05 23:10:17 +00:00
Support for TSV-files. Unit-tested.
This commit is contained in:
@@ -306,6 +306,20 @@ class Dataset(object):
|
||||
"""
|
||||
pass
|
||||
|
||||
@property
|
||||
def tsv():
|
||||
"""A TSV representation of the :class:`Dataset` object. The top row will contain
|
||||
headers, if they have been set. Otherwise, the top row will contain
|
||||
the first row of the dataset.
|
||||
|
||||
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams'
|
||||
|
||||
Import assumes (for now) that headers exist.
|
||||
"""
|
||||
|
||||
@property
|
||||
def yaml():
|
||||
"""A YAML representation of the :class:`Dataset` object. If headers have been
|
||||
|
||||
@@ -7,5 +7,6 @@ import _csv as csv
|
||||
import _json as json
|
||||
import _xls as xls
|
||||
import _yaml as yaml
|
||||
import _tsv as tsv
|
||||
|
||||
available = (json, xls, yaml, csv)
|
||||
available = (json, xls, yaml, csv, tsv)
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
""" Tablib - TSV (Tab Separated Values) Support.
|
||||
"""
|
||||
|
||||
import cStringIO
|
||||
import csv
|
||||
import os
|
||||
|
||||
import tablib
|
||||
|
||||
|
||||
title = 'tsv'
|
||||
extentions = ('tsv',)
|
||||
|
||||
|
||||
|
||||
def export_set(dataset):
|
||||
"""Returns a TSV representation of Dataset."""
|
||||
stream = cStringIO.StringIO()
|
||||
_tsv = csv.writer(stream, delimiter="\t")
|
||||
|
||||
for row in dataset._package(dicts=False):
|
||||
_tsv.writerow(row)
|
||||
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
def import_set(dset, in_stream, headers=True):
|
||||
"""Returns dataset from TSV stream."""
|
||||
|
||||
dset.wipe()
|
||||
|
||||
rows = csv.reader(in_stream.split("\r\n"), delimiter="\t")
|
||||
for i, row in enumerate(rows):
|
||||
|
||||
# Skip empty rows
|
||||
if not row:
|
||||
continue
|
||||
|
||||
if (i == 0) and (headers):
|
||||
dset.headers = row
|
||||
else:
|
||||
dset.append(row)
|
||||
|
||||
|
||||
def detect(stream):
|
||||
"""Returns True if given stream is valid TSV."""
|
||||
try:
|
||||
rows = dialect = csv.Sniffer().sniff(stream, delimiters="\t")
|
||||
return True
|
||||
except csv.Error:
|
||||
return False
|
||||
@@ -178,6 +178,22 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
self.assertEqual(csv, self.founders.csv)
|
||||
|
||||
def test_tsv_export(self):
|
||||
"""Verify exporting dataset object as CSV."""
|
||||
|
||||
# Build up the csv string with headers first, followed by each row
|
||||
tsv = ''
|
||||
for col in self.headers:
|
||||
tsv += col + '\t'
|
||||
|
||||
tsv = tsv.strip('\t') + '\r\n'
|
||||
|
||||
for founder in self.founders:
|
||||
for col in founder:
|
||||
tsv += str(col) + '\t'
|
||||
tsv = tsv.strip('\t') + '\r\n'
|
||||
|
||||
self.assertEqual(tsv, self.founders.tsv)
|
||||
|
||||
def test_unicode_append(self):
|
||||
"""Passes in a single unicode charecter and exports."""
|
||||
@@ -188,6 +204,7 @@ class TablibTestCase(unittest.TestCase):
|
||||
data.json
|
||||
data.yaml
|
||||
data.csv
|
||||
data.tsv
|
||||
data.xls
|
||||
|
||||
|
||||
@@ -268,6 +285,18 @@ class TablibTestCase(unittest.TestCase):
|
||||
|
||||
self.assertEqual(_csv, data.csv)
|
||||
|
||||
def test_tsv_import_set(self):
|
||||
"""Generate and import TSV set serialization."""
|
||||
data.append(self.john)
|
||||
data.append(self.george)
|
||||
data.headers = self.headers
|
||||
|
||||
_tsv = data.tsv
|
||||
|
||||
data.tsv = _tsv
|
||||
|
||||
self.assertEqual(_tsv, data.tsv)
|
||||
|
||||
def test_csv_format_detect(self):
|
||||
"""Test CSV format detection."""
|
||||
|
||||
@@ -283,6 +312,21 @@ class TablibTestCase(unittest.TestCase):
|
||||
self.assertTrue(tablib.formats.csv.detect(_csv))
|
||||
self.assertFalse(tablib.formats.csv.detect(_bunk))
|
||||
|
||||
def test_tsv_format_detect(self):
|
||||
"""Test TSV format detection."""
|
||||
|
||||
_tsv = (
|
||||
'1\t2\t3\n'
|
||||
'4\t5\t6\n'
|
||||
'7\t8\t9\n'
|
||||
)
|
||||
_bunk = (
|
||||
'¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
)
|
||||
|
||||
self.assertTrue(tablib.formats.tsv.detect(_tsv))
|
||||
self.assertFalse(tablib.formats.tsv.detect(_bunk))
|
||||
|
||||
def test_json_format_detect(self):
|
||||
"""Test JSON format detection."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user