From 6407afba3eedaf27c2614a362f0842470a8f4fae Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 28 Sep 2010 08:46:31 -0400 Subject: [PATCH 01/14] typo fix. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 13ad1e8..920c148 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', - packages=['tablib'm 'tablib.formats'], + packages=['tablib', 'tablib.formats'], install_requires=['xlwt', 'simplejson', 'PyYAML'], license='MIT', classifiers=( From 2a7aa959b35cdc535727a27147cae7721db47014 Mon Sep 17 00:00:00 2001 From: Josh Ourisman Date: Fri, 1 Oct 2010 14:51:36 -0400 Subject: [PATCH 02/14] modified .gitignore to actually ignore .pyc files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 91c9479..68f4c75 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ dist/* MANIFEST # python skin -.pyc +*.pyc .pyo # osx noise From 9f7fec23793b25c7aeffcee4186d6009dc816cca Mon Sep 17 00:00:00 2001 From: Josh Ourisman Date: Fri, 1 Oct 2010 15:27:28 -0400 Subject: [PATCH 03/14] changing syntax of checking for row and col values in append(); slightly more robust this way --- tablib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index ddb9769..8f74975 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -173,10 +173,10 @@ class Dataset(object): def append(self, row=None, col=None): """Adds a row to the end of Dataset""" - if row: + if row is not None: self._validate(row) self._data.append(tuple(row)) - elif col: + elif col is not None: self._validate(col=col) if self.headers: From 149bafa97b3c93b5116090d2834f0c19e6face95 Mon Sep 17 00:00:00 2001 From: Josh Ourisman Date: Fri, 1 Oct 2010 16:17:04 -0400 Subject: [PATCH 04/14] added ability to append new column passing a callable as the value that will be applied to every row; w/ test --- tablib/core.py | 9 +++++++++ test_tablib.py | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index 8f74975..dfb8027 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -177,6 +177,15 @@ class Dataset(object): self._validate(row) self._data.append(tuple(row)) elif col is not None: + col = list(col) + if self.headers: + header = [col.pop(0)] + else: + header = [] + if len(col) == 1 and callable(col[0]): + col = map(col[0], self._data) + col = tuple(header + col) + self._validate(col=col) if self.headers: diff --git a/test_tablib.py b/test_tablib.py index 67b693d..a315642 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -102,6 +102,13 @@ class TablibTestCase(unittest.TestCase): self.assertRaises(tablib.InvalidDimensions, data.append, col=new_col) + def test_add_callable_column(self): + """Verify adding column with values specified as callable.""" + new_col = ['first_again', lambda x: x[0]] + self.founders.append(col=new_col) + + self.assertTrue(map(lambda x: x[0] == x[-1], self.founders)) + def test_header_slicing(self): """Verify slicing by headers.""" From 22fe18239fcf6d0886ed95979c6d7352951539b4 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:26:53 -0400 Subject: [PATCH 05/14] Added legacy cli interface. --- tablib/cli.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 tablib/cli.py diff --git a/tablib/cli.py b/tablib/cli.py new file mode 100644 index 0000000..4e46eae --- /dev/null +++ b/tablib/cli.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# encoding: utf-8 + +""" Tabbed CLI Inteface Application +""" + +import io +import sys + +import argue + +import tablib +from helpers import Struct, piped + + + +FORMATS = ('json', 'yaml', 'xls', 'csv') + +opts = [] + +opts.append(('v', 'version', False, 'Report tabbed version')) + +for format in FORMATS: + opts.append(('', format, False, 'Output to %s' % (format.upper()))) + + + +@argue.command(options=opts, usage='[FILE] [--FORMAT | FILE]') +def start(in_file=None, out_file=None, **opts): + """Covertly convert dataset formats""" + + opts = Struct(**opts) + + if opts.version: + print('Tabbed, Ver. %s' % tabbed.core.__version__) + sys.sys.exit(0) + + stdin = piped() + + if stdin: + print stdin + + elif in_file: + + try: + in_file = io.open(in_file, 'r') + except Exception, e: + print(' %s cannot be read.' % in_file) + sys.exit(65) + + file_ext = in_file.name.split('.')[-1] + + if file_ext.lower() in FORMATS: + setattr(opts, file_ext, True) + else: + print('Import format not supported.') + sys.exit(65) + else: + print('Please provide input.') + sys.exit(65) + + + + _formats_sum = sum(opts[f] for f in FORMATS) + + # Multiple output formats given + if _formats_sum > 1: + print('Please specify a single output format.') + sys.exit(64) + + # No output formats given + elif _formats_sum < 1: + print('Please specify an output format.') + sys.exit(64) + + + # fetch options.formats list + # if sum(()) > 1 + # log only one data format please + # if sum of formats == 0, specity format + + # look for filename + + print opts.__dict__ + print in_file + print out_file \ No newline at end of file From 25f846a78a2ba313632839530b1823e94d30320c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:27:04 -0400 Subject: [PATCH 06/14] Added entrance point, setup.py updates. --- setup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 920c148..0cdfb39 100644 --- a/setup.py +++ b/setup.py @@ -24,8 +24,8 @@ setup( author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', - packages=['tablib', 'tablib.formats'], - install_requires=['xlwt', 'simplejson', 'PyYAML'], + packages=['tablib'], + install_requires=['xlwt', 'simplejson', 'PyYAML', 'argue'], license='MIT', classifiers=( 'Development Status :: 4 - Beta', @@ -35,9 +35,9 @@ setup( 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', ), - # entry_points={ - # 'console_scripts': [ - # 'tabbed = tablib.cli:start', - # ], - # } + entry_points={ + 'console_scripts': [ + 'tabbed = tablib.cli:start', + ], + } ) From b369baba409cfbd924c24e6afa3d0f17c5bdc8e5 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:27:53 -0400 Subject: [PATCH 07/14] Added runner (for testing). --- tabbed | 1 + 1 file changed, 1 insertion(+) create mode 160000 tabbed diff --git a/tabbed b/tabbed new file mode 160000 index 0000000..28a7222 --- /dev/null +++ b/tabbed @@ -0,0 +1 @@ +Subproject commit 28a722239b883a11d0067488e6cd765a945d1dd2 From 96668bb393f1dbab54e7a48a8093f7f744c92de8 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:28:46 -0400 Subject: [PATCH 08/14] tabbed runner --- tabbed | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) mode change 160000 => 100755 tabbed diff --git a/tabbed b/tabbed deleted file mode 160000 index 28a7222..0000000 --- a/tabbed +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 28a722239b883a11d0067488e6cd765a945d1dd2 diff --git a/tabbed b/tabbed new file mode 100755 index 0000000..c30ec36 --- /dev/null +++ b/tabbed @@ -0,0 +1,14 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Tabbed -- CLI for Tablib +Copyright (c) 2010 Kenneth Reitz. MIT License. +""" + +import tablib.cli + + +if __name__ == '__main__': + + tablib.cli.start() \ No newline at end of file From d479c5735ad5875fc2a953abd1c20654ddc83aef Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:36:20 -0400 Subject: [PATCH 09/14] Hmmm.... --- test_tablib.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_tablib.py b/test_tablib.py index a315642..9c1941e 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -268,6 +268,11 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_format_detect(self): + """Test format detection.""" + + pass + def test_wipe(self): """Purge a dataset.""" From eaa4de779391d9cbfc970cd418ccb36efdde122e Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:03:03 -0400 Subject: [PATCH 10/14] Auto-detectors operational. --- tablib/core.py | 10 ++++++++++ tablib/formats/__init__.py | 2 +- tablib/formats/_csv.py | 9 +++++++++ tablib/formats/_json.py | 9 +++++++++ tablib/formats/_yaml.py | 13 ++++++++++++- test_tablib.py | 37 +++++++++++++++++++++++++++++++++++-- 6 files changed, 76 insertions(+), 4 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index dfb8027..5b6b8ca 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -277,6 +277,16 @@ class Databook(object): return len(self._datasets) +def detect(stream): + """Return (format, stream) of given stream.""" + for fmt in formats: + try: + if fmt.detect(stream): + return (fmt, stream) + except AttributeError: + pass + return (None, stream) + class InvalidDatasetType(Exception): "Only Datasets can be added to a DataBook" diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index b22a959..69eada7 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -8,4 +8,4 @@ import _json as json import _xls as xls import _yaml as yaml -FORMATS = (csv, json, xls, yaml) +FORMATS = (json, xls, yaml, csv) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 8b19da7..27d2e0d 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -40,3 +40,12 @@ def import_set(dset, in_stream, headers=True): dset.headers = row else: dset.append(row) + + +def detect(stream): + """Returns True if given stream is valid CSV.""" + try: + rows = dialect = csv.Sniffer().sniff(stream) + return True + except csv.Error: + return False \ No newline at end of file diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index 1f92b58..f7c88ee 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -36,3 +36,12 @@ def import_book(dbook, in_stream): data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) + + +def detect(stream): + """Returns True if given stream is valid JSON.""" + try: + json.loads(stream) + return True + except json.decoder.JSONDecodeError: + return False \ No newline at end of file diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 4cac8aa..57d63d7 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -39,4 +39,15 @@ def import_book(dbook, in_stream): data = tablib.core.Dataset() data.title = sheet['title'] data.dict = sheet['data'] - dbook.add_sheet(data) \ No newline at end of file + dbook.add_sheet(data) + +def detect(stream): + """Returns True if given stream is valid YAML.""" + try: + _yaml = yaml.load(stream) + if isinstance(_yaml, (list, tuple, dict)): + return True + else: + return False + except yaml.parser.ParserError: + return False \ No newline at end of file diff --git a/test_tablib.py b/test_tablib.py index 9c1941e..3f8ee37 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -269,9 +269,42 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) def test_csv_format_detect(self): - """Test format detection.""" + """Test CSV format detection.""" - pass + _csv = ( + '1,2,3\n' + '4,5,6\n' + '7,8,9\n' + ) + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.csv.detect(_csv)) + self.assertFalse(tablib.formats.csv.detect(_bunk)) + + def test_json_format_detect(self): + """Test JSON format detection.""" + + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.json.detect(_json)) + self.assertFalse(tablib.formats.json.detect(_bunk)) + + + def test_yaml_format_detect(self): + """Test YAML format detection.""" + + _yaml = '- {age: 90, first_name: John, last_name: Adams}' + _bunk = ( + '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.yaml.detect(_yaml)) + self.assertFalse(tablib.formats.yaml.detect(_bunk)) def test_wipe(self): From 187d12cffc5cb3b46f0ce6333cf0f5f407d3a949 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:09:44 -0400 Subject: [PATCH 11/14] Format Auto-detection in place. Test suite updated. --- tablib/__init__.py | 2 +- test_tablib.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tablib/__init__.py b/tablib/__init__.py index fadd8dd..e9bdf69 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -2,7 +2,7 @@ """ from tablib.core import ( - Databook, Dataset, InvalidDatasetType, + Databook, Dataset, detect, InvalidDatasetType, InvalidDimensions, UnsupportedFormat ) diff --git a/test_tablib.py b/test_tablib.py index 3f8ee37..4843117 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -307,6 +307,21 @@ class TablibTestCase(unittest.TestCase): self.assertFalse(tablib.formats.yaml.detect(_bunk)) + def test_auto_format_detect(self): + """Test auto format detection.""" + + _yaml = '- {age: 90, first_name: John, last_name: Adams}' + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + _csv = '1,2,3\n4,5,6\n7,8,9\n' + _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + + self.assertEqual(tablib.detect(_yaml)[0], tablib.formats.yaml) + self.assertEqual(tablib.detect(_csv)[0], tablib.formats.csv) + self.assertEqual(tablib.detect(_json)[0], tablib.formats.json) + self.assertEqual(tablib.detect(_bunk)[0], None) + + + def test_wipe(self): """Purge a dataset.""" From fb59035f8d2718417a24ab7e141397d85cc13b12 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:35:10 -0400 Subject: [PATCH 12/14] Added tablib.import_set() and tested accordingly. --- tablib/__init__.py | 4 ++-- tablib/cli.py | 17 ++++++++++------- tablib/core.py | 13 +++++++++++++ test_tablib.py | 1 - 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/tablib/__init__.py b/tablib/__init__.py index e9bdf69..3f23850 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -2,7 +2,7 @@ """ from tablib.core import ( - Databook, Dataset, detect, InvalidDatasetType, - InvalidDimensions, UnsupportedFormat + Databook, Dataset, detect, import_set, + InvalidDatasetType, InvalidDimensions, UnsupportedFormat ) diff --git a/tablib/cli.py b/tablib/cli.py index 4e46eae..f182049 100644 --- a/tablib/cli.py +++ b/tablib/cli.py @@ -14,7 +14,7 @@ from helpers import Struct, piped -FORMATS = ('json', 'yaml', 'xls', 'csv') +FORMATS = [fmt.title for fmt in tablib.formats.FORMATS] opts = [] @@ -32,13 +32,16 @@ def start(in_file=None, out_file=None, **opts): opts = Struct(**opts) if opts.version: - print('Tabbed, Ver. %s' % tabbed.core.__version__) - sys.sys.exit(0) + print('Tabbed, Ver. %s' % tablib.core.__version__) + sys.exit(0) stdin = piped() if stdin: - print stdin + data = tablib.import_set(stdin) + print data.json + # test = tablib.Dataset() + # print test.yaml elif in_file: @@ -81,6 +84,6 @@ def start(in_file=None, out_file=None, **opts): # look for filename - print opts.__dict__ - print in_file - print out_file \ No newline at end of file + # print opts.__dict__ + # print in_file + # print out_file \ No newline at end of file diff --git a/tablib/core.py b/tablib/core.py index 5b6b8ca..a7a2963 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -286,7 +286,20 @@ def detect(stream): except AttributeError: pass return (None, stream) + + +def import_set(stream): + """Return dataset of given stream.""" + (format, stream) = detect(stream) + + try: + data = Dataset() + format.import_set(data, stream) + return data + except AttributeError, e: + return None + class InvalidDatasetType(Exception): "Only Datasets can be added to a DataBook" diff --git a/test_tablib.py b/test_tablib.py index 4843117..ebad061 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -321,7 +321,6 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(tablib.detect(_bunk)[0], None) - def test_wipe(self): """Purge a dataset.""" From 9427decdb035067dbc367c436e2706127242dbaa Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 28 Sep 2010 08:33:57 -0400 Subject: [PATCH 13/14] Changes. --- tablib/cli.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tablib/cli.py b/tablib/cli.py index f182049..6d773c9 100644 --- a/tablib/cli.py +++ b/tablib/cli.py @@ -39,23 +39,19 @@ def start(in_file=None, out_file=None, **opts): if stdin: data = tablib.import_set(stdin) - print data.json - # test = tablib.Dataset() - # print test.yaml elif in_file: try: - in_file = io.open(in_file, 'r') + in_stream =- io.open(in_file, 'r').read() except Exception, e: print(' %s cannot be read.' % in_file) sys.exit(65) - file_ext = in_file.name.split('.')[-1] - - if file_ext.lower() in FORMATS: - setattr(opts, file_ext, True) - else: + try: + tablib.import_set(in_stream) + except Exception, e: + raise e print('Import format not supported.') sys.exit(65) else: @@ -63,7 +59,6 @@ def start(in_file=None, out_file=None, **opts): sys.exit(65) - _formats_sum = sum(opts[f] for f in FORMATS) # Multiple output formats given From 06a394ea5ccf57600a12b0deca45a2f7e89b018b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 28 Sep 2010 09:01:34 -0400 Subject: [PATCH 14/14] typo in setup.py. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 0cdfb39..a5d2f33 100644 --- a/setup.py +++ b/setup.py @@ -24,8 +24,8 @@ setup( author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', - packages=['tablib'], - install_requires=['xlwt', 'simplejson', 'PyYAML', 'argue'], + packages=['tablib', 'tablib.formats'], + install_requires=['xlwt', 'simplejson', 'PyYAML'], license='MIT', classifiers=( 'Development Status :: 4 - Beta',