From 8feb6e8ddf7cecc26958654411b6bfc0aec08499 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:26:53 -0400 Subject: [PATCH 01/27] Added legacy cli interface. --- tablib/cli.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 tablib/cli.py diff --git a/tablib/cli.py b/tablib/cli.py new file mode 100644 index 0000000..4e46eae --- /dev/null +++ b/tablib/cli.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# encoding: utf-8 + +""" Tabbed CLI Inteface Application +""" + +import io +import sys + +import argue + +import tablib +from helpers import Struct, piped + + + +FORMATS = ('json', 'yaml', 'xls', 'csv') + +opts = [] + +opts.append(('v', 'version', False, 'Report tabbed version')) + +for format in FORMATS: + opts.append(('', format, False, 'Output to %s' % (format.upper()))) + + + +@argue.command(options=opts, usage='[FILE] [--FORMAT | FILE]') +def start(in_file=None, out_file=None, **opts): + """Covertly convert dataset formats""" + + opts = Struct(**opts) + + if opts.version: + print('Tabbed, Ver. %s' % tabbed.core.__version__) + sys.sys.exit(0) + + stdin = piped() + + if stdin: + print stdin + + elif in_file: + + try: + in_file = io.open(in_file, 'r') + except Exception, e: + print(' %s cannot be read.' % in_file) + sys.exit(65) + + file_ext = in_file.name.split('.')[-1] + + if file_ext.lower() in FORMATS: + setattr(opts, file_ext, True) + else: + print('Import format not supported.') + sys.exit(65) + else: + print('Please provide input.') + sys.exit(65) + + + + _formats_sum = sum(opts[f] for f in FORMATS) + + # Multiple output formats given + if _formats_sum > 1: + print('Please specify a single output format.') + sys.exit(64) + + # No output formats given + elif _formats_sum < 1: + print('Please specify an output format.') + sys.exit(64) + + + # fetch options.formats list + # if sum(()) > 1 + # log only one data format please + # if sum of formats == 0, specity format + + # look for filename + + print opts.__dict__ + print in_file + print out_file \ No newline at end of file From 8c402da729598979322f13cab9fb4dd63a1000a5 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:27:04 -0400 Subject: [PATCH 02/27] Added entrance point, setup.py updates. --- setup.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 0c95cb4..8b5e5a9 100644 --- a/setup.py +++ b/setup.py @@ -25,7 +25,7 @@ setup( author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', packages=['tablib'], - install_requires=['xlwt', 'simplejson', 'PyYAML'], + install_requires=['xlwt', 'simplejson', 'PyYAML', 'argue'], license='MIT', classifiers=( 'Development Status :: 4 - Beta', @@ -35,9 +35,9 @@ setup( 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', ), - # entry_points={ - # 'console_scripts': [ - # 'tabbed = tablib.cli:start', - # ], - # } + entry_points={ + 'console_scripts': [ + 'tabbed = tablib.cli:start', + ], + } ) From 3a9c3944cfbd9aeaf8fd81acd33f89768bc864f4 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:27:53 -0400 Subject: [PATCH 03/27] Added runner (for testing). --- tabbed | 1 + 1 file changed, 1 insertion(+) create mode 160000 tabbed diff --git a/tabbed b/tabbed new file mode 160000 index 0000000..28a7222 --- /dev/null +++ b/tabbed @@ -0,0 +1 @@ +Subproject commit 28a722239b883a11d0067488e6cd765a945d1dd2 From 392eaac299e84b7f4dc0104765080933e89b1f5b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:28:46 -0400 Subject: [PATCH 04/27] tabbed runner --- tabbed | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) mode change 160000 => 100755 tabbed diff --git a/tabbed b/tabbed deleted file mode 160000 index 28a7222..0000000 --- a/tabbed +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 28a722239b883a11d0067488e6cd765a945d1dd2 diff --git a/tabbed b/tabbed new file mode 100755 index 0000000..c30ec36 --- /dev/null +++ b/tabbed @@ -0,0 +1,14 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Tabbed -- CLI for Tablib +Copyright (c) 2010 Kenneth Reitz. MIT License. +""" + +import tablib.cli + + +if __name__ == '__main__': + + tablib.cli.start() \ No newline at end of file From de46f45e2e13d0f689d895236c6bdcb0656593b2 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:36:20 -0400 Subject: [PATCH 05/27] Hmmm.... --- test_tablib.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_tablib.py b/test_tablib.py index 67b693d..e50b767 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -261,6 +261,11 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_format_detect(self): + """Test format detection.""" + + pass + def test_wipe(self): """Purge a dataset.""" From 3fc898e222c6215c46711d36677f70b8df33eca3 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:03:03 -0400 Subject: [PATCH 06/27] Auto-detectors operational. --- tablib/core.py | 10 ++++++++++ tablib/formats/__init__.py | 2 +- tablib/formats/_csv.py | 9 +++++++++ tablib/formats/_json.py | 9 +++++++++ tablib/formats/_yaml.py | 13 ++++++++++++- test_tablib.py | 37 +++++++++++++++++++++++++++++++++++-- 6 files changed, 76 insertions(+), 4 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index cdaf0d5..93ca725 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -268,6 +268,16 @@ class Databook(object): return len(self._datasets) +def detect(stream): + """Return (format, stream) of given stream.""" + for fmt in formats: + try: + if fmt.detect(stream): + return (fmt, stream) + except AttributeError: + pass + return (None, stream) + class InvalidDatasetType(Exception): "Only Datasets can be added to a DataBook" diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index b22a959..69eada7 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -8,4 +8,4 @@ import _json as json import _xls as xls import _yaml as yaml -FORMATS = (csv, json, xls, yaml) +FORMATS = (json, xls, yaml, csv) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 8b19da7..27d2e0d 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -40,3 +40,12 @@ def import_set(dset, in_stream, headers=True): dset.headers = row else: dset.append(row) + + +def detect(stream): + """Returns True if given stream is valid CSV.""" + try: + rows = dialect = csv.Sniffer().sniff(stream) + return True + except csv.Error: + return False \ No newline at end of file diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index 1f92b58..f7c88ee 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -36,3 +36,12 @@ def import_book(dbook, in_stream): data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) + + +def detect(stream): + """Returns True if given stream is valid JSON.""" + try: + json.loads(stream) + return True + except json.decoder.JSONDecodeError: + return False \ No newline at end of file diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 4cac8aa..57d63d7 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -39,4 +39,15 @@ def import_book(dbook, in_stream): data = tablib.core.Dataset() data.title = sheet['title'] data.dict = sheet['data'] - dbook.add_sheet(data) \ No newline at end of file + dbook.add_sheet(data) + +def detect(stream): + """Returns True if given stream is valid YAML.""" + try: + _yaml = yaml.load(stream) + if isinstance(_yaml, (list, tuple, dict)): + return True + else: + return False + except yaml.parser.ParserError: + return False \ No newline at end of file diff --git a/test_tablib.py b/test_tablib.py index e50b767..1091390 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -262,9 +262,42 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) def test_csv_format_detect(self): - """Test format detection.""" + """Test CSV format detection.""" - pass + _csv = ( + '1,2,3\n' + '4,5,6\n' + '7,8,9\n' + ) + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.csv.detect(_csv)) + self.assertFalse(tablib.formats.csv.detect(_bunk)) + + def test_json_format_detect(self): + """Test JSON format detection.""" + + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.json.detect(_json)) + self.assertFalse(tablib.formats.json.detect(_bunk)) + + + def test_yaml_format_detect(self): + """Test YAML format detection.""" + + _yaml = '- {age: 90, first_name: John, last_name: Adams}' + _bunk = ( + '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.yaml.detect(_yaml)) + self.assertFalse(tablib.formats.yaml.detect(_bunk)) def test_wipe(self): From 7f2f925ddb7d47192b79204ccf80e8a9c42f1601 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:09:44 -0400 Subject: [PATCH 07/27] Format Auto-detection in place. Test suite updated. --- tablib/__init__.py | 2 +- test_tablib.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tablib/__init__.py b/tablib/__init__.py index fadd8dd..e9bdf69 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -2,7 +2,7 @@ """ from tablib.core import ( - Databook, Dataset, InvalidDatasetType, + Databook, Dataset, detect, InvalidDatasetType, InvalidDimensions, UnsupportedFormat ) diff --git a/test_tablib.py b/test_tablib.py index 1091390..df9f7bd 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -300,6 +300,21 @@ class TablibTestCase(unittest.TestCase): self.assertFalse(tablib.formats.yaml.detect(_bunk)) + def test_auto_format_detect(self): + """Test auto format detection.""" + + _yaml = '- {age: 90, first_name: John, last_name: Adams}' + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + _csv = '1,2,3\n4,5,6\n7,8,9\n' + _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + + self.assertEqual(tablib.detect(_yaml)[0], tablib.formats.yaml) + self.assertEqual(tablib.detect(_csv)[0], tablib.formats.csv) + self.assertEqual(tablib.detect(_json)[0], tablib.formats.json) + self.assertEqual(tablib.detect(_bunk)[0], None) + + + def test_wipe(self): """Purge a dataset.""" From a310ab7a09e2444d146877d27e0bba6872866b8b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:35:10 -0400 Subject: [PATCH 08/27] Added tablib.import_set() and tested accordingly. --- tablib/__init__.py | 4 ++-- tablib/cli.py | 17 ++++++++++------- tablib/core.py | 13 +++++++++++++ test_tablib.py | 1 - 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/tablib/__init__.py b/tablib/__init__.py index e9bdf69..3f23850 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -2,7 +2,7 @@ """ from tablib.core import ( - Databook, Dataset, detect, InvalidDatasetType, - InvalidDimensions, UnsupportedFormat + Databook, Dataset, detect, import_set, + InvalidDatasetType, InvalidDimensions, UnsupportedFormat ) diff --git a/tablib/cli.py b/tablib/cli.py index 4e46eae..f182049 100644 --- a/tablib/cli.py +++ b/tablib/cli.py @@ -14,7 +14,7 @@ from helpers import Struct, piped -FORMATS = ('json', 'yaml', 'xls', 'csv') +FORMATS = [fmt.title for fmt in tablib.formats.FORMATS] opts = [] @@ -32,13 +32,16 @@ def start(in_file=None, out_file=None, **opts): opts = Struct(**opts) if opts.version: - print('Tabbed, Ver. %s' % tabbed.core.__version__) - sys.sys.exit(0) + print('Tabbed, Ver. %s' % tablib.core.__version__) + sys.exit(0) stdin = piped() if stdin: - print stdin + data = tablib.import_set(stdin) + print data.json + # test = tablib.Dataset() + # print test.yaml elif in_file: @@ -81,6 +84,6 @@ def start(in_file=None, out_file=None, **opts): # look for filename - print opts.__dict__ - print in_file - print out_file \ No newline at end of file + # print opts.__dict__ + # print in_file + # print out_file \ No newline at end of file diff --git a/tablib/core.py b/tablib/core.py index 93ca725..5632e69 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -277,7 +277,20 @@ def detect(stream): except AttributeError: pass return (None, stream) + + +def import_set(stream): + """Return dataset of given stream.""" + (format, stream) = detect(stream) + + try: + data = Dataset() + format.import_set(data, stream) + return data + except AttributeError, e: + return None + class InvalidDatasetType(Exception): "Only Datasets can be added to a DataBook" diff --git a/test_tablib.py b/test_tablib.py index df9f7bd..9e50e6f 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -314,7 +314,6 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(tablib.detect(_bunk)[0], None) - def test_wipe(self): """Purge a dataset.""" From f58d4b67dc22570fe69338ddf841b76569872ad8 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 28 Sep 2010 08:33:57 -0400 Subject: [PATCH 09/27] Changes. --- tablib/cli.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tablib/cli.py b/tablib/cli.py index f182049..6d773c9 100644 --- a/tablib/cli.py +++ b/tablib/cli.py @@ -39,23 +39,19 @@ def start(in_file=None, out_file=None, **opts): if stdin: data = tablib.import_set(stdin) - print data.json - # test = tablib.Dataset() - # print test.yaml elif in_file: try: - in_file = io.open(in_file, 'r') + in_stream =- io.open(in_file, 'r').read() except Exception, e: print(' %s cannot be read.' % in_file) sys.exit(65) - file_ext = in_file.name.split('.')[-1] - - if file_ext.lower() in FORMATS: - setattr(opts, file_ext, True) - else: + try: + tablib.import_set(in_stream) + except Exception, e: + raise e print('Import format not supported.') sys.exit(65) else: @@ -63,7 +59,6 @@ def start(in_file=None, out_file=None, **opts): sys.exit(65) - _formats_sum = sum(opts[f] for f in FORMATS) # Multiple output formats given From d85523b6a6dc5452105a887f3d5fe60534b57332 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 28 Sep 2010 09:01:34 -0400 Subject: [PATCH 10/27] typo in setup.py. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index f0179bb..a5d2f33 100644 --- a/setup.py +++ b/setup.py @@ -17,14 +17,14 @@ if sys.argv[-1] == "publish": setup( name='tablib', - version='0.8.0', + version='0.8.1', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', - packages=['tablib'm 'tablib.formats'], + packages=['tablib', 'tablib.formats'], install_requires=['xlwt', 'simplejson', 'PyYAML'], license='MIT', classifiers=( From 2a7aa959b35cdc535727a27147cae7721db47014 Mon Sep 17 00:00:00 2001 From: Josh Ourisman Date: Fri, 1 Oct 2010 14:51:36 -0400 Subject: [PATCH 11/27] modified .gitignore to actually ignore .pyc files --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 91c9479..68f4c75 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ dist/* MANIFEST # python skin -.pyc +*.pyc .pyo # osx noise From 9f7fec23793b25c7aeffcee4186d6009dc816cca Mon Sep 17 00:00:00 2001 From: Josh Ourisman Date: Fri, 1 Oct 2010 15:27:28 -0400 Subject: [PATCH 12/27] changing syntax of checking for row and col values in append(); slightly more robust this way --- tablib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index ddb9769..8f74975 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -173,10 +173,10 @@ class Dataset(object): def append(self, row=None, col=None): """Adds a row to the end of Dataset""" - if row: + if row is not None: self._validate(row) self._data.append(tuple(row)) - elif col: + elif col is not None: self._validate(col=col) if self.headers: From 149bafa97b3c93b5116090d2834f0c19e6face95 Mon Sep 17 00:00:00 2001 From: Josh Ourisman Date: Fri, 1 Oct 2010 16:17:04 -0400 Subject: [PATCH 13/27] added ability to append new column passing a callable as the value that will be applied to every row; w/ test --- tablib/core.py | 9 +++++++++ test_tablib.py | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index 8f74975..dfb8027 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -177,6 +177,15 @@ class Dataset(object): self._validate(row) self._data.append(tuple(row)) elif col is not None: + col = list(col) + if self.headers: + header = [col.pop(0)] + else: + header = [] + if len(col) == 1 and callable(col[0]): + col = map(col[0], self._data) + col = tuple(header + col) + self._validate(col=col) if self.headers: diff --git a/test_tablib.py b/test_tablib.py index 67b693d..a315642 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -102,6 +102,13 @@ class TablibTestCase(unittest.TestCase): self.assertRaises(tablib.InvalidDimensions, data.append, col=new_col) + def test_add_callable_column(self): + """Verify adding column with values specified as callable.""" + new_col = ['first_again', lambda x: x[0]] + self.founders.append(col=new_col) + + self.assertTrue(map(lambda x: x[0] == x[-1], self.founders)) + def test_header_slicing(self): """Verify slicing by headers.""" From 22fe18239fcf6d0886ed95979c6d7352951539b4 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:26:53 -0400 Subject: [PATCH 14/27] Added legacy cli interface. --- tablib/cli.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 tablib/cli.py diff --git a/tablib/cli.py b/tablib/cli.py new file mode 100644 index 0000000..4e46eae --- /dev/null +++ b/tablib/cli.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# encoding: utf-8 + +""" Tabbed CLI Inteface Application +""" + +import io +import sys + +import argue + +import tablib +from helpers import Struct, piped + + + +FORMATS = ('json', 'yaml', 'xls', 'csv') + +opts = [] + +opts.append(('v', 'version', False, 'Report tabbed version')) + +for format in FORMATS: + opts.append(('', format, False, 'Output to %s' % (format.upper()))) + + + +@argue.command(options=opts, usage='[FILE] [--FORMAT | FILE]') +def start(in_file=None, out_file=None, **opts): + """Covertly convert dataset formats""" + + opts = Struct(**opts) + + if opts.version: + print('Tabbed, Ver. %s' % tabbed.core.__version__) + sys.sys.exit(0) + + stdin = piped() + + if stdin: + print stdin + + elif in_file: + + try: + in_file = io.open(in_file, 'r') + except Exception, e: + print(' %s cannot be read.' % in_file) + sys.exit(65) + + file_ext = in_file.name.split('.')[-1] + + if file_ext.lower() in FORMATS: + setattr(opts, file_ext, True) + else: + print('Import format not supported.') + sys.exit(65) + else: + print('Please provide input.') + sys.exit(65) + + + + _formats_sum = sum(opts[f] for f in FORMATS) + + # Multiple output formats given + if _formats_sum > 1: + print('Please specify a single output format.') + sys.exit(64) + + # No output formats given + elif _formats_sum < 1: + print('Please specify an output format.') + sys.exit(64) + + + # fetch options.formats list + # if sum(()) > 1 + # log only one data format please + # if sum of formats == 0, specity format + + # look for filename + + print opts.__dict__ + print in_file + print out_file \ No newline at end of file From 25f846a78a2ba313632839530b1823e94d30320c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:27:04 -0400 Subject: [PATCH 15/27] Added entrance point, setup.py updates. --- setup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 920c148..0cdfb39 100644 --- a/setup.py +++ b/setup.py @@ -24,8 +24,8 @@ setup( author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', - packages=['tablib', 'tablib.formats'], - install_requires=['xlwt', 'simplejson', 'PyYAML'], + packages=['tablib'], + install_requires=['xlwt', 'simplejson', 'PyYAML', 'argue'], license='MIT', classifiers=( 'Development Status :: 4 - Beta', @@ -35,9 +35,9 @@ setup( 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', ), - # entry_points={ - # 'console_scripts': [ - # 'tabbed = tablib.cli:start', - # ], - # } + entry_points={ + 'console_scripts': [ + 'tabbed = tablib.cli:start', + ], + } ) From b369baba409cfbd924c24e6afa3d0f17c5bdc8e5 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:27:53 -0400 Subject: [PATCH 16/27] Added runner (for testing). --- tabbed | 1 + 1 file changed, 1 insertion(+) create mode 160000 tabbed diff --git a/tabbed b/tabbed new file mode 160000 index 0000000..28a7222 --- /dev/null +++ b/tabbed @@ -0,0 +1 @@ +Subproject commit 28a722239b883a11d0067488e6cd765a945d1dd2 From 96668bb393f1dbab54e7a48a8093f7f744c92de8 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:28:46 -0400 Subject: [PATCH 17/27] tabbed runner --- tabbed | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) mode change 160000 => 100755 tabbed diff --git a/tabbed b/tabbed deleted file mode 160000 index 28a7222..0000000 --- a/tabbed +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 28a722239b883a11d0067488e6cd765a945d1dd2 diff --git a/tabbed b/tabbed new file mode 100755 index 0000000..c30ec36 --- /dev/null +++ b/tabbed @@ -0,0 +1,14 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +""" +Tabbed -- CLI for Tablib +Copyright (c) 2010 Kenneth Reitz. MIT License. +""" + +import tablib.cli + + +if __name__ == '__main__': + + tablib.cli.start() \ No newline at end of file From d479c5735ad5875fc2a953abd1c20654ddc83aef Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 17:36:20 -0400 Subject: [PATCH 18/27] Hmmm.... --- test_tablib.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_tablib.py b/test_tablib.py index a315642..9c1941e 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -268,6 +268,11 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_format_detect(self): + """Test format detection.""" + + pass + def test_wipe(self): """Purge a dataset.""" From eaa4de779391d9cbfc970cd418ccb36efdde122e Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:03:03 -0400 Subject: [PATCH 19/27] Auto-detectors operational. --- tablib/core.py | 10 ++++++++++ tablib/formats/__init__.py | 2 +- tablib/formats/_csv.py | 9 +++++++++ tablib/formats/_json.py | 9 +++++++++ tablib/formats/_yaml.py | 13 ++++++++++++- test_tablib.py | 37 +++++++++++++++++++++++++++++++++++-- 6 files changed, 76 insertions(+), 4 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index dfb8027..5b6b8ca 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -277,6 +277,16 @@ class Databook(object): return len(self._datasets) +def detect(stream): + """Return (format, stream) of given stream.""" + for fmt in formats: + try: + if fmt.detect(stream): + return (fmt, stream) + except AttributeError: + pass + return (None, stream) + class InvalidDatasetType(Exception): "Only Datasets can be added to a DataBook" diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index b22a959..69eada7 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -8,4 +8,4 @@ import _json as json import _xls as xls import _yaml as yaml -FORMATS = (csv, json, xls, yaml) +FORMATS = (json, xls, yaml, csv) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 8b19da7..27d2e0d 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -40,3 +40,12 @@ def import_set(dset, in_stream, headers=True): dset.headers = row else: dset.append(row) + + +def detect(stream): + """Returns True if given stream is valid CSV.""" + try: + rows = dialect = csv.Sniffer().sniff(stream) + return True + except csv.Error: + return False \ No newline at end of file diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index 1f92b58..f7c88ee 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -36,3 +36,12 @@ def import_book(dbook, in_stream): data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) + + +def detect(stream): + """Returns True if given stream is valid JSON.""" + try: + json.loads(stream) + return True + except json.decoder.JSONDecodeError: + return False \ No newline at end of file diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 4cac8aa..57d63d7 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -39,4 +39,15 @@ def import_book(dbook, in_stream): data = tablib.core.Dataset() data.title = sheet['title'] data.dict = sheet['data'] - dbook.add_sheet(data) \ No newline at end of file + dbook.add_sheet(data) + +def detect(stream): + """Returns True if given stream is valid YAML.""" + try: + _yaml = yaml.load(stream) + if isinstance(_yaml, (list, tuple, dict)): + return True + else: + return False + except yaml.parser.ParserError: + return False \ No newline at end of file diff --git a/test_tablib.py b/test_tablib.py index 9c1941e..3f8ee37 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -269,9 +269,42 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) def test_csv_format_detect(self): - """Test format detection.""" + """Test CSV format detection.""" - pass + _csv = ( + '1,2,3\n' + '4,5,6\n' + '7,8,9\n' + ) + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.csv.detect(_csv)) + self.assertFalse(tablib.formats.csv.detect(_bunk)) + + def test_json_format_detect(self): + """Test JSON format detection.""" + + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.json.detect(_json)) + self.assertFalse(tablib.formats.json.detect(_bunk)) + + + def test_yaml_format_detect(self): + """Test YAML format detection.""" + + _yaml = '- {age: 90, first_name: John, last_name: Adams}' + _bunk = ( + '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + + self.assertTrue(tablib.formats.yaml.detect(_yaml)) + self.assertFalse(tablib.formats.yaml.detect(_bunk)) def test_wipe(self): From 187d12cffc5cb3b46f0ce6333cf0f5f407d3a949 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:09:44 -0400 Subject: [PATCH 20/27] Format Auto-detection in place. Test suite updated. --- tablib/__init__.py | 2 +- test_tablib.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tablib/__init__.py b/tablib/__init__.py index fadd8dd..e9bdf69 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -2,7 +2,7 @@ """ from tablib.core import ( - Databook, Dataset, InvalidDatasetType, + Databook, Dataset, detect, InvalidDatasetType, InvalidDimensions, UnsupportedFormat ) diff --git a/test_tablib.py b/test_tablib.py index 3f8ee37..4843117 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -307,6 +307,21 @@ class TablibTestCase(unittest.TestCase): self.assertFalse(tablib.formats.yaml.detect(_bunk)) + def test_auto_format_detect(self): + """Test auto format detection.""" + + _yaml = '- {age: 90, first_name: John, last_name: Adams}' + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + _csv = '1,2,3\n4,5,6\n7,8,9\n' + _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + + self.assertEqual(tablib.detect(_yaml)[0], tablib.formats.yaml) + self.assertEqual(tablib.detect(_csv)[0], tablib.formats.csv) + self.assertEqual(tablib.detect(_json)[0], tablib.formats.json) + self.assertEqual(tablib.detect(_bunk)[0], None) + + + def test_wipe(self): """Purge a dataset.""" From fb59035f8d2718417a24ab7e141397d85cc13b12 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sat, 25 Sep 2010 18:35:10 -0400 Subject: [PATCH 21/27] Added tablib.import_set() and tested accordingly. --- tablib/__init__.py | 4 ++-- tablib/cli.py | 17 ++++++++++------- tablib/core.py | 13 +++++++++++++ test_tablib.py | 1 - 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/tablib/__init__.py b/tablib/__init__.py index e9bdf69..3f23850 100644 --- a/tablib/__init__.py +++ b/tablib/__init__.py @@ -2,7 +2,7 @@ """ from tablib.core import ( - Databook, Dataset, detect, InvalidDatasetType, - InvalidDimensions, UnsupportedFormat + Databook, Dataset, detect, import_set, + InvalidDatasetType, InvalidDimensions, UnsupportedFormat ) diff --git a/tablib/cli.py b/tablib/cli.py index 4e46eae..f182049 100644 --- a/tablib/cli.py +++ b/tablib/cli.py @@ -14,7 +14,7 @@ from helpers import Struct, piped -FORMATS = ('json', 'yaml', 'xls', 'csv') +FORMATS = [fmt.title for fmt in tablib.formats.FORMATS] opts = [] @@ -32,13 +32,16 @@ def start(in_file=None, out_file=None, **opts): opts = Struct(**opts) if opts.version: - print('Tabbed, Ver. %s' % tabbed.core.__version__) - sys.sys.exit(0) + print('Tabbed, Ver. %s' % tablib.core.__version__) + sys.exit(0) stdin = piped() if stdin: - print stdin + data = tablib.import_set(stdin) + print data.json + # test = tablib.Dataset() + # print test.yaml elif in_file: @@ -81,6 +84,6 @@ def start(in_file=None, out_file=None, **opts): # look for filename - print opts.__dict__ - print in_file - print out_file \ No newline at end of file + # print opts.__dict__ + # print in_file + # print out_file \ No newline at end of file diff --git a/tablib/core.py b/tablib/core.py index 5b6b8ca..a7a2963 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -286,7 +286,20 @@ def detect(stream): except AttributeError: pass return (None, stream) + + +def import_set(stream): + """Return dataset of given stream.""" + (format, stream) = detect(stream) + + try: + data = Dataset() + format.import_set(data, stream) + return data + except AttributeError, e: + return None + class InvalidDatasetType(Exception): "Only Datasets can be added to a DataBook" diff --git a/test_tablib.py b/test_tablib.py index 4843117..ebad061 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -321,7 +321,6 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(tablib.detect(_bunk)[0], None) - def test_wipe(self): """Purge a dataset.""" From 9427decdb035067dbc367c436e2706127242dbaa Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 28 Sep 2010 08:33:57 -0400 Subject: [PATCH 22/27] Changes. --- tablib/cli.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tablib/cli.py b/tablib/cli.py index f182049..6d773c9 100644 --- a/tablib/cli.py +++ b/tablib/cli.py @@ -39,23 +39,19 @@ def start(in_file=None, out_file=None, **opts): if stdin: data = tablib.import_set(stdin) - print data.json - # test = tablib.Dataset() - # print test.yaml elif in_file: try: - in_file = io.open(in_file, 'r') + in_stream =- io.open(in_file, 'r').read() except Exception, e: print(' %s cannot be read.' % in_file) sys.exit(65) - file_ext = in_file.name.split('.')[-1] - - if file_ext.lower() in FORMATS: - setattr(opts, file_ext, True) - else: + try: + tablib.import_set(in_stream) + except Exception, e: + raise e print('Import format not supported.') sys.exit(65) else: @@ -63,7 +59,6 @@ def start(in_file=None, out_file=None, **opts): sys.exit(65) - _formats_sum = sum(opts[f] for f in FORMATS) # Multiple output formats given From 06a394ea5ccf57600a12b0deca45a2f7e89b018b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Tue, 28 Sep 2010 09:01:34 -0400 Subject: [PATCH 23/27] typo in setup.py. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 0cdfb39..a5d2f33 100644 --- a/setup.py +++ b/setup.py @@ -24,8 +24,8 @@ setup( author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://github.com/kennethreitz/tablib', - packages=['tablib'], - install_requires=['xlwt', 'simplejson', 'PyYAML', 'argue'], + packages=['tablib', 'tablib.formats'], + install_requires=['xlwt', 'simplejson', 'PyYAML'], license='MIT', classifiers=( 'Development Status :: 4 - Beta', From bfe70066b8a4a635de3c4ef9cdbcb51cace8152a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Fri, 1 Oct 2010 18:44:50 -0400 Subject: [PATCH 24/27] Added Josh Ourisman to authors --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 3d48743..fa467c3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,4 +10,5 @@ Development Lead Patches and Suggestions ``````````````````````` -- Luke Lee \ No newline at end of file +- Luke Lee +- Josh Ourisman \ No newline at end of file From c4edaa2ca856c99f56a5a12ae00ca7f2c432ef10 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 4 Oct 2010 11:55:17 -0400 Subject: [PATCH 25/27] Appended history. --- HISTORY.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index d380034..350cb6f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,7 +1,14 @@ History ======= -0.8.2 (2010-09-28) +0.8.3 (2010-10-04) +------------------ + +* Ability to append new column passing a callable + as the value that will be applied to every row. + + +0.8.2 (2010-10-04) ------------------ * Added alignment wrapping to written cells. * Added separator support to XLS. From 41a7a5d32991c80899a3a900cbb63ffd4c577126 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 4 Oct 2010 11:55:26 -0400 Subject: [PATCH 26/27] No cli app at this time. --- setup.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/setup.py b/setup.py index 1f55202..a297169 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ if sys.argv[-1] == "publish": setup( name='tablib', - version='0.8.2', + version='0.8.3', description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)', long_description=open('README.rst').read() + '\n\n' + open('HISTORY.rst').read(), @@ -35,9 +35,9 @@ setup( 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', ), - entry_points={ - 'console_scripts': [ - 'tabbed = tablib.cli:start', - ], - } + # entry_points={ + # 'console_scripts': [ + # 'tabbed = tablib.cli:start', + # ], + # } ) From 1ea793112cdbc7328e37138e12aa0bfa8cedd9ea Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 4 Oct 2010 11:55:35 -0400 Subject: [PATCH 27/27] Version Bump (v0.8.3) --- tablib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 90ef2b0..69e7549 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -7,8 +7,8 @@ from tablib.formats import FORMATS as formats __title__ = 'tablib' -__version__ = '0.8.1' -__build__ = 0x000801 +__version__ = '0.8.3' +__build__ = 0x000803 __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz'