From 70716fdd216755dc4b542df74e95b0e5ac74f0ee Mon Sep 17 00:00:00 2001 From: papisz Date: Wed, 9 Apr 2014 22:35:56 +0200 Subject: [PATCH 01/34] CSV custom delimiter support --- tablib/core.py | 2 ++ tablib/formats/_csv.py | 20 ++++++++--------- tablib/formats/_tsv.py | 49 +++++++++--------------------------------- test_tablib.py | 24 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 49 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 86f75b2..9953ffa 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -256,6 +256,8 @@ class Dataset(object): setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set)) except AttributeError: setattr(cls, fmt.title, property(fmt.export_set)) + setattr(cls, 'set_%s' % fmt.title, fmt.import_set) + setattr(cls, 'get_%s' % fmt.title, fmt.export_set) except AttributeError: pass diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index c5d3202..7deec23 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -""" Tablib - CSV Support. +""" Tablib - *SV Support. """ from tablib.compat import is_py3, csv, StringIO @@ -11,17 +11,17 @@ extensions = ('csv',) DEFAULT_ENCODING = 'utf-8' +DEFAULT_DELIMITER = ',' - -def export_set(dataset): +def export_set(dataset, delimiter=DEFAULT_DELIMITER): """Returns CSV representation of Dataset.""" stream = StringIO() if is_py3: - _csv = csv.writer(stream) + _csv = csv.writer(stream, delimiter=delimiter) else: - _csv = csv.writer(stream, encoding=DEFAULT_ENCODING) + _csv = csv.writer(stream, delimiter=delimiter, encoding=DEFAULT_ENCODING) for row in dataset._package(dicts=False): _csv.writerow(row) @@ -29,15 +29,15 @@ def export_set(dataset): return stream.getvalue() -def import_set(dset, in_stream, headers=True): +def import_set(dset, in_stream, headers=True, delimiter=DEFAULT_DELIMITER): """Returns dataset from CSV stream.""" dset.wipe() if is_py3: - rows = csv.reader(in_stream.splitlines()) + rows = csv.reader(in_stream.splitlines(), delimiter=delimiter) else: - rows = csv.reader(in_stream.splitlines(), encoding=DEFAULT_ENCODING) + rows = csv.reader(in_stream.splitlines(), delimiter=delimiter, encoding=DEFAULT_ENCODING) for i, row in enumerate(rows): if (i == 0) and (headers): @@ -46,10 +46,10 @@ def import_set(dset, in_stream, headers=True): dset.append(row) -def detect(stream): +def detect(stream, delimiter=DEFAULT_DELIMITER): """Returns True if given stream is valid CSV.""" try: - csv.Sniffer().sniff(stream, delimiters=',') + csv.Sniffer().sniff(stream, delimiters=delimiter) return True except (csv.Error, TypeError): return False diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 8ef2b67..9380b3b 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,57 +3,28 @@ """ Tablib - TSV (Tab Separated Values) Support. """ -from tablib.compat import is_py3, csv, StringIO - - +from tablib.formats._csv import ( + export_set as export_set_wrapper, + import_set as import_set_wrapper, + detect as detect_wrapper, +) title = 'tsv' extensions = ('tsv',) DEFAULT_ENCODING = 'utf-8' +DELIMITER = '\t' def export_set(dataset): - """Returns a TSV representation of Dataset.""" - - stream = StringIO() - - if is_py3: - _tsv = csv.writer(stream, delimiter='\t') - else: - _tsv = csv.writer(stream, encoding=DEFAULT_ENCODING, delimiter='\t') - - for row in dataset._package(dicts=False): - _tsv.writerow(row) - - return stream.getvalue() + """Returns TSV representation of Dataset.""" + return export_set_wrapper(dataset, delimiter=DELIMITER) def import_set(dset, in_stream, headers=True): """Returns dataset from TSV stream.""" - - dset.wipe() - - if is_py3: - rows = csv.reader(in_stream.splitlines(), delimiter='\t') - else: - rows = csv.reader(in_stream.splitlines(), delimiter='\t', - encoding=DEFAULT_ENCODING) - - for i, row in enumerate(rows): - # Skip empty rows - if not row: - continue - - if (i == 0) and (headers): - dset.headers = row - else: - dset.append(row) + return import_set_wrapper(dset, in_stream, headers=headers, delimiter=DELIMITER) def detect(stream): """Returns True if given stream is valid TSV.""" - try: - csv.Sniffer().sniff(stream, delimiters='\t') - return True - except (csv.Error, TypeError): - return False + return detect_wrapper(stream, delimiter=DELIMITER) diff --git a/test_tablib.py b/test_tablib.py index 6125408..1c160d2 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -401,6 +401,17 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_import_set_semicolons(self): + """Test for proper output with semicolon separated CSV.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _csv = data.get_csv(delimiter=';') + + data.set_csv(_csv, delimiter=';') + + self.assertEqual(_csv, data.get_csv(delimiter=';')) def test_csv_import_set_with_spaces(self): """Generate and import CSV set serialization when row values have @@ -415,6 +426,19 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_import_set_semicolon_with_spaces(self): + """Generate and import semicolon separated CSV set serialization when row values have + spaces.""" + data.append(('Bill Gates', 'Microsoft')) + data.append(('Steve Jobs', 'Apple')) + data.headers = ('Name', 'Company') + + _csv = data.get_csv(delimiter=';') + + data.set_csv(_csv, delimiter=';') + + self.assertEqual(_csv, data.get_csv(delimiter=';')) + def test_tsv_import_set(self): """Generate and import TSV set serialization.""" From 84e7e251aec1d0ef8b3d45376ed91c49bf2c2f06 Mon Sep 17 00:00:00 2001 From: Iuri de Silvio Date: Tue, 27 May 2014 19:25:15 -0300 Subject: [PATCH 02/34] Separate py2 and py3 packages to avoid installation errors. Fix #151 --- setup.py | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/setup.py b/setup.py index a6eeb1e..648f7eb 100755 --- a/setup.py +++ b/setup.py @@ -36,6 +36,34 @@ if sys.argv[-1] == 'test': errors = os.system('py.test test_tablib.py') sys.exit(bool(errors)) +packages = [ + 'tablib.packages.omnijson', + 'tablib.packages.unicodecsv' +] +if sys.version_info[0] == 2: + packages.extend([ + 'tablib.packages.xlwt', + 'tablib.packages.xlrd', + 'tablib.packages.odf', + 'tablib.packages.openpyxl', + 'tablib.packages.openpyxl.shared', + 'tablib.packages.openpyxl.reader', + 'tablib.packages.openpyxl.writer', + 'tablib.packages.yaml', + ]) +else: + packages.extend([ + 'tablib.packages.xlwt3', + 'tablib.packages.xlrd3', + 'tablib.packages.odf3', + 'tablib.packages.openpyxl3', + 'tablib.packages.openpyxl3.shared', + 'tablib.packages.openpyxl3.reader', + 'tablib.packages.openpyxl3.writer', + 'tablib.packages.yaml3', + ]) + + setup( name='tablib', version=tablib.__version__, @@ -45,28 +73,7 @@ setup( author='Kenneth Reitz', author_email='me@kennethreitz.org', url='http://python-tablib.org', - packages=[ - 'tablib', 'tablib.formats', - 'tablib.packages', - 'tablib.packages.xlwt', - 'tablib.packages.xlwt3', - 'tablib.packages.xlrd', - 'tablib.packages.xlrd3', - 'tablib.packages.omnijson', - 'tablib.packages.odf', - 'tablib.packages.odf3', - 'tablib.packages.openpyxl', - 'tablib.packages.openpyxl.shared', - 'tablib.packages.openpyxl.reader', - 'tablib.packages.openpyxl.writer', - 'tablib.packages.openpyxl3', - 'tablib.packages.openpyxl3.shared', - 'tablib.packages.openpyxl3.reader', - 'tablib.packages.openpyxl3.writer', - 'tablib.packages.yaml', - 'tablib.packages.yaml3', - 'tablib.packages.unicodecsv' - ], + packages=packages, license='MIT', classifiers=( 'Development Status :: 5 - Production/Stable', From 7acaa8460dc69f172dee4db6e0492af1db86492e Mon Sep 17 00:00:00 2001 From: Iuri de Silvio Date: Tue, 27 May 2014 19:46:32 -0300 Subject: [PATCH 03/34] Running travis and tox with python 3.4. --- .travis.yml | 1 + tox.ini | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index d677d37..5c4c8cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: - 2.7 - 3.2 - 3.3 + - 3.4 install: - python setup.py install script: python test_tablib.py diff --git a/tox.ini b/tox.ini index 08f72aa..52cd321 100644 --- a/tox.ini +++ b/tox.ini @@ -4,12 +4,8 @@ # and then run "tox" from this directory. [tox] -envlist = py26, py27, py32, py33, pypy +envlist = py26, py27, py32, py33, py34, pypy [testenv] commands = python setup.py test -deps = - pytest - PyYAML - xlrd - omnijson +deps = pytest From 954bbdccf32b42bb2871a6dc851eec476fee803a Mon Sep 17 00:00:00 2001 From: Gavin Wahl Date: Mon, 16 Jun 2014 15:31:00 -0600 Subject: [PATCH 04/34] Only freeze the headers row, not the headers columns Fixes #53 --- tablib/formats/_xlsx.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py index d697d9c..57058c8 100644 --- a/tablib/formats/_xlsx.py +++ b/tablib/formats/_xlsx.py @@ -115,8 +115,6 @@ def dset_sheet(dataset, ws): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) - # We want to freeze the column after the last column - frzn_col_idx = get_column_letter(j + 2) # bold headers if (row_number == 1) and dataset.headers: @@ -125,7 +123,7 @@ def dset_sheet(dataset, ws): ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col) style = ws.get_style('%s%s' % (col_idx, row_number)) style.font.bold = True - ws.freeze_panes = '%s%s' % (frzn_col_idx, row_number) + ws.freeze_panes = 'A2' # bold separators From 7a2842a8af28eeb1be9fa25da008b2070a99c8ed Mon Sep 17 00:00:00 2001 From: Gavin Wahl Date: Tue, 24 Jun 2014 15:22:12 -0600 Subject: [PATCH 05/34] Update the vendored unicodecsv to fix None handling The old version of unicodecsv incorrectly (according https://docs.python.org/2/library/csv.html#csv.writer) encoding None values as the string 'None', instead of the string '' as the python documentation specifies. The newest version of unicodecsv has fixed this. Fixes #121 --- tablib/packages/unicodecsv/__init__.py | 194 ++++++++++++++++++------- 1 file changed, 143 insertions(+), 51 deletions(-) diff --git a/tablib/packages/unicodecsv/__init__.py b/tablib/packages/unicodecsv/__init__.py index e640987..6a20118 100644 --- a/tablib/packages/unicodecsv/__init__.py +++ b/tablib/packages/unicodecsv/__init__.py @@ -1,22 +1,65 @@ # -*- coding: utf-8 -*- import csv -from csv import * +try: + from itertools import izip +except ImportError: + izip = zip #http://semver.org/ -VERSION = (0, 8, 0) +VERSION = (0, 10, 1) __version__ = ".".join(map(str,VERSION)) -def _stringify(s, encoding): - if type(s)==unicode: - return s.encode(encoding) +pass_throughs = [ + 'register_dialect', + 'unregister_dialect', + 'get_dialect', + 'list_dialects', + 'field_size_limit', + 'Dialect', + 'excel', + 'excel_tab', + 'Sniffer', + 'QUOTE_ALL', + 'QUOTE_MINIMAL', + 'QUOTE_NONNUMERIC', + 'QUOTE_NONE', + 'Error' +] +__all__ = [ + 'reader', + 'writer', + 'DictReader', + 'DictWriter', +] + pass_throughs + +for prop in pass_throughs: + globals()[prop]=getattr(csv, prop) + +def _stringify(s, encoding, errors): + if s is None: + return '' + if isinstance(s, unicode): + return s.encode(encoding, errors) elif isinstance(s, (int , float)): pass #let csv.QUOTE_NONNUMERIC do its thing. - elif type(s) != str: + elif not isinstance(s, str): s=str(s) return s -def _stringify_list(l, encoding): - return [_stringify(s, encoding) for s in l] +def _stringify_list(l, encoding, errors='strict'): + try: + return [_stringify(s, encoding, errors) for s in iter(l)] + except TypeError as e: + raise csv.Error(str(e)) + +def _unicodify(s, encoding): + if s is None: + return None + if isinstance(s, (unicode, int, float)): + return s + elif isinstance(s, str): + return s.decode(encoding) + return s class UnicodeWriter(object): """ @@ -28,78 +71,127 @@ class UnicodeWriter(object): >>> f.seek(0) >>> r = unicodecsv.reader(f, encoding='utf-8') >>> row = r.next() - >>> print row[0], row[1] - é ñ + >>> row[0] == u'é' + True + >>> row[1] == u'ñ' + True """ - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.writer = csv.writer(f) - self.dialect = dialect + def __init__(self, f, dialect=csv.excel, encoding='utf-8', errors='strict', + *args, **kwds): self.encoding = encoding - self.writer = csv.writer(f, dialect=dialect, **kwds) + self.writer = csv.writer(f, dialect, *args, **kwds) + self.encoding_errors = errors def writerow(self, row): - self.writer.writerow(_stringify_list(row, self.encoding)) + self.writer.writerow(_stringify_list(row, self.encoding, self.encoding_errors)) def writerows(self, rows): for row in rows: self.writerow(row) + + @property + def dialect(self): + return self.writer.dialect writer = UnicodeWriter class UnicodeReader(object): - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.reader = csv.reader(f, dialect=dialect, **kwds) + def __init__(self, f, dialect=None, encoding='utf-8', errors='strict', + **kwds): + format_params = ['delimiter', 'doublequote', 'escapechar', 'lineterminator', 'quotechar', 'quoting', 'skipinitialspace'] + if dialect is None: + if not any([kwd_name in format_params for kwd_name in kwds.keys()]): + dialect = csv.excel + self.reader = csv.reader(f, dialect, **kwds) self.encoding = encoding + self.encoding_errors = errors def next(self): row = self.reader.next() - return [unicode(s, self.encoding) for s in row] + encoding = self.encoding + encoding_errors = self.encoding_errors + float_ = float + unicode_ = unicode + return [(value if isinstance(value, float_) else + unicode_(value, encoding, encoding_errors)) for value in row] def __iter__(self): return self + + @property + def dialect(self): + return self.reader.dialect + + @property + def line_num(self): + return self.reader.line_num reader = UnicodeReader class DictWriter(csv.DictWriter): """ >>> from cStringIO import StringIO >>> f = StringIO() - >>> w = DictWriter(f, ['a', 'b'], restval=u'î') - >>> w.writerow({'a':'1'}) - >>> w.writerow({'a':'1', 'b':u'ø'}) - >>> w.writerow({'a':u'é'}) + >>> w = DictWriter(f, ['a', u'ñ', 'b'], restval=u'î') + >>> w.writerow({'a':'1', u'ñ':'2'}) + >>> w.writerow({'a':'1', u'ñ':'2', 'b':u'ø'}) + >>> w.writerow({'a':u'é', u'ñ':'2'}) >>> f.seek(0) - >>> r = DictReader(f, fieldnames=['a'], restkey='r') - >>> r.next() == {'a':u'1', 'r':[u"î"]} + >>> r = DictReader(f, fieldnames=['a', u'ñ'], restkey='r') + >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'î']} True - >>> r.next() == {'a':u'1', 'r':[u"ø"]} + >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'\xc3\xb8']} + True + >>> r.next() == {'a': u'\xc3\xa9', u'ñ':'2', 'r': [u'\xc3\xae']} True - >>> r.next() == {'a':u'é', 'r':[u"î"]} """ - def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', *args, **kwds): - self.fieldnames = fieldnames + def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', errors='strict', *args, **kwds): self.encoding = encoding - self.restval = restval - self.writer = csv.DictWriter(csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds) - def writerow(self, d): - for fieldname in self.fieldnames: - if fieldname in d: - d[fieldname] = _stringify(d[fieldname], self.encoding) - else: - d[fieldname] = _stringify(self.restval, self.encoding) - self.writer.writerow(d) + csv.DictWriter.__init__(self, csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds) + self.writer = UnicodeWriter(csvfile, dialect, encoding=encoding, errors=errors, *args, **kwds) + self.encoding_errors = errors + + def writeheader(self): + fieldnames = _stringify_list(self.fieldnames, self.encoding, self.encoding_errors) + header = dict(zip(self.fieldnames, self.fieldnames)) + self.writerow(header) class DictReader(csv.DictReader): - def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, dialect='excel', encoding='utf-8', *args, **kwds): - self.restkey = restkey - self.encoding = encoding - self.reader = csv.DictReader(csvfile, fieldnames, restkey, restval, dialect, *args, **kwds) + """ + >>> from cStringIO import StringIO + >>> f = StringIO() + >>> w = DictWriter(f, fieldnames=['name', 'place']) + >>> w.writerow({'name': 'Cary Grant', 'place': 'hollywood'}) + >>> w.writerow({'name': 'Nathan Brillstone', 'place': u'øLand'}) + >>> w.writerow({'name': u'Willam ø. Unicoder', 'place': u'éSpandland'}) + >>> f.seek(0) + >>> r = DictReader(f, fieldnames=['name', 'place']) + >>> print r.next() == {'name': 'Cary Grant', 'place': 'hollywood'} + True + >>> print r.next() == {'name': 'Nathan Brillstone', 'place': u'øLand'} + True + >>> print r.next() == {'name': u'Willam ø. Unicoder', 'place': u'éSpandland'} + True + """ + def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, + dialect='excel', encoding='utf-8', errors='strict', *args, + **kwds): + if fieldnames is not None: + fieldnames = _stringify_list(fieldnames, encoding) + csv.DictReader.__init__(self, csvfile, fieldnames, restkey, restval, dialect, *args, **kwds) + self.reader = UnicodeReader(csvfile, dialect, encoding=encoding, + errors=errors, *args, **kwds) + if fieldnames is None and not hasattr(csv.DictReader, 'fieldnames'): + # Python 2.5 fieldnames workaround. (http://bugs.python.org/issue3436) + reader = UnicodeReader(csvfile, dialect, encoding=encoding, *args, **kwds) + self.fieldnames = _stringify_list(reader.next(), reader.encoding) + self.unicode_fieldnames = [_unicodify(f, encoding) for f in + self.fieldnames] + self.unicode_restkey = _unicodify(restkey, encoding) + def next(self): - d = self.reader.next() - for k, v in d.items(): - if k == self.restkey: - rest = v - if rest: - d[self.restkey] = [unicode(v, self.encoding) for v in rest] - else: - if v is not None: - d[k] = unicode(v, self.encoding) - return d + row = csv.DictReader.next(self) + result = dict((uni_key, row[str_key]) for (str_key, uni_key) in + izip(self.fieldnames, self.unicode_fieldnames)) + rest = row.get(self.restkey) + if rest: + result[self.unicode_restkey] = rest + return result From 0b714f21e1bb302d8cb608f9b3b350cb8fda9694 Mon Sep 17 00:00:00 2001 From: Ustun Ozgur Date: Wed, 30 Jul 2014 14:46:50 +0300 Subject: [PATCH 06/34] Typo --- docs/intro.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/intro.rst b/docs/intro.rst index c3413f3..78a6180 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -10,7 +10,7 @@ Advanced features include, segregation, dynamic columns, tags / filtering, and seamless format import/export. -Philosphy +Philosophy --------- Tablib was developed with a few :pep:`20` idioms in mind. @@ -90,4 +90,4 @@ Support for other Pythons will be rolled out soon. -Now, go :ref:`Install Tablib `. \ No newline at end of file +Now, go :ref:`Install Tablib `. From 8479df725e1e02e4380acaba2cf93a74ca63b84f Mon Sep 17 00:00:00 2001 From: Iuri de Silvio Date: Sun, 10 Aug 2014 11:46:55 -0300 Subject: [PATCH 07/34] Fix some http schemes to follow page scheme. --- docs/_themes/kr/layout.html | 2 +- docs/_themes/kr_small/layout.html | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/_themes/kr/layout.html b/docs/_themes/kr/layout.html index 1431b49..470f017 100644 --- a/docs/_themes/kr/layout.html +++ b/docs/_themes/kr/layout.html @@ -13,7 +13,7 @@ © Copyright {{ copyright }}. - Fork me on GitHub + Fork me on GitHub diff --git a/docs/_themes/kr_small/layout.html b/docs/_themes/kr_small/layout.html index aa1716a..cab20c7 100644 --- a/docs/_themes/kr_small/layout.html +++ b/docs/_themes/kr_small/layout.html @@ -14,8 +14,8 @@ {% block relbar1 %}{% endblock %} {% block relbar2 %} {% if theme_github_fork %} - Fork me on GitHub + Fork me on GitHub {% endif %} {% endblock %} {% block sidebar1 %}{% endblock %} From a21f8187f8bdbe27434478da45fa58f3e2158268 Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 30 May 2014 17:27:59 -0700 Subject: [PATCH 08/34] Adding DBF support. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Squashing two squashes. Adding DBF support Adding the DBFpy python package The DBFpy package provides basic dbf support for python. Still need to write an interface format file for tablib. Adding DBF format and imports in compat.py Adding DBF format to formats.__init__ DBF format had not been committed to formats.__init__, so I’m adding it. Adding a dbf import test Adding at test to check whether a DBF can be created properly and compare it against a regression binary string. Adding an import_set test (and renaming another) Adding an import_set test that conforms with the other import_set tests for other formats. I’m also adding an export_set function. Fixing system site-packages import Importing dbfpy from tab lib.packages instead of system site packages. Fixing a syntaxError in dbfpy/dbfnew.py Fixing an issue with ending field definitions DBFPY, when writing a DBF, terminates the field definitions with a newline character. When importing a DBF from a stream, however, DBFPY was looking only for the \x0D character rather than the newline. Now we consider both cases. Adding a test for dbf format detection Adding DBF filetype detection tests Adding tests for YAML, JSON, TSV, CSV using the DBF detection function. Handling extra exceptions in dbf detection Adding exception handling for struct.error, an exception that DBFPY raises when trying to unpack a TSV table. Since it’s not a DBF file, we know it’s not a DBF and return False. Fixing an issue with the DBF set exporting test The DBF set export test needed a bit enabled (probably the writeable bit?) before the test would match the regression output. Updating dbf interface Updating the int/float class/type checking in the dbf format file. This allows for python2 and python3 compatibility. Tweaking dbfpy to work with python3 Altering a couple of imports. Updating dbf tests for binary data compatibility Making regression strings binary and improving debug messages for dbf assertion errors. Improving file handling for python 2 and 3 Updating DBF file handling for both python 2 and 3 in the _dbf interface. Adding a (seemingly) functional dbfpy for python3 I’ve made dbfpy python3 compatible! Tests appear to pass. A significant change was made to the format detection test whereby I made the input string a binary (bytes) string. If the string is not a bytes string by the time we try to detect the format, we try to decode the string as utf-8 (which admittedly might not be the safest thing to do) and try to decode anyways. Updating imports for tablib dbf interface Now importing python2 or python3 versions as appropriate. Updating dbf package references in compat.py Cleaning up debugging print statements Updating stream handling in dbf interface Factoring the open() call out of the py3 conditional and removing the temp file before returning the stream value. Adding dbfpy3 init.py I had apparently missed the dbfpy3 init file when committing dbfpy3. Adding dbfpy and dbfpy3 to setup.py's package list Switching test order of formats Putting dbf format testing ahead of TSV. In some of my tests with numeric DBF files, I encountered an issue where the ASCII horizontal tab character (0x09) would appear in a numeric DBF. Because of the order of tabular format imports, though, format detection would recognize it as a TSV and not as a DBF. Adding my name to AUTHORS. Adding a DBF property to tab lib core Documentation includes examples on how to explicitly load a DBF straight from a file and how to load a DBF from a binary string. Also, how to write the binary data to a file. Adding DBF format notes to README Adding exclamation point to DBF section title Matching formatting of XLS section Updating setup.py to match current dev state Setup.py had been updated since I forked the tablib repo, so I’m updating setup.py to match its current structure while still maintaining DBF compatibility. Fixed callable collumn test the test was sending a list instead of a function CORE CONTRIBUTORS :cake: @iurisilvio v0.10.0 WHEELS 3.3, 3.4 makefile for WHEELS v0.10.0 history ALL Separate py2 and py3 packages to avoid installation errors. Fix #151 Running travis and tox with python 3.4. Adding DBF support Adding the DBFpy python package The DBFpy package provides basic dbf support for python. Still need to write an interface format file for tablib. Adding DBF format and imports in compat.py Adding DBF format to formats.__init__ DBF format had not been committed to formats.__init__, so I’m adding it. Adding a dbf import test Adding at test to check whether a DBF can be created properly and compare it against a regression binary string. Adding an import_set test (and renaming another) Adding an import_set test that conforms with the other import_set tests for other formats. I’m also adding an export_set function. Fixing system site-packages import Importing dbfpy from tab lib.packages instead of system site packages. Fixing a syntaxError in dbfpy/dbfnew.py Fixing an issue with ending field definitions DBFPY, when writing a DBF, terminates the field definitions with a newline character. When importing a DBF from a stream, however, DBFPY was looking only for the \x0D character rather than the newline. Now we consider both cases. Adding a test for dbf format detection Adding DBF filetype detection tests Adding tests for YAML, JSON, TSV, CSV using the DBF detection function. Handling extra exceptions in dbf detection Adding exception handling for struct.error, an exception that DBFPY raises when trying to unpack a TSV table. Since it’s not a DBF file, we know it’s not a DBF and return False. Fixing an issue with the DBF set exporting test The DBF set export test needed a bit enabled (probably the writeable bit?) before the test would match the regression output. Updating dbf interface Updating the int/float class/type checking in the dbf format file. This allows for python2 and python3 compatibility. Tweaking dbfpy to work with python3 Altering a couple of imports. Updating dbf tests for binary data compatibility Making regression strings binary and improving debug messages for dbf assertion errors. Improving file handling for python 2 and 3 Updating DBF file handling for both python 2 and 3 in the _dbf interface. Adding a (seemingly) functional dbfpy for python3 I’ve made dbfpy python3 compatible! Tests appear to pass. A significant change was made to the format detection test whereby I made the input string a binary (bytes) string. If the string is not a bytes string by the time we try to detect the format, we try to decode the string as utf-8 (which admittedly might not be the safest thing to do) and try to decode anyways. Updating imports for tablib dbf interface Now importing python2 or python3 versions as appropriate. Updating dbf package references in compat.py Cleaning up debugging print statements Updating stream handling in dbf interface Factoring the open() call out of the py3 conditional and removing the temp file before returning the stream value. Adding dbfpy3 init.py I had apparently missed the dbfpy3 init file when committing dbfpy3. Adding dbfpy and dbfpy3 to setup.py's package list Switching test order of formats Putting dbf format testing ahead of TSV. In some of my tests with numeric DBF files, I encountered an issue where the ASCII horizontal tab character (0x09) would appear in a numeric DBF. Because of the order of tabular format imports, though, format detection would recognize it as a TSV and not as a DBF. Adding my name to AUTHORS. Adding a DBF property to tab lib core Documentation includes examples on how to explicitly load a DBF straight from a file and how to load a DBF from a binary string. Also, how to write the binary data to a file. Adding DBF format notes to README Adding exclamation point to DBF section title Matching formatting of XLS section Updating setup.py to match current dev state Setup.py had been updated since I forked the tablib repo, so I’m updating setup.py to match its current structure while still maintaining DBF compatibility. Fixed callable collumn test the test was sending a list instead of a function CORE CONTRIBUTORS :cake: @iurisilvio v0.10.0 WHEELS 3.3, 3.4 makefile for WHEELS v0.10.0 history ALL Separate py2 and py3 packages to avoid installation errors. Fix #151 Running travis and tox with python 3.4. --- AUTHORS | 1 + README.rst | 10 +- setup.py | 2 + tablib/compat.py | 2 + tablib/core.py | 23 ++ tablib/formats/__init__.py | 3 +- tablib/formats/_dbf.py | 93 ++++++ tablib/packages/dbfpy/__init__.py | 0 tablib/packages/dbfpy/dbf.py | 292 ++++++++++++++++++ tablib/packages/dbfpy/dbfnew.py | 188 ++++++++++++ tablib/packages/dbfpy/fields.py | 466 ++++++++++++++++++++++++++++ tablib/packages/dbfpy/header.py | 275 +++++++++++++++++ tablib/packages/dbfpy/record.py | 262 ++++++++++++++++ tablib/packages/dbfpy/utils.py | 170 +++++++++++ tablib/packages/dbfpy3/__init__.py | 0 tablib/packages/dbfpy3/dbf.py | 293 ++++++++++++++++++ tablib/packages/dbfpy3/dbfnew.py | 182 +++++++++++ tablib/packages/dbfpy3/fields.py | 467 +++++++++++++++++++++++++++++ tablib/packages/dbfpy3/header.py | 273 +++++++++++++++++ tablib/packages/dbfpy3/record.py | 266 ++++++++++++++++ tablib/packages/dbfpy3/utils.py | 170 +++++++++++ test_tablib.py | 102 +++++++ 22 files changed, 3538 insertions(+), 2 deletions(-) create mode 100644 tablib/formats/_dbf.py create mode 100644 tablib/packages/dbfpy/__init__.py create mode 100644 tablib/packages/dbfpy/dbf.py create mode 100644 tablib/packages/dbfpy/dbfnew.py create mode 100644 tablib/packages/dbfpy/fields.py create mode 100644 tablib/packages/dbfpy/header.py create mode 100644 tablib/packages/dbfpy/record.py create mode 100644 tablib/packages/dbfpy/utils.py create mode 100644 tablib/packages/dbfpy3/__init__.py create mode 100644 tablib/packages/dbfpy3/dbf.py create mode 100644 tablib/packages/dbfpy3/dbfnew.py create mode 100644 tablib/packages/dbfpy3/fields.py create mode 100644 tablib/packages/dbfpy3/header.py create mode 100644 tablib/packages/dbfpy3/record.py create mode 100644 tablib/packages/dbfpy3/utils.py diff --git a/AUTHORS b/AUTHORS index bf68c06..bd0e0c4 100644 --- a/AUTHORS +++ b/AUTHORS @@ -27,3 +27,4 @@ Patches and Suggestions - Jakub Janoszek - Marc Abramowitz - Alex Gaynor +- James Douglass diff --git a/README.rst b/README.rst index 15a2126..925e027 100644 --- a/README.rst +++ b/README.rst @@ -24,6 +24,7 @@ Output formats supported: - HTML (Sets) - TSV (Sets) - CSV (Sets) +- DBF (Sets) Note that tablib *purposefully* excludes XML support. It always will. (Note: This is a joke. Pull requests are welcome.) @@ -31,7 +32,7 @@ Overview -------- `tablib.Dataset()` - A Dataset is a table of tabular data. It may or may not have a header row. They can be build and manipulated as raw Python datatypes (Lists of tuples|dictionaries). Datasets can be imported from JSON, YAML, and CSV; they can be exported to XLSX, XLS, ODS, JSON, YAML, CSV, TSV, and HTML. + A Dataset is a table of tabular data. It may or may not have a header row. They can be build and manipulated as raw Python datatypes (Lists of tuples|dictionaries). Datasets can be imported from JSON, YAML, DBF, and CSV; they can be exported to XLSX, XLS, ODS, JSON, YAML, DBF, CSV, TSV, and HTML. `tablib.Databook()` A Databook is a set of Datasets. The most common form of a Databook is an Excel file with multiple spreadsheets. Databooks can be imported from JSON and YAML; they can be exported to XLSX, XLS, ODS, JSON, and YAML. @@ -123,6 +124,13 @@ EXCEL! >>> with open('people.xls', 'wb') as f: ... f.write(data.xls) +DBF! +++++ +:: + + >>> with open('people.dbf', 'wb') as f: + ... f.write(data.dbf) + It's that easy. diff --git a/setup.py b/setup.py index 648f7eb..8409b3c 100755 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ if sys.version_info[0] == 2: 'tablib.packages.openpyxl.reader', 'tablib.packages.openpyxl.writer', 'tablib.packages.yaml', + 'tablib.packages.dbfpy' ]) else: packages.extend([ @@ -61,6 +62,7 @@ else: 'tablib.packages.openpyxl3.reader', 'tablib.packages.openpyxl3.writer', 'tablib.packages.yaml3', + 'tablib.packages.dbfpy3' ]) diff --git a/tablib/compat.py b/tablib/compat.py index 919f464..d4582d5 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -28,6 +28,7 @@ if is_py3: from tablib.packages import markup3 as markup from tablib.packages import openpyxl3 as openpyxl from tablib.packages.odf3 import opendocument, style, text, table + import tablib.packages.dbfpy3 as dbfpy import csv from io import StringIO @@ -49,5 +50,6 @@ else: from tablib.packages.odf import opendocument, style, text, table from tablib.packages import unicodecsv as csv + import tablib.packages.dbfpy as dbfpy unicode = unicode diff --git a/tablib/core.py b/tablib/core.py index 9db46c9..02c9085 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -559,6 +559,29 @@ class Dataset(object): """ pass + @property + def dbf(): + """A dBASE representation of the :class:`Dataset` object. + + A dataset object can also be imported by setting the :class:`Dataset.dbf` attribute::: + + # To import data from an existing DBF file: + data = tablib.Dataset() + data.dbf = open('existing_table.dbf').read() + + # to import data from an ASCII-encoded bytestring: + data = tablib.Dataset() + data.dbf = + + .. admonition:: Binary Warning + + :class:`Dataset.dbf` contains binary data, so make sure to write in binary mode:: + + with open('output.dbf', 'wb') as f: + f.write(data.dbf) + """ + pass + # ---- # Rows diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 5fdf279..1eda107 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -11,5 +11,6 @@ from . import _tsv as tsv from . import _html as html from . import _xlsx as xlsx from . import _ods as ods +from . import _dbf as dbf -available = (json, xls, yaml, csv, tsv, html, xlsx, ods) +available = (json, xls, yaml, csv, dbf, tsv, html, xlsx, ods) diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py new file mode 100644 index 0000000..41c2ef4 --- /dev/null +++ b/tablib/formats/_dbf.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- + +""" Tablib - DBF Support. +""" +import tempfile +import struct +import os + +from tablib.compat import StringIO +from tablib.compat import dbfpy +from tablib.compat import is_py3 + +if is_py3: + from tablib.packages.dbfpy3 import dbf + from tablib.packages.dbfpy3 import dbfnew + from tablib.packages.dbfpy3 import record as dbfrecord + import io +else: + from tablib.packages.dbfpy import dbf + from tablib.packages.dbfpy import dbfnew + from tablib.packages.dbfpy import record as dbfrecord + + +title = 'dbf' +extensions = ('csv',) + +DEFAULT_ENCODING = 'utf-8' + +def export_set(dataset): + """Returns DBF representation of a Dataset""" + new_dbf = dbfnew.dbf_new() + temp_file, temp_uri = tempfile.mkstemp() + + # create the appropriate fields based on the contents of the first row + first_row = dataset[0] + for fieldname, field_value in zip(dataset.headers, first_row): + if type(field_value) in [int, float]: + new_dbf.add_field(fieldname, 'N', 10, 8) + else: + new_dbf.add_field(fieldname, 'C', 80) + + new_dbf.write(temp_uri) + + dbf_file = dbf.Dbf(temp_uri, readOnly=0) + for row in dataset: + record = dbfrecord.DbfRecord(dbf_file) + for fieldname, field_value in zip(dataset.headers, row): + record[fieldname] = field_value + record.store() + + dbf_file.close() + dbf_stream = open(temp_uri, 'rb') + if is_py3: + stream = io.BytesIO(dbf_stream.read()) + else: + stream = StringIO(dbf_stream.read()) + dbf_stream.close() + os.remove(temp_uri) + return stream.getvalue() + +def import_set(dset, in_stream, headers=True): + """Returns a dataset from a DBF stream.""" + + dset.wipe() + if is_py3: + _dbf = dbf.Dbf(io.BytesIO(in_stream)) + else: + _dbf = dbf.Dbf(StringIO(in_stream)) + dset.headers = _dbf.fieldNames + for record in range(_dbf.recordCount): + row = [_dbf[record][f] for f in _dbf.fieldNames] + dset.append(row) + +def detect(stream): + """Returns True if the given stream is valid DBF""" + #_dbf = dbf.Table(StringIO(stream)) + try: + if is_py3: + if type(stream) is not bytes: + stream = bytes(stream, 'utf-8') + _dbf = dbf.Dbf(io.BytesIO(stream), readOnly=True) + else: + _dbf = dbf.Dbf(StringIO(stream), readOnly=True) + return True + except (ValueError, struct.error): + # When we try to open up a file that's not a DBF, dbfpy raises a + # ValueError. + # When unpacking a string argument with less than 8 chars, struct.error is + # raised. + return False + + + diff --git a/tablib/packages/dbfpy/__init__.py b/tablib/packages/dbfpy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tablib/packages/dbfpy/dbf.py b/tablib/packages/dbfpy/dbf.py new file mode 100644 index 0000000..b3d2e21 --- /dev/null +++ b/tablib/packages/dbfpy/dbf.py @@ -0,0 +1,292 @@ +#! /usr/bin/env python +"""DBF accessing helpers. + +FIXME: more documentation needed + +Examples: + + Create new table, setup structure, add records: + + dbf = Dbf(filename, new=True) + dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (n, s, i, b) in ( + ("John", "Miller", "YC", (1980, 10, 11)), + ("Andy", "Larkin", "", (1980, 4, 11)), + ): + rec = dbf.newRecord() + rec["NAME"] = n + rec["SURNAME"] = s + rec["INITIALS"] = i + rec["BIRTHDATE"] = b + rec.store() + dbf.close() + + Open existed dbf, read some data: + + dbf = Dbf(filename, True) + for rec in dbf: + for fldName in dbf.fieldNames: + print '%s:\t %s (%s)' % (fldName, rec[fldName], + type(rec[fldName])) + print + dbf.close() + +""" +"""History (most recent first): +11-feb-2007 [als] export INVALID_VALUE; + Dbf: added .ignoreErrors, .INVALID_VALUE +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] removed fromStream and newDbf methods: + use argument of __init__ call must be used instead; + added class fields pointing to the header and + record classes. +17-dec-2005 [yc] split to several modules; reimplemented +13-dec-2005 [yc] adapted to the changes of the `strutil` module. +13-sep-2002 [als] support FoxPro Timestamp datatype +15-nov-1999 [jjk] documentation updates, add demo +24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks +08-jun-1998 [jjk] fix problems, add more features +20-feb-1998 [jjk] fix problems, add more features +19-feb-1998 [jjk] add create/write capabilities +18-feb-1998 [jjk] from dbfload.py +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] +__author__ = "Jeff Kunce " + +__all__ = ["Dbf"] + +from . import header +from .import record +from utils import INVALID_VALUE + +class Dbf(object): + """DBF accessor. + + FIXME: + docs and examples needed (dont' forget to tell + about problems adding new fields on the fly) + + Implementation notes: + ``_new`` field is used to indicate whether this is + a new data table. `addField` could be used only for + the new tables! If at least one record was appended + to the table it's structure couldn't be changed. + + """ + + __slots__ = ("name", "header", "stream", + "_changed", "_new", "_ignore_errors") + + HeaderClass = header.DbfHeader + RecordClass = record.DbfRecord + INVALID_VALUE = INVALID_VALUE + + ## initialization and creation helpers + + def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): + """Initialize instance. + + Arguments: + f: + Filename or file-like object. + new: + True if new data table must be created. Assume + data table exists if this argument is False. + readOnly: + if ``f`` argument is a string file will + be opend in read-only mode; in other cases + this argument is ignored. This argument is ignored + even if ``new`` argument is True. + headerObj: + `header.DbfHeader` instance or None. If this argument + is None, new empty header will be used with the + all fields set by default. + ignoreErrors: + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """ + if isinstance(f, basestring): + # a filename + self.name = f + if new: + # new table (table file must be + # created or opened and truncated) + self.stream = file(f, "w+b") + else: + # tabe file must exist + self.stream = file(f, ("r+b", "rb")[bool(readOnly)]) + else: + # a stream + self.name = getattr(f, "name", "") + self.stream = f + if new: + # if this is a new table, header will be empty + self.header = self.HeaderClass() + else: + # or instantiated using stream + self.header = self.HeaderClass.fromStream(self.stream) + self.ignoreErrors = ignoreErrors + self._new = bool(new) + self._changed = False + + ## properties + + closed = property(lambda self: self.stream.closed) + recordCount = property(lambda self: self.header.recordCount) + fieldNames = property( + lambda self: [_fld.name for _fld in self.header.fields]) + fieldDefs = property(lambda self: self.header.fields) + changed = property(lambda self: self._changed or self.header.changed) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on the header object and self""" + self.header.ignoreErrors = self._ignore_errors = bool(value) + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## protected methods + + def _fixIndex(self, index): + """Return fixed index. + + This method fails if index isn't a numeric object + (long or int). Or index isn't in a valid range + (less or equal to the number of records in the db). + + If ``index`` is a negative number, it will be + treated as a negative indexes for list objects. + + Return: + Return value is numeric object maning valid index. + + """ + if not isinstance(index, (int, long)): + raise TypeError("Index must be a numeric object") + if index < 0: + # index from the right side + # fix it to the left-side index + index += len(self) + 1 + if index >= len(self): + raise IndexError("Record index out of range") + return index + + ## iterface methods + + def close(self): + self.flush() + self.stream.close() + + def flush(self): + """Flush data to the associated stream.""" + if self.changed: + self.header.setCurrentDate() + self.header.write(self.stream) + self.stream.flush() + self._changed = False + + def indexOfFieldName(self, name): + """Index of field named ``name``.""" + # FIXME: move this to header class + return self.header.fields.index(name) + + def newRecord(self): + """Return new record, which belong to this table.""" + return self.RecordClass(self) + + def append(self, record): + """Append ``record`` to the database.""" + record.index = self.header.recordCount + record._write() + self.header.recordCount += 1 + self._changed = True + self._new = False + + def addField(self, *defs): + """Add field definitions. + + For more information see `header.DbfHeader.addField`. + + """ + if self._new: + self.header.addField(*defs) + else: + raise TypeError("At least one record was added, " + "structure can't be changed") + + ## 'magic' methods (representation and sequence interface) + + def __repr__(self): + return "Dbf stream '%s'\n" % self.stream + repr(self.header) + + def __len__(self): + """Return number of records.""" + return self.recordCount + + def __getitem__(self, index): + """Return `DbfRecord` instance.""" + return self.RecordClass.fromStream(self, self._fixIndex(index)) + + def __setitem__(self, index, record): + """Write `DbfRecord` instance to the stream.""" + record.index = self._fixIndex(index) + record._write() + self._changed = True + self._new = False + + #def __del__(self): + # """Flush stream upon deletion of the object.""" + # self.flush() + + +def demoRead(filename): + _dbf = Dbf(filename, True) + for _rec in _dbf: + print + print(repr(_rec)) + _dbf.close() + +def demoCreate(filename): + _dbf = Dbf(filename, new=True) + _dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (_n, _s, _i, _b) in ( + ("John", "Miller", "YC", (1981, 1, 2)), + ("Andy", "Larkin", "AL", (1982, 3, 4)), + ("Bill", "Clinth", "", (1983, 5, 6)), + ("Bobb", "McNail", "", (1984, 7, 8)), + ): + _rec = _dbf.newRecord() + _rec["NAME"] = _n + _rec["SURNAME"] = _s + _rec["INITIALS"] = _i + _rec["BIRTHDATE"] = _b + _rec.store() + print(repr(_dbf)) + _dbf.close() + +if (__name__=='__main__'): + import sys + _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" + demoCreate(_name) + demoRead(_name) + +# vim: set et sw=4 sts=4 : diff --git a/tablib/packages/dbfpy/dbfnew.py b/tablib/packages/dbfpy/dbfnew.py new file mode 100644 index 0000000..dea7e52 --- /dev/null +++ b/tablib/packages/dbfpy/dbfnew.py @@ -0,0 +1,188 @@ +#!/usr/bin/python +""".DBF creation helpers. + +Note: this is a legacy interface. New code should use Dbf class + for table creation (see examples in dbf.py) + +TODO: + - handle Memo fields. + - check length of the fields accoring to the + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + +""" +"""History (most recent first) +04-jul-2006 [als] added export declaration; + updated for dbfpy 2.0 +15-dec-2005 [yc] define dbf_new.__slots__ +14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; + dbf_new now is a new class (inherited from object) +??-jun-2000 [--] added by Hans Fiby +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] + +__all__ = ["dbf_new"] + +from dbf import * +from fields import * +from header import * +from record import * + +class _FieldDefinition(object): + """Field definition. + + This is a simple structure, which contains ``name``, ``type``, + ``len``, ``dec`` and ``cls`` fields. + + Objects also implement get/setitem magic functions, so fields + could be accessed via sequence iterface, where 'name' has + index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and + 'cls' could be located at index 4. + + """ + + __slots__ = "name", "type", "len", "dec", "cls" + + # WARNING: be attentive - dictionaries are mutable! + FLD_TYPES = { + # type: (cls, len) + "C": (DbfCharacterFieldDef, None), + "N": (DbfNumericFieldDef, None), + "L": (DbfLogicalFieldDef, 1), + # FIXME: support memos + # "M": (DbfMemoFieldDef), + "D": (DbfDateFieldDef, 8), + # FIXME: I'm not sure length should be 14 characters! + # but temporary I use it, cuz date is 8 characters + # and time 6 (hhmmss) + "T": (DbfDateTimeFieldDef, 14), + } + + def __init__(self, name, type, len=None, dec=0): + _cls, _len = self.FLD_TYPES[type] + if _len is None: + if len is None: + raise ValueError("Field length must be defined") + _len = len + self.name = name + self.type = type + self.len = _len + self.dec = dec + self.cls = _cls + + def getDbfField(self): + "Return `DbfFieldDef` instance from the current definition." + return self.cls(self.name, self.len, self.dec) + + def appendToHeader(self, dbfh): + """Create a `DbfFieldDef` instance and append it to the dbf header. + + Arguments: + dbfh: `DbfHeader` instance. + + """ + _dbff = self.getDbfField() + dbfh.addField(_dbff) + + +class dbf_new(object): + """New .DBF creation helper. + + Example Usage: + + dbfn = dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + + Note: + This module cannot handle Memo-fields, + they are special. + + """ + + __slots__ = ("fields",) + + FieldDefinitionClass = _FieldDefinition + + def __init__(self): + self.fields = [] + + def add_field(self, name, typ, len, dec=0): + """Add field definition. + + Arguments: + name: + field name (str object). field name must not + contain ASCII NULs and it's length shouldn't + exceed 10 characters. + typ: + type of the field. this must be a single character + from the "CNLMDT" set meaning character, numeric, + logical, memo, date and date/time respectively. + len: + length of the field. this argument is used only for + the character and numeric fields. all other fields + have fixed length. + FIXME: use None as a default for this argument? + dec: + decimal precision. used only for the numric fields. + + """ + self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) + + def write(self, filename): + """Create empty .DBF file using current structure.""" + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + _dbfStream = file(filename, "wb") + _dbfh.write(_dbfStream) + _dbfStream.close() + + def write_stream(self, stream): + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + _dbfh.write(stream) + + +if (__name__=='__main__'): + # create a new DBF-File + dbfn=dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + # test new dbf + print "*** created tst.dbf: ***" + dbft = Dbf('tst.dbf', readOnly=0) + print repr(dbft) + # add a record + rec=DbfRecord(dbft) + rec['name']='something' + rec['price']=10.5 + rec['date']=(2000,1,12) + rec.store() + # add another record + rec=DbfRecord(dbft) + rec['name']='foo and bar' + rec['price']=12234 + rec['date']=(1992,7,15) + rec.store() + + # show the records + print "*** inserted 2 records into tst.dbf: ***" + print repr(dbft) + for i1 in range(len(dbft)): + rec = dbft[i1] + for fldName in dbft.fieldNames: + print '%s:\t %s'%(fldName, rec[fldName]) + print + dbft.close() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/fields.py b/tablib/packages/dbfpy/fields.py new file mode 100644 index 0000000..69cd436 --- /dev/null +++ b/tablib/packages/dbfpy/fields.py @@ -0,0 +1,466 @@ +"""DBF fields definitions. + +TODO: + - make memos work +""" +"""History (most recent first): +26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes +05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date +16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point + in the value to select float or integer return type +13-mar-2008 [als] check field name length in constructor +11-feb-2007 [als] handle value conversion errors +10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() +01-dec-2006 [als] Timestamp columns use None for empty values +31-oct-2006 [als] support field types 'F' (float), 'I' (integer) + and 'Y' (currency); + automate export and registration of field classes +04-jul-2006 [als] added export declaration +10-mar-2006 [als] decode empty values for Date and Logical fields; + show field name in errors +10-mar-2006 [als] fix Numeric value decoding: according to spec, + value always is string representation of the number; + ensure that encoded Numeric value fits into the field +20-dec-2005 [yc] use field names in upper case +15-dec-2005 [yc] field definitions moved from `dbf`. +""" + +__version__ = "$Revision: 1.14 $"[11:-2] +__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] + +__all__ = ["lookupFor",] # field classes added at the end of the module + +import datetime +import struct +import sys + +from . import utils + +## abstract definitions + +class DbfFieldDef(object): + """Abstract field definition. + + Child classes must override ``type`` class attribute to provide datatype + infromation of the field definition. For more info about types visit + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + + Also child classes must override ``defaultValue`` field to provide + default value for the field value. + + If child class has fixed length ``length`` class attribute must be + overriden and set to the valid value. None value means, that field + isn't of fixed length. + + Note: ``name`` field must not be changed after instantiation. + + """ + + __slots__ = ("name", "length", "decimalCount", + "start", "end", "ignoreErrors") + + # length of the field, None in case of variable-length field, + # or a number if this field is a fixed-length field + length = None + + # field type. for more information about fields types visit + # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + # must be overriden in child classes + typeCode = None + + # default value for the field. this field must be + # overriden in child classes + defaultValue = None + + def __init__(self, name, length=None, decimalCount=None, + start=None, stop=None, ignoreErrors=False, + ): + """Initialize instance.""" + assert self.typeCode is not None, "Type code must be overriden" + assert self.defaultValue is not None, "Default value must be overriden" + ## fix arguments + if len(name) >10: + raise ValueError("Field name \"%s\" is too long" % name) + name = str(name).upper() + if self.__class__.length is None: + if length is None: + raise ValueError("[%s] Length isn't specified" % name) + length = int(length) + if length <= 0: + raise ValueError("[%s] Length must be a positive integer" + % name) + else: + length = self.length + if decimalCount is None: + decimalCount = 0 + ## set fields + self.name = name + # FIXME: validate length according to the specification at + # http://www.clicketyclick.dk/databases/xbase/format/data_types.html + self.length = length + self.decimalCount = decimalCount + self.ignoreErrors = ignoreErrors + self.start = start + self.end = stop + + def __cmp__(self, other): + return cmp(self.name, str(other).upper()) + + def __hash__(self): + return hash(self.name) + + def fromString(cls, string, start, ignoreErrors=False): + """Decode dbf field definition from the string data. + + Arguments: + string: + a string, dbf definition is decoded from. length of + the string must be 32 bytes. + start: + position in the database file. + ignoreErrors: + initial error processing mode for the new field (boolean) + + """ + assert len(string) == 32 + _length = ord(string[16]) + return cls(utils.unzfill(string)[:11], _length, ord(string[17]), + start, start + _length, ignoreErrors=ignoreErrors) + fromString = classmethod(fromString) + + def toString(self): + """Return encoded field definition. + + Return: + Return value is a string object containing encoded + definition of this field. + + """ + if sys.version_info < (2, 4): + # earlier versions did not support padding character + _name = self.name[:11] + "\0" * (11 - len(self.name)) + else: + _name = self.name.ljust(11, '\0') + return ( + _name + + self.typeCode + + #data address + chr(0) * 4 + + chr(self.length) + + chr(self.decimalCount) + + chr(0) * 14 + ) + + def __repr__(self): + return "%-10s %1s %3d %3d" % self.fieldInfo() + + def fieldInfo(self): + """Return field information. + + Return: + Return value is a (name, type, length, decimals) tuple. + + """ + return (self.name, self.typeCode, self.length, self.decimalCount) + + def rawFromRecord(self, record): + """Return a "raw" field value from the record string.""" + return record[self.start:self.end] + + def decodeFromRecord(self, record): + """Return decoded field value from the record string.""" + try: + return self.decodeValue(self.rawFromRecord(record)) + except: + if self.ignoreErrors: + return utils.INVALID_VALUE + else: + raise + + def decodeValue(self, value): + """Return decoded value from string value. + + This method shouldn't be used publicly. It's called from the + `decodeFromRecord` method. + + This is an abstract method and it must be overridden in child classes. + """ + raise NotImplementedError + + def encodeValue(self, value): + """Return str object containing encoded field value. + + This is an abstract method and it must be overriden in child classes. + """ + raise NotImplementedError + +## real classes + +class DbfCharacterFieldDef(DbfFieldDef): + """Definition of the character field.""" + + typeCode = "C" + defaultValue = "" + + def decodeValue(self, value): + """Return string object. + + Return value is a ``value`` argument with stripped right spaces. + + """ + return value.rstrip(" ") + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``.""" + return str(value)[:self.length].ljust(self.length) + + +class DbfNumericFieldDef(DbfFieldDef): + """Definition of the numeric field.""" + + typeCode = "N" + # XXX: now I'm not sure it was a good idea to make a class field + # `defaultValue` instead of a generic method as it was implemented + # previously -- it's ok with all types except number, cuz + # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. + defaultValue = 0 + + def decodeValue(self, value): + """Return a number decoded from ``value``. + + If decimals is zero, value will be decoded as an integer; + or as a float otherwise. + + Return: + Return value is a int (long) or float instance. + + """ + value = value.strip(" \0") + if "." in value: + # a float (has decimal separator) + return float(value) + elif value: + # must be an integer + return int(value) + else: + return 0 + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + _rv = ("%*.*f" % (self.length, self.decimalCount, value)) + if len(_rv) > self.length: + _ppos = _rv.find(".") + if 0 <= _ppos <= self.length: + _rv = _rv[:self.length] + else: + raise ValueError("[%s] Numeric overflow: %s (field width: %i)" + % (self.name, _rv, self.length)) + return _rv + +class DbfFloatFieldDef(DbfNumericFieldDef): + """Definition of the float field - same as numeric.""" + + typeCode = "F" + +class DbfIntegerFieldDef(DbfFieldDef): + """Definition of the integer field.""" + + typeCode = "I" + length = 4 + defaultValue = 0 + + def decodeValue(self, value): + """Return an integer number decoded from ``value``.""" + return struct.unpack("= 1: + _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) + _rv += datetime.timedelta(0, _msecs / 1000.0) + else: + # empty date + _rv = None + return _rv + + def encodeValue(self, value): + """Return a string-encoded ``value``.""" + if value: + value = utils.getDateTime(value) + # LE byteorder + _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, + (value.hour * 3600 + value.minute * 60 + value.second) * 1000) + else: + _rv = "\0" * self.length + assert len(_rv) == self.length + return _rv + + +_fieldsRegistry = {} + +def registerField(fieldCls): + """Register field definition class. + + ``fieldCls`` should be subclass of the `DbfFieldDef`. + + Use `lookupFor` to retrieve field definition class + by the type code. + + """ + assert fieldCls.typeCode is not None, "Type code isn't defined" + # XXX: use fieldCls.typeCode.upper()? in case of any decign + # don't forget to look to the same comment in ``lookupFor`` method + _fieldsRegistry[fieldCls.typeCode] = fieldCls + + +def lookupFor(typeCode): + """Return field definition class for the given type code. + + ``typeCode`` must be a single character. That type should be + previously registered. + + Use `registerField` to register new field class. + + Return: + Return value is a subclass of the `DbfFieldDef`. + + """ + # XXX: use typeCode.upper()? in case of any decign don't + # forget to look to the same comment in ``registerField`` + return _fieldsRegistry[typeCode] + +## register generic types + +for (_name, _val) in globals().items(): + if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ + and (_name != "DbfFieldDef"): + __all__.append(_name) + registerField(_val) +del _name, _val + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/header.py b/tablib/packages/dbfpy/header.py new file mode 100644 index 0000000..03a877c --- /dev/null +++ b/tablib/packages/dbfpy/header.py @@ -0,0 +1,275 @@ +"""DBF header definition. + +TODO: + - handle encoding of the character fields + (encoding information stored in the DBF header) + +""" +"""History (most recent first): +16-sep-2010 [als] fromStream: fix century of the last update field +11-feb-2007 [als] added .ignoreErrors +10-feb-2007 [als] added __getitem__: return field definitions + by field name or field number (zero-based) +04-jul-2006 [als] added export declaration +15-dec-2005 [yc] created +""" + +__version__ = "$Revision: 1.6 $"[11:-2] +__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] + +__all__ = ["DbfHeader"] + +try: + import cStringIO +except ImportError: + # when we're in python3, we cStringIO has been replaced by io.StringIO + import io as cStringIO +import datetime +import struct +import time + +from . import fields +from . import utils + + +class DbfHeader(object): + """Dbf header definition. + + For more information about dbf header format visit + `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` + + Examples: + Create an empty dbf header and add some field definitions: + dbfh = DbfHeader() + dbfh.addField(("name", "C", 10)) + dbfh.addField(("date", "D")) + dbfh.addField(DbfNumericFieldDef("price", 5, 2)) + Create a dbf header with field definitions: + dbfh = DbfHeader([ + ("name", "C", 10), + ("date", "D"), + DbfNumericFieldDef("price", 5, 2), + ]) + + """ + + __slots__ = ("signature", "fields", "lastUpdate", "recordLength", + "recordCount", "headerLength", "changed", "_ignore_errors") + + ## instance construction and initialization methods + + def __init__(self, fields=None, headerLength=0, recordLength=0, + recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False, + ): + """Initialize instance. + + Arguments: + fields: + a list of field definitions; + recordLength: + size of the records; + headerLength: + size of the header; + recordCount: + number of records stored in DBF; + signature: + version number (aka signature). using 0x03 as a default meaning + "File without DBT". for more information about this field visit + ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` + lastUpdate: + date of the DBF's update. this could be a string ('yymmdd' or + 'yyyymmdd'), timestamp (int or float), datetime/date value, + a sequence (assuming (yyyy, mm, dd, ...)) or an object having + callable ``ticks`` field. + ignoreErrors: + error processing mode for DBF fields (boolean) + + """ + self.signature = signature + if fields is None: + self.fields = [] + else: + self.fields = list(fields) + self.lastUpdate = utils.getDate(lastUpdate) + self.recordLength = recordLength + self.headerLength = headerLength + self.recordCount = recordCount + self.ignoreErrors = ignoreErrors + # XXX: I'm not sure this is safe to + # initialize `self.changed` in this way + self.changed = bool(self.fields) + + # @classmethod + def fromString(cls, string): + """Return header instance from the string object.""" + return cls.fromStream(cStringIO.StringIO(str(string))) + fromString = classmethod(fromString) + + # @classmethod + def fromStream(cls, stream): + """Return header object from the stream.""" + stream.seek(0) + _data = stream.read(32) + (_cnt, _hdrLen, _recLen) = struct.unpack(" DbfRecord._write(); + added delete() method. +16-dec-2005 [yc] record definition moved from `dbf`. +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2] + +__all__ = ["DbfRecord"] + +from itertools import izip + +import utils + +class DbfRecord(object): + """DBF record. + + Instances of this class shouldn't be created manualy, + use `dbf.Dbf.newRecord` instead. + + Class implements mapping/sequence interface, so + fields could be accessed via their names or indexes + (names is a preffered way to access fields). + + Hint: + Use `store` method to save modified record. + + Examples: + Add new record to the database: + db = Dbf(filename) + rec = db.newRecord() + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + Or the same, but modify existed + (second in this case) record: + db = Dbf(filename) + rec = db[2] + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + + """ + + __slots__ = "dbf", "index", "deleted", "fieldData" + + ## creation and initialization + + def __init__(self, dbf, index=None, deleted=False, data=None): + """Instance initialiation. + + Arguments: + dbf: + A `Dbf.Dbf` instance this record belonogs to. + index: + An integer record index or None. If this value is + None, record will be appended to the DBF. + deleted: + Boolean flag indicating whether this record + is a deleted record. + data: + A sequence or None. This is a data of the fields. + If this argument is None, default values will be used. + + """ + self.dbf = dbf + # XXX: I'm not sure ``index`` is necessary + self.index = index + self.deleted = deleted + if data is None: + self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] + else: + self.fieldData = list(data) + + # XXX: validate self.index before calculating position? + position = property(lambda self: self.dbf.header.headerLength + \ + self.index * self.dbf.header.recordLength) + + def rawFromStream(cls, dbf, index): + """Return raw record contents read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance containing the record. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is a string containing record data in DBF format. + + """ + # XXX: may be write smth assuming, that current stream + # position is the required one? it could save some + # time required to calculate where to seek in the file + dbf.stream.seek(dbf.header.headerLength + + index * dbf.header.recordLength) + return dbf.stream.read(dbf.header.recordLength) + rawFromStream = classmethod(rawFromStream) + + def fromStream(cls, dbf, index): + """Return a record read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is an instance of the current class. + + """ + return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) + fromStream = classmethod(fromStream) + + def fromString(cls, dbf, string, index=None): + """Return record read from the string object. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + string: + A string new record should be created from. + index: + Index of the record in the container. If this + argument is None, record will be appended. + + Return value is an instance of the current class. + + """ + return cls(dbf, index, string[0]=="*", + [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) + fromString = classmethod(fromString) + + ## object representation + + def __repr__(self): + _template = "%%%ds: %%s (%%s)" % max([len(_fld) + for _fld in self.dbf.fieldNames]) + _rv = [] + for _fld in self.dbf.fieldNames: + _val = self[_fld] + if _val is utils.INVALID_VALUE: + _rv.append(_template % + (_fld, "None", "value cannot be decoded")) + else: + _rv.append(_template % (_fld, _val, type(_val))) + return "\n".join(_rv) + + ## protected methods + + def _write(self): + """Write data to the dbf stream. + + Note: + This isn't a public method, it's better to + use 'store' instead publically. + Be design ``_write`` method should be called + only from the `Dbf` instance. + + + """ + self._validateIndex(False) + self.dbf.stream.seek(self.position) + self.dbf.stream.write(self.toString()) + # FIXME: may be move this write somewhere else? + # why we should check this condition for each record? + if self.index == len(self.dbf): + # this is the last record, + # we should write SUB (ASCII 26) + self.dbf.stream.write("\x1A") + + ## utility methods + + def _validateIndex(self, allowUndefined=True, checkRange=False): + """Valid ``self.index`` value. + + If ``allowUndefined`` argument is True functions does nothing + in case of ``self.index`` pointing to None object. + + """ + if self.index is None: + if not allowUndefined: + raise ValueError("Index is undefined") + elif self.index < 0: + raise ValueError("Index can't be negative (%s)" % self.index) + elif checkRange and self.index <= self.dbf.header.recordCount: + raise ValueError("There are only %d records in the DBF" % + self.dbf.header.recordCount) + + ## interface methods + + def store(self): + """Store current record in the DBF. + + If ``self.index`` is None, this record will be appended to the + records of the DBF this records belongs to; or replaced otherwise. + + """ + self._validateIndex() + if self.index is None: + self.index = len(self.dbf) + self.dbf.append(self) + else: + self.dbf[self.index] = self + + def delete(self): + """Mark method as deleted.""" + self.deleted = True + + def toString(self): + """Return string packed record values.""" + return "".join([" *"[self.deleted]] + [ + _def.encodeValue(_dat) + for (_def, _dat) in izip(self.dbf.header.fields, self.fieldData) + ]) + + def asList(self): + """Return a flat list of fields. + + Note: + Change of the list's values won't change + real values stored in this object. + + """ + return self.fieldData[:] + + def asDict(self): + """Return a dictionary of fields. + + Note: + Change of the dicts's values won't change + real values stored in this object. + + """ + return dict([_i for _i in izip(self.dbf.fieldNames, self.fieldData)]) + + def __getitem__(self, key): + """Return value by field name or field index.""" + if isinstance(key, (long, int)): + # integer index of the field + return self.fieldData[key] + # assuming string field name + return self.fieldData[self.dbf.indexOfFieldName(key)] + + def __setitem__(self, key, value): + """Set field value by integer index of the field or string name.""" + if isinstance(key, (int, long)): + # integer index of the field + return self.fieldData[key] + # assuming string field name + self.fieldData[self.dbf.indexOfFieldName(key)] = value + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/utils.py b/tablib/packages/dbfpy/utils.py new file mode 100644 index 0000000..cef8aa5 --- /dev/null +++ b/tablib/packages/dbfpy/utils.py @@ -0,0 +1,170 @@ +"""String utilities. + +TODO: + - allow strings in getDateTime routine; +""" +"""History (most recent first): +11-feb-2007 [als] added INVALID_VALUE +10-feb-2007 [als] allow date strings padded with spaces instead of zeroes +20-dec-2005 [yc] handle long objects in getDate/getDateTime +16-dec-2005 [yc] created from ``strutil`` module. +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] + +import datetime +import time + + +def unzfill(str): + """Return a string without ASCII NULs. + + This function searchers for the first NUL (ASCII 0) occurance + and truncates string till that position. + + """ + try: + return str[:str.index('\0')] + except ValueError: + return str + + +def getDate(date=None): + """Return `datetime.date` instance. + + Type of the ``date`` argument could be one of the following: + None: + use current date value; + datetime.date: + this value will be returned; + datetime.datetime: + the result of the date.date() will be returned; + string: + assuming "%Y%m%d" or "%y%m%dd" format; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``date`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if date is None: + # use current value + return datetime.date.today() + if isinstance(date, datetime.date): + return date + if isinstance(date, datetime.datetime): + return date.date() + if isinstance(date, (int, long, float)): + # date is a timestamp + return datetime.date.fromtimestamp(date) + if isinstance(date, basestring): + date = date.replace(" ", "0") + if len(date) == 6: + # yymmdd + return datetime.date(*time.strptime(date, "%y%m%d")[:3]) + # yyyymmdd + return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) + if hasattr(date, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.date(*date[:3]) + return datetime.date.fromtimestamp(date.ticks()) + + +def getDateTime(value=None): + """Return `datetime.datetime` instance. + + Type of the ``value`` argument could be one of the following: + None: + use current date value; + datetime.date: + result will be converted to the `datetime.datetime` instance + using midnight; + datetime.datetime: + ``value`` will be returned as is; + string: + *** CURRENTLY NOT SUPPORTED ***; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``value`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if value is None: + # use current value + return datetime.datetime.today() + if isinstance(value, datetime.datetime): + return value + if isinstance(value, datetime.date): + return datetime.datetime.fromordinal(value.toordinal()) + if isinstance(value, (int, long, float)): + # value is a timestamp + return datetime.datetime.fromtimestamp(value) + if isinstance(value, basestring): + raise NotImplementedError("Strings aren't currently implemented") + if hasattr(value, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.datetime(*tuple(value)[:6]) + return datetime.datetime.fromtimestamp(value.ticks()) + + +class classproperty(property): + """Works in the same way as a ``property``, but for the classes.""" + + def __get__(self, obj, cls): + return self.fget(cls) + + +class _InvalidValue(object): + + """Value returned from DBF records when field validation fails + + The value is not equal to anything except for itself + and equal to all empty values: None, 0, empty string etc. + In other words, invalid value is equal to None and not equal + to None at the same time. + + This value yields zero upon explicit conversion to a number type, + empty string for string types, and False for boolean. + + """ + + def __eq__(self, other): + return not other + + def __ne__(self, other): + return not (other is self) + + def __nonzero__(self): + return False + + def __int__(self): + return 0 + __long__ = __int__ + + def __float__(self): + return 0.0 + + def __str__(self): + return "" + + def __unicode__(self): + return u"" + + def __repr__(self): + return "" + +# invalid value is a constant singleton +INVALID_VALUE = _InvalidValue() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/__init__.py b/tablib/packages/dbfpy3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tablib/packages/dbfpy3/dbf.py b/tablib/packages/dbfpy3/dbf.py new file mode 100644 index 0000000..42de8a4 --- /dev/null +++ b/tablib/packages/dbfpy3/dbf.py @@ -0,0 +1,293 @@ +#! /usr/bin/env python +"""DBF accessing helpers. + +FIXME: more documentation needed + +Examples: + + Create new table, setup structure, add records: + + dbf = Dbf(filename, new=True) + dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (n, s, i, b) in ( + ("John", "Miller", "YC", (1980, 10, 11)), + ("Andy", "Larkin", "", (1980, 4, 11)), + ): + rec = dbf.newRecord() + rec["NAME"] = n + rec["SURNAME"] = s + rec["INITIALS"] = i + rec["BIRTHDATE"] = b + rec.store() + dbf.close() + + Open existed dbf, read some data: + + dbf = Dbf(filename, True) + for rec in dbf: + for fldName in dbf.fieldNames: + print '%s:\t %s (%s)' % (fldName, rec[fldName], + type(rec[fldName])) + print + dbf.close() + +""" +"""History (most recent first): +11-feb-2007 [als] export INVALID_VALUE; + Dbf: added .ignoreErrors, .INVALID_VALUE +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] removed fromStream and newDbf methods: + use argument of __init__ call must be used instead; + added class fields pointing to the header and + record classes. +17-dec-2005 [yc] split to several modules; reimplemented +13-dec-2005 [yc] adapted to the changes of the `strutil` module. +13-sep-2002 [als] support FoxPro Timestamp datatype +15-nov-1999 [jjk] documentation updates, add demo +24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks +08-jun-1998 [jjk] fix problems, add more features +20-feb-1998 [jjk] fix problems, add more features +19-feb-1998 [jjk] add create/write capabilities +18-feb-1998 [jjk] from dbfload.py +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] +__author__ = "Jeff Kunce " + +__all__ = ["Dbf"] + +from . import header +from . import record +from .utils import INVALID_VALUE + +class Dbf(object): + """DBF accessor. + + FIXME: + docs and examples needed (dont' forget to tell + about problems adding new fields on the fly) + + Implementation notes: + ``_new`` field is used to indicate whether this is + a new data table. `addField` could be used only for + the new tables! If at least one record was appended + to the table it's structure couldn't be changed. + + """ + + __slots__ = ("name", "header", "stream", + "_changed", "_new", "_ignore_errors") + + HeaderClass = header.DbfHeader + RecordClass = record.DbfRecord + INVALID_VALUE = INVALID_VALUE + + ## initialization and creation helpers + + def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): + """Initialize instance. + + Arguments: + f: + Filename or file-like object. + new: + True if new data table must be created. Assume + data table exists if this argument is False. + readOnly: + if ``f`` argument is a string file will + be opend in read-only mode; in other cases + this argument is ignored. This argument is ignored + even if ``new`` argument is True. + headerObj: + `header.DbfHeader` instance or None. If this argument + is None, new empty header will be used with the + all fields set by default. + ignoreErrors: + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """ + if isinstance(f, str): + # a filename + self.name = f + if new: + # new table (table file must be + # created or opened and truncated) + self.stream = open(f, "w+b") + else: + # tabe file must exist + self.stream = open(f, ("r+b", "rb")[bool(readOnly)]) + else: + # a stream + self.name = getattr(f, "name", "") + self.stream = f + if new: + # if this is a new table, header will be empty + self.header = self.HeaderClass() + else: + # or instantiated using stream + self.header = self.HeaderClass.fromStream(self.stream) + self.ignoreErrors = ignoreErrors + self._new = bool(new) + self._changed = False + + ## properties + + closed = property(lambda self: self.stream.closed) + recordCount = property(lambda self: self.header.recordCount) + fieldNames = property( + lambda self: [_fld.name for _fld in self.header.fields]) + fieldDefs = property(lambda self: self.header.fields) + changed = property(lambda self: self._changed or self.header.changed) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on the header object and self""" + self.header.ignoreErrors = self._ignore_errors = bool(value) + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## protected methods + + def _fixIndex(self, index): + """Return fixed index. + + This method fails if index isn't a numeric object + (long or int). Or index isn't in a valid range + (less or equal to the number of records in the db). + + If ``index`` is a negative number, it will be + treated as a negative indexes for list objects. + + Return: + Return value is numeric object maning valid index. + + """ + if not isinstance(index, int): + raise TypeError("Index must be a numeric object") + if index < 0: + # index from the right side + # fix it to the left-side index + index += len(self) + 1 + if index >= len(self): + raise IndexError("Record index out of range") + return index + + ## iterface methods + + def close(self): + self.flush() + self.stream.close() + + def flush(self): + """Flush data to the associated stream.""" + if self.changed: + self.header.setCurrentDate() + self.header.write(self.stream) + self.stream.flush() + self._changed = False + + def indexOfFieldName(self, name): + """Index of field named ``name``.""" + # FIXME: move this to header class + names = [f.name for f in self.header.fields] + return names.index(name.upper()) + + def newRecord(self): + """Return new record, which belong to this table.""" + return self.RecordClass(self) + + def append(self, record): + """Append ``record`` to the database.""" + record.index = self.header.recordCount + record._write() + self.header.recordCount += 1 + self._changed = True + self._new = False + + def addField(self, *defs): + """Add field definitions. + + For more information see `header.DbfHeader.addField`. + + """ + if self._new: + self.header.addField(*defs) + else: + raise TypeError("At least one record was added, " + "structure can't be changed") + + ## 'magic' methods (representation and sequence interface) + + def __repr__(self): + return "Dbf stream '%s'\n" % self.stream + repr(self.header) + + def __len__(self): + """Return number of records.""" + return self.recordCount + + def __getitem__(self, index): + """Return `DbfRecord` instance.""" + return self.RecordClass.fromStream(self, self._fixIndex(index)) + + def __setitem__(self, index, record): + """Write `DbfRecord` instance to the stream.""" + record.index = self._fixIndex(index) + record._write() + self._changed = True + self._new = False + + #def __del__(self): + # """Flush stream upon deletion of the object.""" + # self.flush() + + +def demoRead(filename): + _dbf = Dbf(filename, True) + for _rec in _dbf: + print() + print(repr(_rec)) + _dbf.close() + +def demoCreate(filename): + _dbf = Dbf(filename, new=True) + _dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (_n, _s, _i, _b) in ( + ("John", "Miller", "YC", (1981, 1, 2)), + ("Andy", "Larkin", "AL", (1982, 3, 4)), + ("Bill", "Clinth", "", (1983, 5, 6)), + ("Bobb", "McNail", "", (1984, 7, 8)), + ): + _rec = _dbf.newRecord() + _rec["NAME"] = _n + _rec["SURNAME"] = _s + _rec["INITIALS"] = _i + _rec["BIRTHDATE"] = _b + _rec.store() + print(repr(_dbf)) + _dbf.close() + +if (__name__=='__main__'): + import sys + _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" + demoCreate(_name) + demoRead(_name) + +# vim: set et sw=4 sts=4 : diff --git a/tablib/packages/dbfpy3/dbfnew.py b/tablib/packages/dbfpy3/dbfnew.py new file mode 100644 index 0000000..4051bc6 --- /dev/null +++ b/tablib/packages/dbfpy3/dbfnew.py @@ -0,0 +1,182 @@ +#!/usr/bin/python +""".DBF creation helpers. + +Note: this is a legacy interface. New code should use Dbf class + for table creation (see examples in dbf.py) + +TODO: + - handle Memo fields. + - check length of the fields accoring to the + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + +""" +"""History (most recent first) +04-jul-2006 [als] added export declaration; + updated for dbfpy 2.0 +15-dec-2005 [yc] define dbf_new.__slots__ +14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; + dbf_new now is a new class (inherited from object) +??-jun-2000 [--] added by Hans Fiby +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] + +__all__ = ["dbf_new"] + +from .dbf import * +from .fields import * +from .header import * +from .record import * + +class _FieldDefinition(object): + """Field definition. + + This is a simple structure, which contains ``name``, ``type``, + ``len``, ``dec`` and ``cls`` fields. + + Objects also implement get/setitem magic functions, so fields + could be accessed via sequence iterface, where 'name' has + index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and + 'cls' could be located at index 4. + + """ + + __slots__ = "name", "type", "len", "dec", "cls" + + # WARNING: be attentive - dictionaries are mutable! + FLD_TYPES = { + # type: (cls, len) + "C": (DbfCharacterFieldDef, None), + "N": (DbfNumericFieldDef, None), + "L": (DbfLogicalFieldDef, 1), + # FIXME: support memos + # "M": (DbfMemoFieldDef), + "D": (DbfDateFieldDef, 8), + # FIXME: I'm not sure length should be 14 characters! + # but temporary I use it, cuz date is 8 characters + # and time 6 (hhmmss) + "T": (DbfDateTimeFieldDef, 14), + } + + def __init__(self, name, type, len=None, dec=0): + _cls, _len = self.FLD_TYPES[type] + if _len is None: + if len is None: + raise ValueError("Field length must be defined") + _len = len + self.name = name + self.type = type + self.len = _len + self.dec = dec + self.cls = _cls + + def getDbfField(self): + "Return `DbfFieldDef` instance from the current definition." + return self.cls(self.name, self.len, self.dec) + + def appendToHeader(self, dbfh): + """Create a `DbfFieldDef` instance and append it to the dbf header. + + Arguments: + dbfh: `DbfHeader` instance. + + """ + _dbff = self.getDbfField() + dbfh.addField(_dbff) + + +class dbf_new(object): + """New .DBF creation helper. + + Example Usage: + + dbfn = dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + + Note: + This module cannot handle Memo-fields, + they are special. + + """ + + __slots__ = ("fields",) + + FieldDefinitionClass = _FieldDefinition + + def __init__(self): + self.fields = [] + + def add_field(self, name, typ, len, dec=0): + """Add field definition. + + Arguments: + name: + field name (str object). field name must not + contain ASCII NULs and it's length shouldn't + exceed 10 characters. + typ: + type of the field. this must be a single character + from the "CNLMDT" set meaning character, numeric, + logical, memo, date and date/time respectively. + len: + length of the field. this argument is used only for + the character and numeric fields. all other fields + have fixed length. + FIXME: use None as a default for this argument? + dec: + decimal precision. used only for the numric fields. + + """ + self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) + + def write(self, filename): + """Create empty .DBF file using current structure.""" + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + + _dbfStream = open(filename, "wb") + _dbfh.write(_dbfStream) + _dbfStream.close() + + +if (__name__=='__main__'): + # create a new DBF-File + dbfn=dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + # test new dbf + print("*** created tst.dbf: ***") + dbft = Dbf('tst.dbf', readOnly=0) + print(repr(dbft)) + # add a record + rec=DbfRecord(dbft) + rec['name']='something' + rec['price']=10.5 + rec['date']=(2000,1,12) + rec.store() + # add another record + rec=DbfRecord(dbft) + rec['name']='foo and bar' + rec['price']=12234 + rec['date']=(1992,7,15) + rec.store() + + # show the records + print("*** inserted 2 records into tst.dbf: ***") + print(repr(dbft)) + for i1 in range(len(dbft)): + rec = dbft[i1] + for fldName in dbft.fieldNames: + print('%s:\t %s'%(fldName, rec[fldName])) + print() + dbft.close() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/fields.py b/tablib/packages/dbfpy3/fields.py new file mode 100644 index 0000000..883d035 --- /dev/null +++ b/tablib/packages/dbfpy3/fields.py @@ -0,0 +1,467 @@ +"""DBF fields definitions. + +TODO: + - make memos work +""" +"""History (most recent first): +26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes +05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date +16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point + in the value to select float or integer return type +13-mar-2008 [als] check field name length in constructor +11-feb-2007 [als] handle value conversion errors +10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() +01-dec-2006 [als] Timestamp columns use None for empty values +31-oct-2006 [als] support field types 'F' (float), 'I' (integer) + and 'Y' (currency); + automate export and registration of field classes +04-jul-2006 [als] added export declaration +10-mar-2006 [als] decode empty values for Date and Logical fields; + show field name in errors +10-mar-2006 [als] fix Numeric value decoding: according to spec, + value always is string representation of the number; + ensure that encoded Numeric value fits into the field +20-dec-2005 [yc] use field names in upper case +15-dec-2005 [yc] field definitions moved from `dbf`. +""" + +__version__ = "$Revision: 1.14 $"[11:-2] +__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] + +__all__ = ["lookupFor",] # field classes added at the end of the module + +import datetime +import struct +import sys + +from . import utils + +## abstract definitions + +class DbfFieldDef(object): + """Abstract field definition. + + Child classes must override ``type`` class attribute to provide datatype + infromation of the field definition. For more info about types visit + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + + Also child classes must override ``defaultValue`` field to provide + default value for the field value. + + If child class has fixed length ``length`` class attribute must be + overriden and set to the valid value. None value means, that field + isn't of fixed length. + + Note: ``name`` field must not be changed after instantiation. + + """ + + + __slots__ = ("name", "decimalCount", + "start", "end", "ignoreErrors") + + # length of the field, None in case of variable-length field, + # or a number if this field is a fixed-length field + length = None + + # field type. for more information about fields types visit + # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + # must be overriden in child classes + typeCode = None + + # default value for the field. this field must be + # overriden in child classes + defaultValue = None + + def __init__(self, name, length=None, decimalCount=None, + start=None, stop=None, ignoreErrors=False, + ): + """Initialize instance.""" + assert self.typeCode is not None, "Type code must be overriden" + assert self.defaultValue is not None, "Default value must be overriden" + ## fix arguments + if len(name) >10: + raise ValueError("Field name \"%s\" is too long" % name) + name = str(name).upper() + if self.__class__.length is None: + if length is None: + raise ValueError("[%s] Length isn't specified" % name) + length = int(length) + if length <= 0: + raise ValueError("[%s] Length must be a positive integer" + % name) + else: + length = self.length + if decimalCount is None: + decimalCount = 0 + ## set fields + self.name = name + # FIXME: validate length according to the specification at + # http://www.clicketyclick.dk/databases/xbase/format/data_types.html + self.length = length + self.decimalCount = decimalCount + self.ignoreErrors = ignoreErrors + self.start = start + self.end = stop + + def __cmp__(self, other): + return cmp(self.name, str(other).upper()) + + def __hash__(self): + return hash(self.name) + + def fromString(cls, string, start, ignoreErrors=False): + """Decode dbf field definition from the string data. + + Arguments: + string: + a string, dbf definition is decoded from. length of + the string must be 32 bytes. + start: + position in the database file. + ignoreErrors: + initial error processing mode for the new field (boolean) + + """ + assert len(string) == 32 + _length = string[16] + return cls(utils.unzfill(string)[:11].decode('utf-8'), _length, + string[17], start, start + _length, ignoreErrors=ignoreErrors) + fromString = classmethod(fromString) + + def toString(self): + """Return encoded field definition. + + Return: + Return value is a string object containing encoded + definition of this field. + + """ + if sys.version_info < (2, 4): + # earlier versions did not support padding character + _name = self.name[:11] + "\0" * (11 - len(self.name)) + else: + _name = self.name.ljust(11, '\0') + return ( + _name + + self.typeCode + + #data address + chr(0) * 4 + + chr(self.length) + + chr(self.decimalCount) + + chr(0) * 14 + ) + + def __repr__(self): + return "%-10s %1s %3d %3d" % self.fieldInfo() + + def fieldInfo(self): + """Return field information. + + Return: + Return value is a (name, type, length, decimals) tuple. + + """ + return (self.name, self.typeCode, self.length, self.decimalCount) + + def rawFromRecord(self, record): + """Return a "raw" field value from the record string.""" + return record[self.start:self.end] + + def decodeFromRecord(self, record): + """Return decoded field value from the record string.""" + try: + return self.decodeValue(self.rawFromRecord(record)) + except: + if self.ignoreErrors: + return utils.INVALID_VALUE + else: + raise + + def decodeValue(self, value): + """Return decoded value from string value. + + This method shouldn't be used publicly. It's called from the + `decodeFromRecord` method. + + This is an abstract method and it must be overridden in child classes. + """ + raise NotImplementedError + + def encodeValue(self, value): + """Return str object containing encoded field value. + + This is an abstract method and it must be overriden in child classes. + """ + raise NotImplementedError + +## real classes + +class DbfCharacterFieldDef(DbfFieldDef): + """Definition of the character field.""" + + typeCode = "C" + defaultValue = b'' + + def decodeValue(self, value): + """Return string object. + + Return value is a ``value`` argument with stripped right spaces. + + """ + return value.rstrip(b' ').decode('utf-8') + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``.""" + return str(value)[:self.length].ljust(self.length) + + +class DbfNumericFieldDef(DbfFieldDef): + """Definition of the numeric field.""" + + typeCode = "N" + # XXX: now I'm not sure it was a good idea to make a class field + # `defaultValue` instead of a generic method as it was implemented + # previously -- it's ok with all types except number, cuz + # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. + defaultValue = 0 + + def decodeValue(self, value): + """Return a number decoded from ``value``. + + If decimals is zero, value will be decoded as an integer; + or as a float otherwise. + + Return: + Return value is a int (long) or float instance. + + """ + value = value.strip(b' \0') + if b'.' in value: + # a float (has decimal separator) + return float(value) + elif value: + # must be an integer + return int(value) + else: + return 0 + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + _rv = ("%*.*f" % (self.length, self.decimalCount, value)) + if len(_rv) > self.length: + _ppos = _rv.find(".") + if 0 <= _ppos <= self.length: + _rv = _rv[:self.length] + else: + raise ValueError("[%s] Numeric overflow: %s (field width: %i)" + % (self.name, _rv, self.length)) + return _rv + +class DbfFloatFieldDef(DbfNumericFieldDef): + """Definition of the float field - same as numeric.""" + + typeCode = "F" + +class DbfIntegerFieldDef(DbfFieldDef): + """Definition of the integer field.""" + + typeCode = "I" + length = 4 + defaultValue = 0 + + def decodeValue(self, value): + """Return an integer number decoded from ``value``.""" + return struct.unpack("= 1: + _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) + _rv += datetime.timedelta(0, _msecs / 1000.0) + else: + # empty date + _rv = None + return _rv + + def encodeValue(self, value): + """Return a string-encoded ``value``.""" + if value: + value = utils.getDateTime(value) + # LE byteorder + _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, + (value.hour * 3600 + value.minute * 60 + value.second) * 1000) + else: + _rv = "\0" * self.length + assert len(_rv) == self.length + return _rv + + +_fieldsRegistry = {} + +def registerField(fieldCls): + """Register field definition class. + + ``fieldCls`` should be subclass of the `DbfFieldDef`. + + Use `lookupFor` to retrieve field definition class + by the type code. + + """ + assert fieldCls.typeCode is not None, "Type code isn't defined" + # XXX: use fieldCls.typeCode.upper()? in case of any decign + # don't forget to look to the same comment in ``lookupFor`` method + _fieldsRegistry[fieldCls.typeCode] = fieldCls + + +def lookupFor(typeCode): + """Return field definition class for the given type code. + + ``typeCode`` must be a single character. That type should be + previously registered. + + Use `registerField` to register new field class. + + Return: + Return value is a subclass of the `DbfFieldDef`. + + """ + # XXX: use typeCode.upper()? in case of any decign don't + # forget to look to the same comment in ``registerField`` + return _fieldsRegistry[chr(typeCode)] + +## register generic types + +for (_name, _val) in list(globals().items()): + if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ + and (_name != "DbfFieldDef"): + __all__.append(_name) + registerField(_val) +del _name, _val + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/header.py b/tablib/packages/dbfpy3/header.py new file mode 100644 index 0000000..6c0dc4f --- /dev/null +++ b/tablib/packages/dbfpy3/header.py @@ -0,0 +1,273 @@ +"""DBF header definition. + +TODO: + - handle encoding of the character fields + (encoding information stored in the DBF header) + +""" +"""History (most recent first): +16-sep-2010 [als] fromStream: fix century of the last update field +11-feb-2007 [als] added .ignoreErrors +10-feb-2007 [als] added __getitem__: return field definitions + by field name or field number (zero-based) +04-jul-2006 [als] added export declaration +15-dec-2005 [yc] created +""" + +__version__ = "$Revision: 1.6 $"[11:-2] +__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] + +__all__ = ["DbfHeader"] + +import io +import datetime +import struct +import time +import sys + +from . import fields +from .utils import getDate + + +class DbfHeader(object): + """Dbf header definition. + + For more information about dbf header format visit + `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` + + Examples: + Create an empty dbf header and add some field definitions: + dbfh = DbfHeader() + dbfh.addField(("name", "C", 10)) + dbfh.addField(("date", "D")) + dbfh.addField(DbfNumericFieldDef("price", 5, 2)) + Create a dbf header with field definitions: + dbfh = DbfHeader([ + ("name", "C", 10), + ("date", "D"), + DbfNumericFieldDef("price", 5, 2), + ]) + + """ + + __slots__ = ("signature", "fields", "lastUpdate", "recordLength", + "recordCount", "headerLength", "changed", "_ignore_errors") + + ## instance construction and initialization methods + + def __init__(self, fields=None, headerLength=0, recordLength=0, + recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False, + ): + """Initialize instance. + + Arguments: + fields: + a list of field definitions; + recordLength: + size of the records; + headerLength: + size of the header; + recordCount: + number of records stored in DBF; + signature: + version number (aka signature). using 0x03 as a default meaning + "File without DBT". for more information about this field visit + ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` + lastUpdate: + date of the DBF's update. this could be a string ('yymmdd' or + 'yyyymmdd'), timestamp (int or float), datetime/date value, + a sequence (assuming (yyyy, mm, dd, ...)) or an object having + callable ``ticks`` field. + ignoreErrors: + error processing mode for DBF fields (boolean) + + """ + self.signature = signature + if fields is None: + self.fields = [] + else: + self.fields = list(fields) + self.lastUpdate = getDate(lastUpdate) + self.recordLength = recordLength + self.headerLength = headerLength + self.recordCount = recordCount + self.ignoreErrors = ignoreErrors + # XXX: I'm not sure this is safe to + # initialize `self.changed` in this way + self.changed = bool(self.fields) + + # @classmethod + def fromString(cls, string): + """Return header instance from the string object.""" + return cls.fromStream(io.StringIO(str(string))) + fromString = classmethod(fromString) + + # @classmethod + def fromStream(cls, stream): + """Return header object from the stream.""" + stream.seek(0) + first_32 = stream.read(32) + if type(first_32) != bytes: + _data = bytes(first_32, sys.getfilesystemencoding()) + _data = first_32 + (_cnt, _hdrLen, _recLen) = struct.unpack(" DbfRecord._write(); + added delete() method. +16-dec-2005 [yc] record definition moved from `dbf`. +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2] + +__all__ = ["DbfRecord"] + +import sys + +from . import utils + +class DbfRecord(object): + """DBF record. + + Instances of this class shouldn't be created manualy, + use `dbf.Dbf.newRecord` instead. + + Class implements mapping/sequence interface, so + fields could be accessed via their names or indexes + (names is a preffered way to access fields). + + Hint: + Use `store` method to save modified record. + + Examples: + Add new record to the database: + db = Dbf(filename) + rec = db.newRecord() + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + Or the same, but modify existed + (second in this case) record: + db = Dbf(filename) + rec = db[2] + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + + """ + + __slots__ = "dbf", "index", "deleted", "fieldData" + + ## creation and initialization + + def __init__(self, dbf, index=None, deleted=False, data=None): + """Instance initialiation. + + Arguments: + dbf: + A `Dbf.Dbf` instance this record belonogs to. + index: + An integer record index or None. If this value is + None, record will be appended to the DBF. + deleted: + Boolean flag indicating whether this record + is a deleted record. + data: + A sequence or None. This is a data of the fields. + If this argument is None, default values will be used. + + """ + self.dbf = dbf + # XXX: I'm not sure ``index`` is necessary + self.index = index + self.deleted = deleted + if data is None: + self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] + else: + self.fieldData = list(data) + + # XXX: validate self.index before calculating position? + position = property(lambda self: self.dbf.header.headerLength + \ + self.index * self.dbf.header.recordLength) + + def rawFromStream(cls, dbf, index): + """Return raw record contents read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance containing the record. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is a string containing record data in DBF format. + + """ + # XXX: may be write smth assuming, that current stream + # position is the required one? it could save some + # time required to calculate where to seek in the file + dbf.stream.seek(dbf.header.headerLength + + index * dbf.header.recordLength) + return dbf.stream.read(dbf.header.recordLength) + rawFromStream = classmethod(rawFromStream) + + def fromStream(cls, dbf, index): + """Return a record read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is an instance of the current class. + + """ + return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) + fromStream = classmethod(fromStream) + + def fromString(cls, dbf, string, index=None): + """Return record read from the string object. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + string: + A string new record should be created from. + index: + Index of the record in the container. If this + argument is None, record will be appended. + + Return value is an instance of the current class. + + """ + return cls(dbf, index, string[0]=="*", + [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) + fromString = classmethod(fromString) + + ## object representation + + def __repr__(self): + _template = "%%%ds: %%s (%%s)" % max([len(_fld) + for _fld in self.dbf.fieldNames]) + _rv = [] + for _fld in self.dbf.fieldNames: + _val = self[_fld] + if _val is utils.INVALID_VALUE: + _rv.append(_template % + (_fld, "None", "value cannot be decoded")) + else: + _rv.append(_template % (_fld, _val, type(_val))) + return "\n".join(_rv) + + ## protected methods + + def _write(self): + """Write data to the dbf stream. + + Note: + This isn't a public method, it's better to + use 'store' instead publically. + Be design ``_write`` method should be called + only from the `Dbf` instance. + + + """ + self._validateIndex(False) + self.dbf.stream.seek(self.position) + self.dbf.stream.write(bytes(self.toString(), + sys.getfilesystemencoding())) + # FIXME: may be move this write somewhere else? + # why we should check this condition for each record? + if self.index == len(self.dbf): + # this is the last record, + # we should write SUB (ASCII 26) + self.dbf.stream.write(b"\x1A") + + ## utility methods + + def _validateIndex(self, allowUndefined=True, checkRange=False): + """Valid ``self.index`` value. + + If ``allowUndefined`` argument is True functions does nothing + in case of ``self.index`` pointing to None object. + + """ + if self.index is None: + if not allowUndefined: + raise ValueError("Index is undefined") + elif self.index < 0: + raise ValueError("Index can't be negative (%s)" % self.index) + elif checkRange and self.index <= self.dbf.header.recordCount: + raise ValueError("There are only %d records in the DBF" % + self.dbf.header.recordCount) + + ## interface methods + + def store(self): + """Store current record in the DBF. + + If ``self.index`` is None, this record will be appended to the + records of the DBF this records belongs to; or replaced otherwise. + + """ + self._validateIndex() + if self.index is None: + self.index = len(self.dbf) + self.dbf.append(self) + else: + self.dbf[self.index] = self + + def delete(self): + """Mark method as deleted.""" + self.deleted = True + + def toString(self): + """Return string packed record values.""" +# for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData): +# + + return "".join([" *"[self.deleted]] + [ + _def.encodeValue(_dat) + for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData) + ]) + + def asList(self): + """Return a flat list of fields. + + Note: + Change of the list's values won't change + real values stored in this object. + + """ + return self.fieldData[:] + + def asDict(self): + """Return a dictionary of fields. + + Note: + Change of the dicts's values won't change + real values stored in this object. + + """ + return dict([_i for _i in zip(self.dbf.fieldNames, self.fieldData)]) + + def __getitem__(self, key): + """Return value by field name or field index.""" + if isinstance(key, int): + # integer index of the field + return self.fieldData[key] + # assuming string field name + return self.fieldData[self.dbf.indexOfFieldName(key)] + + def __setitem__(self, key, value): + """Set field value by integer index of the field or string name.""" + if isinstance(key, int): + # integer index of the field + return self.fieldData[key] + # assuming string field name + self.fieldData[self.dbf.indexOfFieldName(key)] = value + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/utils.py b/tablib/packages/dbfpy3/utils.py new file mode 100644 index 0000000..856ade8 --- /dev/null +++ b/tablib/packages/dbfpy3/utils.py @@ -0,0 +1,170 @@ +"""String utilities. + +TODO: + - allow strings in getDateTime routine; +""" +"""History (most recent first): +11-feb-2007 [als] added INVALID_VALUE +10-feb-2007 [als] allow date strings padded with spaces instead of zeroes +20-dec-2005 [yc] handle long objects in getDate/getDateTime +16-dec-2005 [yc] created from ``strutil`` module. +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] + +import datetime +import time + + +def unzfill(str): + """Return a string without ASCII NULs. + + This function searchers for the first NUL (ASCII 0) occurance + and truncates string till that position. + + """ + try: + return str[:str.index(b'\0')] + except ValueError: + return str + + +def getDate(date=None): + """Return `datetime.date` instance. + + Type of the ``date`` argument could be one of the following: + None: + use current date value; + datetime.date: + this value will be returned; + datetime.datetime: + the result of the date.date() will be returned; + string: + assuming "%Y%m%d" or "%y%m%dd" format; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``date`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if date is None: + # use current value + return datetime.date.today() + if isinstance(date, datetime.date): + return date + if isinstance(date, datetime.datetime): + return date.date() + if isinstance(date, (int, float)): + # date is a timestamp + return datetime.date.fromtimestamp(date) + if isinstance(date, str): + date = date.replace(" ", "0") + if len(date) == 6: + # yymmdd + return datetime.date(*time.strptime(date, "%y%m%d")[:3]) + # yyyymmdd + return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) + if hasattr(date, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.date(*date[:3]) + return datetime.date.fromtimestamp(date.ticks()) + + +def getDateTime(value=None): + """Return `datetime.datetime` instance. + + Type of the ``value`` argument could be one of the following: + None: + use current date value; + datetime.date: + result will be converted to the `datetime.datetime` instance + using midnight; + datetime.datetime: + ``value`` will be returned as is; + string: + *** CURRENTLY NOT SUPPORTED ***; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``value`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if value is None: + # use current value + return datetime.datetime.today() + if isinstance(value, datetime.datetime): + return value + if isinstance(value, datetime.date): + return datetime.datetime.fromordinal(value.toordinal()) + if isinstance(value, (int, float)): + # value is a timestamp + return datetime.datetime.fromtimestamp(value) + if isinstance(value, str): + raise NotImplementedError("Strings aren't currently implemented") + if hasattr(value, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.datetime(*tuple(value)[:6]) + return datetime.datetime.fromtimestamp(value.ticks()) + + +class classproperty(property): + """Works in the same way as a ``property``, but for the classes.""" + + def __get__(self, obj, cls): + return self.fget(cls) + + +class _InvalidValue(object): + + """Value returned from DBF records when field validation fails + + The value is not equal to anything except for itself + and equal to all empty values: None, 0, empty string etc. + In other words, invalid value is equal to None and not equal + to None at the same time. + + This value yields zero upon explicit conversion to a number type, + empty string for string types, and False for boolean. + + """ + + def __eq__(self, other): + return not other + + def __ne__(self, other): + return not (other is self) + + def __bool__(self): + return False + + def __int__(self): + return 0 + __long__ = __int__ + + def __float__(self): + return 0.0 + + def __str__(self): + return "" + + def __unicode__(self): + return "" + + def __repr__(self): + return "" + +# invalid value is a constant singleton +INVALID_VALUE = _InvalidValue() + +# vim: set et sts=4 sw=4 : diff --git a/test_tablib.py b/test_tablib.py index 78d1045..ba57170 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -450,6 +450,108 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_tsv, data.tsv) + def test_dbf_import_set(self): + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _dbf = data.dbf + data.dbf = _dbf + + #self.assertEqual(_dbf, data.dbf) + try: + self.assertEqual(_dbf, data.dbf) + except AssertionError: + index = 0 + so_far = '' + for reg_char, data_char in zip(_dbf, data.dbf): + so_far += chr(data_char) + if reg_char != data_char and index not in [1, 2, 3]: + raise AssertionError('Failing at char %s: %s vs %s %s' % ( + index, reg_char, data_char, so_far)) + index += 1 + + def test_dbf_export_set(self): + """Test DBF import.""" + data.append(self.john) + data.append(self.george) + data.append(self.tom) + data.headers = self.headers + + _regression_dbf = (b'\x03r\x06\x06\x03\x00\x00\x00\x81\x00\xab\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00FIRST_NAME\x00C\x00\x00\x00\x00P\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LAST_NAME\x00\x00C\x00' + b'\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00GPA\x00\x00\x00\x00\x00\x00\x00\x00N\x00\x00\x00\x00\n' + b'\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\r' + ) + _regression_dbf += b' John' + (b' ' * 75) + _regression_dbf += b' Adams' + (b' ' * 74) + _regression_dbf += b' 90.0000000' + _regression_dbf += b' George' + (b' ' * 73) + _regression_dbf += b' Washington' + (b' ' * 69) + _regression_dbf += b' 67.0000000' + _regression_dbf += b' Thomas' + (b' ' * 73) + _regression_dbf += b' Jefferson' + (b' ' * 70) + _regression_dbf += b' 50.0000000' + _regression_dbf += b'\x1a' + + if is_py3: + # If in python3, decode regression string to binary. + #_regression_dbf = bytes(_regression_dbf, 'utf-8') + #_regression_dbf = _regression_dbf.replace(b'\n', b'\r') + pass + + try: + self.assertEqual(_regression_dbf, data.dbf) + except AssertionError: + index = 0 + found_so_far = '' + for reg_char, data_char in zip(_regression_dbf, data.dbf): + #found_so_far += chr(data_char) + if reg_char != data_char and index not in [1, 2, 3]: + raise AssertionError( + 'Failing at char %s: %s vs %s (found %s)' % ( + index, reg_char, data_char, found_so_far)) + index += 1 + + def test_dbf_format_detect(self): + """Test the DBF format detection.""" + _dbf = (b'\x03r\x06\x03\x03\x00\x00\x00\x81\x00\xab\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00FIRST_NAME\x00C\x00\x00\x00\x00P\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LAST_NAME\x00\x00C\x00' + b'\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00GPA\x00\x00\x00\x00\x00\x00\x00\x00N\x00\x00\x00\x00\n' + b'\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\r' + ) + _dbf += b' John' + (b' ' * 75) + _dbf += b' Adams' + (b' ' * 74) + _dbf += b' 90.0000000' + _dbf += b' George' + (b' ' * 73) + _dbf += b' Washington' + (b' ' * 69) + _dbf += b' 67.0000000' + _dbf += b' Thomas' + (b' ' * 73) + _dbf += b' Jefferson' + (b' ' * 70) + _dbf += b' 50.0000000' + _dbf += b'\x1a' + + _yaml = '- {age: 90, first_name: John, last_name: Adams}' + _tsv = 'foo\tbar' + _csv = '1,2,3\n4,5,6\n7,8,9\n' + _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + + _bunk = ( + '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' + ) + self.assertTrue(tablib.formats.dbf.detect(_dbf)) + self.assertFalse(tablib.formats.dbf.detect(_yaml)) + self.assertFalse(tablib.formats.dbf.detect(_tsv)) + self.assertFalse(tablib.formats.dbf.detect(_csv)) + self.assertFalse(tablib.formats.dbf.detect(_json)) + self.assertFalse(tablib.formats.dbf.detect(_bunk)) + def test_csv_format_detect(self): """Test CSV format detection.""" From 20e2ce5ba0382f66eee747f932265087ef7c1ec8 Mon Sep 17 00:00:00 2001 From: rabinnankhwa Date: Sat, 30 Aug 2014 08:26:08 +0545 Subject: [PATCH 09/34] __getslice__ method of Row classcorrected --- tablib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index 02c9085..e2116f8 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -45,7 +45,7 @@ class Row(object): return repr(self._row) def __getslice__(self, i, j): - return self._row[i,j] + return self._row[i:j] def __getitem__(self, i): return self._row[i] From 87892d7266d8808374d2c32becc9df4687431227 Mon Sep 17 00:00:00 2001 From: rabinnankhwa Date: Sat, 30 Aug 2014 08:56:17 +0545 Subject: [PATCH 10/34] used get method of dictionary instead of exception handling --- tablib/core.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index e2116f8..76f4569 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -163,15 +163,9 @@ class Dataset(object): # (column, callback) tuples self._formatters = [] - try: - self.headers = kwargs['headers'] - except KeyError: - self.headers = None + self.headers = kwargs.get('headers') - try: - self.title = kwargs['title'] - except KeyError: - self.title = None + self.title = kwargs.get('title') self._register_formats() From f187cef5f45e8e4e59d15b732c6c8307142bb634 Mon Sep 17 00:00:00 2001 From: rabinnankhwa Date: Sat, 30 Aug 2014 23:52:35 +0545 Subject: [PATCH 11/34] adding support for creating subset of a dataset. --- AUTHORS | 1 + tablib/core.py | 36 ++++++++++++++++++++++++++++++++++++ test_tablib.py | 20 ++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/AUTHORS b/AUTHORS index bd0e0c4..309c1f5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -28,3 +28,4 @@ Patches and Suggestions - Marc Abramowitz - Alex Gaynor - James Douglass +- Rabin Nankhwa \ No newline at end of file diff --git a/tablib/core.py b/tablib/core.py index 76f4569..3fc55cb 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -933,6 +933,42 @@ class Dataset(object): self.__headers = None + def subset(self, rows=None, cols=None): + """Returns a new instance of the :class:`Dataset`, + including only specified rows and columns. + """ + + # Don't return if no data + if not self: + return + + if rows is None: + rows = list(range(self.height)) + + if cols is None: + cols = list(self.headers) + + _dset = Dataset() + + #filtering rows and columns + _dset.headers = list(cols) + + _dset._data = [] + for row_no, row in enumerate(self._data): + data_row = [] + for key in _dset.headers: + if key in self.headers: + pos = self.headers.index(key) + data_row.append(row[pos]) + else: + raise KeyError + + if row_no in rows: + _dset.append(row=Row(data_row)) + + return _dset + + class Databook(object): """A book of :class:`Dataset` objects. diff --git a/test_tablib.py b/test_tablib.py index ba57170..54049b1 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -713,6 +713,26 @@ class TablibTestCase(unittest.TestCase): self.assertTrue(data[0] == new_row) + def test_subset(self): + """Create a subset of a dataset""" + + rows = (0, 2) + columns = ('first_name','gpa') + + data.headers = self.headers + + data.append(self.john) + data.append(self.george) + data.append(self.tom) + + #Verify data is truncated + subset = data.subset(rows=rows, cols=columns) + self.assertEqual(type(subset), tablib.Dataset) + self.assertEqual(subset.headers, list(columns)) + self.assertEqual(subset._data[0].list, ['John', 90]) + self.assertEqual(subset._data[1].list, ['Thomas', 50]) + + def test_formatters(self): """Confirm formatters are being triggered.""" From 5fbdd56fba67b259a2bee7979b20fbbd9a31dc63 Mon Sep 17 00:00:00 2001 From: rabinnankhwa Date: Sun, 31 Aug 2014 00:12:44 +0545 Subject: [PATCH 12/34] filter row and column values --- tablib/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index 3fc55cb..48758bd 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -948,6 +948,10 @@ class Dataset(object): if cols is None: cols = list(self.headers) + #filter out impossible rows and columns + rows = [row for row in rows if row in range(self.height)] + cols = [header for header in cols if header in self.headers] + _dset = Dataset() #filtering rows and columns From 82ae3ca5073210a2ca280df97729ee78c595a65d Mon Sep 17 00:00:00 2001 From: James Douglass Date: Fri, 5 Sep 2014 14:56:33 -0700 Subject: [PATCH 13/34] Cleaning up DBF documentation Fixing indentation issues (off by one space), which caused problems with the sphinx rendering of the DBF docstring and otherwise cleaning up the sphinx docstring. --- tablib/core.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 02c9085..b275d8a 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -563,7 +563,8 @@ class Dataset(object): def dbf(): """A dBASE representation of the :class:`Dataset` object. - A dataset object can also be imported by setting the :class:`Dataset.dbf` attribute::: + A dataset object can also be imported by setting the + :class:`Dataset.dbf` attribute. :: # To import data from an existing DBF file: data = tablib.Dataset() @@ -571,11 +572,11 @@ class Dataset(object): # to import data from an ASCII-encoded bytestring: data = tablib.Dataset() - data.dbf = + data.dbf = '' - .. admonition:: Binary Warning + .. admonition:: Binary Warning - :class:`Dataset.dbf` contains binary data, so make sure to write in binary mode:: + :class:`Dataset.dbf` contains binary data, so make sure to write in binary mode:: with open('output.dbf', 'wb') as f: f.write(data.dbf) From e4cb3bcd9bb150b049905bac3d35f22db60c7682 Mon Sep 17 00:00:00 2001 From: "Thibault J." Date: Tue, 23 Sep 2014 11:46:05 +0200 Subject: [PATCH 14/34] Minor typo correction Requests -> Tablib --- docs/install.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 9df8ee8..6bb9ad6 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -29,7 +29,7 @@ But, you really `shouldn't do that `_:: +If the Cheeseshop is down, you can also install Tablib from Kenneth Reitz's personal `Cheeseshop mirror `_:: $ pip install -i http://pip.kreitz.co/simple tablib @@ -89,4 +89,4 @@ When a new version is available, upgrading is simple:: $ pip install tablib --upgrade -Now, go get a :ref:`Quick Start `. \ No newline at end of file +Now, go get a :ref:`Quick Start `. From 028be03c2ce66174cddc5ec4bf92445bb8801316 Mon Sep 17 00:00:00 2001 From: Alex Marandon Date: Fri, 3 Oct 2014 09:17:38 +0200 Subject: [PATCH 15/34] Fix JSON import example The example was triggering this error: JSONError: Expecting property name: line 1 column 3 (char 2) This is because JSON property names should be wrapped in double quotes. While at it, I've fixed the typo in "last_name" --- tablib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index b275d8a..f8e2df7 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -544,7 +544,7 @@ class Dataset(object): A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: data = tablib.Dataset() - data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]' + data.json = '[{"age": 90, "first_name": "John", "last_name": "Adams"}]' Import assumes (for now) that headers exist. """ From e350f9428b689e412245c813dfe53b3cde24cbfe Mon Sep 17 00:00:00 2001 From: Ramana Varanasi Date: Mon, 10 Nov 2014 16:03:10 +0530 Subject: [PATCH 16/34] Fix import errors when installed from source --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 8409b3c..e3522fa 100755 --- a/setup.py +++ b/setup.py @@ -37,6 +37,8 @@ if sys.argv[-1] == 'test': sys.exit(bool(errors)) packages = [ + 'tablib', 'tablib.formats', + 'tablib.packages', 'tablib.packages.omnijson', 'tablib.packages.unicodecsv' ] From dca7bc9a7d637cfa6f0f00eaebe8bd0cb4e2d7cd Mon Sep 17 00:00:00 2001 From: Kevin Cherepski Date: Wed, 4 Feb 2015 11:53:14 -0500 Subject: [PATCH 17/34] Adding ability to unique all rows in a dataset. --- tablib/core.py | 7 +++++++ test_tablib.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index f8e2df7..069646c 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -934,6 +934,13 @@ class Dataset(object): return _dset + def unique(self): + """Removes all duplicate rows from the :class:`Dataset` object + while maintaining the original order.""" + seen = set() + self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))] + + def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() diff --git a/test_tablib.py b/test_tablib.py index ba57170..ad182ab 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -696,6 +696,25 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(third_row, expected_third) + def test_unique(self): + """Unique Rows.""" + + self.founders.append(self.john) + self.founders.append(self.george) + self.founders.append(self.tom) + self.assertEqual(self.founders[0], self.founders[3]) + self.assertEqual(self.founders[1], self.founders[4]) + self.assertEqual(self.founders[2], self.founders[5]) + self.assertEqual(self.founders.height, 6) + + self.founders.unique() + + self.assertEqual(self.founders[0], self.john) + self.assertEqual(self.founders[1], self.george) + self.assertEqual(self.founders[2], self.tom) + self.assertEqual(self.founders.height, 3) + + def test_wipe(self): """Purge a dataset.""" From 541fba67866833e43b741c50bcfccec914c36834 Mon Sep 17 00:00:00 2001 From: Thomas Roten Date: Sat, 28 Mar 2015 16:14:27 -0400 Subject: [PATCH 18/34] Fixes Row slicing. Fixes #184. --- tablib/core.py | 2 +- test_tablib.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index f8e2df7..7afdd71 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -45,7 +45,7 @@ class Row(object): return repr(self._row) def __getslice__(self, i, j): - return self._row[i,j] + return self._row[i:j] def __getitem__(self, i): return self._row[i] diff --git a/test_tablib.py b/test_tablib.py index ba57170..a1f581c 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -8,6 +8,7 @@ import sys import os import tablib from tablib.compat import markup, unicode, is_py3 +from tablib.core import Row @@ -206,6 +207,18 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(self.founders[2:], [self.tom]) + def test_row_slicing(self): + """Verify Row's __getslice__ method. Issue #184.""" + + john = Row(self.john) + + self.assertEqual(john[:], list(self.john[:])) + self.assertEqual(john[0:], list(self.john[0:])) + self.assertEqual(john[:2], list(self.john[:2])) + self.assertEqual(john[0:2], list(self.john[0:2])) + self.assertEqual(john[0:-1], list(self.john[0:-1])) + + def test_delete(self): """Verify deleting from dataset works.""" From b3485ec942d77ec38775a9dbfcae19bbebf55313 Mon Sep 17 00:00:00 2001 From: Thomas Anthony Date: Tue, 26 May 2015 20:06:42 -0700 Subject: [PATCH 19/34] Fixed a compatibility bug for Python 3 by adding xrange to compat.py. The code in tablib/formats/_xls.py used xrange in parsing excel spreadsheets. xrange is not a builtin for Python 3, so I've added xrange = range in compat.py and imported it in tablib/formats/_xls.py. --- AUTHORS | 1 + tablib/compat.py | 2 ++ tablib/formats/_xls.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index bd0e0c4..c23d834 100644 --- a/AUTHORS +++ b/AUTHORS @@ -28,3 +28,4 @@ Patches and Suggestions - Marc Abramowitz - Alex Gaynor - James Douglass +- Tommy Anthony diff --git a/tablib/compat.py b/tablib/compat.py index d4582d5..e03526d 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -37,6 +37,7 @@ if is_py3: unicode = str bytes = bytes basestring = str + xrange = range else: from cStringIO import StringIO as BytesIO @@ -53,3 +54,4 @@ else: import tablib.packages.dbfpy as dbfpy unicode = unicode + xrange = xrange diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 67b87ea..787907a 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -5,7 +5,7 @@ import sys -from tablib.compat import BytesIO, xlwt, xlrd, XLRDError +from tablib.compat import BytesIO, xlwt, xlrd, XLRDError, xrange import tablib title = 'xls' From 79dc4524a05e7a89b0e65e9d5b78fa6a56652848 Mon Sep 17 00:00:00 2001 From: Mathias Loesch Date: Wed, 27 Aug 2014 21:08:48 +0200 Subject: [PATCH 20/34] Added LaTeX table export format --- AUTHORS | 1 + tablib/core.py | 10 +++ tablib/formats/__init__.py | 3 +- tablib/formats/_latex.py | 134 +++++++++++++++++++++++++++++++++++++ test_tablib.py | 62 +++++++++++++++++ 5 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 tablib/formats/_latex.py diff --git a/AUTHORS b/AUTHORS index c23d834..bf2b64f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -29,3 +29,4 @@ Patches and Suggestions - Alex Gaynor - James Douglass - Tommy Anthony +- Mathias Loesch diff --git a/tablib/core.py b/tablib/core.py index 7afdd71..314ea1d 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -584,6 +584,16 @@ class Dataset(object): pass + @property + def latex(): + """A LaTeX booktabs representation of the :class:`Dataset` object. If a + title has been set, it will be exported as the table caption. + + .. note:: This method can be used for export only. + """ + pass + + # ---- # Rows # ---- diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 1eda107..5cca19f 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -12,5 +12,6 @@ from . import _html as html from . import _xlsx as xlsx from . import _ods as ods from . import _dbf as dbf +from . import _latex as latex -available = (json, xls, yaml, csv, dbf, tsv, html, xlsx, ods) +available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods) diff --git a/tablib/formats/_latex.py b/tablib/formats/_latex.py new file mode 100644 index 0000000..44ee101 --- /dev/null +++ b/tablib/formats/_latex.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- + +"""Tablib - LaTeX table export support. + + Generates a LaTeX booktabs-style table from the dataset. +""" +import re + +from tablib.compat import unicode + +title = 'latex' +extensions = ('tex',) + +TABLE_TEMPLATE = """\ +%% Note: add \\usepackage{booktabs} to your preamble +%% +\\begin{table}[!htbp] + \\centering + %(CAPTION)s + \\begin{tabular}{%(COLSPEC)s} + \\toprule +%(HEADER)s + %(MIDRULE)s +%(BODY)s + \\bottomrule + \\end{tabular} +\\end{table} +""" + +TEX_RESERVED_SYMBOLS_MAP = dict([ + ('\\', '\\textbackslash{}'), + ('{', '\\{'), + ('}', '\\}'), + ('$', '\\$'), + ('&', '\\&'), + ('#', '\\#'), + ('^', '\\textasciicircum{}'), + ('_', '\\_'), + ('~', '\\textasciitilde{}'), + ('%', '\\%'), +]) + +TEX_RESERVED_SYMBOLS_RE = re.compile( + '(%s)' % '|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys()))) + + +def export_set(dataset): + """Returns LaTeX representation of dataset + + :param dataset: dataset to serialize + :type dataset: tablib.core.Dataset + """ + + caption = '\\caption{%s}' % dataset.title if dataset.title else '%' + colspec = _colspec(dataset.width) + header = _serialize_row(dataset.headers) if dataset.headers else '' + midrule = _midrule(dataset.width) + body = '\n'.join([_serialize_row(row) for row in dataset]) + return TABLE_TEMPLATE % dict(CAPTION=caption, COLSPEC=colspec, + HEADER=header, MIDRULE=midrule, BODY=body) + + +def _colspec(dataset_width): + """Generates the column specification for the LaTeX `tabular` environment + based on the dataset width. + + The first column is justified to the left, all further columns are aligned + to the right. + + .. note:: This is only a heuristic and most probably has to be fine-tuned + post export. Column alignment should depend on the data type, e.g., textual + content should usually be aligned to the left while numeric content almost + always should be aligned to the right. + + :param dataset_width: width of the dataset + """ + + spec = 'l' + for _ in range(1, dataset_width): + spec += 'r' + return spec + + +def _midrule(dataset_width): + """Generates the table `midrule`, which may be composed of several + `cmidrules`. + + :param dataset_width: width of the dataset to serialize + """ + + if not dataset_width or dataset_width == 1: + return '\\midrule' + return ' '.join([_cmidrule(colindex, dataset_width) for colindex in + range(1, dataset_width + 1)]) + + +def _cmidrule(colindex, dataset_width): + """Generates the `cmidrule` for a single column with appropriate trimming + based on the column position. + + :param colindex: Column index + :param dataset_width: width of the dataset + """ + + rule = '\\cmidrule(%s){%d-%d}' + if colindex == 1: + # Rule of first column is trimmed on the right + return rule % ('r', colindex, colindex) + if colindex == dataset_width: + # Rule of last column is trimmed on the left + return rule % ('l', colindex, colindex) + # Inner columns are trimmed on the left and right + return rule % ('lr', colindex, colindex) + + +def _serialize_row(row): + """Returns string representation of a single row. + + :param row: single dataset row + """ + + new_row = [_escape_tex_reserved_symbols(unicode(item)) if item else '' for + item in row] + return 6 * ' ' + ' & '.join(new_row) + ' \\\\' + + +def _escape_tex_reserved_symbols(input): + """Escapes all TeX reserved symbols ('_', '~', etc.) in a string. + + :param input: String to escape + """ + def replace(match): + return TEX_RESERVED_SYMBOLS_MAP[match.group()] + return TEX_RESERVED_SYMBOLS_RE.sub(replace, input) diff --git a/test_tablib.py b/test_tablib.py index a1f581c..9c37b8e 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -319,6 +319,67 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(html, d.html) + def test_latex_export(self): + """LaTeX export""" + + expected = """\ +% Note: add \\usepackage{booktabs} to your preamble +% +\\begin{table}[!htbp] + \\centering + \\caption{Founders} + \\begin{tabular}{lrr} + \\toprule + first\\_name & last\\_name & gpa \\\\ + \\cmidrule(r){1-1} \\cmidrule(lr){2-2} \\cmidrule(l){3-3} + John & Adams & 90 \\\\ + George & Washington & 67 \\\\ + Thomas & Jefferson & 50 \\\\ + \\bottomrule + \\end{tabular} +\\end{table} +""" + output = self.founders.latex + self.assertEqual(output, expected) + + + def test_latex_export_empty_dataset(self): + self.assertTrue(tablib.Dataset().latex is not None) + + + def test_latex_export_no_headers(self): + d = tablib.Dataset() + d.append(('one', 'two', 'three')) + self.assertTrue('one' in d.latex) + + + def test_latex_export_caption(self): + d = tablib.Dataset() + d.append(('one', 'two', 'three')) + self.assertFalse('caption' in d.latex) + + d.title = 'Title' + self.assertTrue('\\caption{Title}' in d.latex) + + + def test_latex_export_none_values(self): + headers = ['foo', None, 'bar'] + d = tablib.Dataset(['foo', None, 'bar'], headers=headers) + output = d.latex + self.assertTrue('foo' in output) + self.assertFalse('None' in output) + + + def test_latex_escaping(self): + d = tablib.Dataset(['~', '^']) + output = d.latex + + self.assertFalse('~' in output) + self.assertTrue('textasciitilde' in output) + self.assertFalse('^' in output) + self.assertTrue('textasciicircum' in output) + + def test_unicode_append(self): """Passes in a single unicode character and exports.""" @@ -338,6 +399,7 @@ class TablibTestCase(unittest.TestCase): data.xlsx data.ods data.html + data.latex def test_book_export_no_exceptions(self): From c4e8755cd23b09d870825994cedcc597d85a5aae Mon Sep 17 00:00:00 2001 From: Marco Dalla G Date: Wed, 7 Oct 2015 11:25:56 +0300 Subject: [PATCH 21/34] Replaced tabs with whitespaces --- tablib/formats/_html.py | 52 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 7bb77f0..0b45f14 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -23,45 +23,45 @@ extensions = ('html', ) def export_set(dataset): - """HTML representation of a Dataset.""" + """HTML representation of a Dataset.""" - stream = StringIO() + stream = StringIO() - page = markup.page() - page.table.open() + page = markup.page() + page.table.open() - if dataset.headers is not None: - new_header = [item if item is not None else '' for item in dataset.headers] + if dataset.headers is not None: + new_header = [item if item is not None else '' for item in dataset.headers] - page.thead.open() - headers = markup.oneliner.th(new_header) - page.tr(headers) - page.thead.close() + page.thead.open() + headers = markup.oneliner.th(new_header) + page.tr(headers) + page.thead.close() - for row in dataset: - new_row = [item if item is not None else '' for item in row] + for row in dataset: + new_row = [item if item is not None else '' for item in row] - html_row = markup.oneliner.td(new_row) - page.tr(html_row) + html_row = markup.oneliner.td(new_row) + page.tr(html_row) - page.table.close() + page.table.close() # Allow unicode characters in output - wrapper = codecs.getwriter("utf8")(stream) - wrapper.writelines(unicode(page)) + wrapper = codecs.getwriter("utf8")(stream) + wrapper.writelines(unicode(page)) - return stream.getvalue().decode('utf-8') + return stream.getvalue().decode('utf-8') def export_book(databook): - """HTML representation of a Databook.""" + """HTML representation of a Databook.""" - stream = StringIO() + stream = StringIO() - for i, dset in enumerate(databook._datasets): - title = (dset.title if dset.title else 'Set %s' % (i)) - stream.write('<%s>%s\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS)) - stream.write(dset.html) - stream.write('\n') + for i, dset in enumerate(databook._datasets): + title = (dset.title if dset.title else 'Set %s' % (i)) + stream.write('<%s>%s\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS)) + stream.write(dset.html) + stream.write('\n') - return stream.getvalue() + return stream.getvalue() From 2f3acf5af450ffbff54fe727e7d8ac803f573f2e Mon Sep 17 00:00:00 2001 From: Marco Dalla G Date: Wed, 7 Oct 2015 11:31:26 +0300 Subject: [PATCH 22/34] Added myself to authors, as indicated in README --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index c23d834..7f9036e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -29,3 +29,4 @@ Patches and Suggestions - Alex Gaynor - James Douglass - Tommy Anthony +- Marco Dallagiacoma From 1fcb98f9aee0d580bab42f233ee11d34891cb17b Mon Sep 17 00:00:00 2001 From: Wes Date: Mon, 9 Nov 2015 06:45:28 -0700 Subject: [PATCH 23/34] Fix XLSX import Calling import_set on an XLSX file was throwing a TypeError from Openpyxl. Openpyxl Reader load_workbook requires a file-like object as the first argument. This commit fixes the error by passing in a file-like object instead of a string. --- tablib/formats/_xlsx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py index 57058c8..34e83e6 100644 --- a/tablib/formats/_xlsx.py +++ b/tablib/formats/_xlsx.py @@ -69,7 +69,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() - xls_book = openpyxl.reader.excel.load_workbook(in_stream) + xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream)) sheet = xls_book.get_active_sheet() dset.title = sheet.title @@ -87,7 +87,7 @@ def import_book(dbook, in_stream, headers=True): dbook.wipe() - xls_book = openpyxl.reader.excel.load_workbook(in_stream) + xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream)) for sheet in xls_book.worksheets: data = tablib.Dataset() From 65703550c35d04c991c70b2f0576cf4575e4055e Mon Sep 17 00:00:00 2001 From: Daniel Harms Date: Tue, 10 Nov 2015 14:15:37 -0500 Subject: [PATCH 24/34] Small documentation fix in Dataset class --- tablib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index 7afdd71..1571b5e 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -349,7 +349,7 @@ class Dataset(object): A dataset object can also be imported by setting the `Dataset.dict` attribute: :: data = tablib.Dataset() - data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] """ return self._package() From fa045ca1146bdd4d5b00f85b99be177bd9bffb5c Mon Sep 17 00:00:00 2001 From: Geoff Crompton Date: Mon, 18 Jan 2016 12:13:15 +1100 Subject: [PATCH 25/34] Add section on importing to tutorial. --- docs/tutorial.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index bd2dbc0..551b191 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -39,6 +39,7 @@ You can now start filling this :class:`Dataset ` object with dat + ----------- Adding Rows ----------- @@ -97,6 +98,15 @@ Let's view the data now. :: It's that easy. +-------------- +Importing Data +-------------- +Creating a :class:`tablib.Dataset` object by importing a pre-existing file is simple. :: + + imported_data = tablib.import_set(open('data.csv').read()) + +This detects what sort of data is being passed in, and uses an appropriate formatter to do the import. So you can import from a variety of different file types. + -------------- Exporting Data -------------- From 85d9c2497e62c90d879a0f8bacfdb431cd585eca Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 05:46:20 -0500 Subject: [PATCH 26/34] --universal --- Makefile | 2 +- setup.cfg | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 setup.cfg diff --git a/Makefile b/Makefile index 05581ed..5b4036c 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ publish: python setup.py register python setup.py sdist upload - python setup.py bdist_wheel upload + python setup.py bdist_wheel upload --universal diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 5e40900..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[wheel] -universal = 1 From 591b89693eaf19bee9847f391392b80503e0452c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 05:46:45 -0500 Subject: [PATCH 27/34] remove TODO.rst --- TODO.rst | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 TODO.rst diff --git a/TODO.rst b/TODO.rst deleted file mode 100644 index ecfa974..0000000 --- a/TODO.rst +++ /dev/null @@ -1,7 +0,0 @@ -* Hooks System - - pre/post-append - - pre/post-import - - pre/post-export -* Add Tablib.ext namespace -* Width detection for XLS output -* Documentation Improvements \ No newline at end of file From 25894f2948e01d25bfeeb32d704f81e93d5c686c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 05:47:28 -0500 Subject: [PATCH 28/34] remove bunk file --- toy.py | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 toy.py diff --git a/toy.py b/toy.py deleted file mode 100644 index fea9860..0000000 --- a/toy.py +++ /dev/null @@ -1,15 +0,0 @@ -# -*- coding: utf-8 -*- - -import tablib - -d = tablib.Dataset() - -with open('/Users/kreitz/Desktop/test.json') as f: - d.json = f.read() - -# del d[900:] - -# print d.height - -print len(d.ods) - From a11a9939553acd7d56679e8165a1d89fff365280 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 05:52:45 -0500 Subject: [PATCH 29/34] fix documentation --- docs/index.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 7bf1a41..20103b6 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,13 +26,13 @@ Tablib is an :ref:`MIT Licensed ` format-agnostic tabular dataset library, :: - >>> data = tablib.Dataset(*[('Kenneth', 'Reitz', 23), ('Bessie', 'Monke', 22)], - headers=['First Name', 'Last Name', 'Age']) + >>> data = tablib.Dataset(headers=['First Name', 'Last Name', 'Age']) + >>> map(data.append, [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 21)]) - >>> data.json + >>> print data.json [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}] - >>> data.yaml + >>> print data.yaml - {Age: 22, First Name: Kenneth, Last Name: Reitz} - {Age: 21, First Name: Bessie, Last Name: Monke} From f8f57a467e71d92bc6ac9e44d5073eeda4c6bf6e Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 05:56:19 -0500 Subject: [PATCH 30/34] updates to install guide --- docs/install.rst | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 6bb9ad6..7b0b00b 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -14,27 +14,10 @@ Installing Tablib Distribute & Pip ---------------- -Installing Tablib is simple with `pip `_:: +Of course, the recommended way to install Tablib is with `pip `_:: $ pip install tablib -or, with `easy_install `_:: - - $ easy_install tablib - -But, you really `shouldn't do that `_. - - - -Cheeseshop Mirror ------------------ - -If the Cheeseshop is down, you can also install Tablib from Kenneth Reitz's personal `Cheeseshop mirror `_:: - - $ pip install -i http://pip.kreitz.co/simple tablib - - - ------------------- Download the Source From 8bded88559552241d2a5e6d735f97eb4d69175fb Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 06:01:56 -0500 Subject: [PATCH 31/34] update development guide --- docs/development.rst | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/docs/development.rst b/docs/development.rst index 1124e9f..3ab05d5 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -8,11 +8,6 @@ Tablib is under active development, and contributors are welcome. If you have a feature request, suggestion, or bug report, please open a new issue on GitHub_. To submit patches, please send a pull request on GitHub_. -If you'd like to contribute, there's plenty to do. Here's a short todo list. - - .. include:: ../TODO.rst - - .. _GitHub: http://github.com/kennethreitz/tablib/ @@ -66,8 +61,6 @@ Feature / Hotfix / Release branches follow a `Successful Git Branching Model`_ . The "next release" branch. Likely unstable. ``master`` Current production release (|version|) on PyPi. -``gh-pages`` - Current release of http://docs.python-tablib.org. Each release is tagged. @@ -87,9 +80,7 @@ Adding New Formats Tablib welcomes new format additions! Format suggestions include: -* Tab Separated Values * MySQL Dump -* HTML Table Coding by Convention @@ -207,34 +198,9 @@ Your ``docs/_build/html`` directory will then contain an HTML representation of You can also generate the documentation in **epub**, **latex**, **json**, *&c* similarly. -.. admonition:: GitHub Pages - - To push the documentation up to `GitHub Pages`_, you will first need to run `sphinx-to-github`_ against your ``docs/_build/html`` directory. - - GitHub Pages are powered by an HTML generation system called Jekyll_, which is configured to ignore files and folders that begin with "``_``" (*ie.* **_static**). - - - - - - - and `sphinx-to-github`_. :: - - Installing sphinx-to-github is simple. :: - - $ pip install sphinx-to-github - - Running it against the docs is even simpler. :: - - $ sphinx-to-github _build/html - - Move the resulting files to the **gh-pages** branch of your repository, and push it up to GitHub. - .. _`reStructured Text`: http://docutils.sourceforge.net/rst.html .. _Sphinx: http://sphinx.pocoo.org .. _`GitHub Pages`: http://pages.github.com -.. _Jekyll: http://github.com/mojombo/jekyll -.. _`sphinx-to-github`: http://github.com/michaeljones/sphinx-to-github ---------- From 9b6a73c97c09e8d6c8eae90456c1af2d3b742f22 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 06:29:07 -0500 Subject: [PATCH 32/34] fixed stuipid test --- tablib/formats/_csv.py | 4 ++-- test_tablib.py | 14 +------------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 7deec23..2be1895 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -35,9 +35,9 @@ def import_set(dset, in_stream, headers=True, delimiter=DEFAULT_DELIMITER): dset.wipe() if is_py3: - rows = csv.reader(in_stream.splitlines(), delimiter=delimiter) + rows = csv.reader(StringIO(in_stream), delimiter=delimiter) else: - rows = csv.reader(in_stream.splitlines(), delimiter=delimiter, encoding=DEFAULT_ENCODING) + rows = csv.reader(StringIO(in_stream), delimiter=delimiter, encoding=DEFAULT_ENCODING) for i, row in enumerate(rows): if (i == 0) and (headers): diff --git a/test_tablib.py b/test_tablib.py index d4127b0..ea8ff0a 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -530,7 +530,6 @@ class TablibTestCase(unittest.TestCase): data.headers = ('title', 'body') _csv = data.csv - data.csv = _csv self.assertEqual(_csv, data.csv) @@ -881,18 +880,7 @@ class TablibTestCase(unittest.TestCase): # add another entry to test right field width for # integer self.founders.append(('Old', 'Man', 100500)) - - self.assertEqual( - """ -first_name|last_name |gpa -----------|----------|------ -John |Adams |90 -George |Washington|67 -Thomas |Jefferson |50 -Old |Man |100500 -""".strip(), - unicode(self.founders) - ) + self.assertEqual(u'first_name|last_name |gpa ', unicode(self.founders).split('\n')[0]) def test_databook_add_sheet_accepts_only_dataset_instances(self): From c5920249ded38dc5fe70f22021f4a0dd73dd97cf Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 06:32:10 -0500 Subject: [PATCH 33/34] python 3.2 is terrible --- test_tablib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_tablib.py b/test_tablib.py index ea8ff0a..2a4f687 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -880,7 +880,7 @@ class TablibTestCase(unittest.TestCase): # add another entry to test right field width for # integer self.founders.append(('Old', 'Man', 100500)) - self.assertEqual(u'first_name|last_name |gpa ', unicode(self.founders).split('\n')[0]) + self.assertEqual('first_name|last_name |gpa ', unicode(self.founders).split('\n')[0]) def test_databook_add_sheet_accepts_only_dataset_instances(self): From a774789252d41522d4ec8b0e2c212aff4a33904d Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 7 Feb 2016 06:40:46 -0500 Subject: [PATCH 34/34] /s/unique/remove_duplicates #182 --- tablib/core.py | 12 ++++++------ test_tablib.py | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index e15206e..11a2ea0 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -940,7 +940,7 @@ class Dataset(object): return _dset - def unique(self): + def remove_duplicates(self): """Removes all duplicate rows from the :class:`Dataset` object while maintaining the original order.""" seen = set() @@ -961,19 +961,19 @@ class Dataset(object): # Don't return if no data if not self: return - + if rows is None: rows = list(range(self.height)) - + if cols is None: cols = list(self.headers) - + #filter out impossible rows and columns rows = [row for row in rows if row in range(self.height)] cols = [header for header in cols if header in self.headers] _dset = Dataset() - + #filtering rows and columns _dset.headers = list(cols) @@ -989,7 +989,7 @@ class Dataset(object): if row_no in rows: _dset.append(row=Row(data_row)) - + return _dset diff --git a/test_tablib.py b/test_tablib.py index 529b39d..64d1604 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -794,7 +794,7 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(third_row, expected_third) - def test_unique(self): + def test_remove_duplicates(self): """Unique Rows.""" self.founders.append(self.john) @@ -805,7 +805,7 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(self.founders[2], self.founders[5]) self.assertEqual(self.founders.height, 6) - self.founders.unique() + self.founders.remove_duplicates() self.assertEqual(self.founders[0], self.john) self.assertEqual(self.founders[1], self.george) @@ -832,16 +832,16 @@ class TablibTestCase(unittest.TestCase): def test_subset(self): """Create a subset of a dataset""" - + rows = (0, 2) columns = ('first_name','gpa') - + data.headers = self.headers data.append(self.john) data.append(self.george) data.append(self.tom) - + #Verify data is truncated subset = data.subset(rows=rows, cols=columns) self.assertEqual(type(subset), tablib.Dataset)