diff --git a/.travis.yml b/.travis.yml index ebf03a6..f9bf7bb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: python python: -- 2.7 - 3.6 - 3.7 - 3.8 diff --git a/AUTHORS b/AUTHORS index d86466b..503f580 100644 --- a/AUTHORS +++ b/AUTHORS @@ -24,6 +24,7 @@ Here is a list of passed and present much-appreciated contributors: Mark Walling Mathias Loesch Mike Waldner + Peyman Salehi Rabin Nankhwa Tommy Anthony Tsuyoshi Hombashi diff --git a/HISTORY.md b/HISTORY.md index 9433322..a36732d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,11 @@ # History +## Unreleased + +### Breaking changes + +- Dropped Python 2 support + ## 0.14.0 (2019-10-19) ### Deprecations diff --git a/docs/intro.rst b/docs/intro.rst index 37a1c60..00562c1 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -74,6 +74,7 @@ THE SOFTWARE. Pythons Supported ----------------- -Python 2.7 and 3.5+ are officially supported. +Python 3.5+ are officially supported. Now, go :ref:`install Tablib `. + diff --git a/setup.py b/setup.py index 912bcb7..5054dea 100755 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- import os import re @@ -11,7 +10,6 @@ from setuptools import find_packages, setup install = [ 'odfpy', 'openpyxl>=2.4.0', - 'backports.csv;python_version<"3.0"', 'markuppy', 'xlrd', 'xlwt', @@ -41,15 +39,14 @@ setup( 'Natural Language :: English', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', ], - python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*', + python_requires='>=3.5', install_requires=install, extras_require={ 'pandas': ['pandas'], diff --git a/src/tablib/compat.py b/src/tablib/compat.py deleted file mode 100644 index 1956e99..0000000 --- a/src/tablib/compat.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -tablib.compat -~~~~~~~~~~~~~ - -Tablib compatibility module. - -""" - -import sys - -is_py3 = (sys.version_info[0] > 2) - - -if is_py3: - from io import StringIO - from statistics import median - from itertools import zip_longest as izip_longest - import csv - import tablib.packages.dbfpy3 as dbfpy - - unicode = str - xrange = range - -else: - from StringIO import StringIO - from tablib.packages.statistics import median - from itertools import izip_longest - from backports import csv - import tablib.packages.dbfpy as dbfpy - - unicode = unicode - xrange = xrange - -from MarkupPy import markup # Kept temporarily to avoid breaking existing imports diff --git a/src/tablib/core.py b/src/tablib/core.py index 0c9131d..46e8444 100644 --- a/src/tablib/core.py +++ b/src/tablib/core.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """ tablib.core ~~~~~~~~~~~ @@ -15,8 +14,6 @@ from operator import itemgetter from tablib import formats -from tablib.compat import unicode - __title__ = 'tablib' __author__ = 'Kenneth Reitz' @@ -25,7 +22,7 @@ __copyright__ = 'Copyright 2017 Kenneth Reitz. 2019 Jazzband.' __docformat__ = 'restructuredtext' -class Row(object): +class Row: """Internal Row object. Mainly used for filtering.""" __slots__ = ['_row', 'tags'] @@ -104,7 +101,7 @@ class Row(object): return bool(len(set(tag) & set(self.tags))) -class Dataset(object): +class Dataset: """The :class:`Dataset` object is the heart of Tablib. It provides all core functionality. @@ -173,7 +170,7 @@ class Dataset(object): return self.height def __getitem__(self, key): - if isinstance(key, (str, unicode)): + if isinstance(key, str): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data return [row[pos] for row in self._data] @@ -191,7 +188,7 @@ class Dataset(object): self._data[key] = Row(value) def __delitem__(self, key): - if isinstance(key, (str, unicode)): + if isinstance(key, str): if key in self.headers: @@ -213,15 +210,15 @@ class Dataset(object): except AttributeError: return '' - def __unicode__(self): + def __str__(self): result = [] - # Add unicode representation of headers. + # Add str representation of headers. if self.__headers: - result.append([unicode(h) for h in self.__headers]) + result.append([str(h) for h in self.__headers]) - # Add unicode representation of rows. - result.extend(list(map(unicode, row)) for row in self._data) + # Add str representation of rows. + result.extend(list(map(str, row)) for row in self._data) lens = [list(map(len, row)) for row in result] field_lens = list(map(max, zip(*lens))) @@ -234,9 +231,6 @@ class Dataset(object): return '\n'.join(format_string.format(*row) for row in result) - def __str__(self): - return self.__unicode__() - # --------- # Internals # --------- @@ -824,7 +818,7 @@ class Dataset(object): each cell value. """ - if isinstance(col, unicode): + if isinstance(col, str): if col in self.headers: col = self.headers.index(col) # get 'key' index from each data else: @@ -855,7 +849,7 @@ class Dataset(object): sorted. """ - if isinstance(col, (str, unicode)): + if isinstance(col, str): if not self.headers: raise HeadersNeeded @@ -1017,7 +1011,7 @@ class Dataset(object): return _dset -class Databook(object): +class Databook: """A book of :class:`Dataset` objects. """ diff --git a/src/tablib/formats/__init__.py b/src/tablib/formats/__init__.py index 52cf472..9542d9a 100644 --- a/src/tablib/formats/__init__.py +++ b/src/tablib/formats/__init__.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Tablib - formats """ diff --git a/src/tablib/formats/_csv.py b/src/tablib/formats/_csv.py index 5c03d6f..f4c1699 100644 --- a/src/tablib/formats/_csv.py +++ b/src/tablib/formats/_csv.py @@ -1,16 +1,15 @@ -# -*- coding: utf-8 -*- - """ Tablib - *SV Support. """ -from tablib.compat import csv, StringIO, unicode +import csv +from io import StringIO title = 'csv' extensions = ('csv',) -DEFAULT_DELIMITER = unicode(',') +DEFAULT_DELIMITER = ',' def export_stream_set(dataset, **kwargs): diff --git a/src/tablib/formats/_dbf.py b/src/tablib/formats/_dbf.py index 0d1c87b..fecb214 100644 --- a/src/tablib/formats/_dbf.py +++ b/src/tablib/formats/_dbf.py @@ -1,24 +1,13 @@ -# -*- coding: utf-8 -*- - """ Tablib - DBF Support. """ -import tempfile -import struct +import io import os +import struct +import tempfile -from tablib.compat import StringIO -from tablib.compat import dbfpy -from tablib.compat import is_py3 - -if is_py3: - from tablib.packages.dbfpy3 import dbf - from tablib.packages.dbfpy3 import dbfnew - from tablib.packages.dbfpy3 import record as dbfrecord - import io -else: - from tablib.packages.dbfpy import dbf - from tablib.packages.dbfpy import dbfnew - from tablib.packages.dbfpy import record as dbfrecord +from tablib.packages.dbfpy import dbf +from tablib.packages.dbfpy import dbfnew +from tablib.packages.dbfpy import record as dbfrecord title = 'dbf' @@ -50,10 +39,7 @@ def export_set(dataset): dbf_file.close() dbf_stream = open(temp_uri, 'rb') - if is_py3: - stream = io.BytesIO(dbf_stream.read()) - else: - stream = StringIO(dbf_stream.read()) + stream = io.BytesIO(dbf_stream.read()) dbf_stream.close() os.close(temp_file) os.remove(temp_uri) @@ -63,10 +49,7 @@ def import_set(dset, in_stream, headers=True): """Returns a dataset from a DBF stream.""" dset.wipe() - if is_py3: - _dbf = dbf.Dbf(io.BytesIO(in_stream)) - else: - _dbf = dbf.Dbf(StringIO(in_stream)) + _dbf = dbf.Dbf(io.BytesIO(in_stream)) dset.headers = _dbf.fieldNames for record in range(_dbf.recordCount): row = [_dbf[record][f] for f in _dbf.fieldNames] @@ -76,12 +59,9 @@ def detect(stream): """Returns True if the given stream is valid DBF""" #_dbf = dbf.Table(StringIO(stream)) try: - if is_py3: - if type(stream) is not bytes: - stream = bytes(stream, 'utf-8') - _dbf = dbf.Dbf(io.BytesIO(stream), readOnly=True) - else: - _dbf = dbf.Dbf(StringIO(stream), readOnly=True) + if type(stream) is not bytes: + stream = bytes(stream, 'utf-8') + _dbf = dbf.Dbf(io.BytesIO(stream), readOnly=True) return True except Exception: return False diff --git a/src/tablib/formats/_df.py b/src/tablib/formats/_df.py index 0c7ebec..dd319a3 100644 --- a/src/tablib/formats/_df.py +++ b/src/tablib/formats/_df.py @@ -11,7 +11,6 @@ except ImportError: import tablib -from tablib.compat import unicode title = 'df' extensions = ('df', ) diff --git a/src/tablib/formats/_html.py b/src/tablib/formats/_html.py index ac72bd6..952caf2 100644 --- a/src/tablib/formats/_html.py +++ b/src/tablib/formats/_html.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Tablib - HTML export support. """ @@ -9,7 +7,6 @@ from io import BytesIO from MarkupPy import markup import tablib -from tablib.compat import unicode BOOK_ENDINGS = 'h3' @@ -43,7 +40,7 @@ def export_set(dataset): # Allow unicode characters in output wrapper = codecs.getwriter("utf8")(stream) - wrapper.writelines(unicode(page)) + wrapper.writelines(str(page)) return stream.getvalue().decode('utf-8') diff --git a/src/tablib/formats/_jira.py b/src/tablib/formats/_jira.py index 55fce52..99dbf3e 100644 --- a/src/tablib/formats/_jira.py +++ b/src/tablib/formats/_jira.py @@ -1,10 +1,7 @@ -# -*- coding: utf-8 -*- - """Tablib - Jira table export support. Generates a Jira table from the dataset. """ -from tablib.compat import unicode title = 'jira' @@ -35,5 +32,5 @@ def _get_header(headers): def _serialize_row(row, delimiter='|'): return '%s%s%s' % (delimiter, - delimiter.join([unicode(item) if item else ' ' for item in row]), + delimiter.join([str(item) if item else ' ' for item in row]), delimiter) diff --git a/src/tablib/formats/_json.py b/src/tablib/formats/_json.py index 009fb3c..f427ff2 100644 --- a/src/tablib/formats/_json.py +++ b/src/tablib/formats/_json.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Tablib - JSON Support """ import decimal diff --git a/src/tablib/formats/_latex.py b/src/tablib/formats/_latex.py index 44ee101..fae2ceb 100644 --- a/src/tablib/formats/_latex.py +++ b/src/tablib/formats/_latex.py @@ -1,12 +1,9 @@ -# -*- coding: utf-8 -*- - """Tablib - LaTeX table export support. Generates a LaTeX booktabs-style table from the dataset. """ import re -from tablib.compat import unicode title = 'latex' extensions = ('tex',) @@ -119,7 +116,7 @@ def _serialize_row(row): :param row: single dataset row """ - new_row = [_escape_tex_reserved_symbols(unicode(item)) if item else '' for + new_row = [_escape_tex_reserved_symbols(str(item)) if item else '' for item in row] return 6 * ' ' + ' & '.join(new_row) + ' \\\\' diff --git a/src/tablib/formats/_ods.py b/src/tablib/formats/_ods.py index dbf57c4..cd58cd1 100644 --- a/src/tablib/formats/_ods.py +++ b/src/tablib/formats/_ods.py @@ -1,11 +1,8 @@ -# -*- coding: utf-8 -*- - """ Tablib - ODF Support. """ from io import BytesIO from odf import opendocument, style, table, text -from tablib.compat import unicode title = 'ods' extensions = ('ods',) @@ -57,9 +54,9 @@ def dset_sheet(dataset, ws): odf_row = table.TableRow(stylename=bold, defaultcellstylename='bold') for j, col in enumerate(row): try: - col = unicode(col, errors='ignore') + col = str(col, errors='ignore') except TypeError: - ## col is already unicode + ## col is already str pass ws.addElement(table.TableColumn()) diff --git a/src/tablib/formats/_rst.py b/src/tablib/formats/_rst.py index 4b53ad7..27151a3 100644 --- a/src/tablib/formats/_rst.py +++ b/src/tablib/formats/_rst.py @@ -1,19 +1,10 @@ -# -*- coding: utf-8 -*- - """ Tablib - reStructuredText Support """ -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from itertools import zip_longest +from statistics import median from textwrap import TextWrapper -from tablib.compat import ( - median, - unicode, - izip_longest, -) - title = 'rst' extensions = ('rst',) @@ -28,10 +19,10 @@ JUSTIFY_RIGHT = 'right' JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT) -def to_unicode(value): +def to_str(value): if isinstance(value, bytes): return value.decode('utf-8') - return unicode(value) + return str(value) def _max_word_len(text): @@ -60,7 +51,7 @@ def _get_column_string_lengths(dataset): for row in dataset.dict: values = iter(row.values() if hasattr(row, 'values') else row) for i, val in enumerate(values): - text = to_unicode(val) + text = to_str(val) column_lengths[i].append(len(text)) word_lens[i] = max(word_lens[i], _max_word_len(text)) return column_lengths, word_lens @@ -86,10 +77,10 @@ def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT): cells = [] for value, width in zip(values, widths): wrapper.width = width - text = to_unicode(value) + text = to_str(value) cell = wrapper.wrap(text) cells.append(cell) - lines = izip_longest(*cells, fillvalue='') + lines = zip_longest(*cells, fillvalue='') lines = ( (just(cell_line, widths[i]) for i, cell_line in enumerate(line)) for line in lines @@ -206,11 +197,11 @@ def _use_simple_table(head0, col0, width0): """ if head0 is not None: - head0 = to_unicode(head0) + head0 = to_str(head0) if len(head0) > width0: return False for cell in col0: - cell = to_unicode(cell) + cell = to_str(cell) if len(cell) > width0: return False return True diff --git a/src/tablib/formats/_tsv.py b/src/tablib/formats/_tsv.py index 1c6d6a1..8015bff 100644 --- a/src/tablib/formats/_tsv.py +++ b/src/tablib/formats/_tsv.py @@ -1,9 +1,6 @@ -# -*- coding: utf-8 -*- - """ Tablib - TSV (Tab Separated Values) Support. """ -from tablib.compat import unicode from tablib.formats._csv import ( export_set as export_set_wrapper, import_set as import_set_wrapper, @@ -13,7 +10,7 @@ from tablib.formats._csv import ( title = 'tsv' extensions = ('tsv',) -DELIMITER = unicode('\t') +DELIMITER = '\t' def export_set(dataset): """Returns TSV representation of Dataset.""" diff --git a/src/tablib/formats/_xls.py b/src/tablib/formats/_xls.py index 88e8636..71c8245 100644 --- a/src/tablib/formats/_xls.py +++ b/src/tablib/formats/_xls.py @@ -1,12 +1,9 @@ -# -*- coding: utf-8 -*- - """ Tablib - XLS Support. """ import sys from io import BytesIO -from tablib.compat import xrange import tablib import xlrd import xlwt @@ -77,7 +74,7 @@ def import_set(dset, in_stream, headers=True): dset.title = sheet.name - for i in xrange(sheet.nrows): + for i in range(sheet.nrows): if (i == 0) and (headers): dset.headers = sheet.row_values(0) else: @@ -94,7 +91,7 @@ def import_book(dbook, in_stream, headers=True): data = tablib.Dataset() data.title = sheet.name - for i in xrange(sheet.nrows): + for i in range(sheet.nrows): if (i == 0) and (headers): data.headers = sheet.row_values(0) else: diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index f8f21c2..27757f4 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Tablib - XLSX Support. """ @@ -13,8 +11,6 @@ Workbook = openpyxl.workbook.Workbook ExcelWriter = openpyxl.writer.excel.ExcelWriter get_column_letter = openpyxl.utils.get_column_letter -from tablib.compat import unicode - title = 'xlsx' extensions = ('xlsx',) @@ -132,7 +128,7 @@ def dset_sheet(dataset, ws, freeze_panes=True): # wrap the rest else: try: - str_col_value = unicode(col) + str_col_value = str(col) except TypeError: str_col_value = '' if '\n' in str_col_value: @@ -141,4 +137,4 @@ def dset_sheet(dataset, ws, freeze_panes=True): try: cell.value = col except (ValueError, TypeError): - cell.value = unicode(col) + cell.value = str(col) diff --git a/src/tablib/formats/_yaml.py b/src/tablib/formats/_yaml.py index 3d17baf..3707876 100644 --- a/src/tablib/formats/_yaml.py +++ b/src/tablib/formats/_yaml.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Tablib - YAML Support. """ diff --git a/src/tablib/packages/dbfpy/dbf.py b/src/tablib/packages/dbfpy/dbf.py index fe0736f..cd9999d 100644 --- a/src/tablib/packages/dbfpy/dbf.py +++ b/src/tablib/packages/dbfpy/dbf.py @@ -64,10 +64,10 @@ __all__ = ["Dbf"] from . import header from . import record -from utils import INVALID_VALUE +from .utils import INVALID_VALUE -class Dbf(object): +class Dbf: """DBF accessor. FIXME: @@ -114,16 +114,16 @@ class Dbf(object): ``INVALID_VALUE`` instead of raising conversion error. """ - if isinstance(f, basestring): + if isinstance(f, str): # a filename self.name = f if new: # new table (table file must be # created or opened and truncated) - self.stream = file(f, "w+b") + self.stream = open(f, "w+b") else: # table file must exist - self.stream = file(f, ("r+b", "rb")[bool(readOnly)]) + self.stream = open(f, ("r+b", "rb")[bool(readOnly)]) else: # a stream self.name = getattr(f, "name", "") @@ -177,7 +177,7 @@ class Dbf(object): Return value is numeric object maning valid index. """ - if not isinstance(index, (int, long)): + if not isinstance(index, int): raise TypeError("Index must be a numeric object") if index < 0: # index from the right side @@ -204,7 +204,8 @@ class Dbf(object): def indexOfFieldName(self, name): """Index of field named ``name``.""" # FIXME: move this to header class - return self.header.fields.index(name) + names = [f.name for f in self.header.fields] + return names.index(name.upper()) def newRecord(self): """Return new record, which belong to this table.""" @@ -294,4 +295,4 @@ if __name__ == '__main__': demo_create(_name) demo_read(_name) - # vim: set et sw=4 sts=4 : +# vim: set et sw=4 sts=4 : diff --git a/src/tablib/packages/dbfpy/dbfnew.py b/src/tablib/packages/dbfpy/dbfnew.py index e9b6ce5..29c09a1 100644 --- a/src/tablib/packages/dbfpy/dbfnew.py +++ b/src/tablib/packages/dbfpy/dbfnew.py @@ -24,13 +24,13 @@ __date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] __all__ = ["dbf_new"] -from dbf import * -from fields import * -from header import * -from record import * +from .dbf import * +from .fields import * +from .header import * +from .record import * -class _FieldDefinition(object): +class _FieldDefinition: """Field definition. This is a simple structure, which contains ``name``, ``type``, @@ -87,7 +87,7 @@ class _FieldDefinition(object): dbfh.addField(_dbff) -class dbf_new(object): +class dbf_new: """New .DBF creation helper. Example Usage: @@ -140,17 +140,11 @@ class dbf_new(object): _dbfh.setCurrentDate() for _fldDef in self.fields: _fldDef.appendToHeader(_dbfh) - _dbfStream = file(filename, "wb") + + _dbfStream = open(filename, "wb") _dbfh.write(_dbfStream) _dbfStream.close() - def write_stream(self, stream): - _dbfh = DbfHeader() - _dbfh.setCurrentDate() - for _fldDef in self.fields: - _fldDef.appendToHeader(_dbfh) - _dbfh.write(stream) - if __name__ == '__main__': # create a new DBF-File @@ -186,4 +180,4 @@ if __name__ == '__main__': print() dbft.close() - # vim: set et sts=4 sw=4 : +# vim: set et sts=4 sw=4 : diff --git a/src/tablib/packages/dbfpy/fields.py b/src/tablib/packages/dbfpy/fields.py index a8f3d9b..bc39d57 100644 --- a/src/tablib/packages/dbfpy/fields.py +++ b/src/tablib/packages/dbfpy/fields.py @@ -38,7 +38,7 @@ from . import utils ## abstract definitions -class DbfFieldDef(object): +class DbfFieldDef: """Abstract field definition. Child classes must override ``type`` class attribute to provide datatype @@ -56,7 +56,7 @@ class DbfFieldDef(object): """ - __slots__ = ("name", "length", "decimalCount", + __slots__ = ("name", "decimalCount", "start", "end", "ignoreErrors") # length of the field, None in case of variable-length field, @@ -123,9 +123,9 @@ class DbfFieldDef(object): """ assert len(string) == 32 - _length = ord(string[16]) - return cls(utils.unzfill(string)[:11], _length, ord(string[17]), - start, start + _length, ignoreErrors=ignoreErrors) + _length = string[16] + return cls(utils.unzfill(string)[:11].decode('utf-8'), _length, + string[17], start, start + _length, ignoreErrors=ignoreErrors) fromString = classmethod(fromString) def toString(self): @@ -200,7 +200,7 @@ class DbfCharacterFieldDef(DbfFieldDef): """Definition of the character field.""" typeCode = "C" - defaultValue = "" + defaultValue = b'' def decodeValue(self, value): """Return string object. @@ -208,7 +208,7 @@ class DbfCharacterFieldDef(DbfFieldDef): Return value is a ``value`` argument with stripped right spaces. """ - return value.rstrip(" ") + return value.rstrip(b' ').decode('utf-8') def encodeValue(self, value): """Return raw data string encoded from a ``value``.""" @@ -235,8 +235,8 @@ class DbfNumericFieldDef(DbfFieldDef): Return value is a int (long) or float instance. """ - value = value.strip(" \0") - if "." in value: + value = value.strip(b' \0') + if b'.' in value: # a float (has decimal separator) return float(value) elif value: @@ -452,11 +452,11 @@ def lookupFor(typeCode): """ # XXX: use typeCode.upper()? in case of any decign don't # forget to look to the same comment in ``registerField`` - return _fieldsRegistry[typeCode] + return _fieldsRegistry[chr(typeCode)] ## register generic types -for (_name, _val) in globals().items(): +for (_name, _val) in list(globals().items()): if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ and (_name != "DbfFieldDef"): __all__.append(_name) diff --git a/src/tablib/packages/dbfpy/header.py b/src/tablib/packages/dbfpy/header.py index 03a877c..cb0050a 100644 --- a/src/tablib/packages/dbfpy/header.py +++ b/src/tablib/packages/dbfpy/header.py @@ -19,20 +19,17 @@ __date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] __all__ = ["DbfHeader"] -try: - import cStringIO -except ImportError: - # when we're in python3, we cStringIO has been replaced by io.StringIO - import io as cStringIO +import io import datetime import struct import time +import sys from . import fields -from . import utils +from .utils import getDate -class DbfHeader(object): +class DbfHeader: """Dbf header definition. For more information about dbf header format visit @@ -90,7 +87,7 @@ class DbfHeader(object): self.fields = [] else: self.fields = list(fields) - self.lastUpdate = utils.getDate(lastUpdate) + self.lastUpdate = getDate(lastUpdate) self.recordLength = recordLength self.headerLength = headerLength self.recordCount = recordCount @@ -102,17 +99,20 @@ class DbfHeader(object): # @classmethod def fromString(cls, string): """Return header instance from the string object.""" - return cls.fromStream(cStringIO.StringIO(str(string))) + return cls.fromStream(io.StringIO(str(string))) fromString = classmethod(fromString) # @classmethod def fromStream(cls, stream): """Return header object from the stream.""" stream.seek(0) - _data = stream.read(32) + first_32 = stream.read(32) + if type(first_32) != bytes: + _data = bytes(first_32, sys.getfilesystemencoding()) + _data = first_32 (_cnt, _hdrLen, _recLen) = struct.unpack("" diff --git a/src/tablib/packages/dbfpy3/__init__.py b/src/tablib/packages/dbfpy3/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/tablib/packages/dbfpy3/dbf.py b/src/tablib/packages/dbfpy3/dbf.py deleted file mode 100644 index 48580d0..0000000 --- a/src/tablib/packages/dbfpy3/dbf.py +++ /dev/null @@ -1,297 +0,0 @@ -#! /usr/bin/env python -"""DBF accessing helpers. - -FIXME: more documentation needed - -Examples: - - Create new table, setup structure, add records: - - dbf = Dbf(filename, new=True) - dbf.addField( - ("NAME", "C", 15), - ("SURNAME", "C", 25), - ("INITIALS", "C", 10), - ("BIRTHDATE", "D"), - ) - for (n, s, i, b) in ( - ("John", "Miller", "YC", (1980, 10, 11)), - ("Andy", "Larkin", "", (1980, 4, 11)), - ): - rec = dbf.newRecord() - rec["NAME"] = n - rec["SURNAME"] = s - rec["INITIALS"] = i - rec["BIRTHDATE"] = b - rec.store() - dbf.close() - - Open existed dbf, read some data: - - dbf = Dbf(filename, True) - for rec in dbf: - for fldName in dbf.fieldNames: - print('%s:\t %s (%s)' % (fldName, rec[fldName], - type(rec[fldName]))) - dbf.close() - -""" -"""History (most recent first): -11-feb-2007 [als] export INVALID_VALUE; - Dbf: added .ignoreErrors, .INVALID_VALUE -04-jul-2006 [als] added export declaration -20-dec-2005 [yc] removed fromStream and newDbf methods: - use argument of __init__ call must be used instead; - added class fields pointing to the header and - record classes. -17-dec-2005 [yc] split to several modules; reimplemented -13-dec-2005 [yc] adapted to the changes of the `strutil` module. -13-sep-2002 [als] support FoxPro Timestamp datatype -15-nov-1999 [jjk] documentation updates, add demo -24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks -08-jun-1998 [jjk] fix problems, add more features -20-feb-1998 [jjk] fix problems, add more features -19-feb-1998 [jjk] add create/write capabilities -18-feb-1998 [jjk] from dbfload.py -""" - -__version__ = "$Revision: 1.7 $"[11:-2] -__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] -__author__ = "Jeff Kunce " - -__all__ = ["Dbf"] - -from . import header -from . import record -from .utils import INVALID_VALUE - - -class Dbf(object): - """DBF accessor. - - FIXME: - docs and examples needed (dont' forget to tell - about problems adding new fields on the fly) - - Implementation notes: - ``_new`` field is used to indicate whether this is - a new data table. `addField` could be used only for - the new tables! If at least one record was appended - to the table it's structure couldn't be changed. - - """ - - __slots__ = ("name", "header", "stream", - "_changed", "_new", "_ignore_errors") - - HeaderClass = header.DbfHeader - RecordClass = record.DbfRecord - INVALID_VALUE = INVALID_VALUE - - # initialization and creation helpers - - def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): - """Initialize instance. - - Arguments: - f: - Filename or file-like object. - new: - True if new data table must be created. Assume - data table exists if this argument is False. - readOnly: - if ``f`` argument is a string file will - be opend in read-only mode; in other cases - this argument is ignored. This argument is ignored - even if ``new`` argument is True. - headerObj: - `header.DbfHeader` instance or None. If this argument - is None, new empty header will be used with the - all fields set by default. - ignoreErrors: - if set, failing field value conversion will return - ``INVALID_VALUE`` instead of raising conversion error. - - """ - if isinstance(f, str): - # a filename - self.name = f - if new: - # new table (table file must be - # created or opened and truncated) - self.stream = open(f, "w+b") - else: - # table file must exist - self.stream = open(f, ("r+b", "rb")[bool(readOnly)]) - else: - # a stream - self.name = getattr(f, "name", "") - self.stream = f - if new: - # if this is a new table, header will be empty - self.header = self.HeaderClass() - else: - # or instantiated using stream - self.header = self.HeaderClass.fromStream(self.stream) - self.ignoreErrors = ignoreErrors - self._new = bool(new) - self._changed = False - - # properties - - closed = property(lambda self: self.stream.closed) - recordCount = property(lambda self: self.header.recordCount) - fieldNames = property( - lambda self: [_fld.name for _fld in self.header.fields]) - fieldDefs = property(lambda self: self.header.fields) - changed = property(lambda self: self._changed or self.header.changed) - - def ignoreErrors(self, value): - """Update `ignoreErrors` flag on the header object and self""" - self.header.ignoreErrors = self._ignore_errors = bool(value) - - ignoreErrors = property( - lambda self: self._ignore_errors, - ignoreErrors, - doc="""Error processing mode for DBF field value conversion - - if set, failing field value conversion will return - ``INVALID_VALUE`` instead of raising conversion error. - - """) - - # protected methods - - def _fixIndex(self, index): - """Return fixed index. - - This method fails if index isn't a numeric object - (long or int). Or index isn't in a valid range - (less or equal to the number of records in the db). - - If ``index`` is a negative number, it will be - treated as a negative indexes for list objects. - - Return: - Return value is numeric object maning valid index. - - """ - if not isinstance(index, int): - raise TypeError("Index must be a numeric object") - if index < 0: - # index from the right side - # fix it to the left-side index - index += len(self) + 1 - if index >= len(self): - raise IndexError("Record index out of range") - return index - - # interface methods - - def close(self): - self.flush() - self.stream.close() - - def flush(self): - """Flush data to the associated stream.""" - if self.changed: - self.header.setCurrentDate() - self.header.write(self.stream) - self.stream.flush() - self._changed = False - - def indexOfFieldName(self, name): - """Index of field named ``name``.""" - # FIXME: move this to header class - names = [f.name for f in self.header.fields] - return names.index(name.upper()) - - def newRecord(self): - """Return new record, which belong to this table.""" - return self.RecordClass(self) - - def append(self, record): - """Append ``record`` to the database.""" - record.index = self.header.recordCount - record._write() - self.header.recordCount += 1 - self._changed = True - self._new = False - - def addField(self, *defs): - """Add field definitions. - - For more information see `header.DbfHeader.addField`. - - """ - if self._new: - self.header.addField(*defs) - else: - raise TypeError("At least one record was added, " - "structure can't be changed") - - # 'magic' methods (representation and sequence interface) - - def __repr__(self): - return "Dbf stream '%s'\n" % self.stream + repr(self.header) - - def __len__(self): - """Return number of records.""" - return self.recordCount - - def __getitem__(self, index): - """Return `DbfRecord` instance.""" - return self.RecordClass.fromStream(self, self._fixIndex(index)) - - def __setitem__(self, index, record): - """Write `DbfRecord` instance to the stream.""" - record.index = self._fixIndex(index) - record._write() - self._changed = True - self._new = False - - # def __del__(self): - # """Flush stream upon deletion of the object.""" - # self.flush() - - -def demo_read(filename): - _dbf = Dbf(filename, True) - for _rec in _dbf: - print() - print(repr(_rec)) - _dbf.close() - - -def demo_create(filename): - _dbf = Dbf(filename, new=True) - _dbf.addField( - ("NAME", "C", 15), - ("SURNAME", "C", 25), - ("INITIALS", "C", 10), - ("BIRTHDATE", "D"), - ) - for (_n, _s, _i, _b) in ( - ("John", "Miller", "YC", (1981, 1, 2)), - ("Andy", "Larkin", "AL", (1982, 3, 4)), - ("Bill", "Clinth", "", (1983, 5, 6)), - ("Bobb", "McNail", "", (1984, 7, 8)), - ): - _rec = _dbf.newRecord() - _rec["NAME"] = _n - _rec["SURNAME"] = _s - _rec["INITIALS"] = _i - _rec["BIRTHDATE"] = _b - _rec.store() - print(repr(_dbf)) - _dbf.close() - - -if __name__ == '__main__': - import sys - - _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" - demo_create(_name) - demo_read(_name) - -# vim: set et sw=4 sts=4 : diff --git a/src/tablib/packages/dbfpy3/dbfnew.py b/src/tablib/packages/dbfpy3/dbfnew.py deleted file mode 100644 index 28c54ef..0000000 --- a/src/tablib/packages/dbfpy3/dbfnew.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/python -""".DBF creation helpers. - -Note: this is a legacy interface. New code should use Dbf class - for table creation (see examples in dbf.py) - -TODO: - - handle Memo fields. - - check length of the fields according to the - `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` - -""" -"""History (most recent first) -04-jul-2006 [als] added export declaration; - updated for dbfpy 2.0 -15-dec-2005 [yc] define dbf_new.__slots__ -14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; - dbf_new now is a new class (inherited from object) -??-jun-2000 [--] added by Hans Fiby -""" - -__version__ = "$Revision: 1.4 $"[11:-2] -__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] - -__all__ = ["dbf_new"] - -from .dbf import * -from .fields import * -from .header import * -from .record import * - - -class _FieldDefinition(object): - """Field definition. - - This is a simple structure, which contains ``name``, ``type``, - ``len``, ``dec`` and ``cls`` fields. - - Objects also implement get/setitem magic functions, so fields - could be accessed via sequence interface, where 'name' has - index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and - 'cls' could be located at index 4. - - """ - - __slots__ = "name", "type", "len", "dec", "cls" - - # WARNING: be attentive - dictionaries are mutable! - FLD_TYPES = { - # type: (cls, len) - "C": (DbfCharacterFieldDef, None), - "N": (DbfNumericFieldDef, None), - "L": (DbfLogicalFieldDef, 1), - # FIXME: support memos - # "M": (DbfMemoFieldDef), - "D": (DbfDateFieldDef, 8), - # FIXME: I'm not sure length should be 14 characters! - # but temporary I use it, cuz date is 8 characters - # and time 6 (hhmmss) - "T": (DbfDateTimeFieldDef, 14), - } - - def __init__(self, name, type, len=None, dec=0): - _cls, _len = self.FLD_TYPES[type] - if _len is None: - if len is None: - raise ValueError("Field length must be defined") - _len = len - self.name = name - self.type = type - self.len = _len - self.dec = dec - self.cls = _cls - - def getDbfField(self): - "Return `DbfFieldDef` instance from the current definition." - return self.cls(self.name, self.len, self.dec) - - def appendToHeader(self, dbfh): - """Create a `DbfFieldDef` instance and append it to the dbf header. - - Arguments: - dbfh: `DbfHeader` instance. - - """ - _dbff = self.getDbfField() - dbfh.addField(_dbff) - - -class dbf_new(object): - """New .DBF creation helper. - - Example Usage: - - dbfn = dbf_new() - dbfn.add_field("name",'C',80) - dbfn.add_field("price",'N',10,2) - dbfn.add_field("date",'D',8) - dbfn.write("tst.dbf") - - Note: - This module cannot handle Memo-fields, - they are special. - - """ - - __slots__ = ("fields",) - - FieldDefinitionClass = _FieldDefinition - - def __init__(self): - self.fields = [] - - def add_field(self, name, typ, len, dec=0): - """Add field definition. - - Arguments: - name: - field name (str object). field name must not - contain ASCII NULs and it's length shouldn't - exceed 10 characters. - typ: - type of the field. this must be a single character - from the "CNLMDT" set meaning character, numeric, - logical, memo, date and date/time respectively. - len: - length of the field. this argument is used only for - the character and numeric fields. all other fields - have fixed length. - FIXME: use None as a default for this argument? - dec: - decimal precision. used only for the numric fields. - - """ - self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) - - def write(self, filename): - """Create empty .DBF file using current structure.""" - _dbfh = DbfHeader() - _dbfh.setCurrentDate() - for _fldDef in self.fields: - _fldDef.appendToHeader(_dbfh) - - _dbfStream = open(filename, "wb") - _dbfh.write(_dbfStream) - _dbfStream.close() - - -if __name__ == '__main__': - # create a new DBF-File - dbfn = dbf_new() - dbfn.add_field("name", 'C', 80) - dbfn.add_field("price", 'N', 10, 2) - dbfn.add_field("date", 'D', 8) - dbfn.write("tst.dbf") - # test new dbf - print("*** created tst.dbf: ***") - dbft = Dbf('tst.dbf', readOnly=0) - print(repr(dbft)) - # add a record - rec = DbfRecord(dbft) - rec['name'] = 'something' - rec['price'] = 10.5 - rec['date'] = (2000, 1, 12) - rec.store() - # add another record - rec = DbfRecord(dbft) - rec['name'] = 'foo and bar' - rec['price'] = 12234 - rec['date'] = (1992, 7, 15) - rec.store() - - # show the records - print("*** inserted 2 records into tst.dbf: ***") - print(repr(dbft)) - for i1 in range(len(dbft)): - rec = dbft[i1] - for fldName in dbft.fieldNames: - print('%s:\t %s' % (fldName, rec[fldName])) - print() - dbft.close() - -# vim: set et sts=4 sw=4 : diff --git a/src/tablib/packages/dbfpy3/fields.py b/src/tablib/packages/dbfpy3/fields.py deleted file mode 100644 index ad63c28..0000000 --- a/src/tablib/packages/dbfpy3/fields.py +++ /dev/null @@ -1,466 +0,0 @@ -"""DBF fields definitions. - -TODO: - - make memos work -""" -"""History (most recent first): -26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes -05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date -16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point - in the value to select float or integer return type -13-mar-2008 [als] check field name length in constructor -11-feb-2007 [als] handle value conversion errors -10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() -01-dec-2006 [als] Timestamp columns use None for empty values -31-oct-2006 [als] support field types 'F' (float), 'I' (integer) - and 'Y' (currency); - automate export and registration of field classes -04-jul-2006 [als] added export declaration -10-mar-2006 [als] decode empty values for Date and Logical fields; - show field name in errors -10-mar-2006 [als] fix Numeric value decoding: according to spec, - value always is string representation of the number; - ensure that encoded Numeric value fits into the field -20-dec-2005 [yc] use field names in upper case -15-dec-2005 [yc] field definitions moved from `dbf`. -""" - -__version__ = "$Revision: 1.14 $"[11:-2] -__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] - -__all__ = ["lookupFor",] # field classes added at the end of the module - -import datetime -import struct -import sys - -from . import utils - -## abstract definitions - -class DbfFieldDef(object): - """Abstract field definition. - - Child classes must override ``type`` class attribute to provide datatype - information of the field definition. For more info about types visit - `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` - - Also child classes must override ``defaultValue`` field to provide - default value for the field value. - - If child class has fixed length ``length`` class attribute must be - overridden and set to the valid value. None value means, that field - isn't of fixed length. - - Note: ``name`` field must not be changed after instantiation. - - """ - - __slots__ = ("name", "decimalCount", - "start", "end", "ignoreErrors") - - # length of the field, None in case of variable-length field, - # or a number if this field is a fixed-length field - length = None - - # field type. for more information about fields types visit - # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` - # must be overridden in child classes - typeCode = None - - # default value for the field. this field must be - # overridden in child classes - defaultValue = None - - def __init__(self, name, length=None, decimalCount=None, - start=None, stop=None, ignoreErrors=False, - ): - """Initialize instance.""" - assert self.typeCode is not None, "Type code must be overridden" - assert self.defaultValue is not None, "Default value must be overridden" - ## fix arguments - if len(name) >10: - raise ValueError("Field name \"%s\" is too long" % name) - name = str(name).upper() - if self.__class__.length is None: - if length is None: - raise ValueError("[%s] Length isn't specified" % name) - length = int(length) - if length <= 0: - raise ValueError("[%s] Length must be a positive integer" - % name) - else: - length = self.length - if decimalCount is None: - decimalCount = 0 - ## set fields - self.name = name - # FIXME: validate length according to the specification at - # http://www.clicketyclick.dk/databases/xbase/format/data_types.html - self.length = length - self.decimalCount = decimalCount - self.ignoreErrors = ignoreErrors - self.start = start - self.end = stop - - def __cmp__(self, other): - return cmp(self.name, str(other).upper()) - - def __hash__(self): - return hash(self.name) - - def fromString(cls, string, start, ignoreErrors=False): - """Decode dbf field definition from the string data. - - Arguments: - string: - a string, dbf definition is decoded from. length of - the string must be 32 bytes. - start: - position in the database file. - ignoreErrors: - initial error processing mode for the new field (boolean) - - """ - assert len(string) == 32 - _length = string[16] - return cls(utils.unzfill(string)[:11].decode('utf-8'), _length, - string[17], start, start + _length, ignoreErrors=ignoreErrors) - fromString = classmethod(fromString) - - def toString(self): - """Return encoded field definition. - - Return: - Return value is a string object containing encoded - definition of this field. - - """ - if sys.version_info < (2, 4): - # earlier versions did not support padding character - _name = self.name[:11] + "\0" * (11 - len(self.name)) - else: - _name = self.name.ljust(11, '\0') - return ( - _name + - self.typeCode + - #data address - chr(0) * 4 + - chr(self.length) + - chr(self.decimalCount) + - chr(0) * 14 - ) - - def __repr__(self): - return "%-10s %1s %3d %3d" % self.fieldInfo() - - def fieldInfo(self): - """Return field information. - - Return: - Return value is a (name, type, length, decimals) tuple. - - """ - return (self.name, self.typeCode, self.length, self.decimalCount) - - def rawFromRecord(self, record): - """Return a "raw" field value from the record string.""" - return record[self.start:self.end] - - def decodeFromRecord(self, record): - """Return decoded field value from the record string.""" - try: - return self.decodeValue(self.rawFromRecord(record)) - except: - if self.ignoreErrors: - return utils.INVALID_VALUE - else: - raise - - def decodeValue(self, value): - """Return decoded value from string value. - - This method shouldn't be used publicly. It's called from the - `decodeFromRecord` method. - - This is an abstract method and it must be overridden in child classes. - """ - raise NotImplementedError - - def encodeValue(self, value): - """Return str object containing encoded field value. - - This is an abstract method and it must be overridden in child classes. - """ - raise NotImplementedError - -## real classes - -class DbfCharacterFieldDef(DbfFieldDef): - """Definition of the character field.""" - - typeCode = "C" - defaultValue = b'' - - def decodeValue(self, value): - """Return string object. - - Return value is a ``value`` argument with stripped right spaces. - - """ - return value.rstrip(b' ').decode('utf-8') - - def encodeValue(self, value): - """Return raw data string encoded from a ``value``.""" - return str(value)[:self.length].ljust(self.length) - - -class DbfNumericFieldDef(DbfFieldDef): - """Definition of the numeric field.""" - - typeCode = "N" - # XXX: now I'm not sure it was a good idea to make a class field - # `defaultValue` instead of a generic method as it was implemented - # previously -- it's ok with all types except number, cuz - # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. - defaultValue = 0 - - def decodeValue(self, value): - """Return a number decoded from ``value``. - - If decimals is zero, value will be decoded as an integer; - or as a float otherwise. - - Return: - Return value is a int (long) or float instance. - - """ - value = value.strip(b' \0') - if b'.' in value: - # a float (has decimal separator) - return float(value) - elif value: - # must be an integer - return int(value) - else: - return 0 - - def encodeValue(self, value): - """Return string containing encoded ``value``.""" - _rv = ("%*.*f" % (self.length, self.decimalCount, value)) - if len(_rv) > self.length: - _ppos = _rv.find(".") - if 0 <= _ppos <= self.length: - _rv = _rv[:self.length] - else: - raise ValueError("[%s] Numeric overflow: %s (field width: %i)" - % (self.name, _rv, self.length)) - return _rv - -class DbfFloatFieldDef(DbfNumericFieldDef): - """Definition of the float field - same as numeric.""" - - typeCode = "F" - -class DbfIntegerFieldDef(DbfFieldDef): - """Definition of the integer field.""" - - typeCode = "I" - length = 4 - defaultValue = 0 - - def decodeValue(self, value): - """Return an integer number decoded from ``value``.""" - return struct.unpack("= 1: - _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) - _rv += datetime.timedelta(0, _msecs / 1000.0) - else: - # empty date - _rv = None - return _rv - - def encodeValue(self, value): - """Return a string-encoded ``value``.""" - if value: - value = utils.getDateTime(value) - # LE byteorder - _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, - (value.hour * 3600 + value.minute * 60 + value.second) * 1000) - else: - _rv = "\0" * self.length - assert len(_rv) == self.length - return _rv - - -_fieldsRegistry = {} - -def registerField(fieldCls): - """Register field definition class. - - ``fieldCls`` should be subclass of the `DbfFieldDef`. - - Use `lookupFor` to retrieve field definition class - by the type code. - - """ - assert fieldCls.typeCode is not None, "Type code isn't defined" - # XXX: use fieldCls.typeCode.upper()? in case of any decign - # don't forget to look to the same comment in ``lookupFor`` method - _fieldsRegistry[fieldCls.typeCode] = fieldCls - - -def lookupFor(typeCode): - """Return field definition class for the given type code. - - ``typeCode`` must be a single character. That type should be - previously registered. - - Use `registerField` to register new field class. - - Return: - Return value is a subclass of the `DbfFieldDef`. - - """ - # XXX: use typeCode.upper()? in case of any decign don't - # forget to look to the same comment in ``registerField`` - return _fieldsRegistry[chr(typeCode)] - -## register generic types - -for (_name, _val) in list(globals().items()): - if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ - and (_name != "DbfFieldDef"): - __all__.append(_name) - registerField(_val) -del _name, _val - -# vim: et sts=4 sw=4 : diff --git a/src/tablib/packages/dbfpy3/header.py b/src/tablib/packages/dbfpy3/header.py deleted file mode 100644 index 6c0dc4f..0000000 --- a/src/tablib/packages/dbfpy3/header.py +++ /dev/null @@ -1,273 +0,0 @@ -"""DBF header definition. - -TODO: - - handle encoding of the character fields - (encoding information stored in the DBF header) - -""" -"""History (most recent first): -16-sep-2010 [als] fromStream: fix century of the last update field -11-feb-2007 [als] added .ignoreErrors -10-feb-2007 [als] added __getitem__: return field definitions - by field name or field number (zero-based) -04-jul-2006 [als] added export declaration -15-dec-2005 [yc] created -""" - -__version__ = "$Revision: 1.6 $"[11:-2] -__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] - -__all__ = ["DbfHeader"] - -import io -import datetime -import struct -import time -import sys - -from . import fields -from .utils import getDate - - -class DbfHeader(object): - """Dbf header definition. - - For more information about dbf header format visit - `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` - - Examples: - Create an empty dbf header and add some field definitions: - dbfh = DbfHeader() - dbfh.addField(("name", "C", 10)) - dbfh.addField(("date", "D")) - dbfh.addField(DbfNumericFieldDef("price", 5, 2)) - Create a dbf header with field definitions: - dbfh = DbfHeader([ - ("name", "C", 10), - ("date", "D"), - DbfNumericFieldDef("price", 5, 2), - ]) - - """ - - __slots__ = ("signature", "fields", "lastUpdate", "recordLength", - "recordCount", "headerLength", "changed", "_ignore_errors") - - ## instance construction and initialization methods - - def __init__(self, fields=None, headerLength=0, recordLength=0, - recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False, - ): - """Initialize instance. - - Arguments: - fields: - a list of field definitions; - recordLength: - size of the records; - headerLength: - size of the header; - recordCount: - number of records stored in DBF; - signature: - version number (aka signature). using 0x03 as a default meaning - "File without DBT". for more information about this field visit - ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` - lastUpdate: - date of the DBF's update. this could be a string ('yymmdd' or - 'yyyymmdd'), timestamp (int or float), datetime/date value, - a sequence (assuming (yyyy, mm, dd, ...)) or an object having - callable ``ticks`` field. - ignoreErrors: - error processing mode for DBF fields (boolean) - - """ - self.signature = signature - if fields is None: - self.fields = [] - else: - self.fields = list(fields) - self.lastUpdate = getDate(lastUpdate) - self.recordLength = recordLength - self.headerLength = headerLength - self.recordCount = recordCount - self.ignoreErrors = ignoreErrors - # XXX: I'm not sure this is safe to - # initialize `self.changed` in this way - self.changed = bool(self.fields) - - # @classmethod - def fromString(cls, string): - """Return header instance from the string object.""" - return cls.fromStream(io.StringIO(str(string))) - fromString = classmethod(fromString) - - # @classmethod - def fromStream(cls, stream): - """Return header object from the stream.""" - stream.seek(0) - first_32 = stream.read(32) - if type(first_32) != bytes: - _data = bytes(first_32, sys.getfilesystemencoding()) - _data = first_32 - (_cnt, _hdrLen, _recLen) = struct.unpack(" DbfRecord._write(); - added delete() method. -16-dec-2005 [yc] record definition moved from `dbf`. -""" - -__version__ = "$Revision: 1.7 $"[11:-2] -__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2] - -__all__ = ["DbfRecord"] - -import sys - -from . import utils - -class DbfRecord(object): - """DBF record. - - Instances of this class shouldn't be created manually, - use `dbf.Dbf.newRecord` instead. - - Class implements mapping/sequence interface, so - fields could be accessed via their names or indexes - (names is a preferred way to access fields). - - Hint: - Use `store` method to save modified record. - - Examples: - Add new record to the database: - db = Dbf(filename) - rec = db.newRecord() - rec["FIELD1"] = value1 - rec["FIELD2"] = value2 - rec.store() - Or the same, but modify existed - (second in this case) record: - db = Dbf(filename) - rec = db[2] - rec["FIELD1"] = value1 - rec["FIELD2"] = value2 - rec.store() - - """ - - __slots__ = "dbf", "index", "deleted", "fieldData" - - ## creation and initialization - - def __init__(self, dbf, index=None, deleted=False, data=None): - """Instance initialization. - - Arguments: - dbf: - A `Dbf.Dbf` instance this record belonogs to. - index: - An integer record index or None. If this value is - None, record will be appended to the DBF. - deleted: - Boolean flag indicating whether this record - is a deleted record. - data: - A sequence or None. This is a data of the fields. - If this argument is None, default values will be used. - - """ - self.dbf = dbf - # XXX: I'm not sure ``index`` is necessary - self.index = index - self.deleted = deleted - if data is None: - self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] - else: - self.fieldData = list(data) - - # XXX: validate self.index before calculating position? - position = property(lambda self: self.dbf.header.headerLength + \ - self.index * self.dbf.header.recordLength) - - def rawFromStream(cls, dbf, index): - """Return raw record contents read from the stream. - - Arguments: - dbf: - A `Dbf.Dbf` instance containing the record. - index: - Index of the record in the records' container. - This argument can't be None in this call. - - Return value is a string containing record data in DBF format. - - """ - # XXX: may be write smth assuming, that current stream - # position is the required one? it could save some - # time required to calculate where to seek in the file - dbf.stream.seek(dbf.header.headerLength + - index * dbf.header.recordLength) - return dbf.stream.read(dbf.header.recordLength) - rawFromStream = classmethod(rawFromStream) - - def fromStream(cls, dbf, index): - """Return a record read from the stream. - - Arguments: - dbf: - A `Dbf.Dbf` instance new record should belong to. - index: - Index of the record in the records' container. - This argument can't be None in this call. - - Return value is an instance of the current class. - - """ - return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) - fromStream = classmethod(fromStream) - - def fromString(cls, dbf, string, index=None): - """Return record read from the string object. - - Arguments: - dbf: - A `Dbf.Dbf` instance new record should belong to. - string: - A string new record should be created from. - index: - Index of the record in the container. If this - argument is None, record will be appended. - - Return value is an instance of the current class. - - """ - return cls(dbf, index, string[0]=="*", - [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) - fromString = classmethod(fromString) - - ## object representation - - def __repr__(self): - _template = "%%%ds: %%s (%%s)" % max([len(_fld) - for _fld in self.dbf.fieldNames]) - _rv = [] - for _fld in self.dbf.fieldNames: - _val = self[_fld] - if _val is utils.INVALID_VALUE: - _rv.append(_template % - (_fld, "None", "value cannot be decoded")) - else: - _rv.append(_template % (_fld, _val, type(_val))) - return "\n".join(_rv) - - ## protected methods - - def _write(self): - """Write data to the dbf stream. - - Note: - This isn't a public method, it's better to - use 'store' instead publicly. - Be design ``_write`` method should be called - only from the `Dbf` instance. - - - """ - self._validateIndex(False) - self.dbf.stream.seek(self.position) - self.dbf.stream.write(bytes(self.toString(), - sys.getfilesystemencoding())) - # FIXME: may be move this write somewhere else? - # why we should check this condition for each record? - if self.index == len(self.dbf): - # this is the last record, - # we should write SUB (ASCII 26) - self.dbf.stream.write(b"\x1A") - - ## utility methods - - def _validateIndex(self, allowUndefined=True, checkRange=False): - """Valid ``self.index`` value. - - If ``allowUndefined`` argument is True functions does nothing - in case of ``self.index`` pointing to None object. - - """ - if self.index is None: - if not allowUndefined: - raise ValueError("Index is undefined") - elif self.index < 0: - raise ValueError("Index can't be negative (%s)" % self.index) - elif checkRange and self.index <= self.dbf.header.recordCount: - raise ValueError("There are only %d records in the DBF" % - self.dbf.header.recordCount) - - ## interface methods - - def store(self): - """Store current record in the DBF. - - If ``self.index`` is None, this record will be appended to the - records of the DBF this records belongs to; or replaced otherwise. - - """ - self._validateIndex() - if self.index is None: - self.index = len(self.dbf) - self.dbf.append(self) - else: - self.dbf[self.index] = self - - def delete(self): - """Mark method as deleted.""" - self.deleted = True - - def toString(self): - """Return string packed record values.""" -# for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData): -# - - return "".join([" *"[self.deleted]] + [ - _def.encodeValue(_dat) - for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData) - ]) - - def asList(self): - """Return a flat list of fields. - - Note: - Change of the list's values won't change - real values stored in this object. - - """ - return self.fieldData[:] - - def asDict(self): - """Return a dictionary of fields. - - Note: - Change of the dicts's values won't change - real values stored in this object. - - """ - return dict([_i for _i in zip(self.dbf.fieldNames, self.fieldData)]) - - def __getitem__(self, key): - """Return value by field name or field index.""" - if isinstance(key, int): - # integer index of the field - return self.fieldData[key] - # assuming string field name - return self.fieldData[self.dbf.indexOfFieldName(key)] - - def __setitem__(self, key, value): - """Set field value by integer index of the field or string name.""" - if isinstance(key, int): - # integer index of the field - return self.fieldData[key] - # assuming string field name - self.fieldData[self.dbf.indexOfFieldName(key)] = value - -# vim: et sts=4 sw=4 : diff --git a/src/tablib/packages/dbfpy3/utils.py b/src/tablib/packages/dbfpy3/utils.py deleted file mode 100644 index ac63e32..0000000 --- a/src/tablib/packages/dbfpy3/utils.py +++ /dev/null @@ -1,170 +0,0 @@ -"""String utilities. - -TODO: - - allow strings in getDateTime routine; -""" -"""History (most recent first): -11-feb-2007 [als] added INVALID_VALUE -10-feb-2007 [als] allow date strings padded with spaces instead of zeroes -20-dec-2005 [yc] handle long objects in getDate/getDateTime -16-dec-2005 [yc] created from ``strutil`` module. -""" - -__version__ = "$Revision: 1.4 $"[11:-2] -__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] - -import datetime -import time - - -def unzfill(str): - """Return a string without ASCII NULs. - - This function searchers for the first NUL (ASCII 0) occurrence - and truncates string till that position. - - """ - try: - return str[:str.index(b'\0')] - except ValueError: - return str - - -def getDate(date=None): - """Return `datetime.date` instance. - - Type of the ``date`` argument could be one of the following: - None: - use current date value; - datetime.date: - this value will be returned; - datetime.datetime: - the result of the date.date() will be returned; - string: - assuming "%Y%m%d" or "%y%m%dd" format; - number: - assuming it's a timestamp (returned for example - by the time.time() call; - sequence: - assuming (year, month, day, ...) sequence; - - Additionally, if ``date`` has callable ``ticks`` attribute, - it will be used and result of the called would be treated - as a timestamp value. - - """ - if date is None: - # use current value - return datetime.date.today() - if isinstance(date, datetime.date): - return date - if isinstance(date, datetime.datetime): - return date.date() - if isinstance(date, (int, float)): - # date is a timestamp - return datetime.date.fromtimestamp(date) - if isinstance(date, str): - date = date.replace(" ", "0") - if len(date) == 6: - # yymmdd - return datetime.date(*time.strptime(date, "%y%m%d")[:3]) - # yyyymmdd - return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) - if hasattr(date, "__getitem__"): - # a sequence (assuming date/time tuple) - return datetime.date(*date[:3]) - return datetime.date.fromtimestamp(date.ticks()) - - -def getDateTime(value=None): - """Return `datetime.datetime` instance. - - Type of the ``value`` argument could be one of the following: - None: - use current date value; - datetime.date: - result will be converted to the `datetime.datetime` instance - using midnight; - datetime.datetime: - ``value`` will be returned as is; - string: - *** CURRENTLY NOT SUPPORTED ***; - number: - assuming it's a timestamp (returned for example - by the time.time() call; - sequence: - assuming (year, month, day, ...) sequence; - - Additionally, if ``value`` has callable ``ticks`` attribute, - it will be used and result of the called would be treated - as a timestamp value. - - """ - if value is None: - # use current value - return datetime.datetime.today() - if isinstance(value, datetime.datetime): - return value - if isinstance(value, datetime.date): - return datetime.datetime.fromordinal(value.toordinal()) - if isinstance(value, (int, float)): - # value is a timestamp - return datetime.datetime.fromtimestamp(value) - if isinstance(value, str): - raise NotImplementedError("Strings aren't currently implemented") - if hasattr(value, "__getitem__"): - # a sequence (assuming date/time tuple) - return datetime.datetime(*tuple(value)[:6]) - return datetime.datetime.fromtimestamp(value.ticks()) - - -class classproperty(property): - """Works in the same way as a ``property``, but for the classes.""" - - def __get__(self, obj, cls): - return self.fget(cls) - - -class _InvalidValue(object): - - """Value returned from DBF records when field validation fails - - The value is not equal to anything except for itself - and equal to all empty values: None, 0, empty string etc. - In other words, invalid value is equal to None and not equal - to None at the same time. - - This value yields zero upon explicit conversion to a number type, - empty string for string types, and False for boolean. - - """ - - def __eq__(self, other): - return not other - - def __ne__(self, other): - return not (other is self) - - def __bool__(self): - return False - - def __int__(self): - return 0 - __long__ = __int__ - - def __float__(self): - return 0.0 - - def __str__(self): - return "" - - def __unicode__(self): - return "" - - def __repr__(self): - return "" - -# invalid value is a constant singleton -INVALID_VALUE = _InvalidValue() - -# vim: set et sts=4 sw=4 : diff --git a/src/tablib/packages/statistics.py b/src/tablib/packages/statistics.py deleted file mode 100644 index e97a6c9..0000000 --- a/src/tablib/packages/statistics.py +++ /dev/null @@ -1,24 +0,0 @@ -from __future__ import division - - -def median(data): - """ - Return the median (middle value) of numeric data, using the common - "mean of middle two" method. If data is empty, ValueError is raised. - - Mimics the behaviour of Python3's statistics.median - - >>> median([1, 3, 5]) - 3 - >>> median([1, 3, 5, 7]) - 4.0 - - """ - data = sorted(data) - n = len(data) - if not n: - raise ValueError("No median for empty data") - i = n // 2 - if n % 2: - return data[i] - return (data[i - 1] + data[i]) / 2 diff --git a/tests/requirements.txt b/tests/requirements.txt index cd43514..dc91fc9 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,6 +1,5 @@ pytest pytest-cov -backports.csv; python_version < '3.0' MarkupPy odfpy openpyxl>=2.4.0 diff --git a/tests/test_tablib.py b/tests/test_tablib.py index 186d83e..943e1e7 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1,7 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """Tests for Tablib.""" -from __future__ import unicode_literals import datetime import doctest @@ -12,7 +10,6 @@ from uuid import uuid4 from MarkupPy import markup import tablib -from tablib.compat import unicode, is_py3 from tablib.core import Row, detect_format from tablib.formats import _csv as csv_module @@ -251,10 +248,7 @@ class TablibTestCase(BaseTestCase): def test_unicode_append(self): """Passes in a single unicode character and exports.""" - if is_py3: - new_row = ('å', 'é') - else: - exec ("new_row = (u'å', u'é')") + new_row = ('å', 'é') data.append(new_row) self._test_export_data_in_all_formats(data) @@ -458,10 +452,10 @@ class TablibTestCase(BaseTestCase): # add another entry to test right field width for # integer self.founders.append(('Old', 'Man', 100500)) - self.assertEqual('first_name|last_name |gpa ', unicode(self.founders).split('\n')[0]) + self.assertEqual('first_name|last_name |gpa ', str(self.founders).split('\n')[0]) def test_databook_add_sheet_accepts_only_dataset_instances(self): - class NotDataset(object): + class NotDataset: def append(self, item): pass @@ -684,10 +678,7 @@ class CSVTests(BaseTestCase): data = tablib.Dataset() - if sys.version_info[0] > 2: - data.append(['\xfc', '\xfd']) - else: - exec ("data.append([u'\xfc', u'\xfd'])") + data.append(['\xfc', '\xfd']) data.csv @@ -698,7 +689,7 @@ class CSVTests(BaseTestCase): data.csv = self.founders.csv headers = data.headers - self.assertTrue(isinstance(headers[0], unicode)) + self.assertTrue(isinstance(headers[0], str)) orig_first_name = self.founders[self.headers[0]] csv_first_name = data[headers[0]] @@ -711,7 +702,7 @@ class CSVTests(BaseTestCase): data.csv = self.founders.csv target_header = data.headers[0] - self.assertTrue(isinstance(target_header, unicode)) + self.assertTrue(isinstance(target_header, str)) del data[target_header] @@ -1034,11 +1025,9 @@ class DBFTests(BaseTestCase): _regression_dbf += b' 50.0000000' _regression_dbf += b'\x1a' - if is_py3: - # If in python3, decode regression string to binary. - # _regression_dbf = bytes(_regression_dbf, 'utf-8') - # _regression_dbf = _regression_dbf.replace(b'\n', b'\r') - pass + # If in python3, decode regression string to binary. + # _regression_dbf = bytes(_regression_dbf, 'utf-8') + # _regression_dbf = _regression_dbf.replace(b'\n', b'\r') try: self.assertEqual(_regression_dbf, data.dbf) diff --git a/tox.ini b/tox.ini index ebd8f53..c3eaa2a 100644 --- a/tox.ini +++ b/tox.ini @@ -2,12 +2,11 @@ usedevelop = true minversion = 2.4 envlist = - py{27,35,36,37,38}-tests, + py{35,36,37,38}-tests, py37-{docs,lint} [testenv] basepython = - py27: python2.7 py35: python3.5 py36: python3.6 py37: python3.7