Compare commits

...

26 Commits

Author SHA1 Message Date
dmosberger e8f54811c7 Expose 'read_only' parameter for 'import_set' and 'import_book' (#483) 2020-12-04 10:10:02 +02:00
Nuno André e8774043ed Substitute tuples for dicts in __getstate__/__setstate__ to speed up the pickling 2020-11-29 22:11:46 +01:00
Jannis Leidel dc1729fc6f Move releases to GitHub actions. 2020-11-23 13:14:21 +01:00
Hugo van Kemenade 3dc62685f8 Reduce Travis CI testing (#479) 2020-11-23 11:01:10 +01:00
Hugo van Kemenade 22c88de90d Upload coverage from GHA (#480)
* Upload coverage from GHA

* Fix PytestConfigWarning: Unknown config option: python_paths
2020-11-14 23:51:05 +02:00
Hugo van Kemenade 615e308559 Docs: Add link to changelog/history (#478) 2020-11-12 10:30:57 +02:00
Hugo van Kemenade 8c5404591b Add support for Python 3.9, drop EOL 3.5 (#477) 2020-10-30 19:01:48 +02:00
Hugo van Kemenade 5fa4496f9d Suggest quotes when pip installing with optional dependencies (#474) 2020-08-12 16:12:57 +03:00
Ran Benita bc8438bda4 Stop using pkg_resources
tablib imports pkg_resources in order to find its own version. Importing
pkg_resources is very slow (100ms-250ms is common).

Avoid it by letting setuptools-scm generate a file with the version
instead.
2020-08-10 15:49:51 +02:00
Claude Paroz ce79e44d14 Fixes #469 - Prevented rst crash with only-space strings (#470)
Thanks nexone for the report.
2020-06-15 08:42:51 +03:00
Claude Paroz 985c3d98b0 Set the release date for 2.0.0 2020-05-16 14:04:19 +02:00
Claude Paroz 6d097c0214 Fixes #465 - Allow importing 'ragged' .xlsx files (#466) 2020-05-16 09:07:32 +03:00
dragonworks 16b5565354 Fixes #462 - Update xlsx import to read cell values instead of cell formulas
Co-authored-by: Claude Paroz <claude@2xlibre.net>
2020-03-11 09:05:43 +01:00
Claude Paroz c25fe54b6f Refs #373 - Import dates from xls files as Python datetime objects 2020-03-09 17:05:32 +01:00
Tim Gates b39aefb8d8 Fix simple typo: belonogs -> belongs (#460)
Closes #459
2020-02-21 10:26:58 +02:00
Claude Paroz a442758729 Fixes #457 - Bumped openpyxl dependency to 2.6.0 (#458) 2020-02-16 15:05:20 +02:00
Claude Paroz 21479001a7 Fixes #453 - Reversing behavior of Row.lpush/Row.rpush (#454)
Co-authored-by: chim <chenpan@xiaomai5.com>
2020-02-13 20:51:49 +02:00
Claude Paroz f7e39c1ad5 Set the 1.1.0 release date 2020-02-13 18:56:15 +01:00
Claude Paroz aaeb5c8360 Fixes #226 - Allow importing ragged CSV files (#456) 2020-02-12 21:12:53 +02:00
Hugo 7a6c623cca Document upcoming breaking change in 2.0 2020-02-12 19:04:51 +01:00
Hugo 0c31fcb3e4 Test on Python 3.8 2020-02-02 16:44:26 +01:00
Hugo fa7fdb0443 pre-commit autoupdate 2020-02-02 16:44:26 +01:00
Hugo 8e19479cea Simplify config: uses the interpreter tox is installed to 2020-02-02 16:44:26 +01:00
Claude Paroz 8f39ac5055 Optimize xlsx detection (#448)
Reading the whole file is a bit too much to detect if the file
looks like an xlsx file.
2020-01-26 22:02:52 +02:00
Hugo 8d02934c53 Fix tox config 2020-01-26 20:48:20 +01:00
Claude Paroz d0963c206f Fix the missing xls dependencies message 2020-01-14 17:58:32 +01:00
38 changed files with 348 additions and 177 deletions
+21 -5
View File
@@ -2,23 +2,39 @@ name: Docs and lint
on: [push, pull_request]
env:
FORCE_COLOR: 1
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8]
env:
- TOXENV: docs
- TOXENV: lint
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
python-version: 3.9
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: Cache
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
key:
${{ matrix.os }}-${{ matrix.python-version }}-v1-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ matrix.os }}-${{ matrix.python-version }}-v1-
- name: Install dependencies
run: |
+56
View File
@@ -0,0 +1,56 @@
name: Release
on:
push:
branches:
- master
release:
types:
- published
jobs:
build:
if: github.repository == 'jazzband/tablib'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: Cache
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: release-${{ hashFiles('**/setup.py') }}
restore-keys: |
release-
- name: Install dependencies
run: |
python -m pip install -U pip
python -m pip install -U setuptools twine wheel
- name: Build package
run: |
python setup.py --version
python setup.py sdist --format=gztar bdist_wheel
twine check dist/*
- name: Upload packages to Jazzband
if: github.event.action == 'published'
uses: pypa/gh-action-pypi-publish@master
with:
user: jazzband
password: ${{ secrets.JAZZBAND_RELEASE_KEY }}
repository_url: https://jazzband.co/projects/tablib/upload
+26 -6
View File
@@ -2,24 +2,40 @@ name: Test
on: [push, pull_request]
env:
FORCE_COLOR: 1
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
python-version: [3.5, 3.6, 3.7]
python-version: [3.6, 3.7, 3.8, 3.9]
os: [ubuntu-latest, macOS-latest, windows-latest]
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Get pip cache dir
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: Cache
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
key:
${{ matrix.os }}-${{ matrix.python-version }}-v1-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ matrix.os }}-${{ matrix.python-version }}-v1-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
@@ -28,6 +44,10 @@ jobs:
- name: Tox tests
shell: bash
# Drop the dot: py3.7-tests -> py37-tests
run: |
tox -e py`echo ${{ matrix.python-version }} | tr -d .`-tests
tox -e py
- name: Upload coverage
uses: codecov/codecov-action@v1
with:
name: ${{ matrix.os }} Python ${{ matrix.python-version }}
+3
View File
@@ -38,3 +38,6 @@ htmlcov
# setuptools noise
.eggs
*.egg-info
# generated by setuptools-scm
/src/tablib/_version.py
+6 -6
View File
@@ -1,24 +1,24 @@
repos:
- repo: https://github.com/asottile/pyupgrade
rev: v1.25.2
rev: v2.7.3
hooks:
- id: pyupgrade
args: ["--py3-plus"]
args: ["--py36-plus"]
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
- repo: https://github.com/PyCQA/isort
rev: 5.6.4
hooks:
- id: isort
additional_dependencies: [toml]
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.4.2
rev: v1.7.0
hooks:
- id: python-check-blanket-noqa
- id: rst-backticks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.4.0
rev: v3.3.0
hooks:
- id: check-merge-conflict
- id: check-toml
-38
View File
@@ -1,38 +0,0 @@
language: python
cache:
pip: true
directories:
- $HOME/.cache/pre-commit
matrix:
fast_finish: true
include:
- python: 3.8
env: TOXENV=docs
- python: 3.8
env: TOXENV=lint
- python: 3.8
- python: 3.7
- python: 3.6
install: travis_retry pip install tox-travis
script: tox
after_success:
- |
if [[ "$TOXENV" != "docs" && "$TOXENV" != "lint" ]]; then
bash <(curl -s https://codecov.io/bash)
fi
deploy:
provider: pypi
user: jazzband
server: https://jazzband.co/projects/tablib/upload
distributions: sdist bdist_wheel
password:
secure: svV4fYtodwW+iTyFOm5ISEfhVwcA+6vTskD3x6peznc40TdMV9Ek8nT3Q/NB4lCbXoUw2qR4H6uhLCjesnv/VvVk/qbitCyD8ySlgwOV5n7NzJs8lC8EYaHSjGQjatTwJAokfGVYkPawkI7HXDqtDggLUQBK+Ag8HDW+XBSbQIU=
on:
tags: true
repo: jazzband/tablib
python: 3.7
+47
View File
@@ -1,5 +1,52 @@
# History
## Unreleased
### Breaking changes
- Dropped Python 3.5 support
### Improvements
- Added Python 3.9 support
- Added read_only option to xlsx file reader (#482).
### Bugfixes
- Prevented crash in rst export with only-space strings (#469).
## 2.0.0 (2020-05-16)
### Breaking changes
- The `Row.lpush/rpush` logic was reversed. `lpush` was appending while `rpush`
and `append` were prepending. This was fixed (reversed behavior). If you
counted on the broken behavior, please update your code (#453).
### Bugfixes
- Fixed minimal openpyxl dependency version to 2.6.0 (#457).
- Dates from xls files are now read as Python datetime objects (#373).
- Allow import of "ragged" xlsx files (#465).
### Improvements
- When importing an xlsx file, Tablib will now read cell values instead of formulas (#462).
## 1.1.0 (2020-02-13)
### Deprecations
- Upcoming breaking change in Tablib 2.0.0: the `Row.lpush/rpush` logic is reversed.
`lpush` is appending while `rpush` and `append` are prepending. The broken behavior
will remain in Tablib 1.x and will be fixed (reversed) in Tablib 2.0.0 (#453). If you
count on the broken behavior, please update your code when you upgrade to Tablib 2.x.
### Improvements
- Tablib is now able to import CSV content where not all rows have the same
length. Missing columns on any line receive the empty string (#226).
## 1.0.0 (2020-01-13)
### Breaking changes
-1
View File
@@ -4,7 +4,6 @@
[![PyPI version](https://img.shields.io/pypi/v/tablib.svg)](https://pypi.org/project/tablib/)
[![Supported Python versions](https://img.shields.io/pypi/pyversions/tablib.svg)](https://pypi.org/project/tablib/)
[![PyPI downloads](https://img.shields.io/pypi/dm/tablib.svg)](https://pypistats.org/packages/tablib)
[![Travus CI status](https://img.shields.io/travis/jazzband/tablib/master?label=Travis%20CI&logo=travis)](https://travis-ci.org/jazzband/tablib)
[![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions)
[![codecov](https://codecov.io/gh/jazzband/tablib/branch/master/graph/badge.svg)](https://codecov.io/gh/jazzband/tablib)
[![GitHub](https://img.shields.io/github/license/jazzband/tablib.svg)](LICENSE)
+3 -3
View File
@@ -3,9 +3,9 @@
Jazzband guidelines: https://jazzband.co/about/releases
* [ ] Get master to the appropriate code release state.
[Travis CI](https://travis-ci.org/jazzband/tablib)
[GitHub Actions](https://github.com/jazzband/tablib/actions)
should pass on master.
[![Build Status](https://travis-ci.org/jazzband/tablib.svg?branch=master)](https://travis-ci.org/jazzband/tablib)
[![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions)
* [ ] Check [HISTORY.md](https://github.com/jazzband/tablib/blob/master/HISTORY.md),
update version number and release date
@@ -16,7 +16,7 @@ git tag -a v0.14.0 -m v0.14.0
git push --tags
```
* [ ] Once Travis CI has built and uploaded distributions, check files at
* [ ] Once GitHub Actions has built and uploaded distributions, check files at
[Jazzband](https://jazzband.co/projects/tablib) and release to
[PyPI](https://pypi.org/pypi/tablib)
+1
View File
@@ -8,4 +8,5 @@
<li><a href="https://pypi.org/project/tablib">Tablib @ PyPI</a></li>
<li><a href="https://github.com/jazzband/tablib">Tablib @ GitHub</a></li>
<li><a href="https://github.com/jazzband/tablib/issues">Issue Tracker</a></li>
<li><a href="https://github.com/jazzband/tablib/blob/master/HISTORY.md">Changelog</a></li>
</ul>
+3 -3
View File
@@ -9,7 +9,7 @@
#
# All configuration values have a default; values that are commented out
# serve to show the default.
from pkg_resources import get_distribution
import tablib
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
@@ -49,9 +49,9 @@ copyright = '2019 Jazzband'
# built documents.
#
# The full version, including alpha/beta/rc tags.
release = get_distribution('tablib').version
release = tablib.__version__
# The short X.Y version.
version = '.'.join(release.split('.')[:2])
version = '.'.join(tablib.__version__.split('.')[:2])
# for example take major/minor
# The language for content autogenerated by Sphinx. Refer to documentation
+3 -3
View File
@@ -163,16 +163,16 @@ the easiest way to test your changes for potential issues is to simply run the t
Continuous Integration
----------------------
Every pull request is automatically tested and inspected upon receipt with `Travis CI`_.
Every pull request is automatically tested and inspected upon receipt with `GitHub Actions`_.
If you broke the build, you will receive an email accordingly.
Anyone may view the build status and history at any time.
https://travis-ci.org/jazzband/tablib
https://github.com/jazzband/tablib/actions
Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets.
.. _`Travis CI`: https://travis-ci.org/
.. _`GitHub Actions`: https://github.com/jazzband/tablib/actions
.. _docs:
+25 -7
View File
@@ -27,7 +27,7 @@ For example::
dataset.export("cli", tablefmt="github")
dataset.export("cli", tablefmt="grid")
This format is optional, install Tablib with ``pip install tablib[cli]`` to
This format is optional, install Tablib with ``pip install "tablib[cli]"`` to
make the format available.
csv
@@ -83,7 +83,7 @@ df (DataFrame)
==============
Import/export using the pandas_ DataFrame format. This format is optional,
install Tablib with ``pip install tablib[pandas]`` to make the format available.
install Tablib with ``pip install "tablib[pandas]"`` to make the format available.
.. _pandas: https://pandas.pydata.org/
@@ -94,7 +94,7 @@ The ``html`` format is currently export-only. The exports produce an HTML page
with the data in a ``<table>``. If headers have been set, they will be used as
table headers.
This format is optional, install Tablib with ``pip install tablib[html]`` to
This format is optional, install Tablib with ``pip install "tablib[html]"`` to
make the format available.
jira
@@ -132,7 +132,7 @@ ods
Export data in OpenDocument Spreadsheet format. The ``ods`` format is currently
export-only.
This format is optional, install Tablib with ``pip install tablib[ods]`` to
This format is optional, install Tablib with ``pip install "tablib[ods]"`` to
make the format available.
.. admonition:: Binary Warning
@@ -183,7 +183,7 @@ xls
Import/export data in Legacy Excel Spreadsheet representation.
This format is optional, install Tablib with ``pip install tablib[xls]`` to
This format is optional, install Tablib with ``pip install "tablib[xls]"`` to
make the format available.
.. note::
@@ -203,9 +203,27 @@ xlsx
Import/export data in Excel 07+ Spreadsheet representation.
This format is optional, install Tablib with ``pip install tablib[xlsx]`` to
This format is optional, install Tablib with ``pip install "tablib[xlsx]"`` to
make the format available.
The ``import_set()`` and ``import_book()`` methods accept keyword
argument ``read_only``. If its value is ``True`` (the default), the
XLSX data source is read lazily. Lazy reading generally reduces time
and memory consumption, especially for large spreadsheets. However,
it relies on the XLSX data source declaring correct dimensions. Some
programs generate XLSX files with incorrect dimensions. Such files
may need to be loaded with this optimization turned off by passing
``read_only=False``.
.. note::
When reading an ``xlsx`` file containing formulas in its cells, Tablib will
read the cell values, not the cell formulas.
.. versionchanged:: 2.0.0
Reads cell values instead of formulas.
.. admonition:: Binary Warning
The ``xlsx`` file format is binary, so make sure to write in binary mode::
@@ -223,7 +241,7 @@ returned instead.
Import assumes (for now) that headers exist.
This format is optional, install Tablib with ``pip install tablib[yaml]`` to
This format is optional, install Tablib with ``pip install "tablib[yaml]"`` to
make the format available.
.. _YAML: https://yaml.org
+3 -3
View File
@@ -26,19 +26,19 @@ formats available:
.. code-block:: console
$ pip install tablib[xlsx]
$ pip install "tablib[xlsx]"
Or all possible formats:
.. code-block:: console
$ pip install tablib[all]
$ pip install "tablib[all]"
which is equivalent to:
.. code-block:: console
$ pip install tablib[html, pandas, ods, xls, xlsx, yaml]
$ pip install "tablib[html, pandas, ods, xls, xlsx, yaml]"
-------------------
Download the Source
+1 -1
View File
@@ -57,7 +57,7 @@ THE SOFTWARE.
Pythons Supported
-----------------
Python 3.5+ is officially supported.
Python 3.6+ is officially supported.
Now, go :ref:`install Tablib <install>`.
+1 -6
View File
@@ -1,7 +1,2 @@
[tool.isort]
force_grid_wrap = 0
include_trailing_comma = true
known_third_party = ["MarkupPy", "odf", "openpyxl", "pkg_resources", "setuptools", "tablib", "xlrd", "xlwt", "yaml"]
line_length = 88
multi_line_output = 3
use_parentheses = true
profile = "black"
-1
View File
@@ -1,4 +1,3 @@
[pytest]
norecursedirs = .git .*
addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html
python_paths = .
+7 -5
View File
@@ -4,7 +4,9 @@ from setuptools import find_packages, setup
setup(
name='tablib',
use_scm_version=True,
use_scm_version={
'write_to': 'src/tablib/_version.py',
},
setup_requires=['setuptools_scm'],
description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
long_description=(
@@ -31,20 +33,20 @@ setup(
'Programming Language :: Python',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
python_requires='>=3.5',
python_requires='>=3.6',
extras_require={
'all': ['markuppy', 'odfpy', 'openpyxl>=2.4.0', 'pandas', 'pyyaml', 'tabulate', 'xlrd', 'xlwt'],
'all': ['markuppy', 'odfpy', 'openpyxl>=2.6.0', 'pandas', 'pyyaml', 'tabulate', 'xlrd', 'xlwt'],
'cli': ['tabulate'],
'html': ['markuppy'],
'ods': ['odfpy'],
'pandas': ['pandas'],
'xls': ['xlrd', 'xlwt'],
'xlsx': ['openpyxl>=2.4.0'],
'xlsx': ['openpyxl>=2.6.0'],
'yaml': ['pyyaml'],
},
)
+8 -7
View File
@@ -1,5 +1,12 @@
""" Tablib. """
from pkg_resources import DistributionNotFound, get_distribution
try:
# Generated by setuptools-scm.
from ._version import version as __version__
except ImportError:
# Some broken installation.
__version__ = None
from tablib.core import ( # noqa: F401
Databook,
Dataset,
@@ -10,9 +17,3 @@ from tablib.core import ( # noqa: F401
import_book,
import_set,
)
try:
__version__ = get_distribution(__name__).version
except DistributionNotFound:
# package is not installed
__version__ = None
+24 -32
View File
@@ -57,24 +57,16 @@ class Row:
del self._row[i]
def __getstate__(self):
slots = dict()
for slot in self.__slots__:
attribute = getattr(self, slot)
slots[slot] = attribute
return slots
return self._row, self.tags
def __setstate__(self, state):
for (k, v) in list(state.items()):
setattr(self, k, v)
self._row, self.tags = state
def rpush(self, value):
self.insert(0, value)
self.insert(len(self._row), value)
def lpush(self, value):
self.insert(len(value), value)
self.insert(0, value)
def append(self, value):
self.rpush(value)
@@ -147,9 +139,9 @@ class Dataset:
.. admonition:: Format Attributes Definition
If you look at the code, the various output/import formats are not
defined within the :class:`Dataset` object. To add support for a new format, see
:ref:`Adding New Formats <newformats>`.
If you look at the code, the various output/import formats are not
defined within the :class:`Dataset` object. To add support for a new format, see
:ref:`Adding New Formats <newformats>`.
"""
@@ -299,7 +291,7 @@ class Dataset:
def _get_headers(self):
"""An *optional* list of strings to be used for header rows and attribute names.
This must be set manually. The given list length must equal :class:`Dataset.width`.
This must be set manually. The given list length must equal :attr:`Dataset.width`.
"""
return self.__headers
@@ -335,7 +327,7 @@ class Dataset:
set, a list of Python dictionaries will be returned. If no headers have been
set, a list of tuples (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
A dataset object can also be imported by setting the :attr:`Dataset.dict` attribute. ::
data = tablib.Dataset()
data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
@@ -414,10 +406,10 @@ class Dataset:
fmt = registry.get_format(format)
if not hasattr(fmt, 'import_set'):
raise UnsupportedFormat('Format {} cannot be imported.'.format(format))
raise UnsupportedFormat(f'Format {format} cannot be imported.')
if not import_set:
raise UnsupportedFormat('Format {} cannot be imported.'.format(format))
raise UnsupportedFormat(f'Format {format} cannot be imported.')
fmt.import_set(self, stream, **kwargs)
return self
@@ -430,7 +422,7 @@ class Dataset:
"""
fmt = registry.get_format(format)
if not hasattr(fmt, 'export_set'):
raise UnsupportedFormat('Format {} cannot be exported.'.format(format))
raise UnsupportedFormat(f'Format {format} cannot be exported.')
return fmt.export_set(self, **kwargs)
@@ -452,28 +444,28 @@ class Dataset:
def rpush(self, row, tags=list()):
"""Adds a row to the end of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
See :method:`Dataset.insert` for additional documentation.
"""
self.insert(self.height, row=row, tags=tags)
def lpush(self, row, tags=list()):
"""Adds a row to the top of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
See :method:`Dataset.insert` for additional documentation.
"""
self.insert(0, row=row, tags=tags)
def append(self, row, tags=list()):
"""Adds a row to the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
See :method:`Dataset.insert` for additional documentation.
"""
self.rpush(row, tags)
def extend(self, rows, tags=list()):
"""Adds a list of rows to the :class:`Dataset` using
:class:`Dataset.append`
:method:`Dataset.append`
"""
for row in rows:
@@ -515,20 +507,20 @@ class Dataset:
data.append_col(col=random.randint)
If inserting a column, and :class:`Dataset.headers` is set, the
If inserting a column, and :attr:`Dataset.headers` is set, the
header attribute must be set, and will be considered the header for
that row.
See :ref:`dyncols` for an in-depth example.
.. versionchanged:: 0.9.0
If inserting a column, and :class:`Dataset.headers` is set, the
If inserting a column, and :attr:`Dataset.headers` is set, the
header attribute must be set, and will be considered the header for
that row.
.. versionadded:: 0.9.0
If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
This gives you the ability to :class:`filter <Dataset.filter>` your
This gives you the ability to :method:`filter <Dataset.filter>` your
:class:`Dataset` later.
"""
@@ -565,14 +557,14 @@ class Dataset:
def rpush_col(self, col, header=None):
"""Adds a column to the end of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
See :method:`Dataset.insert` for additional documentation.
"""
self.insert_col(self.width, col, header=header)
def lpush_col(self, col, header=None):
"""Adds a column to the top of the :class:`Dataset`.
See :class:`Dataset.insert` for additional documentation.
See :method:`Dataset.insert` for additional documentation.
"""
self.insert_col(0, col, header=header)
@@ -596,7 +588,7 @@ class Dataset:
def append_col(self, col, header=None):
"""Adds a column to the :class:`Dataset`.
See :class:`Dataset.insert_col` for additional documentation.
See :method:`Dataset.insert_col` for additional documentation.
"""
self.rpush_col(col, header)
@@ -875,7 +867,7 @@ class Databook:
fmt = registry.get_format(format)
if not hasattr(fmt, 'import_book'):
raise UnsupportedFormat('Format {} cannot be loaded.'.format(format))
raise UnsupportedFormat(f'Format {format} cannot be loaded.')
fmt.import_book(self, stream, **kwargs)
return self
@@ -888,7 +880,7 @@ class Databook:
"""
fmt = registry.get_format(format)
if not hasattr(fmt, 'export_book'):
raise UnsupportedFormat('Format {} cannot be exported.'.format(format))
raise UnsupportedFormat(f'Format {format} cannot be exported.')
return fmt.export_book(self, **kwargs)
+3 -3
View File
@@ -17,7 +17,7 @@ uninstalled_format_messages = {
"df": {"package_name": "pandas package", "extras_name": "pandas"},
"html": {"package_name": "MarkupPy package", "extras_name": "html"},
"ods": {"package_name": "odfpy package", "extras_name": "ods"},
"xls": {"package_name": "odfpy and xlwt packages", "extras_name": "ods"},
"xls": {"package_name": "xlrd and xlwt packages", "extras_name": "xls"},
"xlsx": {"package_name": "openpyxl package", "extras_name": "xlsx"},
"yaml": {"package_name": "pyyaml package", "extras_name": "yaml"},
}
@@ -28,7 +28,7 @@ def load_format_class(dotted_path):
module_path, class_name = dotted_path.rsplit('.', 1)
return getattr(import_module(module_path), class_name)
except (ValueError, AttributeError) as err:
raise ImportError("Unable to load format class '{}' ({})".format(dotted_path, err))
raise ImportError(f"Unable to load format class '{dotted_path}' ({err})")
class FormatDescriptorBase:
@@ -122,7 +122,7 @@ class Registry:
if key in uninstalled_format_messages:
raise UnsupportedFormat(
"The '{key}' format is not available. You may want to install the "
"{package_name} (or `pip install tablib[{extras_name}]`).".format(
"{package_name} (or `pip install \"tablib[{extras_name}]\"`).".format(
**uninstalled_format_messages[key], key=key
)
)
+2
View File
@@ -46,6 +46,8 @@ class CSVFormat:
if (i == 0) and (headers):
dset.headers = row
elif row:
if i > 0 and len(row) < dset.width:
row += [''] * (dset.width - len(row))
dset.append(row)
@classmethod
+1 -1
View File
@@ -30,7 +30,7 @@ class DataFrameFormat:
if DataFrame is None:
raise NotImplementedError(
'DataFrame Format requires `pandas` to be installed.'
' Try `pip install tablib[pandas]`.')
' Try `pip install "tablib[pandas]"`.')
dataframe = DataFrame(dset.dict, columns=dset.headers)
return dataframe
+1 -1
View File
@@ -55,7 +55,7 @@ class HTMLFormat:
for i, dset in enumerate(databook._datasets):
title = (dset.title if dset.title else 'Set %s' % (i))
wrapper.write('<{}>{}</{}>\n'.format(cls.BOOK_ENDINGS, title, cls.BOOK_ENDINGS))
wrapper.write(f'<{cls.BOOK_ENDINGS}>{title}</{cls.BOOK_ENDINGS}>\n')
wrapper.write(dset.html)
wrapper.write('\n')
+1 -1
View File
@@ -21,7 +21,7 @@ class JIRAFormat:
header = cls._get_header(dataset.headers) if dataset.headers else ''
body = cls._get_body(dataset)
return '{}\n{}'.format(header, body) if header else body
return f'{header}\n{body}' if header else body
@classmethod
def _get_body(cls, dataset):
+1 -1
View File
@@ -24,7 +24,7 @@ def _max_word_len(text):
>>> _max_word_len('Python Module for Tabular Datasets')
8
"""
return max(len(word) for word in text.split()) if text else 0
return max([len(word) for word in text.split()], default=0) if text else 0
class ReSTFormat:
+11 -2
View File
@@ -3,9 +3,11 @@
from io import BytesIO
import tablib
import xlrd
import xlwt
from xlrd.xldate import xldate_as_datetime
import tablib
# special styles
wrap = xlwt.easyxf("alignment: wrap on")
@@ -74,12 +76,19 @@ class XLSFormat:
dset.title = sheet.name
def cell_value(value, type_):
if type_ == xlrd.XL_CELL_ERROR:
return xlrd.error_text_from_code[value]
elif type_ == xlrd.XL_CELL_DATE:
return xldate_as_datetime(value, xls_book.datemode)
return value
for i in range(sheet.nrows):
if i == 0 and headers:
dset.headers = sheet.row_values(0)
else:
dset.append([
val if typ != xlrd.XL_CELL_ERROR else xlrd.error_text_from_code[val]
cell_value(val, typ)
for val, typ in zip(sheet.row_values(i), sheet.row_types(i))
])
+19 -13
View File
@@ -3,12 +3,13 @@
from io import BytesIO
import openpyxl
import tablib
from openpyxl.reader.excel import ExcelReader, load_workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils import get_column_letter
from openpyxl.workbook import Workbook
from openpyxl.writer.excel import ExcelWriter
Workbook = openpyxl.workbook.Workbook
ExcelWriter = openpyxl.writer.excel.ExcelWriter
get_column_letter = openpyxl.utils.get_column_letter
import tablib
class XLSXFormat:
@@ -19,7 +20,10 @@ class XLSXFormat:
def detect(cls, stream):
"""Returns True if given stream is a readable excel file."""
try:
openpyxl.reader.excel.load_workbook(stream, read_only=True)
# No need to fully load the file, it should be enough to be able to
# read the manifest.
reader = ExcelReader(stream, read_only=False)
reader.read_manifest()
return True
except Exception:
return False
@@ -55,12 +59,12 @@ class XLSXFormat:
return stream.getvalue()
@classmethod
def import_set(cls, dset, in_stream, headers=True):
def import_set(cls, dset, in_stream, headers=True, read_only=True):
"""Returns databook from XLS stream."""
dset.wipe()
xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)
sheet = xls_book.active
dset.title = sheet.title
@@ -73,12 +77,12 @@ class XLSXFormat:
dset.append(row_vals)
@classmethod
def import_book(cls, dbook, in_stream, headers=True):
def import_book(cls, dbook, in_stream, headers=True, read_only=True):
"""Returns databook from XLS stream."""
dbook.wipe()
xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)
for sheet in xls_book.worksheets:
data = tablib.Dataset()
@@ -89,6 +93,8 @@ class XLSXFormat:
if (i == 0) and (headers):
data.headers = row_vals
else:
if i > 0 and len(row_vals) < data.width:
row_vals += [''] * (data.width - len(row_vals))
data.append(row_vals)
dbook.add_sheet(data)
@@ -102,14 +108,14 @@ class XLSXFormat:
_offset = i
_package.insert((sep[0] + _offset), (sep[1],))
bold = openpyxl.styles.Font(bold=True)
wrap_text = openpyxl.styles.Alignment(wrap_text=True)
bold = Font(bold=True)
wrap_text = Alignment(wrap_text=True)
for i, row in enumerate(_package):
row_number = i + 1
for j, col in enumerate(row):
col_idx = get_column_letter(j + 1)
cell = ws['{}{}'.format(col_idx, row_number)]
cell = ws[f'{col_idx}{row_number}']
# bold headers
if (row_number == 1) and dataset.headers:
+2 -1
View File
@@ -1,9 +1,10 @@
""" Tablib - YAML Support.
"""
import tablib
import yaml
import tablib
class YAMLFormat:
title = 'yaml'
+1 -1
View File
@@ -315,7 +315,7 @@ class DbfLogicalFieldDef(DbfFieldDef):
return False
if value in "YyTt":
return True
raise ValueError("[{}] Invalid logical value {!r}".format(self.name, value))
raise ValueError(f"[{self.name}] Invalid logical value {value!r}")
def encodeValue(self, value):
"""Return a character from the "TF?" set.
+1 -1
View File
@@ -60,7 +60,7 @@ class DbfRecord:
Arguments:
dbf:
A `Dbf.Dbf` instance this record belonogs to.
A `Dbf.Dbf` instance this record belongs to.
index:
An integer record index or None. If this value is
None, record will be appended to the DBF.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+1 -1
View File
@@ -2,7 +2,7 @@ pytest
pytest-cov
MarkupPy
odfpy
openpyxl>=2.4.0
openpyxl>=2.6.0
pandas
pyyaml
tabulate
+55 -18
View File
@@ -11,8 +11,9 @@ from io import BytesIO, StringIO
from pathlib import Path
from uuid import uuid4
import tablib
from MarkupPy import markup
import tablib
from tablib.core import Row, detect_format
from tablib.exceptions import UnsupportedFormat
from tablib.formats import registry
@@ -57,7 +58,7 @@ class TablibTestCase(BaseTestCase):
# A known format but uninstalled
del registry._formats['ods']
msg = (r"The 'ods' format is not available. You may want to install the "
"odfpy package \\(or `pip install tablib\\[ods\\]`\\).")
"odfpy package \\(or `pip install \"tablib\\[ods\\]\"`\\).")
with self.assertRaisesRegex(UnsupportedFormat, msg):
data.export('ods')
@@ -556,27 +557,15 @@ class TablibTestCase(BaseTestCase):
def test_row_lpush(self):
"""Row lpush."""
# Arrange
john = Row(self.john)
george = Row(self.george)
# Act
john.lpush(george)
# Assert
self.assertEqual(john[-1], george)
john.lpush(53)
self.assertEqual(john.list, [53, 'John', 'Adams', 90])
def test_row_append(self):
"""Row append."""
# Arrange
john = Row(self.john)
george = Row(self.george)
# Act
john.append(george)
# Assert
self.assertEqual(john[0], george)
john.append('stuff')
self.assertEqual(john.list, ['John', 'Adams', 90, 'stuff'])
def test_row_contains(self):
"""Row __contains__."""
@@ -671,6 +660,7 @@ class RSTTests(BaseTestCase):
data.headers = self.headers
data.append(self.john)
data.append(('Wendy', '', 43))
data.append(('Esther', ' ', 31))
self.assertEqual(
data.export('rst'),
'========== ========= ===\n'
@@ -678,6 +668,7 @@ class RSTTests(BaseTestCase):
'========== ========= ===\n'
'John Adams 90 \n'
'Wendy 43 \n'
'Esther 31 \n'
'========== ========= ==='
)
@@ -804,6 +795,25 @@ class CSVTests(BaseTestCase):
data.csv = csv_text
self.assertEqual(data.width, 7)
def test_csv_import_set_ragged(self):
"""Import CSV set when not all rows have the same length."""
csv_text = (
"H1,H2,H3\n"
"A,B\n"
"C,D,E\n"
"\n"
"F\n"
)
dataset = tablib.import_set(csv_text, format="csv")
self.assertEqual(
str(dataset),
'H1|H2|H3\n'
'--|--|--\n'
'A |B | \n'
'C |D |E \n'
'F | | '
)
def test_csv_export(self):
"""Verify exporting dataset object as CSV."""
@@ -968,6 +978,12 @@ class XLSTests(BaseTestCase):
in_stream = self.founders.xls
self.assertEqual(detect_format(in_stream), 'xls')
def test_xls_date_import(self):
xls_source = Path(__file__).parent / 'files' / 'dates.xls'
with open(str(xls_source), mode='rb') as fh:
dset = tablib.Dataset().load(fh, 'xls')
self.assertEqual(dset.dict[0]['birth_date'], datetime.datetime(2015, 4, 12, 0, 0))
def test_xls_import_with_errors(self):
"""Errors from imported files are kept as errors."""
xls_source = Path(__file__).parent / 'files' / 'errors.xls'
@@ -1002,6 +1018,13 @@ class XLSXTests(BaseTestCase):
self.assertEqual(data.dict[0]['float'], 21.55)
self.assertEqual(data.dict[0]['date/time'], date_time)
def test_xlsx_import_set_ragged(self):
"""Import XLSX file when not all rows have the same length."""
xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx'
with open(str(xlsx_source), mode='rb') as fh:
book = tablib.Databook().load(fh, 'xlsx')
self.assertEqual(book.sheets()[0].pop(), (1.0, ''))
def test_xlsx_wrong_char(self):
"""Bad characters are not silently ignored. We let the exception bubble up."""
from openpyxl.utils.exceptions import IllegalCharacterError
@@ -1010,6 +1033,20 @@ class XLSXTests(BaseTestCase):
data.append(('string', b'\x0cf'))
data.xlsx
def test_xlsx_cell_values(self):
"""Test cell values are read and not formulas"""
xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx'
with xls_source.open('rb') as fh:
data = tablib.Dataset().load(fh)
self.assertEqual(data.headers[0], 'Hello World')
def test_xlsx_bad_dimensions(self):
"""Test loading file with bad dimension. Must be done with
read_only=False."""
xls_source = Path(__file__).parent / 'files' / 'bad_dimensions.xlsx'
with xls_source.open('rb') as fh:
data = tablib.Dataset().load(fh, read_only=False)
self.assertEqual(data.height, 3)
class JSONTests(BaseTestCase):
def test_json_format_detect(self):
+11 -6
View File
@@ -4,16 +4,22 @@ minversion = 2.4
envlist =
docs
lint
py{35,36,37,38}
py{36,37,38,39}
[testenv]
deps =
tests: -rtests/requirements.txt
docs: sphinx
-rtests/requirements.txt
extras = pandas
passenv =
FORCE_COLOR
commands =
tests: pytest {posargs:tests}
docs: sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html
pytest {posargs:tests}
[testenv:docs]
deps =
sphinx
commands =
sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html
[testenv:lint]
deps =
@@ -31,4 +37,3 @@ skip_install = true
[flake8]
exclude =
.tox
ignore=E501,E127,E128,E124