Expose 'read_only' parameter for 'import_set' and 'import_book' (#483 )

Substitute tuples for dicts in __getstate__/__setstate__ to speed up the pickling
Move releases to GitHub actions.
2026-06-05 15:00:19 +00:00 · 2020-12-04 10:10:02 +02:00 · 2020-11-29 22:11:46 +01:00 · 2020-11-23 13:14:21 +01:00 · 2020-11-23 11:01:10 +01:00 · 2020-11-14 23:51:05 +02:00
38 changed files with 348 additions and 177 deletions
@@ -2,23 +2,39 @@ name: Docs and lint

 on: [push, pull_request]

+env:
+  FORCE_COLOR: 1
+
 jobs:
  build:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        python-version: [3.8]
        env:
        - TOXENV: docs
        - TOXENV: lint

    steps:
-      - uses: actions/checkout@v1
+      - uses: actions/checkout@v2

-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v1
+      - name: Set up Python
+        uses: actions/setup-python@v2
        with:
-          python-version: ${{ matrix.python-version }}
+          python-version: 3.9
+
+      - name: Get pip cache dir
+        id: pip-cache
+        run: |
+          echo "::set-output name=dir::$(pip cache dir)"
+
+      - name: Cache
+        uses: actions/cache@v2
+        with:
+          path: ${{ steps.pip-cache.outputs.dir }}
+          key:
+            ${{ matrix.os }}-${{ matrix.python-version }}-v1-${{ hashFiles('**/setup.py') }}
+          restore-keys: |
+            ${{ matrix.os }}-${{ matrix.python-version }}-v1-

      - name: Install dependencies
        run: |
@@ -0,0 +1,56 @@
+name: Release
+
+on:
+  push:
+    branches:
+      - master
+  release:
+    types:
+      - published
+
+jobs:
+  build:
+    if: github.repository == 'jazzband/tablib'
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      - name: Get pip cache dir
+        id: pip-cache
+        run: |
+          echo "::set-output name=dir::$(pip cache dir)"
+
+      - name: Cache
+        uses: actions/cache@v2
+        with:
+          path: ${{ steps.pip-cache.outputs.dir }}
+          key: release-${{ hashFiles('**/setup.py') }}
+          restore-keys: |
+            release-
+
+      - name: Install dependencies
+        run: |
+          python -m pip install -U pip
+          python -m pip install -U setuptools twine wheel
+
+      - name: Build package
+        run: |
+          python setup.py --version
+          python setup.py sdist --format=gztar bdist_wheel
+          twine check dist/*
+
+      - name: Upload packages to Jazzband
+        if: github.event.action == 'published'
+        uses: pypa/gh-action-pypi-publish@master
+        with:
+          user: jazzband
+          password: ${{ secrets.JAZZBAND_RELEASE_KEY }}
+          repository_url: https://jazzband.co/projects/tablib/upload
@@ -2,24 +2,40 @@ name: Test

 on: [push, pull_request]

+env:
+  FORCE_COLOR: 1
+
 jobs:
  build:
-
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
-        python-version: [3.5, 3.6, 3.7]
+        python-version: [3.6, 3.7, 3.8, 3.9]
        os: [ubuntu-latest, macOS-latest, windows-latest]

    steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2

    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}

+    - name: Get pip cache dir
+      id: pip-cache
+      run: |
+        echo "::set-output name=dir::$(pip cache dir)"
+
+    - name: Cache
+      uses: actions/cache@v2
+      with:
+        path: ${{ steps.pip-cache.outputs.dir }}
+        key:
+          ${{ matrix.os }}-${{ matrix.python-version }}-v1-${{ hashFiles('**/setup.py') }}
+        restore-keys: |
+          ${{ matrix.os }}-${{ matrix.python-version }}-v1-
+
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
@@ -28,6 +44,10 @@ jobs:

    - name: Tox tests
      shell: bash
-      # Drop the dot: py3.7-tests -> py37-tests
      run: |
-        tox -e py`echo ${{ matrix.python-version }} | tr -d .`-tests
+        tox -e py
+
+    - name: Upload coverage
+      uses: codecov/codecov-action@v1
+      with:
+        name: ${{ matrix.os }} Python ${{ matrix.python-version }}
@@ -38,3 +38,6 @@ htmlcov
 # setuptools noise
 .eggs
 *.egg-info
+
+# generated by setuptools-scm
+/src/tablib/_version.py
@@ -1,24 +1,24 @@
 repos:
  - repo: https://github.com/asottile/pyupgrade
-    rev: v1.25.2
+    rev: v2.7.3
    hooks:
      - id: pyupgrade
-        args: ["--py3-plus"]
+        args: ["--py36-plus"]

-  - repo: https://github.com/pre-commit/mirrors-isort
-    rev: v4.3.21
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.6.4
    hooks:
      - id: isort
        additional_dependencies: [toml]

  - repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.4.2
+    rev: v1.7.0
    hooks:
      - id: python-check-blanket-noqa
      - id: rst-backticks

  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v2.4.0
+    rev: v3.3.0
    hooks:
      - id: check-merge-conflict
      - id: check-toml
@@ -1,38 +0,0 @@
-language: python
-cache:
-  pip: true
-  directories:
-    - $HOME/.cache/pre-commit
-
-matrix:
-  fast_finish: true
-  include:
-    - python: 3.8
-      env: TOXENV=docs
-    - python: 3.8
-      env: TOXENV=lint
-    - python: 3.8
-    - python: 3.7
-    - python: 3.6
-
-install: travis_retry pip install tox-travis
-
-script: tox
-
-after_success:
-  - |
-    if [[ "$TOXENV" != "docs" && "$TOXENV" != "lint" ]]; then
-      bash <(curl -s https://codecov.io/bash)
-    fi
-
-deploy:
-  provider: pypi
-  user: jazzband
-  server: https://jazzband.co/projects/tablib/upload
-  distributions: sdist bdist_wheel
-  password:
-    secure: svV4fYtodwW+iTyFOm5ISEfhVwcA+6vTskD3x6peznc40TdMV9Ek8nT3Q/NB4lCbXoUw2qR4H6uhLCjesnv/VvVk/qbitCyD8ySlgwOV5n7NzJs8lC8EYaHSjGQjatTwJAokfGVYkPawkI7HXDqtDggLUQBK+Ag8HDW+XBSbQIU=
-  on:
-    tags: true
-    repo: jazzband/tablib
-    python: 3.7
@@ -1,5 +1,52 @@
 # History

+## Unreleased
+
+### Breaking changes
+
+- Dropped Python 3.5 support
+
+### Improvements
+
+- Added Python 3.9 support
+- Added read_only option to xlsx file reader (#482).
+
+### Bugfixes
+
+- Prevented crash in rst export with only-space strings (#469).
+
+## 2.0.0 (2020-05-16)
+
+### Breaking changes
+
+- The `Row.lpush/rpush` logic was reversed. `lpush` was appending while `rpush`
+  and `append` were prepending. This was fixed (reversed behavior). If you
+  counted on the broken behavior, please update your code (#453).
+
+### Bugfixes
+
+- Fixed minimal openpyxl dependency version to 2.6.0 (#457).
+- Dates from xls files are now read as Python datetime objects (#373).
+- Allow import of "ragged" xlsx files (#465).
+
+### Improvements
+
+- When importing an xlsx file, Tablib will now read cell values instead of formulas (#462).
+
+## 1.1.0 (2020-02-13)
+
+### Deprecations
+
+- Upcoming breaking change in Tablib 2.0.0: the `Row.lpush/rpush` logic is reversed.
+  `lpush` is appending while `rpush` and `append` are prepending. The broken behavior
+  will remain in Tablib 1.x and will be fixed (reversed) in Tablib 2.0.0 (#453). If you
+  count on the broken behavior, please update your code when you upgrade to Tablib 2.x.
+
+### Improvements
+
+- Tablib is now able to import CSV content where not all rows have the same
+  length. Missing columns on any line receive the empty string (#226).
+
 ## 1.0.0 (2020-01-13)

 ### Breaking changes
@@ -4,7 +4,6 @@
 [![PyPI version](https://img.shields.io/pypi/v/tablib.svg)](https://pypi.org/project/tablib/)
 [![Supported Python versions](https://img.shields.io/pypi/pyversions/tablib.svg)](https://pypi.org/project/tablib/)
 [![PyPI downloads](https://img.shields.io/pypi/dm/tablib.svg)](https://pypistats.org/packages/tablib)
-[![Travus CI status](https://img.shields.io/travis/jazzband/tablib/master?label=Travis%20CI&logo=travis)](https://travis-ci.org/jazzband/tablib)
 [![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions)
 [![codecov](https://codecov.io/gh/jazzband/tablib/branch/master/graph/badge.svg)](https://codecov.io/gh/jazzband/tablib)
 [![GitHub](https://img.shields.io/github/license/jazzband/tablib.svg)](LICENSE)
@@ -3,9 +3,9 @@
 Jazzband guidelines: https://jazzband.co/about/releases

 * [ ] Get master to the appropriate code release state.
-      [Travis CI](https://travis-ci.org/jazzband/tablib)
+      [GitHub Actions](https://github.com/jazzband/tablib/actions)
      should pass on master.
-      [![Build Status](https://travis-ci.org/jazzband/tablib.svg?branch=master)](https://travis-ci.org/jazzband/tablib)
+      [![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions)

 * [ ] Check [HISTORY.md](https://github.com/jazzband/tablib/blob/master/HISTORY.md),
      update version number and release date
@@ -16,7 +16,7 @@ git tag -a v0.14.0 -m v0.14.0
 git push --tags
 ```

-* [ ] Once Travis CI has built and uploaded distributions, check files at
+* [ ] Once GitHub Actions has built and uploaded distributions, check files at
      [Jazzband](https://jazzband.co/projects/tablib) and release to
      [PyPI](https://pypi.org/pypi/tablib)

@@ -8,4 +8,5 @@
  <li><a href="https://pypi.org/project/tablib">Tablib @ PyPI</a></li>
  <li><a href="https://github.com/jazzband/tablib">Tablib @ GitHub</a></li>
  <li><a href="https://github.com/jazzband/tablib/issues">Issue Tracker</a></li>
+  <li><a href="https://github.com/jazzband/tablib/blob/master/HISTORY.md">Changelog</a></li>
 </ul>
@@ -9,7 +9,7 @@
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
-from pkg_resources import get_distribution
+import tablib

 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -49,9 +49,9 @@ copyright = '2019 Jazzband'
 # built documents.
 #
 # The full version, including alpha/beta/rc tags.
-release = get_distribution('tablib').version
+release = tablib.__version__
 # The short X.Y version.
-version = '.'.join(release.split('.')[:2])
+version = '.'.join(tablib.__version__.split('.')[:2])
 # for example take major/minor

 # The language for content autogenerated by Sphinx. Refer to documentation
@@ -163,16 +163,16 @@ the easiest way to test your changes for potential issues is to simply run the t
 Continuous Integration
 ----------------------

-Every pull request is automatically tested and inspected upon receipt with `Travis CI`_.
+Every pull request is automatically tested and inspected upon receipt with `GitHub Actions`_.
 If you broke the build, you will receive an email accordingly.

 Anyone may view the build status and history at any time.

-    https://travis-ci.org/jazzband/tablib
+    https://github.com/jazzband/tablib/actions

 Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets.

-.. _`Travis CI`: https://travis-ci.org/
+.. _`GitHub Actions`: https://github.com/jazzband/tablib/actions


 .. _docs:
@@ -27,7 +27,7 @@ For example::
    dataset.export("cli", tablefmt="github")
    dataset.export("cli", tablefmt="grid")

-This format is optional, install Tablib with ``pip install tablib[cli]`` to
+This format is optional, install Tablib with ``pip install "tablib[cli]"`` to
 make the format available.

 csv
@@ -83,7 +83,7 @@ df (DataFrame)
 ==============

 Import/export using the pandas_ DataFrame format. This format is optional,
-install Tablib with ``pip install tablib[pandas]`` to make the format available.
+install Tablib with ``pip install "tablib[pandas]"`` to make the format available.

 .. _pandas: https://pandas.pydata.org/

@@ -94,7 +94,7 @@ The ``html`` format is currently export-only. The exports produce an HTML page
 with the data in a ``<table>``. If headers have been set, they will be used as
 table headers.

-This format is optional, install Tablib with ``pip install tablib[html]`` to
+This format is optional, install Tablib with ``pip install "tablib[html]"`` to
 make the format available.

 jira
@@ -132,7 +132,7 @@ ods
 Export data in OpenDocument Spreadsheet format. The ``ods`` format is currently
 export-only.

-This format is optional, install Tablib with ``pip install tablib[ods]`` to
+This format is optional, install Tablib with ``pip install "tablib[ods]"`` to
 make the format available.

 .. admonition:: Binary Warning
@@ -183,7 +183,7 @@ xls

 Import/export data in Legacy Excel Spreadsheet representation.

-This format is optional, install Tablib with ``pip install tablib[xls]`` to
+This format is optional, install Tablib with ``pip install "tablib[xls]"`` to
 make the format available.

 .. note::
@@ -203,9 +203,27 @@ xlsx

 Import/export data in Excel 07+ Spreadsheet representation.

-This format is optional, install Tablib with ``pip install tablib[xlsx]`` to
+This format is optional, install Tablib with ``pip install "tablib[xlsx]"`` to
 make the format available.

+The ``import_set()`` and ``import_book()`` methods accept keyword
+argument ``read_only``.  If its value is ``True`` (the default), the
+XLSX data source is read lazily.  Lazy reading generally reduces time
+and memory consumption, especially for large spreadsheets.  However,
+it relies on the XLSX data source declaring correct dimensions.  Some
+programs generate XLSX files with incorrect dimensions.  Such files
+may need to be loaded with this optimization turned off by passing
+``read_only=False``.
+
+.. note::
+
+    When reading an ``xlsx`` file containing formulas in its cells, Tablib will
+    read the cell values, not the cell formulas.
+
+.. versionchanged:: 2.0.0
+
+    Reads cell values instead of formulas.
+
 .. admonition:: Binary Warning

    The ``xlsx`` file format is binary, so make sure to write in binary mode::
@@ -223,7 +241,7 @@ returned instead.

 Import assumes (for now) that headers exist.

-This format is optional, install Tablib with ``pip install tablib[yaml]`` to
+This format is optional, install Tablib with ``pip install "tablib[yaml]"`` to
 make the format available.

 .. _YAML: https://yaml.org
@@ -26,19 +26,19 @@ formats available:

 .. code-block:: console

-    $ pip install tablib[xlsx]
+    $ pip install "tablib[xlsx]"

 Or all possible formats:

 .. code-block:: console

-    $ pip install tablib[all]
+    $ pip install "tablib[all]"

 which is equivalent to:

 .. code-block:: console

-    $ pip install tablib[html, pandas, ods, xls, xlsx, yaml]
+    $ pip install "tablib[html, pandas, ods, xls, xlsx, yaml]"

 -------------------
 Download the Source
@@ -57,7 +57,7 @@ THE SOFTWARE.
 Pythons Supported
 -----------------

-Python 3.5+ is officially supported.
+Python 3.6+ is officially supported.

 Now, go :ref:`install Tablib <install>`.

@@ -1,7 +1,2 @@
 [tool.isort]
-force_grid_wrap = 0
-include_trailing_comma = true
-known_third_party = ["MarkupPy", "odf", "openpyxl", "pkg_resources", "setuptools", "tablib", "xlrd", "xlwt", "yaml"]
-line_length = 88
-multi_line_output = 3
-use_parentheses = true
+profile = "black"
@@ -1,4 +1,3 @@
 [pytest]
 norecursedirs = .git .*
 addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html
-python_paths = .
@@ -4,7 +4,9 @@ from setuptools import find_packages, setup

 setup(
    name='tablib',
-    use_scm_version=True,
+    use_scm_version={
+        'write_to': 'src/tablib/_version.py',
+    },
    setup_requires=['setuptools_scm'],
    description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
    long_description=(
@@ -31,20 +33,20 @@ setup(
        'Programming Language :: Python',
        'Programming Language :: Python :: 3 :: Only',
        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.5',
        'Programming Language :: Python :: 3.6',
        'Programming Language :: Python :: 3.7',
        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
    ],
-    python_requires='>=3.5',
+    python_requires='>=3.6',
    extras_require={
-        'all': ['markuppy', 'odfpy', 'openpyxl>=2.4.0', 'pandas', 'pyyaml', 'tabulate', 'xlrd', 'xlwt'],
+        'all': ['markuppy', 'odfpy', 'openpyxl>=2.6.0', 'pandas', 'pyyaml', 'tabulate', 'xlrd', 'xlwt'],
        'cli': ['tabulate'],
        'html': ['markuppy'],
        'ods': ['odfpy'],
        'pandas': ['pandas'],
        'xls': ['xlrd', 'xlwt'],
-        'xlsx': ['openpyxl>=2.4.0'],
+        'xlsx': ['openpyxl>=2.6.0'],
        'yaml': ['pyyaml'],
    },
 )
@@ -1,5 +1,12 @@
 """ Tablib. """
-from pkg_resources import DistributionNotFound, get_distribution
+try:
+    # Generated by setuptools-scm.
+    from ._version import version as __version__
+except ImportError:
+    # Some broken installation.
+    __version__ = None
+
+
 from tablib.core import (  # noqa: F401
    Databook,
    Dataset,
@@ -10,9 +17,3 @@ from tablib.core import (  # noqa: F401
    import_book,
    import_set,
 )
-
-try:
-    __version__ = get_distribution(__name__).version
-except DistributionNotFound:
-    # package is not installed
-    __version__ = None
@@ -57,24 +57,16 @@ class Row:
        del self._row[i]

    def __getstate__(self):
-
-        slots = dict()
-
-        for slot in self.__slots__:
-            attribute = getattr(self, slot)
-            slots[slot] = attribute
-
-        return slots
+        return self._row, self.tags

    def __setstate__(self, state):
-        for (k, v) in list(state.items()):
-            setattr(self, k, v)
+        self._row, self.tags = state

    def rpush(self, value):
-        self.insert(0, value)
+        self.insert(len(self._row), value)

    def lpush(self, value):
-        self.insert(len(value), value)
+        self.insert(0, value)

    def append(self, value):
        self.rpush(value)
@@ -147,9 +139,9 @@ class Dataset:

    .. admonition:: Format Attributes Definition

-     If you look at the code, the various output/import formats are not
-     defined within the :class:`Dataset` object. To add support for a new format, see
-     :ref:`Adding New Formats <newformats>`.
+    If you look at the code, the various output/import formats are not
+    defined within the :class:`Dataset` object. To add support for a new format, see
+    :ref:`Adding New Formats <newformats>`.

    """

@@ -299,7 +291,7 @@ class Dataset:
    def _get_headers(self):
        """An *optional* list of strings to be used for header rows and attribute names.

-        This must be set manually. The given list length must equal :class:`Dataset.width`.
+        This must be set manually. The given list length must equal :attr:`Dataset.width`.

        """
        return self.__headers
@@ -335,7 +327,7 @@ class Dataset:
        set, a list of Python dictionaries will be returned. If no headers have been
        set, a list of tuples (rows) will be returned instead.

-        A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
+        A dataset object can also be imported by setting the :attr:`Dataset.dict` attribute. ::

            data = tablib.Dataset()
            data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
@@ -414,10 +406,10 @@ class Dataset:

        fmt = registry.get_format(format)
        if not hasattr(fmt, 'import_set'):
-            raise UnsupportedFormat('Format {} cannot be imported.'.format(format))
+            raise UnsupportedFormat(f'Format {format} cannot be imported.')

        if not import_set:
-            raise UnsupportedFormat('Format {} cannot be imported.'.format(format))
+            raise UnsupportedFormat(f'Format {format} cannot be imported.')

        fmt.import_set(self, stream, **kwargs)
        return self
@@ -430,7 +422,7 @@ class Dataset:
        """
        fmt = registry.get_format(format)
        if not hasattr(fmt, 'export_set'):
-            raise UnsupportedFormat('Format {} cannot be exported.'.format(format))
+            raise UnsupportedFormat(f'Format {format} cannot be exported.')

        return fmt.export_set(self, **kwargs)

@@ -452,28 +444,28 @@ class Dataset:

    def rpush(self, row, tags=list()):
        """Adds a row to the end of the :class:`Dataset`.
-        See :class:`Dataset.insert` for additional documentation.
+        See :method:`Dataset.insert` for additional documentation.
        """

        self.insert(self.height, row=row, tags=tags)

    def lpush(self, row, tags=list()):
        """Adds a row to the top of the :class:`Dataset`.
-        See :class:`Dataset.insert` for additional documentation.
+        See :method:`Dataset.insert` for additional documentation.
        """

        self.insert(0, row=row, tags=tags)

    def append(self, row, tags=list()):
        """Adds a row to the :class:`Dataset`.
-        See :class:`Dataset.insert` for additional documentation.
+        See :method:`Dataset.insert` for additional documentation.
        """

        self.rpush(row, tags)

    def extend(self, rows, tags=list()):
        """Adds a list of rows to the :class:`Dataset` using
-        :class:`Dataset.append`
+        :method:`Dataset.append`
        """

        for row in rows:
@@ -515,20 +507,20 @@ class Dataset:

            data.append_col(col=random.randint)

-        If inserting a column, and :class:`Dataset.headers` is set, the
+        If inserting a column, and :attr:`Dataset.headers` is set, the
        header attribute must be set, and will be considered the header for
        that row.

        See :ref:`dyncols` for an in-depth example.

        .. versionchanged:: 0.9.0
-           If inserting a column, and :class:`Dataset.headers` is set, the
+           If inserting a column, and :attr:`Dataset.headers` is set, the
           header attribute must be set, and will be considered the header for
           that row.

        .. versionadded:: 0.9.0
           If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
-           This gives you the ability to :class:`filter <Dataset.filter>` your
+           This gives you the ability to :method:`filter <Dataset.filter>` your
           :class:`Dataset` later.

        """
@@ -565,14 +557,14 @@ class Dataset:

    def rpush_col(self, col, header=None):
        """Adds a column to the end of the :class:`Dataset`.
-        See :class:`Dataset.insert` for additional documentation.
+        See :method:`Dataset.insert` for additional documentation.
        """

        self.insert_col(self.width, col, header=header)

    def lpush_col(self, col, header=None):
        """Adds a column to the top of the :class:`Dataset`.
-        See :class:`Dataset.insert` for additional documentation.
+        See :method:`Dataset.insert` for additional documentation.
        """

        self.insert_col(0, col, header=header)
@@ -596,7 +588,7 @@ class Dataset:

    def append_col(self, col, header=None):
        """Adds a column to the :class:`Dataset`.
-        See :class:`Dataset.insert_col` for additional documentation.
+        See :method:`Dataset.insert_col` for additional documentation.
        """

        self.rpush_col(col, header)
@@ -875,7 +867,7 @@ class Databook:

        fmt = registry.get_format(format)
        if not hasattr(fmt, 'import_book'):
-            raise UnsupportedFormat('Format {} cannot be loaded.'.format(format))
+            raise UnsupportedFormat(f'Format {format} cannot be loaded.')

        fmt.import_book(self, stream, **kwargs)
        return self
@@ -888,7 +880,7 @@ class Databook:
        """
        fmt = registry.get_format(format)
        if not hasattr(fmt, 'export_book'):
-            raise UnsupportedFormat('Format {} cannot be exported.'.format(format))
+            raise UnsupportedFormat(f'Format {format} cannot be exported.')

        return fmt.export_book(self, **kwargs)

@@ -17,7 +17,7 @@ uninstalled_format_messages = {
    "df": {"package_name": "pandas package", "extras_name": "pandas"},
    "html": {"package_name": "MarkupPy package", "extras_name": "html"},
    "ods": {"package_name": "odfpy package", "extras_name": "ods"},
-    "xls": {"package_name": "odfpy and xlwt packages", "extras_name": "ods"},
+    "xls": {"package_name": "xlrd and xlwt packages", "extras_name": "xls"},
    "xlsx": {"package_name": "openpyxl package", "extras_name": "xlsx"},
    "yaml": {"package_name": "pyyaml package", "extras_name": "yaml"},
 }
@@ -28,7 +28,7 @@ def load_format_class(dotted_path):
        module_path, class_name = dotted_path.rsplit('.', 1)
        return getattr(import_module(module_path), class_name)
    except (ValueError, AttributeError) as err:
-        raise ImportError("Unable to load format class '{}' ({})".format(dotted_path, err))
+        raise ImportError(f"Unable to load format class '{dotted_path}' ({err})")


 class FormatDescriptorBase:
@@ -122,7 +122,7 @@ class Registry:
            if key in uninstalled_format_messages:
                raise UnsupportedFormat(
                    "The '{key}' format is not available. You may want to install the "
-                    "{package_name} (or `pip install tablib[{extras_name}]`).".format(
+                    "{package_name} (or `pip install \"tablib[{extras_name}]\"`).".format(
                        **uninstalled_format_messages[key], key=key
                    )
                )
@@ -46,6 +46,8 @@ class CSVFormat:
            if (i == 0) and (headers):
                dset.headers = row
            elif row:
+                if i > 0 and len(row) < dset.width:
+                    row += [''] * (dset.width - len(row))
                dset.append(row)

    @classmethod
@@ -30,7 +30,7 @@ class DataFrameFormat:
        if DataFrame is None:
            raise NotImplementedError(
                'DataFrame Format requires `pandas` to be installed.'
-                ' Try `pip install tablib[pandas]`.')
+                ' Try `pip install "tablib[pandas]"`.')
        dataframe = DataFrame(dset.dict, columns=dset.headers)
        return dataframe

@@ -55,7 +55,7 @@ class HTMLFormat:

        for i, dset in enumerate(databook._datasets):
            title = (dset.title if dset.title else 'Set %s' % (i))
-            wrapper.write('<{}>{}</{}>\n'.format(cls.BOOK_ENDINGS, title, cls.BOOK_ENDINGS))
+            wrapper.write(f'<{cls.BOOK_ENDINGS}>{title}</{cls.BOOK_ENDINGS}>\n')
            wrapper.write(dset.html)
            wrapper.write('\n')

@@ -21,7 +21,7 @@ class JIRAFormat:

        header = cls._get_header(dataset.headers) if dataset.headers else ''
        body = cls._get_body(dataset)
-        return '{}\n{}'.format(header, body) if header else body
+        return f'{header}\n{body}' if header else body

    @classmethod
    def _get_body(cls, dataset):
@@ -24,7 +24,7 @@ def _max_word_len(text):
    >>> _max_word_len('Python Module for Tabular Datasets')
    8
    """
-    return max(len(word) for word in text.split()) if text else 0
+    return max([len(word) for word in text.split()], default=0) if text else 0


 class ReSTFormat:
@@ -3,9 +3,11 @@

 from io import BytesIO

-import tablib
 import xlrd
 import xlwt
+from xlrd.xldate import xldate_as_datetime
+
+import tablib

 # special styles
 wrap = xlwt.easyxf("alignment: wrap on")
@@ -74,12 +76,19 @@ class XLSFormat:

        dset.title = sheet.name

+        def cell_value(value, type_):
+            if type_ == xlrd.XL_CELL_ERROR:
+                return xlrd.error_text_from_code[value]
+            elif type_ == xlrd.XL_CELL_DATE:
+                return xldate_as_datetime(value, xls_book.datemode)
+            return value
+
        for i in range(sheet.nrows):
            if i == 0 and headers:
                dset.headers = sheet.row_values(0)
            else:
                dset.append([
-                    val if typ != xlrd.XL_CELL_ERROR else xlrd.error_text_from_code[val]
+                    cell_value(val, typ)
                    for val, typ in zip(sheet.row_values(i), sheet.row_types(i))
                ])

@@ -3,12 +3,13 @@

 from io import BytesIO

-import openpyxl
-import tablib
+from openpyxl.reader.excel import ExcelReader, load_workbook
+from openpyxl.styles import Alignment, Font
+from openpyxl.utils import get_column_letter
+from openpyxl.workbook import Workbook
+from openpyxl.writer.excel import ExcelWriter

-Workbook = openpyxl.workbook.Workbook
-ExcelWriter = openpyxl.writer.excel.ExcelWriter
-get_column_letter = openpyxl.utils.get_column_letter
+import tablib


 class XLSXFormat:
@@ -19,7 +20,10 @@ class XLSXFormat:
    def detect(cls, stream):
        """Returns True if given stream is a readable excel file."""
        try:
-            openpyxl.reader.excel.load_workbook(stream, read_only=True)
+            # No need to fully load the file, it should be enough to be able to
+            # read the manifest.
+            reader = ExcelReader(stream, read_only=False)
+            reader.read_manifest()
            return True
        except Exception:
            return False
@@ -55,12 +59,12 @@ class XLSXFormat:
        return stream.getvalue()

    @classmethod
-    def import_set(cls, dset, in_stream, headers=True):
+    def import_set(cls, dset, in_stream, headers=True, read_only=True):
        """Returns databook from XLS stream."""

        dset.wipe()

-        xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
+        xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)
        sheet = xls_book.active

        dset.title = sheet.title
@@ -73,12 +77,12 @@ class XLSXFormat:
                dset.append(row_vals)

    @classmethod
-    def import_book(cls, dbook, in_stream, headers=True):
+    def import_book(cls, dbook, in_stream, headers=True, read_only=True):
        """Returns databook from XLS stream."""

        dbook.wipe()

-        xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
+        xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)

        for sheet in xls_book.worksheets:
            data = tablib.Dataset()
@@ -89,6 +93,8 @@ class XLSXFormat:
                if (i == 0) and (headers):
                    data.headers = row_vals
                else:
+                    if i > 0 and len(row_vals) < data.width:
+                        row_vals += [''] * (data.width - len(row_vals))
                    data.append(row_vals)

            dbook.add_sheet(data)
@@ -102,14 +108,14 @@ class XLSXFormat:
            _offset = i
            _package.insert((sep[0] + _offset), (sep[1],))

-        bold = openpyxl.styles.Font(bold=True)
-        wrap_text = openpyxl.styles.Alignment(wrap_text=True)
+        bold = Font(bold=True)
+        wrap_text = Alignment(wrap_text=True)

        for i, row in enumerate(_package):
            row_number = i + 1
            for j, col in enumerate(row):
                col_idx = get_column_letter(j + 1)
-                cell = ws['{}{}'.format(col_idx, row_number)]
+                cell = ws[f'{col_idx}{row_number}']

                # bold headers
                if (row_number == 1) and dataset.headers:
@@ -1,9 +1,10 @@
 """ Tablib - YAML Support.
 """

-import tablib
 import yaml

+import tablib
+

 class YAMLFormat:
    title = 'yaml'
@@ -315,7 +315,7 @@ class DbfLogicalFieldDef(DbfFieldDef):
            return False
        if value in "YyTt":
            return True
-        raise ValueError("[{}] Invalid logical value {!r}".format(self.name, value))
+        raise ValueError(f"[{self.name}] Invalid logical value {value!r}")

    def encodeValue(self, value):
        """Return a character from the "TF?" set.
@@ -60,7 +60,7 @@ class DbfRecord:

        Arguments:
            dbf:
-                A `Dbf.Dbf` instance this record belonogs to.
+                A `Dbf.Dbf` instance this record belongs to.
            index:
                An integer record index or None. If this value is
                None, record will be appended to the DBF.
@@ -2,7 +2,7 @@ pytest
 pytest-cov
 MarkupPy
 odfpy
-openpyxl>=2.4.0
+openpyxl>=2.6.0
 pandas
 pyyaml
 tabulate
@@ -11,8 +11,9 @@ from io import BytesIO, StringIO
 from pathlib import Path
 from uuid import uuid4

-import tablib
 from MarkupPy import markup
+
+import tablib
 from tablib.core import Row, detect_format
 from tablib.exceptions import UnsupportedFormat
 from tablib.formats import registry
@@ -57,7 +58,7 @@ class TablibTestCase(BaseTestCase):
        # A known format but uninstalled
        del registry._formats['ods']
        msg = (r"The 'ods' format is not available. You may want to install the "
-               "odfpy package \\(or `pip install tablib\\[ods\\]`\\).")
+               "odfpy package \\(or `pip install \"tablib\\[ods\\]\"`\\).")
        with self.assertRaisesRegex(UnsupportedFormat, msg):
            data.export('ods')

@@ -556,27 +557,15 @@ class TablibTestCase(BaseTestCase):

    def test_row_lpush(self):
        """Row lpush."""
-        # Arrange
        john = Row(self.john)
-        george = Row(self.george)
-
-        # Act
-        john.lpush(george)
-
-        # Assert
-        self.assertEqual(john[-1], george)
+        john.lpush(53)
+        self.assertEqual(john.list, [53, 'John', 'Adams', 90])

    def test_row_append(self):
        """Row append."""
-        # Arrange
        john = Row(self.john)
-        george = Row(self.george)
-
-        # Act
-        john.append(george)
-
-        # Assert
-        self.assertEqual(john[0], george)
+        john.append('stuff')
+        self.assertEqual(john.list, ['John', 'Adams', 90, 'stuff'])

    def test_row_contains(self):
        """Row __contains__."""
@@ -671,6 +660,7 @@ class RSTTests(BaseTestCase):
        data.headers = self.headers
        data.append(self.john)
        data.append(('Wendy', '', 43))
+        data.append(('Esther', ' ', 31))
        self.assertEqual(
            data.export('rst'),
            '==========  =========  ===\n'
@@ -678,6 +668,7 @@ class RSTTests(BaseTestCase):
            '==========  =========  ===\n'
            'John        Adams      90 \n'
            'Wendy                  43 \n'
+            'Esther                 31 \n'
            '==========  =========  ==='
        )

@@ -804,6 +795,25 @@ class CSVTests(BaseTestCase):
        data.csv = csv_text
        self.assertEqual(data.width, 7)

+    def test_csv_import_set_ragged(self):
+        """Import CSV set when not all rows have the same length."""
+        csv_text = (
+            "H1,H2,H3\n"
+            "A,B\n"
+            "C,D,E\n"
+            "\n"
+            "F\n"
+        )
+        dataset = tablib.import_set(csv_text, format="csv")
+        self.assertEqual(
+            str(dataset),
+            'H1|H2|H3\n'
+            '--|--|--\n'
+            'A |B |  \n'
+            'C |D |E \n'
+            'F |  |  '
+        )
+
    def test_csv_export(self):
        """Verify exporting dataset object as CSV."""

@@ -968,6 +978,12 @@ class XLSTests(BaseTestCase):
        in_stream = self.founders.xls
        self.assertEqual(detect_format(in_stream), 'xls')

+    def test_xls_date_import(self):
+        xls_source = Path(__file__).parent / 'files' / 'dates.xls'
+        with open(str(xls_source), mode='rb') as fh:
+            dset = tablib.Dataset().load(fh, 'xls')
+        self.assertEqual(dset.dict[0]['birth_date'], datetime.datetime(2015, 4, 12, 0, 0))
+
    def test_xls_import_with_errors(self):
        """Errors from imported files are kept as errors."""
        xls_source = Path(__file__).parent / 'files' / 'errors.xls'
@@ -1002,6 +1018,13 @@ class XLSXTests(BaseTestCase):
        self.assertEqual(data.dict[0]['float'], 21.55)
        self.assertEqual(data.dict[0]['date/time'], date_time)

+    def test_xlsx_import_set_ragged(self):
+        """Import XLSX file when not all rows have the same length."""
+        xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx'
+        with open(str(xlsx_source), mode='rb') as fh:
+            book = tablib.Databook().load(fh, 'xlsx')
+        self.assertEqual(book.sheets()[0].pop(), (1.0, ''))
+
    def test_xlsx_wrong_char(self):
        """Bad characters are not silently ignored. We let the exception bubble up."""
        from openpyxl.utils.exceptions import IllegalCharacterError
@@ -1010,6 +1033,20 @@ class XLSXTests(BaseTestCase):
            data.append(('string', b'\x0cf'))
            data.xlsx

+    def test_xlsx_cell_values(self):
+        """Test cell values are read and not formulas"""
+        xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx'
+        with xls_source.open('rb') as fh:
+            data = tablib.Dataset().load(fh)
+        self.assertEqual(data.headers[0], 'Hello World')
+
+    def test_xlsx_bad_dimensions(self):
+        """Test loading file with bad dimension.  Must be done with
+        read_only=False."""
+        xls_source = Path(__file__).parent / 'files' / 'bad_dimensions.xlsx'
+        with xls_source.open('rb') as fh:
+            data = tablib.Dataset().load(fh, read_only=False)
+        self.assertEqual(data.height, 3)

 class JSONTests(BaseTestCase):
    def test_json_format_detect(self):
@@ -4,16 +4,22 @@ minversion = 2.4
 envlist =
    docs
    lint
-    py{35,36,37,38}
+    py{36,37,38,39}

 [testenv]
 deps =
-    tests: -rtests/requirements.txt
-    docs: sphinx
+    -rtests/requirements.txt
 extras = pandas
+passenv =
+    FORCE_COLOR
 commands =
-    tests: pytest {posargs:tests}
-    docs: sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html
+    pytest {posargs:tests}
+
+[testenv:docs]
+deps =
+    sphinx
+commands =
+    sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html

 [testenv:lint]
 deps =
@@ -31,4 +37,3 @@ skip_install = true
 [flake8]
 exclude =
    .tox
-ignore=E501,E127,E128,E124
Author	SHA1	Message	Date
dmosberger	e8f54811c7	Expose 'read_only' parameter for 'import_set' and 'import_book' (#483 )	2020-12-04 10:10:02 +02:00
Nuno André	e8774043ed	Substitute tuples for dicts in __getstate__/__setstate__ to speed up the pickling	2020-11-29 22:11:46 +01:00
Jannis Leidel	dc1729fc6f	Move releases to GitHub actions.	2020-11-23 13:14:21 +01:00
Hugo van Kemenade	3dc62685f8	Reduce Travis CI testing (#479 )	2020-11-23 11:01:10 +01:00
Hugo van Kemenade	22c88de90d	Upload coverage from GHA (#480 ) * Upload coverage from GHA * Fix PytestConfigWarning: Unknown config option: python_paths	2020-11-14 23:51:05 +02:00
Hugo van Kemenade	615e308559	Docs: Add link to changelog/history (#478 )	2020-11-12 10:30:57 +02:00
Hugo van Kemenade	8c5404591b	Add support for Python 3.9, drop EOL 3.5 (#477 )	2020-10-30 19:01:48 +02:00
Hugo van Kemenade	5fa4496f9d	Suggest quotes when pip installing with optional dependencies (#474 )	2020-08-12 16:12:57 +03:00
Ran Benita	bc8438bda4	Stop using pkg_resources tablib imports pkg_resources in order to find its own version. Importing pkg_resources is very slow (100ms-250ms is common). Avoid it by letting setuptools-scm generate a file with the version instead.	2020-08-10 15:49:51 +02:00
Claude Paroz	ce79e44d14	Fixes #469 - Prevented rst crash with only-space strings (#470 ) Thanks nexone for the report.	2020-06-15 08:42:51 +03:00
Claude Paroz	985c3d98b0	Set the release date for 2.0.0	2020-05-16 14:04:19 +02:00
Claude Paroz	6d097c0214	Fixes #465 - Allow importing 'ragged' .xlsx files (#466 )	2020-05-16 09:07:32 +03:00
dragonworks	16b5565354	Fixes #462 - Update xlsx import to read cell values instead of cell formulas Co-authored-by: Claude Paroz <claude@2xlibre.net>	2020-03-11 09:05:43 +01:00
Claude Paroz	c25fe54b6f	Refs #373 - Import dates from xls files as Python datetime objects	2020-03-09 17:05:32 +01:00
Tim Gates	b39aefb8d8	Fix simple typo: belonogs -> belongs (#460 ) Closes #459	2020-02-21 10:26:58 +02:00
Claude Paroz	a442758729	Fixes #457 - Bumped openpyxl dependency to 2.6.0 (#458 )	2020-02-16 15:05:20 +02:00
Claude Paroz	21479001a7	Fixes #453 - Reversing behavior of Row.lpush/Row.rpush (#454 ) Co-authored-by: chim <chenpan@xiaomai5.com>	2020-02-13 20:51:49 +02:00
Claude Paroz	f7e39c1ad5	Set the 1.1.0 release date	2020-02-13 18:56:15 +01:00
Claude Paroz	aaeb5c8360	Fixes #226 - Allow importing ragged CSV files (#456 )	2020-02-12 21:12:53 +02:00
Hugo	7a6c623cca	Document upcoming breaking change in 2.0	2020-02-12 19:04:51 +01:00
Hugo	0c31fcb3e4	Test on Python 3.8	2020-02-02 16:44:26 +01:00
Hugo	fa7fdb0443	pre-commit autoupdate	2020-02-02 16:44:26 +01:00
Hugo	8e19479cea	Simplify config: uses the interpreter tox is installed to	2020-02-02 16:44:26 +01:00
Claude Paroz	8f39ac5055	Optimize xlsx detection (#448 ) Reading the whole file is a bit too much to detect if the file looks like an xlsx file.	2020-01-26 22:02:52 +02:00
Hugo	8d02934c53	Fix tox config	2020-01-26 20:48:20 +01:00
Claude Paroz	d0963c206f	Fix the missing xls dependencies message	2020-01-14 17:58:32 +01:00