Set the release date for 2.0.0

Fixes #465 - Allow importing 'ragged' .xlsx files (#466 )
Fixes #462 - Update xlsx import to read cell values instead of cell formulas
2026-06-05 15:00:19 +00:00 · 2020-05-16 14:04:19 +02:00 · 2020-05-16 09:07:32 +03:00 · 2020-03-11 09:05:43 +01:00 · 2020-03-09 17:05:32 +01:00 · 2020-02-21 10:26:58 +02:00
17 changed files with 129 additions and 43 deletions
@@ -9,7 +9,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        python-version: [3.5, 3.6, 3.7]
+        python-version: [3.5, 3.6, 3.7, 3.8]
        os: [ubuntu-latest, macOS-latest, windows-latest]

    steps:
@@ -28,6 +28,5 @@ jobs:

    - name: Tox tests
      shell: bash
-      # Drop the dot: py3.7-tests -> py37-tests
      run: |
-        tox -e py`echo ${{ matrix.python-version }} | tr -d .`-tests
+        tox -e py
@@ -1,6 +1,6 @@
 repos:
  - repo: https://github.com/asottile/pyupgrade
-    rev: v1.25.2
+    rev: v1.26.2
    hooks:
      - id: pyupgrade
        args: ["--py3-plus"]
@@ -12,7 +12,7 @@ repos:
        additional_dependencies: [toml]

  - repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.4.2
+    rev: v1.4.4
    hooks:
      - id: python-check-blanket-noqa
      - id: rst-backticks
@@ -1,5 +1,37 @@
 # History

+## 2.0.0 (2020-05-16)
+
+### Breaking changes
+
+- The `Row.lpush/rpush` logic was reversed. `lpush` was appending while `rpush`
+  and `append` were prepending. This was fixed (reversed behavior). If you
+  counted on the broken behavior, please update your code (#453).
+
+### Bugfixes
+
+- Fixed minimal openpyxl dependency version to 2.6.0 (#457).
+- Dates from xls files are now read as Python datetime objects (#373).
+- Allow import of "ragged" xlsx files (#465).
+
+### Improvements
+
+- When importing an xlsx file, Tablib will now read cell values instead of formulas (#462).
+
+## 1.1.0 (2020-02-13)
+
+### Deprecations
+
+- Upcoming breaking change in Tablib 2.0.0: the `Row.lpush/rpush` logic is reversed.
+  `lpush` is appending while `rpush` and `append` are prepending. The broken behavior
+  will remain in Tablib 1.x and will be fixed (reversed) in Tablib 2.0.0 (#453). If you
+  count on the broken behavior, please update your code when you upgrade to Tablib 2.x.
+
+### Improvements
+
+- Tablib is now able to import CSV content where not all rows have the same
+  length. Missing columns on any line receive the empty string (#226).
+
 ## 1.0.0 (2020-01-13)

 ### Breaking changes
@@ -206,6 +206,15 @@ Import/export data in Excel 07+ Spreadsheet representation.
 This format is optional, install Tablib with ``pip install tablib[xlsx]`` to
 make the format available.

+.. note::
+
+    When reading an ``xlsx`` file containing formulas in its cells, Tablib will
+    read the cell values, not the cell formulas.
+
+.. versionchanged:: 2.0.0
+
+    Reads cell values instead of formulas.
+
 .. admonition:: Binary Warning

    The ``xlsx`` file format is binary, so make sure to write in binary mode::
@@ -38,13 +38,13 @@ setup(
    ],
    python_requires='>=3.5',
    extras_require={
-        'all': ['markuppy', 'odfpy', 'openpyxl>=2.4.0', 'pandas', 'pyyaml', 'tabulate', 'xlrd', 'xlwt'],
+        'all': ['markuppy', 'odfpy', 'openpyxl>=2.6.0', 'pandas', 'pyyaml', 'tabulate', 'xlrd', 'xlwt'],
        'cli': ['tabulate'],
        'html': ['markuppy'],
        'ods': ['odfpy'],
        'pandas': ['pandas'],
        'xls': ['xlrd', 'xlwt'],
-        'xlsx': ['openpyxl>=2.4.0'],
+        'xlsx': ['openpyxl>=2.6.0'],
        'yaml': ['pyyaml'],
    },
 )
@@ -71,10 +71,10 @@ class Row:
            setattr(self, k, v)

    def rpush(self, value):
-        self.insert(0, value)
+        self.insert(len(self._row), value)

    def lpush(self, value):
-        self.insert(len(value), value)
+        self.insert(0, value)

    def append(self, value):
        self.rpush(value)
@@ -17,7 +17,7 @@ uninstalled_format_messages = {
    "df": {"package_name": "pandas package", "extras_name": "pandas"},
    "html": {"package_name": "MarkupPy package", "extras_name": "html"},
    "ods": {"package_name": "odfpy package", "extras_name": "ods"},
-    "xls": {"package_name": "odfpy and xlwt packages", "extras_name": "ods"},
+    "xls": {"package_name": "xlrd and xlwt packages", "extras_name": "xls"},
    "xlsx": {"package_name": "openpyxl package", "extras_name": "xlsx"},
    "yaml": {"package_name": "pyyaml package", "extras_name": "yaml"},
 }
@@ -46,6 +46,8 @@ class CSVFormat:
            if (i == 0) and (headers):
                dset.headers = row
            elif row:
+                if i > 0 and len(row) < dset.width:
+                    row += [''] * (dset.width - len(row))
                dset.append(row)

    @classmethod
@@ -6,6 +6,7 @@ from io import BytesIO
 import tablib
 import xlrd
 import xlwt
+from xlrd.xldate import xldate_as_datetime

 # special styles
 wrap = xlwt.easyxf("alignment: wrap on")
@@ -74,12 +75,19 @@ class XLSFormat:

        dset.title = sheet.name

+        def cell_value(value, type_):
+            if type_ == xlrd.XL_CELL_ERROR:
+                return xlrd.error_text_from_code[value]
+            elif type_ == xlrd.XL_CELL_DATE:
+                return xldate_as_datetime(value, xls_book.datemode)
+            return value
+
        for i in range(sheet.nrows):
            if i == 0 and headers:
                dset.headers = sheet.row_values(0)
            else:
                dset.append([
-                    val if typ != xlrd.XL_CELL_ERROR else xlrd.error_text_from_code[val]
+                    cell_value(val, typ)
                    for val, typ in zip(sheet.row_values(i), sheet.row_types(i))
                ])

@@ -3,12 +3,12 @@

 from io import BytesIO

-import openpyxl
 import tablib
-
-Workbook = openpyxl.workbook.Workbook
-ExcelWriter = openpyxl.writer.excel.ExcelWriter
-get_column_letter = openpyxl.utils.get_column_letter
+from openpyxl.reader.excel import ExcelReader, load_workbook
+from openpyxl.styles import Alignment, Font
+from openpyxl.utils import get_column_letter
+from openpyxl.workbook import Workbook
+from openpyxl.writer.excel import ExcelWriter


 class XLSXFormat:
@@ -19,7 +19,10 @@ class XLSXFormat:
    def detect(cls, stream):
        """Returns True if given stream is a readable excel file."""
        try:
-            openpyxl.reader.excel.load_workbook(stream, read_only=True)
+            # No need to fully load the file, it should be enough to be able to
+            # read the manifest.
+            reader = ExcelReader(stream, read_only=False)
+            reader.read_manifest()
            return True
        except Exception:
            return False
@@ -60,7 +63,7 @@ class XLSXFormat:

        dset.wipe()

-        xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
+        xls_book = load_workbook(in_stream, read_only=True, data_only=True)
        sheet = xls_book.active

        dset.title = sheet.title
@@ -78,7 +81,7 @@ class XLSXFormat:

        dbook.wipe()

-        xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
+        xls_book = load_workbook(in_stream, read_only=True, data_only=True)

        for sheet in xls_book.worksheets:
            data = tablib.Dataset()
@@ -89,6 +92,8 @@ class XLSXFormat:
                if (i == 0) and (headers):
                    data.headers = row_vals
                else:
+                    if i > 0 and len(row_vals) < data.width:
+                        row_vals += [''] * (data.width - len(row_vals))
                    data.append(row_vals)

            dbook.add_sheet(data)
@@ -102,8 +107,8 @@ class XLSXFormat:
            _offset = i
            _package.insert((sep[0] + _offset), (sep[1],))

-        bold = openpyxl.styles.Font(bold=True)
-        wrap_text = openpyxl.styles.Alignment(wrap_text=True)
+        bold = Font(bold=True)
+        wrap_text = Alignment(wrap_text=True)

        for i, row in enumerate(_package):
            row_number = i + 1
@@ -60,7 +60,7 @@ class DbfRecord:

        Arguments:
            dbf:
-                A `Dbf.Dbf` instance this record belonogs to.
+                A `Dbf.Dbf` instance this record belongs to.
            index:
                An integer record index or None. If this value is
                None, record will be appended to the DBF.
@@ -2,7 +2,7 @@ pytest
 pytest-cov
 MarkupPy
 odfpy
-openpyxl>=2.4.0
+openpyxl>=2.6.0
 pandas
 pyyaml
 tabulate
@@ -556,27 +556,15 @@ class TablibTestCase(BaseTestCase):

    def test_row_lpush(self):
        """Row lpush."""
-        # Arrange
        john = Row(self.john)
-        george = Row(self.george)
-
-        # Act
-        john.lpush(george)
-
-        # Assert
-        self.assertEqual(john[-1], george)
+        john.lpush(53)
+        self.assertEqual(john.list, [53, 'John', 'Adams', 90])

    def test_row_append(self):
        """Row append."""
-        # Arrange
        john = Row(self.john)
-        george = Row(self.george)
-
-        # Act
-        john.append(george)
-
-        # Assert
-        self.assertEqual(john[0], george)
+        john.append('stuff')
+        self.assertEqual(john.list, ['John', 'Adams', 90, 'stuff'])

    def test_row_contains(self):
        """Row __contains__."""
@@ -804,6 +792,25 @@ class CSVTests(BaseTestCase):
        data.csv = csv_text
        self.assertEqual(data.width, 7)

+    def test_csv_import_set_ragged(self):
+        """Import CSV set when not all rows have the same length."""
+        csv_text = (
+            "H1,H2,H3\n"
+            "A,B\n"
+            "C,D,E\n"
+            "\n"
+            "F\n"
+        )
+        dataset = tablib.import_set(csv_text, format="csv")
+        self.assertEqual(
+            str(dataset),
+            'H1|H2|H3\n'
+            '--|--|--\n'
+            'A |B |  \n'
+            'C |D |E \n'
+            'F |  |  '
+        )
+
    def test_csv_export(self):
        """Verify exporting dataset object as CSV."""

@@ -968,6 +975,12 @@ class XLSTests(BaseTestCase):
        in_stream = self.founders.xls
        self.assertEqual(detect_format(in_stream), 'xls')

+    def test_xls_date_import(self):
+        xls_source = Path(__file__).parent / 'files' / 'dates.xls'
+        with open(str(xls_source), mode='rb') as fh:
+            dset = tablib.Dataset().load(fh, 'xls')
+        self.assertEqual(dset.dict[0]['birth_date'], datetime.datetime(2015, 4, 12, 0, 0))
+
    def test_xls_import_with_errors(self):
        """Errors from imported files are kept as errors."""
        xls_source = Path(__file__).parent / 'files' / 'errors.xls'
@@ -1002,6 +1015,13 @@ class XLSXTests(BaseTestCase):
        self.assertEqual(data.dict[0]['float'], 21.55)
        self.assertEqual(data.dict[0]['date/time'], date_time)

+    def test_xlsx_import_set_ragged(self):
+        """Import XLSX file when not all rows have the same length."""
+        xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx'
+        with open(str(xlsx_source), mode='rb') as fh:
+            book = tablib.Databook().load(fh, 'xlsx')
+        self.assertEqual(book.sheets()[0].pop(), (1.0, ''))
+
    def test_xlsx_wrong_char(self):
        """Bad characters are not silently ignored. We let the exception bubble up."""
        from openpyxl.utils.exceptions import IllegalCharacterError
@@ -1010,6 +1030,13 @@ class XLSXTests(BaseTestCase):
            data.append(('string', b'\x0cf'))
            data.xlsx

+    def test_xlsx_cell_values(self):
+        """Test cell values are read and not formulas"""
+        xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx'
+        with xls_source.open('rb') as fh:
+            data = tablib.Dataset().load(fh)
+        self.assertEqual(data.headers[0], 'Hello World')
+

 class JSONTests(BaseTestCase):
    def test_json_format_detect(self):
@@ -8,12 +8,16 @@ envlist =

 [testenv]
 deps =
-    tests: -rtests/requirements.txt
-    docs: sphinx
+    -rtests/requirements.txt
 extras = pandas
 commands =
-    tests: pytest {posargs:tests}
-    docs: sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html
+    pytest {posargs:tests}
+
+[testenv:docs]
+deps =
+    sphinx
+commands =
+    sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html

 [testenv:lint]
 deps =
Author	SHA1	Message	Date
Claude Paroz	985c3d98b0	Set the release date for 2.0.0	2020-05-16 14:04:19 +02:00
Claude Paroz	6d097c0214	Fixes #465 - Allow importing 'ragged' .xlsx files (#466 )	2020-05-16 09:07:32 +03:00
dragonworks	16b5565354	Fixes #462 - Update xlsx import to read cell values instead of cell formulas Co-authored-by: Claude Paroz <claude@2xlibre.net>	2020-03-11 09:05:43 +01:00
Claude Paroz	c25fe54b6f	Refs #373 - Import dates from xls files as Python datetime objects	2020-03-09 17:05:32 +01:00
Tim Gates	b39aefb8d8	Fix simple typo: belonogs -> belongs (#460 ) Closes #459	2020-02-21 10:26:58 +02:00
Claude Paroz	a442758729	Fixes #457 - Bumped openpyxl dependency to 2.6.0 (#458 )	2020-02-16 15:05:20 +02:00
Claude Paroz	21479001a7	Fixes #453 - Reversing behavior of Row.lpush/Row.rpush (#454 ) Co-authored-by: chim <chenpan@xiaomai5.com>	2020-02-13 20:51:49 +02:00
Claude Paroz	f7e39c1ad5	Set the 1.1.0 release date	2020-02-13 18:56:15 +01:00
Claude Paroz	aaeb5c8360	Fixes #226 - Allow importing ragged CSV files (#456 )	2020-02-12 21:12:53 +02:00
Hugo	7a6c623cca	Document upcoming breaking change in 2.0	2020-02-12 19:04:51 +01:00
Hugo	0c31fcb3e4	Test on Python 3.8	2020-02-02 16:44:26 +01:00
Hugo	fa7fdb0443	pre-commit autoupdate	2020-02-02 16:44:26 +01:00
Hugo	8e19479cea	Simplify config: uses the interpreter tox is installed to	2020-02-02 16:44:26 +01:00
Claude Paroz	8f39ac5055	Optimize xlsx detection (#448 ) Reading the whole file is a bit too much to detect if the file looks like an xlsx file.	2020-01-26 22:02:52 +02:00
Hugo	8d02934c53	Fix tox config	2020-01-26 20:48:20 +01:00
Claude Paroz	d0963c206f	Fix the missing xls dependencies message	2020-01-14 17:58:32 +01:00