Refs #108 - Test and improve format autodetection

Autodetection was added for the odf format.
2026-06-05 06:56:13 +00:00 · 2019-10-04 23:23:43 +02:00
parent 4418535030
commit ca8dbcf9be
7 changed files with 42 additions and 18 deletions
@@ -7,6 +7,7 @@
 - Fixed a regression for xlsx exports where non-string values were forced to
  strings (#314).
 - Fixed xlsx format detection (which was often detected as `xls` format).
+- Improved format autodetection and added autodetection for the odf format.
 - Added search to all documentation pages
 - Open xlsx workbooks in read-only mode (#316)
 - Unpin requirements
@@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER):
    try:
        csv.Sniffer().sniff(stream, delimiters=delimiter)
        return True
-    except (csv.Error, TypeError):
+    except Exception:
        return False
@@ -83,9 +83,5 @@ def detect(stream):
        else:
            _dbf = dbf.Dbf(StringIO(stream), readOnly=True)
        return True
-    except (ValueError, struct.error):
-        # When we try to open up a file that's not a DBF, dbfpy raises a
-        # ValueError.
-        # When unpacking a string argument with less than 8 chars, struct.error is
-        # raised.
+    except Exception:
        return False
@@ -91,3 +91,14 @@ def dset_sheet(dataset, ws):
                    cell = table.TableCell()
                    cell.addElement(text.P(text=col))
                    odf_row.addElement(cell)
+
+
+def detect(stream):
+    if isinstance(stream, bytes):
+        # load expects a file-like object.
+        stream = BytesIO(stream)
+    try:
+        opendocument.load(stream)
+        return True
+    except Exception:
+        return False
@@ -25,17 +25,17 @@ def detect(stream):
    try:
        xlrd.open_workbook(file_contents=stream)
        return True
-    except (TypeError, XLRDError):
+    except Exception:
        pass
    try:
        xlrd.open_workbook(file_contents=stream.read())
        return True
-    except (AttributeError, XLRDError):
+    except Exception:
        pass
    try:
        xlrd.open_workbook(filename=stream)
        return True
-    except:
+    except Exception:
        return False


@@ -28,8 +28,8 @@ def detect(stream):
    try:
        openpyxl.reader.excel.load_workbook(stream, read_only=True)
        return True
-    except openpyxl.shared.exc.InvalidFileException:
-        pass
+    except Exception:
+        return False

 def export_set(dataset, freeze_panes=True):
    """Returns XLSX representation of Dataset."""
@@ -288,17 +288,33 @@ class TablibTestCase(BaseTestCase):

    def test_auto_format_detect(self):
        """Test auto format detection."""
+        # html, jira, latex, rst are export only.
+
+        _xls = self.founders.export('xls')
+        self.assertEqual(tablib.detect_format(_xls), 'xls')
+
+        _xlsx = self.founders.export('xlsx')
+        self.assertEqual(tablib.detect_format(_xlsx), 'xlsx')
+
+        _ods = self.founders.export('ods')
+        self.assertEqual(tablib.detect_format(_ods), 'ods')
+
+        _df = self.founders.export('df')
+        self.assertEqual(tablib.detect_format(_df), 'df')

        _yaml = '- {age: 90, first_name: John, last_name: Adams}'
-        _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
-        _csv = '1,2,3\n4,5,6\n7,8,9\n'
-        _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
-        _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
-
        self.assertEqual(tablib.detect_format(_yaml), 'yaml')
-        self.assertEqual(tablib.detect_format(_csv), 'csv')
-        self.assertEqual(tablib.detect_format(_tsv), 'tsv')
+
+        _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
        self.assertEqual(tablib.detect_format(_json), 'json')
+
+        _csv = '1,2,3\n4,5,6\n7,8,9\n'
+        self.assertEqual(tablib.detect_format(_csv), 'csv')
+
+        _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
+        self.assertEqual(tablib.detect_format(_tsv), 'tsv')
+
+        _bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
        self.assertEqual(tablib.detect_format(_bunk), None)

    def test_transpose(self):