Refs #108 - Test and improve format autodetection

Autodetection was added for the odf format.
This commit is contained in:
Claude Paroz
2019-10-04 23:23:43 +02:00
parent 4418535030
commit ca8dbcf9be
7 changed files with 42 additions and 18 deletions
+1
View File
@@ -7,6 +7,7 @@
- Fixed a regression for xlsx exports where non-string values were forced to
strings (#314).
- Fixed xlsx format detection (which was often detected as `xls` format).
- Improved format autodetection and added autodetection for the odf format.
- Added search to all documentation pages
- Open xlsx workbooks in read-only mode (#316)
- Unpin requirements
+1 -1
View File
@@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER):
try:
csv.Sniffer().sniff(stream, delimiters=delimiter)
return True
except (csv.Error, TypeError):
except Exception:
return False
+1 -5
View File
@@ -83,9 +83,5 @@ def detect(stream):
else:
_dbf = dbf.Dbf(StringIO(stream), readOnly=True)
return True
except (ValueError, struct.error):
# When we try to open up a file that's not a DBF, dbfpy raises a
# ValueError.
# When unpacking a string argument with less than 8 chars, struct.error is
# raised.
except Exception:
return False
+11
View File
@@ -91,3 +91,14 @@ def dset_sheet(dataset, ws):
cell = table.TableCell()
cell.addElement(text.P(text=col))
odf_row.addElement(cell)
def detect(stream):
if isinstance(stream, bytes):
# load expects a file-like object.
stream = BytesIO(stream)
try:
opendocument.load(stream)
return True
except Exception:
return False
+3 -3
View File
@@ -25,17 +25,17 @@ def detect(stream):
try:
xlrd.open_workbook(file_contents=stream)
return True
except (TypeError, XLRDError):
except Exception:
pass
try:
xlrd.open_workbook(file_contents=stream.read())
return True
except (AttributeError, XLRDError):
except Exception:
pass
try:
xlrd.open_workbook(filename=stream)
return True
except:
except Exception:
return False
+2 -2
View File
@@ -28,8 +28,8 @@ def detect(stream):
try:
openpyxl.reader.excel.load_workbook(stream, read_only=True)
return True
except openpyxl.shared.exc.InvalidFileException:
pass
except Exception:
return False
def export_set(dataset, freeze_panes=True):
"""Returns XLSX representation of Dataset."""
+23 -7
View File
@@ -288,17 +288,33 @@ class TablibTestCase(BaseTestCase):
def test_auto_format_detect(self):
"""Test auto format detection."""
# html, jira, latex, rst are export only.
_xls = self.founders.export('xls')
self.assertEqual(tablib.detect_format(_xls), 'xls')
_xlsx = self.founders.export('xlsx')
self.assertEqual(tablib.detect_format(_xlsx), 'xlsx')
_ods = self.founders.export('ods')
self.assertEqual(tablib.detect_format(_ods), 'ods')
_df = self.founders.export('df')
self.assertEqual(tablib.detect_format(_df), 'df')
_yaml = '- {age: 90, first_name: John, last_name: Adams}'
_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
_csv = '1,2,3\n4,5,6\n7,8,9\n'
_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
self.assertEqual(tablib.detect_format(_yaml), 'yaml')
self.assertEqual(tablib.detect_format(_csv), 'csv')
self.assertEqual(tablib.detect_format(_tsv), 'tsv')
_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
self.assertEqual(tablib.detect_format(_json), 'json')
_csv = '1,2,3\n4,5,6\n7,8,9\n'
self.assertEqual(tablib.detect_format(_csv), 'csv')
_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
self.assertEqual(tablib.detect_format(_tsv), 'tsv')
_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
self.assertEqual(tablib.detect_format(_bunk), None)
def test_transpose(self):