mirror of
https://github.com/kennethreitz/tablib.git
synced 2026-06-05 06:56:13 +00:00
Refs #108 - Test and improve format autodetection
Autodetection was added for the odf format.
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
- Fixed a regression for xlsx exports where non-string values were forced to
|
||||
strings (#314).
|
||||
- Fixed xlsx format detection (which was often detected as `xls` format).
|
||||
- Improved format autodetection and added autodetection for the odf format.
|
||||
- Added search to all documentation pages
|
||||
- Open xlsx workbooks in read-only mode (#316)
|
||||
- Unpin requirements
|
||||
|
||||
@@ -55,5 +55,5 @@ def detect(stream, delimiter=DEFAULT_DELIMITER):
|
||||
try:
|
||||
csv.Sniffer().sniff(stream, delimiters=delimiter)
|
||||
return True
|
||||
except (csv.Error, TypeError):
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -83,9 +83,5 @@ def detect(stream):
|
||||
else:
|
||||
_dbf = dbf.Dbf(StringIO(stream), readOnly=True)
|
||||
return True
|
||||
except (ValueError, struct.error):
|
||||
# When we try to open up a file that's not a DBF, dbfpy raises a
|
||||
# ValueError.
|
||||
# When unpacking a string argument with less than 8 chars, struct.error is
|
||||
# raised.
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -91,3 +91,14 @@ def dset_sheet(dataset, ws):
|
||||
cell = table.TableCell()
|
||||
cell.addElement(text.P(text=col))
|
||||
odf_row.addElement(cell)
|
||||
|
||||
|
||||
def detect(stream):
|
||||
if isinstance(stream, bytes):
|
||||
# load expects a file-like object.
|
||||
stream = BytesIO(stream)
|
||||
try:
|
||||
opendocument.load(stream)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -25,17 +25,17 @@ def detect(stream):
|
||||
try:
|
||||
xlrd.open_workbook(file_contents=stream)
|
||||
return True
|
||||
except (TypeError, XLRDError):
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
xlrd.open_workbook(file_contents=stream.read())
|
||||
return True
|
||||
except (AttributeError, XLRDError):
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
xlrd.open_workbook(filename=stream)
|
||||
return True
|
||||
except:
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -28,8 +28,8 @@ def detect(stream):
|
||||
try:
|
||||
openpyxl.reader.excel.load_workbook(stream, read_only=True)
|
||||
return True
|
||||
except openpyxl.shared.exc.InvalidFileException:
|
||||
pass
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def export_set(dataset, freeze_panes=True):
|
||||
"""Returns XLSX representation of Dataset."""
|
||||
|
||||
+23
-7
@@ -288,17 +288,33 @@ class TablibTestCase(BaseTestCase):
|
||||
|
||||
def test_auto_format_detect(self):
|
||||
"""Test auto format detection."""
|
||||
# html, jira, latex, rst are export only.
|
||||
|
||||
_xls = self.founders.export('xls')
|
||||
self.assertEqual(tablib.detect_format(_xls), 'xls')
|
||||
|
||||
_xlsx = self.founders.export('xlsx')
|
||||
self.assertEqual(tablib.detect_format(_xlsx), 'xlsx')
|
||||
|
||||
_ods = self.founders.export('ods')
|
||||
self.assertEqual(tablib.detect_format(_ods), 'ods')
|
||||
|
||||
_df = self.founders.export('df')
|
||||
self.assertEqual(tablib.detect_format(_df), 'df')
|
||||
|
||||
_yaml = '- {age: 90, first_name: John, last_name: Adams}'
|
||||
_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
|
||||
_csv = '1,2,3\n4,5,6\n7,8,9\n'
|
||||
_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
|
||||
_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
|
||||
self.assertEqual(tablib.detect_format(_yaml), 'yaml')
|
||||
self.assertEqual(tablib.detect_format(_csv), 'csv')
|
||||
self.assertEqual(tablib.detect_format(_tsv), 'tsv')
|
||||
|
||||
_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
|
||||
self.assertEqual(tablib.detect_format(_json), 'json')
|
||||
|
||||
_csv = '1,2,3\n4,5,6\n7,8,9\n'
|
||||
self.assertEqual(tablib.detect_format(_csv), 'csv')
|
||||
|
||||
_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
|
||||
self.assertEqual(tablib.detect_format(_tsv), 'tsv')
|
||||
|
||||
_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
self.assertEqual(tablib.detect_format(_bunk), None)
|
||||
|
||||
def test_transpose(self):
|
||||
|
||||
Reference in New Issue
Block a user