mirror of
https://github.com/kennethreitz/tablib.git
synced 2026-06-05 23:10:17 +00:00
Fixes #440 -Normalize stream inputs as IO streams
This commit is contained in:
@@ -12,6 +12,9 @@
|
||||
|
||||
- Formats can now be dynamically registered through the
|
||||
`tablib.formats.registry.refister` API (#256).
|
||||
- Tablib methods expecting data input (`detect_format`, `import_set`,
|
||||
`Dataset.load`, `Databook.load`) now accepts file-like objects in addition
|
||||
to raw strings and bytestrings (#440).
|
||||
|
||||
### Bugfixes
|
||||
|
||||
|
||||
+6
-3
@@ -106,7 +106,8 @@ Importing Data
|
||||
--------------
|
||||
Creating a :class:`tablib.Dataset` object by importing a pre-existing file is simple. ::
|
||||
|
||||
imported_data = Dataset().load(open('data.csv').read())
|
||||
with open('data.csv', 'r') as fh:
|
||||
imported_data = Dataset().load(fh)
|
||||
|
||||
This detects what sort of data is being passed in, and uses an appropriate formatter to do the import. So you can import from a variety of different file types.
|
||||
|
||||
@@ -114,7 +115,8 @@ This detects what sort of data is being passed in, and uses an appropriate forma
|
||||
|
||||
When the format is :class:`csv <Dataset.csv>`, :class:`tsv <Dataset.tsv>`, :class:`dbf <Dataset.dbf>`, :class:`xls <Dataset.xls>` or :class:`xlsx <Dataset.xlsx>`, and the data source does not have headers, the import should be done as follows ::
|
||||
|
||||
imported_data = Dataset().load(open('data.csv').read(), headers=False)
|
||||
with open('data.csv', 'r') as fh:
|
||||
imported_data = Dataset().load(fh, headers=False)
|
||||
|
||||
--------------
|
||||
Exporting Data
|
||||
@@ -320,7 +322,8 @@ Open an Excel Workbook and read first sheet
|
||||
Open an Excel 2007 and later workbook with a single sheet (or a workbook with multiple sheets but you just want the first sheet). ::
|
||||
|
||||
data = tablib.Dataset()
|
||||
data.xlsx = open('my_excel_file.xlsx', 'rb').read()
|
||||
with open('my_excel_file.xlsx', 'rb') as fh:
|
||||
data.load(fh, 'xlsx')
|
||||
print(data)
|
||||
|
||||
Excel Workbook With Multiple Sheets
|
||||
|
||||
+25
-12
@@ -21,6 +21,7 @@ from tablib.exceptions import (
|
||||
UnsupportedFormat,
|
||||
)
|
||||
from tablib.formats import registry
|
||||
from tablib.utils import normalize_input
|
||||
|
||||
__title__ = 'tablib'
|
||||
__author__ = 'Kenneth Reitz'
|
||||
@@ -239,8 +240,9 @@ class Dataset:
|
||||
def _get_in_format(self, fmt_key, **kwargs):
|
||||
return registry.get_format(fmt_key).export_set(self, **kwargs)
|
||||
|
||||
def _set_in_format(self, fmt_key, *args, **kwargs):
|
||||
return registry.get_format(fmt_key).import_set(self, *args, **kwargs)
|
||||
def _set_in_format(self, fmt_key, in_stream, **kwargs):
|
||||
in_stream = normalize_input(in_stream)
|
||||
return registry.get_format(fmt_key).import_set(self, in_stream, **kwargs)
|
||||
|
||||
def _validate(self, row=None, col=None, safety=False):
|
||||
"""Assures size of every row in dataset is of proper proportions."""
|
||||
@@ -402,12 +404,14 @@ class Dataset:
|
||||
def load(self, in_stream, format=None, **kwargs):
|
||||
"""
|
||||
Import `in_stream` to the :class:`Dataset` object using the `format`.
|
||||
`in_stream` can be a file-like object, a string, or a bytestring.
|
||||
|
||||
:param \\*\\*kwargs: (optional) custom configuration to the format `import_set`.
|
||||
"""
|
||||
|
||||
stream = normalize_input(in_stream)
|
||||
if not format:
|
||||
format = detect_format(in_stream)
|
||||
format = detect_format(stream)
|
||||
|
||||
fmt = registry.get_format(format)
|
||||
if not hasattr(fmt, 'import_set'):
|
||||
@@ -416,7 +420,7 @@ class Dataset:
|
||||
if not import_set:
|
||||
raise UnsupportedFormat('Format {} cannot be imported.'.format(format))
|
||||
|
||||
fmt.import_set(self, in_stream, **kwargs)
|
||||
fmt.import_set(self, stream, **kwargs)
|
||||
return self
|
||||
|
||||
def export(self, format, **kwargs):
|
||||
@@ -861,18 +865,20 @@ class Databook:
|
||||
def load(self, in_stream, format, **kwargs):
|
||||
"""
|
||||
Import `in_stream` to the :class:`Databook` object using the `format`.
|
||||
`in_stream` can be a file-like object, a string, or a bytestring.
|
||||
|
||||
:param \\*\\*kwargs: (optional) custom configuration to the format `import_book`.
|
||||
"""
|
||||
|
||||
stream = normalize_input(in_stream)
|
||||
if not format:
|
||||
format = detect_format(in_stream)
|
||||
format = detect_format(stream)
|
||||
|
||||
fmt = registry.get_format(format)
|
||||
if not hasattr(fmt, 'import_book'):
|
||||
raise UnsupportedFormat('Format {} cannot be loaded.'.format(format))
|
||||
|
||||
fmt.import_book(self, in_stream, **kwargs)
|
||||
fmt.import_book(self, stream, **kwargs)
|
||||
return self
|
||||
|
||||
def export(self, format, **kwargs):
|
||||
@@ -889,25 +895,32 @@ class Databook:
|
||||
|
||||
|
||||
def detect_format(stream):
|
||||
"""Return format name of given stream."""
|
||||
"""Return format name of given stream (file-like object, string, or bytestring)."""
|
||||
stream = normalize_input(stream)
|
||||
fmt_title = None
|
||||
for fmt in registry.formats():
|
||||
try:
|
||||
if fmt.detect(stream):
|
||||
return fmt.title
|
||||
fmt_title = fmt.title
|
||||
break
|
||||
except AttributeError:
|
||||
pass
|
||||
finally:
|
||||
if hasattr(stream, 'seek'):
|
||||
stream.seek(0)
|
||||
return fmt_title
|
||||
|
||||
|
||||
def import_set(stream, format=None, **kwargs):
|
||||
"""Return dataset of given stream."""
|
||||
"""Return dataset of given stream (file-like object, string, or bytestring)."""
|
||||
|
||||
return Dataset().load(stream, format, **kwargs)
|
||||
return Dataset().load(normalize_input(stream), format, **kwargs)
|
||||
|
||||
|
||||
def import_book(stream, format=None, **kwargs):
|
||||
"""Return dataset of given stream."""
|
||||
"""Return dataset of given stream (file-like object, string, or bytestring)."""
|
||||
|
||||
return Databook().load(stream, format, **kwargs)
|
||||
return Databook().load(normalize_input(stream), format, **kwargs)
|
||||
|
||||
|
||||
registry.register_builtins()
|
||||
|
||||
@@ -6,6 +6,7 @@ from importlib import import_module
|
||||
from importlib.util import find_spec
|
||||
|
||||
from tablib.exceptions import UnsupportedFormat
|
||||
from tablib.utils import normalize_input
|
||||
|
||||
from ._csv import CSVFormat
|
||||
from ._json import JSONFormat
|
||||
@@ -52,7 +53,7 @@ class ImportExportBookDescriptor(FormatDescriptorBase):
|
||||
|
||||
def __set__(self, obj, val):
|
||||
self.ensure_format_loaded()
|
||||
return self._format.import_book(obj, val)
|
||||
return self._format.import_book(obj, normalize_input(val))
|
||||
|
||||
|
||||
class ImportExportSetDescriptor(FormatDescriptorBase):
|
||||
@@ -62,7 +63,7 @@ class ImportExportSetDescriptor(FormatDescriptorBase):
|
||||
|
||||
def __set__(self, obj, val):
|
||||
self.ensure_format_loaded()
|
||||
return self._format.import_set(obj, val)
|
||||
return self._format.import_set(obj, normalize_input(val))
|
||||
|
||||
|
||||
class Registry:
|
||||
|
||||
@@ -40,7 +40,7 @@ class CSVFormat:
|
||||
|
||||
kwargs.setdefault('delimiter', cls.DEFAULT_DELIMITER)
|
||||
|
||||
rows = csv.reader(StringIO(in_stream), **kwargs)
|
||||
rows = csv.reader(in_stream, **kwargs)
|
||||
for i, row in enumerate(rows):
|
||||
|
||||
if (i == 0) and (headers):
|
||||
@@ -52,7 +52,7 @@ class CSVFormat:
|
||||
def detect(cls, stream, delimiter=None):
|
||||
"""Returns True if given stream is valid CSV."""
|
||||
try:
|
||||
csv.Sniffer().sniff(stream[:1024], delimiters=delimiter or cls.DEFAULT_DELIMITER)
|
||||
csv.Sniffer().sniff(stream.read(1024), delimiters=delimiter or cls.DEFAULT_DELIMITER)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -50,7 +50,7 @@ class DBFFormat:
|
||||
"""Returns a dataset from a DBF stream."""
|
||||
|
||||
dset.wipe()
|
||||
_dbf = dbf.Dbf(io.BytesIO(in_stream))
|
||||
_dbf = dbf.Dbf(in_stream)
|
||||
dset.headers = _dbf.fieldNames
|
||||
for record in range(_dbf.recordCount):
|
||||
row = [_dbf[record][f] for f in _dbf.fieldNames]
|
||||
@@ -59,11 +59,8 @@ class DBFFormat:
|
||||
@classmethod
|
||||
def detect(cls, stream):
|
||||
"""Returns True if the given stream is valid DBF"""
|
||||
#_dbf = dbf.Table(StringIO(stream))
|
||||
try:
|
||||
if type(stream) is not bytes:
|
||||
stream = bytes(stream, 'utf-8')
|
||||
_dbf = dbf.Dbf(io.BytesIO(stream), readOnly=True)
|
||||
_dbf = dbf.Dbf(stream, readOnly=True)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -16,8 +16,10 @@ class DataFrameFormat:
|
||||
"""Returns True if given stream is a DataFrame."""
|
||||
if DataFrame is None:
|
||||
return False
|
||||
elif isinstance(stream, DataFrame):
|
||||
return True
|
||||
try:
|
||||
DataFrame(stream)
|
||||
DataFrame(stream.read())
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
@@ -35,14 +35,14 @@ class JSONFormat:
|
||||
"""Returns dataset from JSON stream."""
|
||||
|
||||
dset.wipe()
|
||||
dset.dict = json.loads(in_stream)
|
||||
dset.dict = json.load(in_stream)
|
||||
|
||||
@classmethod
|
||||
def import_book(cls, dbook, in_stream):
|
||||
"""Returns databook from JSON stream."""
|
||||
|
||||
dbook.wipe()
|
||||
for sheet in json.loads(in_stream):
|
||||
for sheet in json.load(in_stream):
|
||||
data = tablib.Dataset()
|
||||
data.title = sheet['title']
|
||||
data.dict = sheet['data']
|
||||
@@ -52,7 +52,7 @@ class JSONFormat:
|
||||
def detect(cls, stream):
|
||||
"""Returns True if given stream is valid JSON."""
|
||||
try:
|
||||
json.loads(stream)
|
||||
json.load(stream)
|
||||
return True
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
|
||||
@@ -70,7 +70,7 @@ class XLSFormat:
|
||||
|
||||
dset.wipe()
|
||||
|
||||
xls_book = xlrd.open_workbook(file_contents=in_stream)
|
||||
xls_book = xlrd.open_workbook(file_contents=in_stream.read())
|
||||
sheet = xls_book.sheet_by_index(0)
|
||||
|
||||
dset.title = sheet.name
|
||||
|
||||
@@ -18,9 +18,6 @@ class XLSXFormat:
|
||||
@classmethod
|
||||
def detect(cls, stream):
|
||||
"""Returns True if given stream is a readable excel file."""
|
||||
if isinstance(stream, bytes):
|
||||
# load_workbook expects a file-like object.
|
||||
stream = BytesIO(stream)
|
||||
try:
|
||||
openpyxl.reader.excel.load_workbook(stream, read_only=True)
|
||||
return True
|
||||
@@ -63,7 +60,7 @@ class XLSXFormat:
|
||||
|
||||
dset.wipe()
|
||||
|
||||
xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream), read_only=True)
|
||||
xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
|
||||
sheet = xls_book.active
|
||||
|
||||
dset.title = sheet.title
|
||||
@@ -81,7 +78,7 @@ class XLSXFormat:
|
||||
|
||||
dbook.wipe()
|
||||
|
||||
xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream), read_only=True)
|
||||
xls_book = openpyxl.reader.excel.load_workbook(in_stream, read_only=True)
|
||||
|
||||
for sheet in xls_book.worksheets:
|
||||
data = tablib.Dataset()
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
|
||||
def normalize_input(stream):
|
||||
"""
|
||||
Accept either a str/bytes stream or a file-like object and always return a
|
||||
file-like object.
|
||||
"""
|
||||
if isinstance(stream, str):
|
||||
return StringIO(stream)
|
||||
elif isinstance(stream, bytes):
|
||||
return BytesIO(stream)
|
||||
return stream
|
||||
Binary file not shown.
+23
-7
@@ -7,6 +7,7 @@ import json
|
||||
import pickle
|
||||
import unittest
|
||||
from collections import OrderedDict
|
||||
from io import BytesIO, StringIO
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
@@ -302,6 +303,18 @@ class TablibTestCase(BaseTestCase):
|
||||
with self.assertRaises(UnsupportedFormat):
|
||||
book.export('csv')
|
||||
|
||||
def test_book_import_from_file(self):
|
||||
xlsx_source = Path(__file__).parent / 'files' / 'founders.xlsx'
|
||||
with open(str(xlsx_source), mode='rb') as fh:
|
||||
book = tablib.Databook().load(fh, 'xlsx')
|
||||
self.assertEqual(eval(book.json)[0]['title'], 'Feuille1')
|
||||
|
||||
def test_dataset_import_from_file(self):
|
||||
xlsx_source = Path(__file__).parent / 'files' / 'founders.xlsx'
|
||||
with open(str(xlsx_source), mode='rb') as fh:
|
||||
dset = tablib.Dataset().load(fh, 'xlsx')
|
||||
self.assertEqual(eval(dset.json)[0]['last_name'], 'Adams')
|
||||
|
||||
def test_auto_format_detect(self):
|
||||
"""Test auto format detection."""
|
||||
# html, jira, latex, rst are export only.
|
||||
@@ -330,7 +343,9 @@ class TablibTestCase(BaseTestCase):
|
||||
_tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n'
|
||||
self.assertEqual(tablib.detect_format(_tsv), 'tsv')
|
||||
|
||||
_bunk = '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
_bunk = StringIO(
|
||||
'¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
)
|
||||
self.assertEqual(tablib.detect_format(_bunk), None)
|
||||
|
||||
def test_transpose(self):
|
||||
@@ -692,12 +707,12 @@ class CSVTests(BaseTestCase):
|
||||
def test_csv_format_detect(self):
|
||||
"""Test CSV format detection."""
|
||||
|
||||
_csv = (
|
||||
_csv = StringIO(
|
||||
'1,2,3\n'
|
||||
'4,5,6\n'
|
||||
'7,8,9\n'
|
||||
)
|
||||
_bunk = (
|
||||
_bunk = StringIO(
|
||||
'¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
)
|
||||
|
||||
@@ -915,12 +930,12 @@ class TSVTests(BaseTestCase):
|
||||
def test_tsv_format_detect(self):
|
||||
"""Test TSV format detection."""
|
||||
|
||||
_tsv = (
|
||||
_tsv = StringIO(
|
||||
'1\t2\t3\n'
|
||||
'4\t5\t6\n'
|
||||
'7\t8\t9\n'
|
||||
)
|
||||
_bunk = (
|
||||
_bunk = StringIO(
|
||||
'¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
)
|
||||
|
||||
@@ -999,8 +1014,8 @@ class JSONTests(BaseTestCase):
|
||||
def test_json_format_detect(self):
|
||||
"""Test JSON format detection."""
|
||||
|
||||
_json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
|
||||
_bunk = (
|
||||
_json = StringIO('[{"last_name": "Adams","age": 90,"first_name": "John"}]')
|
||||
_bunk = StringIO(
|
||||
'¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶'
|
||||
)
|
||||
|
||||
@@ -1251,6 +1266,7 @@ class DBFTests(BaseTestCase):
|
||||
_dbf += b' Jefferson' + (b' ' * 70)
|
||||
_dbf += b' 50.0000000'
|
||||
_dbf += b'\x1a'
|
||||
_dbf = BytesIO(_dbf)
|
||||
|
||||
_yaml = '- {age: 90, first_name: John, last_name: Adams}'
|
||||
_tsv = 'foo\tbar'
|
||||
|
||||
Reference in New Issue
Block a user