Merge branch 'master' into bugfix/invalid-ascii-csv

# Conflicts:
#	setup.py
#	tablib/compat.py
#	test_tablib.py
This commit is contained in:
Bruno Alla
2019-03-02 10:34:19 -03:00
25 changed files with 599 additions and 247 deletions
+2 -3
View File
@@ -1,11 +1,10 @@
language: python
cache: pip
python:
- 2.6
- 2.7
- 3.3
- 3.4
- 3.5
- 3.6
install:
- python setup.py install
- pip install -r requirements.txt
script: python test_tablib.py
+2
View File
@@ -34,3 +34,5 @@ Patches and Suggestions
- Mathias Loesch
- Tushar Makkar
- Andrii Soldatenko
- Bruno Soares
- Tsuyoshi Hombashi
+5
View File
@@ -1,6 +1,11 @@
History
-------
0.11.5 (2017-06-13)
+++++++++++++++++++
- Use ``yaml.safe_load`` for importing yaml.
0.11.4 (2017-01-23)
+++++++++++++++++++
+1 -27
View File
@@ -1,32 +1,6 @@
Tablib includes some vendorized python libraries: ordereddict, markup.
Tablib includes some vendorized Python libraries: markup.
Markup License
==============
Markup is in the public domain.
OrderedDict License
===================
Copyright (c) 2009 Raymond Hettinger
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
+22 -9
View File
@@ -21,14 +21,18 @@ Output formats supported:
- Excel (Sets + Books)
- JSON (Sets + Books)
- YAML (Sets + Books)
- Pandas DataFrames (Sets)
- HTML (Sets)
- Jira (Sets)
- TSV (Sets)
- OSD (Sets)
- ODS (Sets)
- CSV (Sets)
- DBF (Sets)
Note that tablib *purposefully* excludes XML support. It always will. (Note: This is a joke. Pull requests are welcome.)
If you're interested in financially supporting Kenneth Reitz open source, consider `visiting this link <https://cash.me/$KennethReitz>`_. Your support helps tremendously with sustainability of motivation, as Open Source is no longer part of my day job.
Overview
--------
@@ -64,13 +68,13 @@ Intelligently add new columns: ::
Slice rows: ::
>>> print data[:2]
>>> print(data[:2])
[('John', 'Adams', 90), ('George', 'Washington', 67)]
Slice columns by header: ::
>>> print data['first_name']
>>> print(data['first_name'])
['John', 'George', 'Henry']
Easily delete rows: ::
@@ -86,7 +90,7 @@ JSON!
+++++
::
>>> print data.json
>>> print(data.export('json'))
[
{
"last_name": "Adams",
@@ -105,7 +109,7 @@ YAML!
+++++
::
>>> print data.yaml
>>> print(data.export('yaml'))
- {age: 90, first_name: John, last_name: Adams}
- {age: 83, first_name: Henry, last_name: Ford}
@@ -113,7 +117,7 @@ CSV...
++++++
::
>>> print data.csv
>>> print(data.export('csv'))
first_name,last_name,age
John,Adams,90
Henry,Ford,83
@@ -123,14 +127,23 @@ EXCEL!
::
>>> with open('people.xls', 'wb') as f:
... f.write(data.xls)
... f.write(data.export('xls'))
DBF!
++++
::
>>> with open('people.dbf', 'wb') as f:
... f.write(data.dbf)
... f.write(data.export('dbf'))
Pandas DataFrame!
+++++++++++++++++
::
>>> print(data.export('df')):
first_name last_name age
0 John Adams 90
1 Henry Ford 83
It's that easy.
@@ -140,7 +153,7 @@ Installation
To install tablib, simply: ::
$ pip install tablib
$ pip install tablib[pandas]
Make sure to check out `Tablib on PyPi <https://pypi.python.org/pypi/tablib/>`_!
+9 -4
View File
@@ -29,18 +29,23 @@ Tablib is an :ref:`MIT Licensed <mit>` format-agnostic tabular dataset library,
>>> data = tablib.Dataset(headers=['First Name', 'Last Name', 'Age'])
>>> for i in [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 21)]:
... data.append(i)
>>> print data.json
>>> print(data.export('json'))
[{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}]
>>> print data.yaml
>>> print(data.export('yaml'))
- {Age: 22, First Name: Kenneth, Last Name: Reitz}
- {Age: 21, First Name: Bessie, Last Name: Monke}
>>> data.xlsx
>>> data.export('xlsx')
<censored binary data>
>>> data.export('df')
First Name Last Name Age
0 Kenneth Reitz 22
1 Bessie Monke 21
Testimonials
------------
+1 -11
View File
@@ -16,7 +16,7 @@ Distribute & Pip
Of course, the recommended way to install Tablib is with `pip <http://www.pip-installer.org/>`_::
$ pip install tablib
$ pip install tablib[pandas]
-------------------
@@ -40,16 +40,6 @@ To download the full source history from Git, see :ref:`Source Control <scm>`.
.. _zipball: http://github.com/kennethreitz/tablib/zipball/master
.. _speed-extensions:
Speed Extensions
----------------
You can gain some speed improvement by optionally installing the ujson_ library.
Tablib will fallback to the standard `json` module if it doesn't find ``ujson``.
.. _ujson: https://pypi.python.org/pypi/ujson
.. _updates:
Staying Updated
---------------
+1 -2
View File
@@ -49,7 +49,7 @@ Tablib is released under terms of `The MIT License`_.
Tablib License
--------------
Copyright 2016 Kenneth Reitz
Copyright 2017 Kenneth Reitz
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -77,7 +77,6 @@ Pythons Supported
At this time, the following Python platforms are officially supported:
* cPython 2.6
* cPython 2.7
* cPython 3.3
* cPython 3.4
+23 -7
View File
@@ -115,30 +115,38 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i
**Comma-Separated Values** ::
>>> data.csv
>>> data.export('csv')
Last Name,First Name,Age
Reitz,Kenneth,22
Monke,Bessie,20
**JavaScript Object Notation** ::
>>> data.json
>>> data.export('json')
[{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}]
**YAML Ain't Markup Language** ::
>>> data.yaml
>>> data.export('yaml')
- {Age: 22, First Name: Kenneth, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Last Name: Monke}
**Microsoft Excel** ::
>>> data.xls
>>> data.export('xls')
<censored binary data>
**Pandas DataFrame** ::
>>> data.export('df')
First Name Last Name Age
0 Kenneth Reitz 22
1 Bessie Monke 21
------------------------
Selecting Rows & Columns
------------------------
@@ -216,7 +224,7 @@ Let's add a dynamic column to our :class:`Dataset` object. In this example, we h
Let's have a look at our data. ::
>>> data.yaml
>>> data.export('yaml')
- {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke}
@@ -246,7 +254,7 @@ For example, we can use the data available in the row to guess the gender of a s
Adding this function to our dataset as a dynamic column would result in: ::
>>> data.yaml
>>> data.export('yaml')
- {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke}
@@ -281,6 +289,14 @@ Now that we have extra meta-data on our rows, we can easily filter our :class:`D
It's that simple. The original :class:`Dataset` is untouched.
Open an Excel Workbook and read first sheet
--------------------------------
To open an Excel 2007 and later workbook with a single sheet (or a workbook with multiple sheets but you just want the first sheet), use the following:
data = tablib.Dataset()
data.xlsx = open('my_excel_file.xlsx', 'rb').read()
print(data)
Excel Workbook With Multiple Sheets
------------------------------------
@@ -346,7 +362,7 @@ When, it's often useful to create a blank row containing information on the upco
# Write spreadsheet to disk
with open('grades.xls', 'wb') as f:
f.write(tests.xls)
f.write(tests.export('xls'))
The resulting **tests.xls** will have the following layout:
+21
View File
@@ -0,0 +1,21 @@
certifi==2017.7.27.1
chardet==3.0.4
et-xmlfile==1.0.1
idna==2.6
jdcal==1.3
numpy==1.13.1
odfpy==1.3.5
openpyxl==2.4.8
pandas==0.20.3
pkginfo==1.4.1
python-dateutil==2.6.1
pytz==2017.2
PyYAML==3.12
requests==2.18.4
requests-toolbelt==0.8.0
six==1.10.0
tqdm==4.15.0
unicodecsv==0.14.1
urllib3==1.22
xlrd==1.1.0
xlwt==1.3.0
+5 -12
View File
@@ -14,15 +14,6 @@ if sys.argv[-1] == 'publish':
os.system("python setup.py sdist upload")
sys.exit()
if sys.argv[-1] == 'speedups':
try:
__import__('pip')
except ImportError:
print('Pip required.')
sys.exit(1)
os.system('pip install ujson')
sys.exit()
if sys.argv[-1] == 'test':
try:
@@ -43,13 +34,14 @@ packages = [
install = [
'odfpy',
'openpyxl',
'openpyxl>=2.4.0',
'backports.csv',
'xlrd',
'xlwt',
'pyyaml',
]
with open('tablib/core.py', 'r') as fd:
version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
fd.read(), re.MULTILINE).group(1)
@@ -71,13 +63,14 @@ setup(
'Natural Language :: English',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],
tests_require=['pytest'],
install_requires=install,
extras_require={
'pandas': ['pandas'],
},
)
+6 -16
View File
@@ -13,34 +13,24 @@ import sys
is_py3 = (sys.version_info[0] > 2)
try:
from collections import OrderedDict
except ImportError:
from tablib.packages.ordereddict import OrderedDict
if is_py3:
from io import BytesIO
from io import StringIO
from tablib.packages import markup3 as markup
from statistics import median
from itertools import zip_longest as izip_longest
import csv
import tablib.packages.dbfpy3 as dbfpy
import csv
from io import StringIO
# py3 mappings
ifilter = filter
unicode = str
bytes = bytes
basestring = str
xrange = range
else:
from cStringIO import StringIO as BytesIO
from StringIO import StringIO
from tablib.packages import markup
from itertools import ifilter
from tablib.packages.statistics import median
from itertools import izip_longest
from backports import csv
import tablib.packages.dbfpy as dbfpy
+38 -7
View File
@@ -9,20 +9,21 @@
:license: MIT, see LICENSE for more details.
"""
from collections import OrderedDict
from copy import copy
from operator import itemgetter
from tablib import formats
from tablib.compat import OrderedDict, unicode
from tablib.compat import unicode
__title__ = 'tablib'
__version__ = '0.11.4'
__build__ = 0x001104
__version__ = '0.12.1'
__build__ = 0x001201
__author__ = 'Kenneth Reitz'
__license__ = 'MIT'
__copyright__ = 'Copyright 2016 Kenneth Reitz'
__copyright__ = 'Copyright 2017 Kenneth Reitz'
__docformat__ = 'restructuredtext'
@@ -526,9 +527,9 @@ class Dataset(object):
Import assumes (for now) that headers exist.
.. admonition:: Binary Warning
.. admonition:: Binary Warning for Python 2
:class:`Dataset.csv` uses \\r\\n line endings by default, so make
:class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 2, make
sure to write in binary mode::
with open('output.csv', 'wb') as f:
@@ -536,6 +537,18 @@ class Dataset(object):
If you do not do this, and you export the file on Windows, your
CSV file will open in Excel with a blank line between each row.
.. admonition:: Line endings for Python 3
:class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 3, make
sure to include newline='' otherwise you will get a blank line between each row
when you open the file in Excel::
with open('output.csv', 'w', newline='') as f:
f.write(data.csv)
If you do not do this, and you export the file on Windows, your
CSV file will open in Excel with a blank line between each row.
"""
pass
@@ -570,6 +583,18 @@ class Dataset(object):
"""
pass
@property
def df():
"""A DataFrame representation of the :class:`Dataset` object.
A dataset object can also be imported by setting the :class:`Dataset.df` attribute: ::
data = tablib.Dataset()
data.df = DataFrame(np.random.randn(6,4))
Import assumes (for now) that headers exist.
"""
pass
@property
def json():
@@ -619,7 +644,6 @@ class Dataset(object):
"""
pass
@property
def latex():
"""A LaTeX booktabs representation of the :class:`Dataset` object. If a
@@ -629,6 +653,13 @@ class Dataset(object):
"""
pass
@property
def jira():
"""A Jira table representation of the :class:`Dataset` object.
.. note:: This method can be used for export only.
"""
pass
# ----
# Rows
+4 -1
View File
@@ -13,5 +13,8 @@ from . import _xlsx as xlsx
from . import _ods as ods
from . import _dbf as dbf
from . import _latex as latex
from . import _df as df
from . import _rst as rst
from . import _jira as jira
available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods)
available = (json, xls, yaml, csv, dbf, tsv, html, jira, latex, xlsx, ods, df, rst)
+1 -1
View File
@@ -39,7 +39,7 @@ def import_set(dset, in_stream, headers=True, **kwargs):
if (i == 0) and (headers):
dset.headers = row
else:
elif row:
dset.append(row)
+49
View File
@@ -0,0 +1,49 @@
""" Tablib - DataFrame Support.
"""
import sys
if sys.version_info[0] > 2:
from io import BytesIO
else:
from cStringIO import StringIO as BytesIO
try:
from pandas import DataFrame
except ImportError:
DataFrame = None
import tablib
from tablib.compat import unicode
title = 'df'
extensions = ('df', )
def detect(stream):
"""Returns True if given stream is a DataFrame."""
if DataFrame is None:
return False
try:
DataFrame(stream)
return True
except ValueError:
return False
def export_set(dset, index=None):
"""Returns DataFrame representation of DataBook."""
if DataFrame is None:
raise NotImplementedError(
'DataFrame Format requires `pandas` to be installed.'
' Try `pip install tablib[pandas]`.')
dataframe = DataFrame(dset.dict, columns=dset.headers)
return dataframe
def import_set(dset, in_stream):
"""Returns dataset from DataFrame."""
dset.wipe()
dset.dict = in_stream.to_dict(orient='records')
+39
View File
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
"""Tablib - Jira table export support.
Generates a Jira table from the dataset.
"""
from tablib.compat import unicode
title = 'jira'
def export_set(dataset):
"""Formats the dataset according to the Jira table syntax:
||heading 1||heading 2||heading 3||
|col A1|col A2|col A3|
|col B1|col B2|col B3|
:param dataset: dataset to serialize
:type dataset: tablib.core.Dataset
"""
header = _get_header(dataset.headers) if dataset.headers else ''
body = _get_body(dataset)
return '%s\n%s' % (header, body) if header else body
def _get_body(dataset):
return '\n'.join([_serialize_row(row) for row in dataset])
def _get_header(headers):
return _serialize_row(headers, delimiter='||')
def _serialize_row(row, delimiter='|'):
return '%s%s%s' % (delimiter,
delimiter.join([unicode(item) if item else ' ' for item in row]),
delimiter)
+6 -9
View File
@@ -3,36 +3,33 @@
""" Tablib - JSON Support
"""
import decimal
import json
from uuid import UUID
import tablib
try:
import ujson as json
except ImportError:
import json
title = 'json'
extensions = ('json', 'jsn')
def date_handler(obj):
if isinstance(obj, decimal.Decimal):
def serialize_objects_handler(obj):
if isinstance(obj, decimal.Decimal) or isinstance(obj, UUID):
return str(obj)
elif hasattr(obj, 'isoformat'):
return obj.isoformat()
else:
return obj
# return obj.isoformat() if hasattr(obj, 'isoformat') else obj
def export_set(dataset):
"""Returns JSON representation of Dataset."""
return json.dumps(dataset.dict, default=date_handler)
return json.dumps(dataset.dict, default=serialize_objects_handler)
def export_book(databook):
"""Returns JSON representation of Databook."""
return json.dumps(databook._package(), default=date_handler)
return json.dumps(databook._package(), default=serialize_objects_handler)
def import_set(dset, in_stream):
+273
View File
@@ -0,0 +1,273 @@
# -*- coding: utf-8 -*-
""" Tablib - reStructuredText Support
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from textwrap import TextWrapper
from tablib.compat import (
median,
unicode,
izip_longest,
)
title = 'rst'
extensions = ('rst',)
MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words.
JUSTIFY_LEFT = 'left'
JUSTIFY_CENTER = 'center'
JUSTIFY_RIGHT = 'right'
JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT)
def to_unicode(value):
if isinstance(value, bytes):
return value.decode('utf-8')
return unicode(value)
def _max_word_len(text):
"""
Return the length of the longest word in `text`.
>>> _max_word_len('Python Module for Tabular Datasets')
8
"""
return max((len(word) for word in text.split()))
def _get_column_string_lengths(dataset):
"""
Returns a list of string lengths of each column, and a list of
maximum word lengths.
"""
if dataset.headers:
column_lengths = [[len(h)] for h in dataset.headers]
word_lens = [_max_word_len(h) for h in dataset.headers]
else:
column_lengths = [[] for _ in range(dataset.width)]
word_lens = [0 for _ in range(dataset.width)]
for row in dataset.dict:
values = iter(row.values() if hasattr(row, 'values') else row)
for i, val in enumerate(values):
text = to_unicode(val)
column_lengths[i].append(len(text))
word_lens[i] = max(word_lens[i], _max_word_len(text))
return column_lengths, word_lens
def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT):
"""
Returns a table row of wrapped values as a list of lines
"""
if justify not in JUSTIFY_VALUES:
raise ValueError('Value of "justify" must be one of "{}"'.format(
'", "'.join(JUSTIFY_VALUES)
))
if justify == JUSTIFY_LEFT:
just = lambda text, width: text.ljust(width)
elif justify == JUSTIFY_CENTER:
just = lambda text, width: text.center(width)
else:
just = lambda text, width: text.rjust(width)
lpad = sep + ' ' if sep else ''
rpad = ' ' + sep if sep else ''
pad = ' ' + sep + ' '
cells = []
for value, width in zip(values, widths):
wrapper.width = width
text = to_unicode(value)
cell = wrapper.wrap(text)
cells.append(cell)
lines = izip_longest(*cells, fillvalue='')
lines = (
(just(cell_line, widths[i]) for i, cell_line in enumerate(line))
for line in lines
)
lines = [''.join((lpad, pad.join(line), rpad)) for line in lines]
return lines
def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3):
"""
Returns a list of column widths proportional to the median length
of the text in their cells.
"""
str_lens, word_lens = _get_column_string_lengths(dataset)
median_lens = [int(median(lens)) for lens in str_lens]
total = sum(median_lens)
if total > max_table_width - (pad_len * len(median_lens)):
column_widths = (max_table_width * l // total for l in median_lens)
else:
column_widths = (l for l in median_lens)
# Allow for separator and padding:
column_widths = (w - pad_len if w > pad_len else w for w in column_widths)
# Rather widen table than break words:
column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)]
return column_widths
def export_set_as_simple_table(dataset, column_widths=None):
"""
Returns reStructuredText grid table representation of dataset.
"""
lines = []
wrapper = TextWrapper()
if column_widths is None:
column_widths = _get_column_widths(dataset, pad_len=2)
border = ' '.join(['=' * w for w in column_widths])
lines.append(border)
if dataset.headers:
lines.extend(_row_to_lines(
dataset.headers,
column_widths,
wrapper,
sep='',
justify=JUSTIFY_CENTER,
))
lines.append(border)
for row in dataset.dict:
values = iter(row.values() if hasattr(row, 'values') else row)
lines.extend(_row_to_lines(values, column_widths, wrapper, ''))
lines.append(border)
return '\n'.join(lines)
def export_set_as_grid_table(dataset, column_widths=None):
"""
Returns reStructuredText grid table representation of dataset.
>>> from tablib import Dataset
>>> from tablib.formats import rst
>>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
>>> data = Dataset()
>>> data.headers = ['A', 'B', 'A and B']
>>> for a, b in bits:
... data.append([bool(a), bool(b), bool(a * b)])
>>> print(rst.export_set(data, force_grid=True))
+-------+-------+-------+
| A | B | A and |
| | | B |
+=======+=======+=======+
| False | False | False |
+-------+-------+-------+
| True | False | False |
+-------+-------+-------+
| False | True | False |
+-------+-------+-------+
| True | True | True |
+-------+-------+-------+
"""
lines = []
wrapper = TextWrapper()
if column_widths is None:
column_widths = _get_column_widths(dataset)
header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+'
row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+'
lines.append(row_sep)
if dataset.headers:
lines.extend(_row_to_lines(
dataset.headers,
column_widths,
wrapper,
justify=JUSTIFY_CENTER,
))
lines.append(header_sep)
for row in dataset.dict:
values = iter(row.values() if hasattr(row, 'values') else row)
lines.extend(_row_to_lines(values, column_widths, wrapper))
lines.append(row_sep)
return '\n'.join(lines)
def _use_simple_table(head0, col0, width0):
"""
Use a simple table if the text in the first column is never wrapped
>>> _use_simple_table('menu', ['egg', 'bacon'], 10)
True
>>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10)
False
"""
if head0 is not None:
head0 = to_unicode(head0)
if len(head0) > width0:
return False
for cell in col0:
cell = to_unicode(cell)
if len(cell) > width0:
return False
return True
def export_set(dataset, **kwargs):
"""
Returns reStructuredText table representation of dataset.
Returns a simple table if the text in the first column is never
wrapped, otherwise returns a grid table.
>>> from tablib import Dataset
>>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
>>> data = Dataset()
>>> data.headers = ['A', 'B', 'A and B']
>>> for a, b in bits:
... data.append([bool(a), bool(b), bool(a * b)])
>>> table = data.rst
>>> table.split('\\n') == [
... '===== ===== =====',
... ' A B A and',
... ' B ',
... '===== ===== =====',
... 'False False False',
... 'True False False',
... 'False True False',
... 'True True True ',
... '===== ===== =====',
... ]
True
"""
if not dataset.dict:
return ''
force_grid = kwargs.get('force_grid', False)
max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH)
column_widths = _get_column_widths(dataset, max_table_width)
use_simple_table = _use_simple_table(
dataset.headers[0] if dataset.headers else None,
dataset.get_col(0),
column_widths[0],
)
if use_simple_table and not force_grid:
return export_set_as_simple_table(dataset, column_widths)
else:
return export_set_as_grid_table(dataset, column_widths)
def export_book(databook):
"""
reStructuredText representation of a Databook.
Tables are separated by a blank line. All tables use the grid
format.
"""
return '\n\n'.join(export_set(dataset, force_grid=True)
for dataset in databook._datasets)
+3 -3
View File
@@ -52,7 +52,7 @@ def export_book(databook, freeze_panes=True):
wb = Workbook()
for sheet in wb.worksheets:
wb.remove_sheet(sheet)
wb.remove(sheet)
for i, dset in enumerate(databook._datasets):
ws = wb.create_sheet()
ws.title = dset.title if dset.title else 'Sheet%s' % (i)
@@ -71,7 +71,7 @@ def import_set(dset, in_stream, headers=True):
dset.wipe()
xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream))
sheet = xls_book.get_active_sheet()
sheet = xls_book.active
dset.title = sheet.title
@@ -119,7 +119,7 @@ def dset_sheet(dataset, ws, freeze_panes=True):
row_number = i + 1
for j, col in enumerate(row):
col_idx = get_column_letter(j + 1)
cell = ws.cell('%s%s' % (col_idx, row_number))
cell = ws['%s%s' % (col_idx, row_number)]
# bold headers
if (row_number == 1) and dataset.headers:
+1 -1
View File
@@ -33,7 +33,7 @@ def import_book(dbook, in_stream):
dbook.wipe()
for sheet in yaml.load(in_stream):
for sheet in yaml.safe_load(in_stream):
data = tablib.Dataset()
data.title = sheet['title']
data.dict = sheet['data']
-127
View File
@@ -1,127 +0,0 @@
# Copyright (c) 2009 Raymond Hettinger
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
from UserDict import DictMixin
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__end
except AttributeError:
self.clear()
self.update(*args, **kwds)
def clear(self):
self.__end = end = []
end += [None, end, end] # sentinel node for doubly linked list
self.__map = {} # key --> [key, prev, next]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
end = self.__end
curr = end[1]
curr[2] = end[1] = self.__map[key] = [key, curr, end]
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
key, prev, next = self.__map.pop(key)
prev[2] = next
next[1] = prev
def __iter__(self):
end = self.__end
curr = end[2]
while curr is not end:
yield curr[0]
curr = curr[2]
def __reversed__(self):
end = self.__end
curr = end[1]
while curr is not end:
yield curr[0]
curr = curr[1]
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
if last:
key = next(reversed(self))
else:
key = next(iter(self))
value = self.pop(key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
tmp = self.__map, self.__end
del self.__map, self.__end
inst_dict = vars(self).copy()
self.__map, self.__end = tmp
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def keys(self):
return list(self)
setdefault = DictMixin.setdefault
update = DictMixin.update
pop = DictMixin.pop
values = DictMixin.values
items = DictMixin.items
iterkeys = DictMixin.iterkeys
itervalues = DictMixin.itervalues
iteritems = DictMixin.iteritems
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self.items()))
def copy(self):
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
if isinstance(other, OrderedDict):
if len(self) != len(other):
return False
for p, q in zip(list(self.items()), list(other.items())):
if p != q:
return False
return True
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other
+24
View File
@@ -0,0 +1,24 @@
from __future__ import division
def median(data):
"""
Return the median (middle value) of numeric data, using the common
"mean of middle two" method. If data is empty, ValueError is raised.
Mimics the behaviour of Python3's statistics.median
>>> median([1, 3, 5])
3
>>> median([1, 3, 5, 7])
4.0
"""
data = sorted(data)
n = len(data)
if not n:
raise ValueError("No median for empty data")
i = n // 2
if n % 2:
return data[i]
return (data[i - 1] + data[i]) / 2
+59
View File
@@ -4,9 +4,11 @@
from __future__ import unicode_literals
import datetime
import doctest
import json
import sys
import unittest
from uuid import uuid4
import tablib
from tablib.compat import markup, unicode, is_py3
@@ -227,6 +229,22 @@ class TablibTestCase(unittest.TestCase):
# Delete from invalid index
self.assertRaises(IndexError, self.founders.__delitem__, 3)
def test_json_export(self):
"""Verify exporting dataset object as JSON"""
address_id = uuid4()
headers = self.headers + ('address_id',)
founders = tablib.Dataset(headers=headers, title='Founders')
founders.append(('John', 'Adams', 90, address_id))
founders_json = founders.export('json')
expected_json = (
'[{"first_name": "John", "last_name": "Adams", "gpa": 90, '
'"address_id": "%s"}]' % str(address_id)
)
self.assertEqual(founders_json, expected_json)
def test_csv_export(self):
"""Verify exporting dataset object as CSV."""
@@ -299,6 +317,23 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(html, d.html)
def test_jira_export(self):
expected = """||first_name||last_name||gpa||
|John|Adams|90|
|George|Washington|67|
|Thomas|Jefferson|50|"""
self.assertEqual(expected, self.founders.jira)
def test_jira_export_no_headers(self):
self.assertEqual('|a|b|c|', tablib.Dataset(['a', 'b', 'c']).jira)
def test_jira_export_none_and_empty_values(self):
self.assertEqual('| | |c|', tablib.Dataset(['', None, 'c']).jira)
def test_jira_export_empty_dataset(self):
self.assertTrue(tablib.Dataset().jira is not None)
def test_latex_export(self):
"""LaTeX export"""
@@ -382,7 +417,10 @@ class TablibTestCase(unittest.TestCase):
data.xlsx
data.ods
data.html
data.jira
data.latex
data.df
data.rst
def test_datetime_append(self):
"""Passes in a single datetime and a single date and exports."""
@@ -402,7 +440,9 @@ class TablibTestCase(unittest.TestCase):
data.xlsx
data.ods
data.html
data.jira
data.latex
data.rst
def test_book_export_no_exceptions(self):
"""Test that various exports don't error out."""
@@ -416,6 +456,7 @@ class TablibTestCase(unittest.TestCase):
book.xlsx
book.ods
book.html
data.rst
def test_json_import_set(self):
"""Generate and import JSON set serialization."""
@@ -970,6 +1011,24 @@ class TablibTestCase(unittest.TestCase):
self.founders.append(('First\nSecond', 'Name', 42))
self.founders.export('xlsx')
def test_rst_force_grid(self):
data.append(self.john)
data.append(self.george)
data.headers = self.headers
simple = tablib.formats._rst.export_set(data)
grid = tablib.formats._rst.export_set(data, force_grid=True)
self.assertNotEqual(simple, grid)
self.assertNotIn('+', simple)
self.assertIn('+', grid)
class DocTests(unittest.TestCase):
def test_rst_formatter_doctests(self):
results = doctest.testmod(tablib.formats._rst)
self.assertEqual(results.failed, 0)
if __name__ == '__main__':
unittest.main()
+4 -7
View File
@@ -1,11 +1,8 @@
# Tox (http://tox.testrun.org/) is a tool for running tests
# in multiple virtualenvs. This configuration file will run the
# test suite on all supported python versions. To use it, "pip install tox"
# and then run "tox" from this directory.
[tox]
envlist = py26, py27, py33, py34, py35, py36, pypy
minversion = 2.4
envlist = py27, py34, py35, py36, pypy
[testenv]
commands = python setup.py test
deps = pytest
extras = pandas
commands = python setup.py test