mirror of
https://github.com/kennethreitz/tablib.git
synced 2026-06-05 06:56:13 +00:00
reStructuredText (#336)
* median for Python 2 * More compat * Support reStructuredText * Tests
This commit is contained in:
committed by
Iuri de Silvio
parent
75f1bafd69
commit
38486231cc
+4
-1
@@ -22,6 +22,8 @@ except ImportError:
|
|||||||
|
|
||||||
if is_py3:
|
if is_py3:
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from itertools import zip_longest as izip_longest
|
||||||
|
from statistics import median
|
||||||
from tablib.packages import markup3 as markup
|
from tablib.packages import markup3 as markup
|
||||||
import tablib.packages.dbfpy3 as dbfpy
|
import tablib.packages.dbfpy3 as dbfpy
|
||||||
|
|
||||||
@@ -39,7 +41,8 @@ else:
|
|||||||
from cStringIO import StringIO as BytesIO
|
from cStringIO import StringIO as BytesIO
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from tablib.packages import markup
|
from tablib.packages import markup
|
||||||
from itertools import ifilter
|
from tablib.packages.statistics import median
|
||||||
|
from itertools import ifilter, izip_longest
|
||||||
|
|
||||||
import unicodecsv as csv
|
import unicodecsv as csv
|
||||||
import tablib.packages.dbfpy as dbfpy
|
import tablib.packages.dbfpy as dbfpy
|
||||||
|
|||||||
@@ -14,5 +14,6 @@ from . import _ods as ods
|
|||||||
from . import _dbf as dbf
|
from . import _dbf as dbf
|
||||||
from . import _latex as latex
|
from . import _latex as latex
|
||||||
from . import _df as df
|
from . import _df as df
|
||||||
|
from . import _rst as rst
|
||||||
|
|
||||||
available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df)
|
available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df, rst)
|
||||||
|
|||||||
@@ -0,0 +1,273 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
""" Tablib - reStructuredText Support
|
||||||
|
"""
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from textwrap import TextWrapper
|
||||||
|
|
||||||
|
from tablib.compat import (
|
||||||
|
median,
|
||||||
|
unicode,
|
||||||
|
izip_longest,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
title = 'rst'
|
||||||
|
extensions = ('rst',)
|
||||||
|
|
||||||
|
|
||||||
|
MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words.
|
||||||
|
|
||||||
|
|
||||||
|
JUSTIFY_LEFT = 'left'
|
||||||
|
JUSTIFY_CENTER = 'center'
|
||||||
|
JUSTIFY_RIGHT = 'right'
|
||||||
|
JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT)
|
||||||
|
|
||||||
|
|
||||||
|
def to_unicode(value):
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
return value.decode('utf-8')
|
||||||
|
return unicode(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _max_word_len(text):
|
||||||
|
"""
|
||||||
|
Return the length of the longest word in `text`.
|
||||||
|
|
||||||
|
|
||||||
|
>>> _max_word_len('Python Module for Tabular Datasets')
|
||||||
|
8
|
||||||
|
|
||||||
|
"""
|
||||||
|
return max((len(word) for word in text.split()))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_column_string_lengths(dataset):
|
||||||
|
"""
|
||||||
|
Returns a list of string lengths of each column, and a list of
|
||||||
|
maximum word lengths.
|
||||||
|
"""
|
||||||
|
if dataset.headers:
|
||||||
|
column_lengths = [[len(h)] for h in dataset.headers]
|
||||||
|
word_lens = [_max_word_len(h) for h in dataset.headers]
|
||||||
|
else:
|
||||||
|
column_lengths = [[] for _ in range(dataset.width)]
|
||||||
|
word_lens = [0 for _ in range(dataset.width)]
|
||||||
|
for row in dataset.dict:
|
||||||
|
values = iter(row.values() if hasattr(row, 'values') else row)
|
||||||
|
for i, val in enumerate(values):
|
||||||
|
text = to_unicode(val)
|
||||||
|
column_lengths[i].append(len(text))
|
||||||
|
word_lens[i] = max(word_lens[i], _max_word_len(text))
|
||||||
|
return column_lengths, word_lens
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT):
|
||||||
|
"""
|
||||||
|
Returns a table row of wrapped values as a list of lines
|
||||||
|
"""
|
||||||
|
if justify not in JUSTIFY_VALUES:
|
||||||
|
raise ValueError('Value of "justify" must be one of "{}"'.format(
|
||||||
|
'", "'.join(JUSTIFY_VALUES)
|
||||||
|
))
|
||||||
|
if justify == JUSTIFY_LEFT:
|
||||||
|
just = lambda text, width: text.ljust(width)
|
||||||
|
elif justify == JUSTIFY_CENTER:
|
||||||
|
just = lambda text, width: text.center(width)
|
||||||
|
else:
|
||||||
|
just = lambda text, width: text.rjust(width)
|
||||||
|
lpad = sep + ' ' if sep else ''
|
||||||
|
rpad = ' ' + sep if sep else ''
|
||||||
|
pad = ' ' + sep + ' '
|
||||||
|
cells = []
|
||||||
|
for value, width in zip(values, widths):
|
||||||
|
wrapper.width = width
|
||||||
|
text = to_unicode(value)
|
||||||
|
cell = wrapper.wrap(text)
|
||||||
|
cells.append(cell)
|
||||||
|
lines = izip_longest(*cells, fillvalue='')
|
||||||
|
lines = (
|
||||||
|
(just(cell_line, widths[i]) for i, cell_line in enumerate(line))
|
||||||
|
for line in lines
|
||||||
|
)
|
||||||
|
lines = [''.join((lpad, pad.join(line), rpad)) for line in lines]
|
||||||
|
return lines
|
||||||
|
|
||||||
|
|
||||||
|
def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3):
|
||||||
|
"""
|
||||||
|
Returns a list of column widths proportional to the median length
|
||||||
|
of the text in their cells.
|
||||||
|
"""
|
||||||
|
str_lens, word_lens = _get_column_string_lengths(dataset)
|
||||||
|
median_lens = [int(median(lens)) for lens in str_lens]
|
||||||
|
total = sum(median_lens)
|
||||||
|
if total > max_table_width - (pad_len * len(median_lens)):
|
||||||
|
column_widths = (max_table_width * l // total for l in median_lens)
|
||||||
|
else:
|
||||||
|
column_widths = (l for l in median_lens)
|
||||||
|
# Allow for separator and padding:
|
||||||
|
column_widths = (w - pad_len if w > pad_len else w for w in column_widths)
|
||||||
|
# Rather widen table than break words:
|
||||||
|
column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)]
|
||||||
|
return column_widths
|
||||||
|
|
||||||
|
|
||||||
|
def export_set_as_simple_table(dataset, column_widths=None):
|
||||||
|
"""
|
||||||
|
Returns reStructuredText grid table representation of dataset.
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
wrapper = TextWrapper()
|
||||||
|
if column_widths is None:
|
||||||
|
column_widths = _get_column_widths(dataset, pad_len=2)
|
||||||
|
border = ' '.join(['=' * w for w in column_widths])
|
||||||
|
|
||||||
|
lines.append(border)
|
||||||
|
if dataset.headers:
|
||||||
|
lines.extend(_row_to_lines(
|
||||||
|
dataset.headers,
|
||||||
|
column_widths,
|
||||||
|
wrapper,
|
||||||
|
sep='',
|
||||||
|
justify=JUSTIFY_CENTER,
|
||||||
|
))
|
||||||
|
lines.append(border)
|
||||||
|
for row in dataset.dict:
|
||||||
|
values = iter(row.values() if hasattr(row, 'values') else row)
|
||||||
|
lines.extend(_row_to_lines(values, column_widths, wrapper, ''))
|
||||||
|
lines.append(border)
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def export_set_as_grid_table(dataset, column_widths=None):
|
||||||
|
"""
|
||||||
|
Returns reStructuredText grid table representation of dataset.
|
||||||
|
|
||||||
|
|
||||||
|
>>> from tablib import Dataset
|
||||||
|
>>> from tablib.formats import rst
|
||||||
|
>>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
|
||||||
|
>>> data = Dataset()
|
||||||
|
>>> data.headers = ['A', 'B', 'A and B']
|
||||||
|
>>> for a, b in bits:
|
||||||
|
... data.append([bool(a), bool(b), bool(a * b)])
|
||||||
|
>>> print(rst.export_set(data, force_grid=True))
|
||||||
|
+-------+-------+-------+
|
||||||
|
| A | B | A and |
|
||||||
|
| | | B |
|
||||||
|
+=======+=======+=======+
|
||||||
|
| False | False | False |
|
||||||
|
+-------+-------+-------+
|
||||||
|
| True | False | False |
|
||||||
|
+-------+-------+-------+
|
||||||
|
| False | True | False |
|
||||||
|
+-------+-------+-------+
|
||||||
|
| True | True | True |
|
||||||
|
+-------+-------+-------+
|
||||||
|
|
||||||
|
"""
|
||||||
|
lines = []
|
||||||
|
wrapper = TextWrapper()
|
||||||
|
if column_widths is None:
|
||||||
|
column_widths = _get_column_widths(dataset)
|
||||||
|
header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+'
|
||||||
|
row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+'
|
||||||
|
|
||||||
|
lines.append(row_sep)
|
||||||
|
if dataset.headers:
|
||||||
|
lines.extend(_row_to_lines(
|
||||||
|
dataset.headers,
|
||||||
|
column_widths,
|
||||||
|
wrapper,
|
||||||
|
justify=JUSTIFY_CENTER,
|
||||||
|
))
|
||||||
|
lines.append(header_sep)
|
||||||
|
for row in dataset.dict:
|
||||||
|
values = iter(row.values() if hasattr(row, 'values') else row)
|
||||||
|
lines.extend(_row_to_lines(values, column_widths, wrapper))
|
||||||
|
lines.append(row_sep)
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _use_simple_table(head0, col0, width0):
|
||||||
|
"""
|
||||||
|
Use a simple table if the text in the first column is never wrapped
|
||||||
|
|
||||||
|
|
||||||
|
>>> _use_simple_table('menu', ['egg', 'bacon'], 10)
|
||||||
|
True
|
||||||
|
>>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10)
|
||||||
|
False
|
||||||
|
|
||||||
|
"""
|
||||||
|
if head0 is not None:
|
||||||
|
head0 = to_unicode(head0)
|
||||||
|
if len(head0) > width0:
|
||||||
|
return False
|
||||||
|
for cell in col0:
|
||||||
|
cell = to_unicode(cell)
|
||||||
|
if len(cell) > width0:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def export_set(dataset, **kwargs):
|
||||||
|
"""
|
||||||
|
Returns reStructuredText table representation of dataset.
|
||||||
|
|
||||||
|
Returns a simple table if the text in the first column is never
|
||||||
|
wrapped, otherwise returns a grid table.
|
||||||
|
|
||||||
|
|
||||||
|
>>> from tablib import Dataset
|
||||||
|
>>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
|
||||||
|
>>> data = Dataset()
|
||||||
|
>>> data.headers = ['A', 'B', 'A and B']
|
||||||
|
>>> for a, b in bits:
|
||||||
|
... data.append([bool(a), bool(b), bool(a * b)])
|
||||||
|
>>> table = data.rst
|
||||||
|
>>> table.split('\\n') == [
|
||||||
|
... '===== ===== =====',
|
||||||
|
... ' A B A and',
|
||||||
|
... ' B ',
|
||||||
|
... '===== ===== =====',
|
||||||
|
... 'False False False',
|
||||||
|
... 'True False False',
|
||||||
|
... 'False True False',
|
||||||
|
... 'True True True ',
|
||||||
|
... '===== ===== =====',
|
||||||
|
... ]
|
||||||
|
True
|
||||||
|
|
||||||
|
"""
|
||||||
|
if not dataset.dict:
|
||||||
|
return ''
|
||||||
|
force_grid = kwargs.get('force_grid', False)
|
||||||
|
max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH)
|
||||||
|
column_widths = _get_column_widths(dataset, max_table_width)
|
||||||
|
|
||||||
|
use_simple_table = _use_simple_table(
|
||||||
|
dataset.headers[0] if dataset.headers else None,
|
||||||
|
dataset.get_col(0),
|
||||||
|
column_widths[0],
|
||||||
|
)
|
||||||
|
if use_simple_table and not force_grid:
|
||||||
|
return export_set_as_simple_table(dataset, column_widths)
|
||||||
|
else:
|
||||||
|
return export_set_as_grid_table(dataset, column_widths)
|
||||||
|
|
||||||
|
|
||||||
|
def export_book(databook):
|
||||||
|
"""
|
||||||
|
reStructuredText representation of a Databook.
|
||||||
|
|
||||||
|
Tables are separated by a blank line. All tables use the grid
|
||||||
|
format.
|
||||||
|
"""
|
||||||
|
return '\n\n'.join(export_set(dataset, force_grid=True)
|
||||||
|
for dataset in databook._datasets)
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
from __future__ import division
|
||||||
|
|
||||||
|
|
||||||
|
def median(data):
|
||||||
|
"""
|
||||||
|
Return the median (middle value) of numeric data, using the common
|
||||||
|
"mean of middle two" method. If data is empty, ValueError is raised.
|
||||||
|
|
||||||
|
Mimics the behaviour of Python3's statistics.median
|
||||||
|
|
||||||
|
>>> median([1, 3, 5])
|
||||||
|
3
|
||||||
|
>>> median([1, 3, 5, 7])
|
||||||
|
4.0
|
||||||
|
|
||||||
|
"""
|
||||||
|
data = sorted(data)
|
||||||
|
n = len(data)
|
||||||
|
if not n:
|
||||||
|
raise ValueError("No median for empty data")
|
||||||
|
i = n // 2
|
||||||
|
if n % 2:
|
||||||
|
return data[i]
|
||||||
|
return (data[i - 1] + data[i]) / 2
|
||||||
@@ -2,6 +2,7 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""Tests for Tablib."""
|
"""Tests for Tablib."""
|
||||||
|
|
||||||
|
import doctest
|
||||||
import json
|
import json
|
||||||
import unittest
|
import unittest
|
||||||
import sys
|
import sys
|
||||||
@@ -383,6 +384,7 @@ class TablibTestCase(unittest.TestCase):
|
|||||||
data.html
|
data.html
|
||||||
data.latex
|
data.latex
|
||||||
data.df
|
data.df
|
||||||
|
data.rst
|
||||||
|
|
||||||
def test_datetime_append(self):
|
def test_datetime_append(self):
|
||||||
"""Passes in a single datetime and a single date and exports."""
|
"""Passes in a single datetime and a single date and exports."""
|
||||||
@@ -403,6 +405,7 @@ class TablibTestCase(unittest.TestCase):
|
|||||||
data.ods
|
data.ods
|
||||||
data.html
|
data.html
|
||||||
data.latex
|
data.latex
|
||||||
|
data.rst
|
||||||
|
|
||||||
def test_book_export_no_exceptions(self):
|
def test_book_export_no_exceptions(self):
|
||||||
"""Test that various exports don't error out."""
|
"""Test that various exports don't error out."""
|
||||||
@@ -416,6 +419,7 @@ class TablibTestCase(unittest.TestCase):
|
|||||||
book.xlsx
|
book.xlsx
|
||||||
book.ods
|
book.ods
|
||||||
book.html
|
book.html
|
||||||
|
data.rst
|
||||||
|
|
||||||
def test_json_import_set(self):
|
def test_json_import_set(self):
|
||||||
"""Generate and import JSON set serialization."""
|
"""Generate and import JSON set serialization."""
|
||||||
@@ -961,6 +965,24 @@ class TablibTestCase(unittest.TestCase):
|
|||||||
self.founders.append(('First\nSecond', 'Name', 42))
|
self.founders.append(('First\nSecond', 'Name', 42))
|
||||||
self.founders.export('xlsx')
|
self.founders.export('xlsx')
|
||||||
|
|
||||||
|
def test_rst_force_grid(self):
|
||||||
|
data.append(self.john)
|
||||||
|
data.append(self.george)
|
||||||
|
data.headers = self.headers
|
||||||
|
|
||||||
|
simple = tablib.formats._rst.export_set(data)
|
||||||
|
grid = tablib.formats._rst.export_set(data, force_grid=True)
|
||||||
|
self.assertNotEqual(simple, grid)
|
||||||
|
self.assertNotIn('+', simple)
|
||||||
|
self.assertIn('+', grid)
|
||||||
|
|
||||||
|
|
||||||
|
class DocTests(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_rst_formatter_doctests(self):
|
||||||
|
results = doctest.testmod(tablib.formats._rst)
|
||||||
|
self.assertEqual(results.failed, 0)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user