reStructuredText (#336)

* median for Python 2

* More compat

* Support reStructuredText

* Tests
This commit is contained in:
Norman Hooper
2018-09-12 20:27:10 +02:00
committed by Iuri de Silvio
parent 75f1bafd69
commit 38486231cc
5 changed files with 325 additions and 2 deletions
+4 -1
View File
@@ -22,6 +22,8 @@ except ImportError:
if is_py3: if is_py3:
from io import BytesIO from io import BytesIO
from itertools import zip_longest as izip_longest
from statistics import median
from tablib.packages import markup3 as markup from tablib.packages import markup3 as markup
import tablib.packages.dbfpy3 as dbfpy import tablib.packages.dbfpy3 as dbfpy
@@ -39,7 +41,8 @@ else:
from cStringIO import StringIO as BytesIO from cStringIO import StringIO as BytesIO
from cStringIO import StringIO from cStringIO import StringIO
from tablib.packages import markup from tablib.packages import markup
from itertools import ifilter from tablib.packages.statistics import median
from itertools import ifilter, izip_longest
import unicodecsv as csv import unicodecsv as csv
import tablib.packages.dbfpy as dbfpy import tablib.packages.dbfpy as dbfpy
+2 -1
View File
@@ -14,5 +14,6 @@ from . import _ods as ods
from . import _dbf as dbf from . import _dbf as dbf
from . import _latex as latex from . import _latex as latex
from . import _df as df from . import _df as df
from . import _rst as rst
available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df) available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df, rst)
+273
View File
@@ -0,0 +1,273 @@
# -*- coding: utf-8 -*-
""" Tablib - reStructuredText Support
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from textwrap import TextWrapper
from tablib.compat import (
median,
unicode,
izip_longest,
)
title = 'rst'
extensions = ('rst',)
MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words.
JUSTIFY_LEFT = 'left'
JUSTIFY_CENTER = 'center'
JUSTIFY_RIGHT = 'right'
JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT)
def to_unicode(value):
if isinstance(value, bytes):
return value.decode('utf-8')
return unicode(value)
def _max_word_len(text):
"""
Return the length of the longest word in `text`.
>>> _max_word_len('Python Module for Tabular Datasets')
8
"""
return max((len(word) for word in text.split()))
def _get_column_string_lengths(dataset):
"""
Returns a list of string lengths of each column, and a list of
maximum word lengths.
"""
if dataset.headers:
column_lengths = [[len(h)] for h in dataset.headers]
word_lens = [_max_word_len(h) for h in dataset.headers]
else:
column_lengths = [[] for _ in range(dataset.width)]
word_lens = [0 for _ in range(dataset.width)]
for row in dataset.dict:
values = iter(row.values() if hasattr(row, 'values') else row)
for i, val in enumerate(values):
text = to_unicode(val)
column_lengths[i].append(len(text))
word_lens[i] = max(word_lens[i], _max_word_len(text))
return column_lengths, word_lens
def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT):
"""
Returns a table row of wrapped values as a list of lines
"""
if justify not in JUSTIFY_VALUES:
raise ValueError('Value of "justify" must be one of "{}"'.format(
'", "'.join(JUSTIFY_VALUES)
))
if justify == JUSTIFY_LEFT:
just = lambda text, width: text.ljust(width)
elif justify == JUSTIFY_CENTER:
just = lambda text, width: text.center(width)
else:
just = lambda text, width: text.rjust(width)
lpad = sep + ' ' if sep else ''
rpad = ' ' + sep if sep else ''
pad = ' ' + sep + ' '
cells = []
for value, width in zip(values, widths):
wrapper.width = width
text = to_unicode(value)
cell = wrapper.wrap(text)
cells.append(cell)
lines = izip_longest(*cells, fillvalue='')
lines = (
(just(cell_line, widths[i]) for i, cell_line in enumerate(line))
for line in lines
)
lines = [''.join((lpad, pad.join(line), rpad)) for line in lines]
return lines
def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3):
"""
Returns a list of column widths proportional to the median length
of the text in their cells.
"""
str_lens, word_lens = _get_column_string_lengths(dataset)
median_lens = [int(median(lens)) for lens in str_lens]
total = sum(median_lens)
if total > max_table_width - (pad_len * len(median_lens)):
column_widths = (max_table_width * l // total for l in median_lens)
else:
column_widths = (l for l in median_lens)
# Allow for separator and padding:
column_widths = (w - pad_len if w > pad_len else w for w in column_widths)
# Rather widen table than break words:
column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)]
return column_widths
def export_set_as_simple_table(dataset, column_widths=None):
"""
Returns reStructuredText grid table representation of dataset.
"""
lines = []
wrapper = TextWrapper()
if column_widths is None:
column_widths = _get_column_widths(dataset, pad_len=2)
border = ' '.join(['=' * w for w in column_widths])
lines.append(border)
if dataset.headers:
lines.extend(_row_to_lines(
dataset.headers,
column_widths,
wrapper,
sep='',
justify=JUSTIFY_CENTER,
))
lines.append(border)
for row in dataset.dict:
values = iter(row.values() if hasattr(row, 'values') else row)
lines.extend(_row_to_lines(values, column_widths, wrapper, ''))
lines.append(border)
return '\n'.join(lines)
def export_set_as_grid_table(dataset, column_widths=None):
"""
Returns reStructuredText grid table representation of dataset.
>>> from tablib import Dataset
>>> from tablib.formats import rst
>>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
>>> data = Dataset()
>>> data.headers = ['A', 'B', 'A and B']
>>> for a, b in bits:
... data.append([bool(a), bool(b), bool(a * b)])
>>> print(rst.export_set(data, force_grid=True))
+-------+-------+-------+
| A | B | A and |
| | | B |
+=======+=======+=======+
| False | False | False |
+-------+-------+-------+
| True | False | False |
+-------+-------+-------+
| False | True | False |
+-------+-------+-------+
| True | True | True |
+-------+-------+-------+
"""
lines = []
wrapper = TextWrapper()
if column_widths is None:
column_widths = _get_column_widths(dataset)
header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+'
row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+'
lines.append(row_sep)
if dataset.headers:
lines.extend(_row_to_lines(
dataset.headers,
column_widths,
wrapper,
justify=JUSTIFY_CENTER,
))
lines.append(header_sep)
for row in dataset.dict:
values = iter(row.values() if hasattr(row, 'values') else row)
lines.extend(_row_to_lines(values, column_widths, wrapper))
lines.append(row_sep)
return '\n'.join(lines)
def _use_simple_table(head0, col0, width0):
"""
Use a simple table if the text in the first column is never wrapped
>>> _use_simple_table('menu', ['egg', 'bacon'], 10)
True
>>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10)
False
"""
if head0 is not None:
head0 = to_unicode(head0)
if len(head0) > width0:
return False
for cell in col0:
cell = to_unicode(cell)
if len(cell) > width0:
return False
return True
def export_set(dataset, **kwargs):
"""
Returns reStructuredText table representation of dataset.
Returns a simple table if the text in the first column is never
wrapped, otherwise returns a grid table.
>>> from tablib import Dataset
>>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
>>> data = Dataset()
>>> data.headers = ['A', 'B', 'A and B']
>>> for a, b in bits:
... data.append([bool(a), bool(b), bool(a * b)])
>>> table = data.rst
>>> table.split('\\n') == [
... '===== ===== =====',
... ' A B A and',
... ' B ',
... '===== ===== =====',
... 'False False False',
... 'True False False',
... 'False True False',
... 'True True True ',
... '===== ===== =====',
... ]
True
"""
if not dataset.dict:
return ''
force_grid = kwargs.get('force_grid', False)
max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH)
column_widths = _get_column_widths(dataset, max_table_width)
use_simple_table = _use_simple_table(
dataset.headers[0] if dataset.headers else None,
dataset.get_col(0),
column_widths[0],
)
if use_simple_table and not force_grid:
return export_set_as_simple_table(dataset, column_widths)
else:
return export_set_as_grid_table(dataset, column_widths)
def export_book(databook):
"""
reStructuredText representation of a Databook.
Tables are separated by a blank line. All tables use the grid
format.
"""
return '\n\n'.join(export_set(dataset, force_grid=True)
for dataset in databook._datasets)
+24
View File
@@ -0,0 +1,24 @@
from __future__ import division
def median(data):
"""
Return the median (middle value) of numeric data, using the common
"mean of middle two" method. If data is empty, ValueError is raised.
Mimics the behaviour of Python3's statistics.median
>>> median([1, 3, 5])
3
>>> median([1, 3, 5, 7])
4.0
"""
data = sorted(data)
n = len(data)
if not n:
raise ValueError("No median for empty data")
i = n // 2
if n % 2:
return data[i]
return (data[i - 1] + data[i]) / 2
+22
View File
@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""Tests for Tablib.""" """Tests for Tablib."""
import doctest
import json import json
import unittest import unittest
import sys import sys
@@ -383,6 +384,7 @@ class TablibTestCase(unittest.TestCase):
data.html data.html
data.latex data.latex
data.df data.df
data.rst
def test_datetime_append(self): def test_datetime_append(self):
"""Passes in a single datetime and a single date and exports.""" """Passes in a single datetime and a single date and exports."""
@@ -403,6 +405,7 @@ class TablibTestCase(unittest.TestCase):
data.ods data.ods
data.html data.html
data.latex data.latex
data.rst
def test_book_export_no_exceptions(self): def test_book_export_no_exceptions(self):
"""Test that various exports don't error out.""" """Test that various exports don't error out."""
@@ -416,6 +419,7 @@ class TablibTestCase(unittest.TestCase):
book.xlsx book.xlsx
book.ods book.ods
book.html book.html
data.rst
def test_json_import_set(self): def test_json_import_set(self):
"""Generate and import JSON set serialization.""" """Generate and import JSON set serialization."""
@@ -961,6 +965,24 @@ class TablibTestCase(unittest.TestCase):
self.founders.append(('First\nSecond', 'Name', 42)) self.founders.append(('First\nSecond', 'Name', 42))
self.founders.export('xlsx') self.founders.export('xlsx')
def test_rst_force_grid(self):
data.append(self.john)
data.append(self.george)
data.headers = self.headers
simple = tablib.formats._rst.export_set(data)
grid = tablib.formats._rst.export_set(data, force_grid=True)
self.assertNotEqual(simple, grid)
self.assertNotIn('+', simple)
self.assertIn('+', grid)
class DocTests(unittest.TestCase):
def test_rst_formatter_doctests(self):
results = doctest.testmod(tablib.formats._rst)
self.assertEqual(results.failed, 0)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()