reStructuredText (#336)

* median for Python 2 * More compat * Support reStructuredText * Tests
2026-06-05 06:56:13 +00:00 · 2018-09-12 20:27:10 +02:00
parent 75f1bafd69
commit 38486231cc
5 changed files with 325 additions and 2 deletions
@@ -22,6 +22,8 @@ except ImportError:

 if is_py3:
    from io import BytesIO
+    from itertools import zip_longest as izip_longest
+    from statistics import median
    from tablib.packages import markup3 as markup
    import tablib.packages.dbfpy3 as dbfpy

@@ -39,7 +41,8 @@ else:
    from cStringIO import StringIO as BytesIO
    from cStringIO import StringIO
    from tablib.packages import markup
-    from itertools import ifilter
+    from tablib.packages.statistics import median
+    from itertools import ifilter, izip_longest

    import unicodecsv as csv
    import tablib.packages.dbfpy as dbfpy
@@ -14,5 +14,6 @@ from . import _ods as ods
 from . import _dbf as dbf
 from . import _latex as latex
 from . import _df as df
+from . import _rst as rst

-available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df)
+available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df, rst)
@@ -0,0 +1,273 @@
+# -*- coding: utf-8 -*-
+
+""" Tablib - reStructuredText Support
+"""
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from textwrap import TextWrapper
+
+from tablib.compat import (
+    median,
+    unicode,
+    izip_longest,
+)
+
+
+title = 'rst'
+extensions = ('rst',)
+
+
+MAX_TABLE_WIDTH = 80  # Roughly. It may be wider to avoid breaking words.
+
+
+JUSTIFY_LEFT = 'left'
+JUSTIFY_CENTER = 'center'
+JUSTIFY_RIGHT = 'right'
+JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT)
+
+
+def to_unicode(value):
+    if isinstance(value, bytes):
+        return value.decode('utf-8')
+    return unicode(value)
+
+
+def _max_word_len(text):
+    """
+    Return the length of the longest word in `text`.
+
+
+    >>> _max_word_len('Python Module for Tabular Datasets')
+    8
+
+    """
+    return max((len(word) for word in text.split()))
+
+
+def _get_column_string_lengths(dataset):
+    """
+    Returns a list of string lengths of each column, and a list of
+    maximum word lengths.
+    """
+    if dataset.headers:
+        column_lengths = [[len(h)] for h in dataset.headers]
+        word_lens = [_max_word_len(h) for h in dataset.headers]
+    else:
+        column_lengths = [[] for _ in range(dataset.width)]
+        word_lens = [0 for _ in range(dataset.width)]
+    for row in dataset.dict:
+        values = iter(row.values() if hasattr(row, 'values') else row)
+        for i, val in enumerate(values):
+            text = to_unicode(val)
+            column_lengths[i].append(len(text))
+            word_lens[i] = max(word_lens[i], _max_word_len(text))
+    return column_lengths, word_lens
+
+
+def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT):
+    """
+    Returns a table row of wrapped values as a list of lines
+    """
+    if justify not in JUSTIFY_VALUES:
+        raise ValueError('Value of "justify" must be one of "{}"'.format(
+            '", "'.join(JUSTIFY_VALUES)
+        ))
+    if justify == JUSTIFY_LEFT:
+        just = lambda text, width: text.ljust(width)
+    elif justify == JUSTIFY_CENTER:
+        just = lambda text, width: text.center(width)
+    else:
+        just = lambda text, width: text.rjust(width)
+    lpad = sep + ' ' if sep else ''
+    rpad = ' ' + sep if sep else ''
+    pad = ' ' + sep + ' '
+    cells = []
+    for value, width in zip(values, widths):
+        wrapper.width = width
+        text = to_unicode(value)
+        cell = wrapper.wrap(text)
+        cells.append(cell)
+    lines = izip_longest(*cells, fillvalue='')
+    lines = (
+        (just(cell_line, widths[i]) for i, cell_line in enumerate(line))
+        for line in lines
+    )
+    lines = [''.join((lpad, pad.join(line), rpad)) for line in lines]
+    return lines
+
+
+def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3):
+    """
+    Returns a list of column widths proportional to the median length
+    of the text in their cells.
+    """
+    str_lens, word_lens = _get_column_string_lengths(dataset)
+    median_lens = [int(median(lens)) for lens in str_lens]
+    total = sum(median_lens)
+    if total > max_table_width - (pad_len * len(median_lens)):
+        column_widths = (max_table_width * l // total for l in median_lens)
+    else:
+        column_widths = (l for l in median_lens)
+    # Allow for separator and padding:
+    column_widths = (w - pad_len if w > pad_len else w for w in column_widths)
+    # Rather widen table than break words:
+    column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)]
+    return column_widths
+
+
+def export_set_as_simple_table(dataset, column_widths=None):
+    """
+    Returns reStructuredText grid table representation of dataset.
+    """
+    lines = []
+    wrapper = TextWrapper()
+    if column_widths is None:
+        column_widths = _get_column_widths(dataset, pad_len=2)
+    border = '  '.join(['=' * w for w in column_widths])
+
+    lines.append(border)
+    if dataset.headers:
+        lines.extend(_row_to_lines(
+            dataset.headers,
+            column_widths,
+            wrapper,
+            sep='',
+            justify=JUSTIFY_CENTER,
+        ))
+        lines.append(border)
+    for row in dataset.dict:
+        values = iter(row.values() if hasattr(row, 'values') else row)
+        lines.extend(_row_to_lines(values, column_widths, wrapper, ''))
+    lines.append(border)
+    return '\n'.join(lines)
+
+
+def export_set_as_grid_table(dataset, column_widths=None):
+    """
+    Returns reStructuredText grid table representation of dataset.
+
+
+    >>> from tablib import Dataset
+    >>> from tablib.formats import rst
+    >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
+    >>> data = Dataset()
+    >>> data.headers = ['A', 'B', 'A and B']
+    >>> for a, b in bits:
+    ...     data.append([bool(a), bool(b), bool(a * b)])
+    >>> print(rst.export_set(data, force_grid=True))
+    +-------+-------+-------+
+    |   A   |   B   | A and |
+    |       |       |   B   |
+    +=======+=======+=======+
+    | False | False | False |
+    +-------+-------+-------+
+    | True  | False | False |
+    +-------+-------+-------+
+    | False | True  | False |
+    +-------+-------+-------+
+    | True  | True  | True  |
+    +-------+-------+-------+
+
+    """
+    lines = []
+    wrapper = TextWrapper()
+    if column_widths is None:
+        column_widths = _get_column_widths(dataset)
+    header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+'
+    row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+'
+
+    lines.append(row_sep)
+    if dataset.headers:
+        lines.extend(_row_to_lines(
+            dataset.headers,
+            column_widths,
+            wrapper,
+            justify=JUSTIFY_CENTER,
+        ))
+        lines.append(header_sep)
+    for row in dataset.dict:
+        values = iter(row.values() if hasattr(row, 'values') else row)
+        lines.extend(_row_to_lines(values, column_widths, wrapper))
+        lines.append(row_sep)
+    return '\n'.join(lines)
+
+
+def _use_simple_table(head0, col0, width0):
+    """
+    Use a simple table if the text in the first column is never wrapped
+
+
+    >>> _use_simple_table('menu', ['egg', 'bacon'], 10)
+    True
+    >>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10)
+    False
+
+    """
+    if head0 is not None:
+        head0 = to_unicode(head0)
+        if len(head0) > width0:
+            return False
+    for cell in col0:
+        cell = to_unicode(cell)
+        if len(cell) > width0:
+            return False
+    return True
+
+
+def export_set(dataset, **kwargs):
+    """
+    Returns reStructuredText table representation of dataset.
+
+    Returns a simple table if the text in the first column is never
+    wrapped, otherwise returns a grid table.
+
+
+    >>> from tablib import Dataset
+    >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
+    >>> data = Dataset()
+    >>> data.headers = ['A', 'B', 'A and B']
+    >>> for a, b in bits:
+    ...     data.append([bool(a), bool(b), bool(a * b)])
+    >>> table = data.rst
+    >>> table.split('\\n') == [
+    ...     '=====  =====  =====',
+    ...     '  A      B    A and',
+    ...     '                B  ',
+    ...     '=====  =====  =====',
+    ...     'False  False  False',
+    ...     'True   False  False',
+    ...     'False  True   False',
+    ...     'True   True   True ',
+    ...     '=====  =====  =====',
+    ... ]
+    True
+
+    """
+    if not dataset.dict:
+        return ''
+    force_grid = kwargs.get('force_grid', False)
+    max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH)
+    column_widths = _get_column_widths(dataset, max_table_width)
+
+    use_simple_table = _use_simple_table(
+        dataset.headers[0] if dataset.headers else None,
+        dataset.get_col(0),
+        column_widths[0],
+    )
+    if use_simple_table and not force_grid:
+        return export_set_as_simple_table(dataset, column_widths)
+    else:
+        return export_set_as_grid_table(dataset, column_widths)
+
+
+def export_book(databook):
+    """
+    reStructuredText representation of a Databook.
+
+    Tables are separated by a blank line. All tables use the grid
+    format.
+    """
+    return '\n\n'.join(export_set(dataset, force_grid=True)
+                       for dataset in databook._datasets)
@@ -0,0 +1,24 @@
+from __future__ import division
+
+
+def median(data):
+    """
+    Return the median (middle value) of numeric data, using the common
+    "mean of middle two" method. If data is empty, ValueError is raised.
+
+    Mimics the behaviour of Python3's statistics.median
+
+    >>> median([1, 3, 5])
+    3
+    >>> median([1, 3, 5, 7])
+    4.0
+
+    """
+    data = sorted(data)
+    n = len(data)
+    if not n:
+        raise ValueError("No median for empty data")
+    i = n // 2
+    if n % 2:
+        return data[i]
+    return (data[i - 1] + data[i]) / 2
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 """Tests for Tablib."""

+import doctest
 import json
 import unittest
 import sys
@@ -383,6 +384,7 @@ class TablibTestCase(unittest.TestCase):
        data.html
        data.latex
        data.df
+        data.rst

    def test_datetime_append(self):
        """Passes in a single datetime and a single date and exports."""
@@ -403,6 +405,7 @@ class TablibTestCase(unittest.TestCase):
        data.ods
        data.html
        data.latex
+        data.rst

    def test_book_export_no_exceptions(self):
        """Test that various exports don't error out."""
@@ -416,6 +419,7 @@ class TablibTestCase(unittest.TestCase):
        book.xlsx
        book.ods
        book.html
+        data.rst

    def test_json_import_set(self):
        """Generate and import JSON set serialization."""
@@ -961,6 +965,24 @@ class TablibTestCase(unittest.TestCase):
        self.founders.append(('First\nSecond', 'Name', 42))
        self.founders.export('xlsx')

+    def test_rst_force_grid(self):
+        data.append(self.john)
+        data.append(self.george)
+        data.headers = self.headers
+
+        simple = tablib.formats._rst.export_set(data)
+        grid = tablib.formats._rst.export_set(data, force_grid=True)
+        self.assertNotEqual(simple, grid)
+        self.assertNotIn('+', simple)
+        self.assertIn('+', grid)
+
+
+class DocTests(unittest.TestCase):
+
+    def test_rst_formatter_doctests(self):
+        results = doctest.testmod(tablib.formats._rst)
+        self.assertEqual(results.failed, 0)
+

 if __name__ == '__main__':
    unittest.main()