Merge branch 'latex-export' of https://github.com/mloesch/tablib into develop

2026-06-05 15:00:19 +00:00 · 2016-02-07 06:08:23 -05:00
parent 8bded88559 79dc4524a0
commit 32cbc36fc1
5 changed files with 209 additions and 1 deletions
@@ -30,3 +30,4 @@ Patches and Suggestions
 - James Douglass
 - Tommy Anthony
 - Marco Dallagiacoma
+- Mathias Loesch
@@ -584,6 +584,16 @@ class Dataset(object):
        pass


+    @property
+    def latex():
+        """A LaTeX booktabs representation of the :class:`Dataset` object. If a
+        title has been set, it will be exported as the table caption.
+
+        .. note:: This method can be used for export only.
+        """
+        pass
+
+
    # ----
    # Rows
    # ----
@@ -12,5 +12,6 @@ from . import _html as html
 from . import _xlsx as xlsx
 from . import _ods as ods
 from . import _dbf as dbf
+from . import _latex as latex

-available = (json, xls, yaml, csv, dbf, tsv, html, xlsx, ods)
+available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods)
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+"""Tablib - LaTeX table export support.
+
+   Generates a LaTeX booktabs-style table from the dataset.
+"""
+import re
+
+from tablib.compat import unicode
+
+title = 'latex'
+extensions = ('tex',)
+
+TABLE_TEMPLATE = """\
+%% Note: add \\usepackage{booktabs} to your preamble
+%%
+\\begin{table}[!htbp]
+  \\centering
+  %(CAPTION)s
+  \\begin{tabular}{%(COLSPEC)s}
+    \\toprule
+%(HEADER)s
+    %(MIDRULE)s
+%(BODY)s
+    \\bottomrule
+  \\end{tabular}
+\\end{table}
+"""
+
+TEX_RESERVED_SYMBOLS_MAP = dict([
+    ('\\', '\\textbackslash{}'),
+    ('{', '\\{'),
+    ('}', '\\}'),
+    ('$', '\\$'),
+    ('&', '\\&'),
+    ('#', '\\#'),
+    ('^', '\\textasciicircum{}'),
+    ('_', '\\_'),
+    ('~', '\\textasciitilde{}'),
+    ('%', '\\%'),
+])
+
+TEX_RESERVED_SYMBOLS_RE = re.compile(
+    '(%s)' % '|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys())))
+
+
+def export_set(dataset):
+    """Returns LaTeX representation of dataset
+
+    :param dataset: dataset to serialize
+    :type dataset: tablib.core.Dataset
+    """
+
+    caption = '\\caption{%s}' % dataset.title if dataset.title else '%'
+    colspec = _colspec(dataset.width)
+    header = _serialize_row(dataset.headers) if dataset.headers else ''
+    midrule = _midrule(dataset.width)
+    body = '\n'.join([_serialize_row(row) for row in dataset])
+    return TABLE_TEMPLATE % dict(CAPTION=caption, COLSPEC=colspec,
+                                 HEADER=header, MIDRULE=midrule, BODY=body)
+
+
+def _colspec(dataset_width):
+    """Generates the column specification for the LaTeX `tabular` environment
+    based on the dataset width.
+
+    The first column is justified to the left, all further columns are aligned
+    to the right.
+
+    .. note:: This is only a heuristic and most probably has to be fine-tuned
+    post export. Column alignment should depend on the data type, e.g., textual
+    content should usually be aligned to the left while numeric content almost
+    always should be aligned to the right.
+
+    :param dataset_width: width of the dataset
+    """
+
+    spec = 'l'
+    for _ in range(1, dataset_width):
+        spec += 'r'
+    return spec
+
+
+def _midrule(dataset_width):
+    """Generates the table `midrule`, which may be composed of several
+    `cmidrules`.
+
+    :param dataset_width: width of the dataset to serialize
+    """
+
+    if not dataset_width or dataset_width == 1:
+        return '\\midrule'
+    return ' '.join([_cmidrule(colindex, dataset_width) for colindex in
+                     range(1, dataset_width + 1)])
+
+
+def _cmidrule(colindex, dataset_width):
+    """Generates the `cmidrule` for a single column with appropriate trimming
+    based on the column position.
+
+    :param colindex: Column index
+    :param dataset_width: width of the dataset
+    """
+
+    rule = '\\cmidrule(%s){%d-%d}'
+    if colindex == 1:
+        # Rule of first column is trimmed on the right
+        return rule % ('r', colindex, colindex)
+    if colindex == dataset_width:
+        # Rule of last column is trimmed on the left
+        return rule % ('l', colindex, colindex)
+    # Inner columns are trimmed on the left and right
+    return rule % ('lr', colindex, colindex)
+
+
+def _serialize_row(row):
+    """Returns string representation of a single row.
+
+    :param row: single dataset row
+    """
+
+    new_row = [_escape_tex_reserved_symbols(unicode(item)) if item else '' for
+               item in row]
+    return 6 * ' ' + ' & '.join(new_row) + ' \\\\'
+
+
+def _escape_tex_reserved_symbols(input):
+    """Escapes all TeX reserved symbols ('_', '~', etc.) in a string.
+
+    :param input: String to escape
+    """
+    def replace(match):
+        return TEX_RESERVED_SYMBOLS_MAP[match.group()]
+    return TEX_RESERVED_SYMBOLS_RE.sub(replace, input)
@@ -319,6 +319,67 @@ class TablibTestCase(unittest.TestCase):
        self.assertEqual(html, d.html)


+    def test_latex_export(self):
+        """LaTeX export"""
+
+        expected = """\
+% Note: add \\usepackage{booktabs} to your preamble
+%
+\\begin{table}[!htbp]
+  \\centering
+  \\caption{Founders}
+  \\begin{tabular}{lrr}
+    \\toprule
+      first\\_name & last\\_name & gpa \\\\
+    \\cmidrule(r){1-1} \\cmidrule(lr){2-2} \\cmidrule(l){3-3}
+      John & Adams & 90 \\\\
+      George & Washington & 67 \\\\
+      Thomas & Jefferson & 50 \\\\
+    \\bottomrule
+  \\end{tabular}
+\\end{table}
+"""
+        output = self.founders.latex
+        self.assertEqual(output, expected)
+
+
+    def test_latex_export_empty_dataset(self):
+        self.assertTrue(tablib.Dataset().latex is not None)
+
+
+    def test_latex_export_no_headers(self):
+        d = tablib.Dataset()
+        d.append(('one', 'two', 'three'))
+        self.assertTrue('one' in d.latex)
+
+
+    def test_latex_export_caption(self):
+        d = tablib.Dataset()
+        d.append(('one', 'two', 'three'))
+        self.assertFalse('caption' in d.latex)
+
+        d.title = 'Title'
+        self.assertTrue('\\caption{Title}' in d.latex)
+
+
+    def test_latex_export_none_values(self):
+        headers = ['foo', None, 'bar']
+        d = tablib.Dataset(['foo', None, 'bar'], headers=headers)
+        output = d.latex
+        self.assertTrue('foo' in output)
+        self.assertFalse('None' in output)
+
+
+    def test_latex_escaping(self):
+        d = tablib.Dataset(['~', '^'])
+        output = d.latex
+
+        self.assertFalse('~' in output)
+        self.assertTrue('textasciitilde' in output)
+        self.assertFalse('^' in output)
+        self.assertTrue('textasciicircum' in output)
+
+
    def test_unicode_append(self):
        """Passes in a single unicode character and exports."""

@@ -338,6 +399,7 @@ class TablibTestCase(unittest.TestCase):
        data.xlsx
        data.ods
        data.html
+        data.latex


    def test_book_export_no_exceptions(self):