From f187cef5f45e8e4e59d15b732c6c8307142bb634 Mon Sep 17 00:00:00 2001 From: rabinnankhwa Date: Sat, 30 Aug 2014 23:52:35 +0545 Subject: [PATCH] adding support for creating subset of a dataset. --- AUTHORS | 1 + tablib/core.py | 36 ++++++++++++++++++++++++++++++++++++ test_tablib.py | 20 ++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/AUTHORS b/AUTHORS index bd0e0c4..309c1f5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -28,3 +28,4 @@ Patches and Suggestions - Marc Abramowitz - Alex Gaynor - James Douglass +- Rabin Nankhwa \ No newline at end of file diff --git a/tablib/core.py b/tablib/core.py index 76f4569..3fc55cb 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -933,6 +933,42 @@ class Dataset(object): self.__headers = None + def subset(self, rows=None, cols=None): + """Returns a new instance of the :class:`Dataset`, + including only specified rows and columns. + """ + + # Don't return if no data + if not self: + return + + if rows is None: + rows = list(range(self.height)) + + if cols is None: + cols = list(self.headers) + + _dset = Dataset() + + #filtering rows and columns + _dset.headers = list(cols) + + _dset._data = [] + for row_no, row in enumerate(self._data): + data_row = [] + for key in _dset.headers: + if key in self.headers: + pos = self.headers.index(key) + data_row.append(row[pos]) + else: + raise KeyError + + if row_no in rows: + _dset.append(row=Row(data_row)) + + return _dset + + class Databook(object): """A book of :class:`Dataset` objects. diff --git a/test_tablib.py b/test_tablib.py index ba57170..54049b1 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -713,6 +713,26 @@ class TablibTestCase(unittest.TestCase): self.assertTrue(data[0] == new_row) + def test_subset(self): + """Create a subset of a dataset""" + + rows = (0, 2) + columns = ('first_name','gpa') + + data.headers = self.headers + + data.append(self.john) + data.append(self.george) + data.append(self.tom) + + #Verify data is truncated + subset = data.subset(rows=rows, cols=columns) + self.assertEqual(type(subset), tablib.Dataset) + self.assertEqual(subset.headers, list(columns)) + self.assertEqual(subset._data[0].list, ['John', 90]) + self.assertEqual(subset._data[1].list, ['Thomas', 50]) + + def test_formatters(self): """Confirm formatters are being triggered."""