diff --git a/AUTHORS b/AUTHORS index 7d150db..7f14eb1 100644 --- a/AUTHORS +++ b/AUTHORS @@ -29,5 +29,6 @@ Patches and Suggestions - Alex Gaynor - James Douglass - Tommy Anthony +- Rabin Nankhwa - Marco Dallagiacoma - Mathias Loesch diff --git a/tablib/core.py b/tablib/core.py index a744a70..8c4cc51 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -163,15 +163,9 @@ class Dataset(object): # (column, callback) tuples self._formatters = [] - try: - self.headers = kwargs['headers'] - except KeyError: - self.headers = None + self.headers = kwargs.get('headers') - try: - self.title = kwargs['title'] - except KeyError: - self.title = None + self.title = kwargs.get('title') self._register_formats() @@ -952,6 +946,46 @@ class Dataset(object): self.__headers = None + def subset(self, rows=None, cols=None): + """Returns a new instance of the :class:`Dataset`, + including only specified rows and columns. + """ + + # Don't return if no data + if not self: + return + + if rows is None: + rows = list(range(self.height)) + + if cols is None: + cols = list(self.headers) + + #filter out impossible rows and columns + rows = [row for row in rows if row in range(self.height)] + cols = [header for header in cols if header in self.headers] + + _dset = Dataset() + + #filtering rows and columns + _dset.headers = list(cols) + + _dset._data = [] + for row_no, row in enumerate(self._data): + data_row = [] + for key in _dset.headers: + if key in self.headers: + pos = self.headers.index(key) + data_row.append(row[pos]) + else: + raise KeyError + + if row_no in rows: + _dset.append(row=Row(data_row)) + + return _dset + + class Databook(object): """A book of :class:`Dataset` objects. diff --git a/test_tablib.py b/test_tablib.py index 2a4f687..87f35f9 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -811,6 +811,26 @@ class TablibTestCase(unittest.TestCase): self.assertTrue(data[0] == new_row) + def test_subset(self): + """Create a subset of a dataset""" + + rows = (0, 2) + columns = ('first_name','gpa') + + data.headers = self.headers + + data.append(self.john) + data.append(self.george) + data.append(self.tom) + + #Verify data is truncated + subset = data.subset(rows=rows, cols=columns) + self.assertEqual(type(subset), tablib.Dataset) + self.assertEqual(subset.headers, list(columns)) + self.assertEqual(subset._data[0].list, ['John', 90]) + self.assertEqual(subset._data[1].list, ['Thomas', 50]) + + def test_formatters(self): """Confirm formatters are being triggered."""