From 864f29cc4b833fc3ac8cdeaa695d5894901d35b5 Mon Sep 17 00:00:00 2001 From: Mark Walling Date: Thu, 30 Jun 2011 22:38:57 -0400 Subject: [PATCH 01/96] Updated some docstrings in core.py * Binary warning for CSV output, because if you don't, Excel gets upset when Python translates \r\n to \r\n\r\n * Cleaned up what looked like a couple of copy paste errors --- tablib/core.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 53f3767..1025b61 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -433,7 +433,7 @@ class Dataset(object): .. admonition:: Binary Warning - :class:`Dataset.xlsx` contains binary data, so make sure to write in binary mode:: + :class:`Dataset.ods` contains binary data, so make sure to write in binary mode:: with open('output.ods', 'wb') as f: f.write(data.ods)' @@ -452,6 +452,17 @@ class Dataset(object): data.csv = 'age, first_name, last_name\\n90, John, Adams' Import assumes (for now) that headers exist. + + .. admonition:: Binary Warning + + :class:`Dataset.csv` uses \\r\\n line endings by default, so make + sure to write in binary mode:: + + with open('output.csv', 'wb') as f: + f.write(data.csv)' + + If you do not do this, and you export the file on Windows, your + CSV file will open in Excel with a blank line between each row. """ pass @@ -477,7 +488,7 @@ class Dataset(object): set, a YAML list of objects will be returned. If no headers have been set, a YAML list of lists (rows) will be returned instead. - A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: + A dataset object can also be imported by setting the :class:`Dataset.yaml` attribute: :: data = tablib.Dataset() data.yaml = '- {age: 90, first_name: John, last_name: Adams}' From 23a5bb14432eda6aa38f47c1bb7e4f6a4b4aa21a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 30 Jun 2011 23:00:26 -0400 Subject: [PATCH 02/96] yay --- test_tablib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_tablib.py b/test_tablib.py index 252dde4..8245944 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -95,7 +95,7 @@ class TablibTestCase(unittest.TestCase): def test_add_callable_column(self): """Verify adding column with values specified as callable.""" - new_col = [lambda x: x[0]] + new_col = lambda x: x[0] self.founders.append_col(new_col, header='first_again') From 6313437a27d68e42247802f7f68ec3270cb9a2da Mon Sep 17 00:00:00 2001 From: Mark Walling Date: Fri, 1 Jul 2011 17:51:43 -0400 Subject: [PATCH 03/96] Added support for detecting unicode column headers Also added tests! Fix for kennethreitz#26 --- tablib/core.py | 6 +++--- test_tablib.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 53f3767..00e55af 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -162,7 +162,7 @@ class Dataset(object): def __getitem__(self, key): - if isinstance(key, str): + if isinstance(key, str) or isinstance(key, unicode): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data return [row[pos] for row in self._data] @@ -182,7 +182,7 @@ class Dataset(object): def __delitem__(self, key): - if isinstance(key, str): + if isinstance(key, str) or isinstance(key, unicode): if key in self.headers: @@ -730,7 +730,7 @@ class Dataset(object): sorted. """ - if isinstance(col, str): + if isinstance(col, str) or isinstance(col, unicode): if not self.headers: raise HeadersNeeded diff --git a/test_tablib.py b/test_tablib.py index 252dde4..6695f5e 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -511,6 +511,48 @@ class TablibTestCase(unittest.TestCase): data.csv + + def test_csv_column_select(self): + """Build up a CSV and test selecting a column""" + + data = tablib.Dataset() + data.csv = self.founders.csv + + headers = data.headers + self.assertTrue(isinstance(headers[0], unicode)) + + orig_first_name = self.founders[self.headers[0]] + csv_first_name = data[headers[0]] + self.assertEquals(orig_first_name, csv_first_name) + + + def test_csv_column_delete(self): + """Build up a CSV and test deleting a column""" + + data = tablib.Dataset() + data.csv = self.founders.csv + + target_header = data.headers[0] + self.assertTrue(isinstance(target_header, unicode)) + + del data[target_header] + + self.assertTrue(target_header not in data.headers) + + def test_csv_column_sort(self): + """Build up a CSV and test sorting a column by name""" + + data = tablib.Dataset() + data.csv = self.founders.csv + + orig_target_header = self.founders.headers[1] + target_header = data.headers[1] + + self.founders.sort(orig_target_header) + data.sort(target_header) + + self.assertEquals(self.founders[orig_target_header], data[target_header]) + if __name__ == '__main__': unittest.main() From 62ad123ad83553585007fa105c1679db35016142 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 4 Jul 2011 05:49:41 -0400 Subject: [PATCH 04/96] updated history --- HISTORY.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 096d1c5..0fa8a61 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,10 @@ History ------- +++++ + +* Unicode Column Headers + 0.9.11 (2011-06-30) +++++++++++++++++++ From 4a3fde37a3e4d2822407fcf5526ebf072041d2e0 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 4 Jul 2011 14:05:48 -0400 Subject: [PATCH 05/96] tox cleanups --- tox.ini | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tox.ini b/tox.ini index da39016..c034d2a 100644 --- a/tox.ini +++ b/tox.ini @@ -4,8 +4,3 @@ envlist = py25,py26,py27,py3 [testenv] commands=py.test --junitxml=junit-{envname}.xml deps = pytest - - -[testenv:py3] -basepython=/usr/bin/python3 -simplejson = pytest \ No newline at end of file From 1d460bac401edc450f999fda72cac2d29f9b7efe Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 4 Jul 2011 14:27:42 -0400 Subject: [PATCH 06/96] setup.py changes --- setup.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 6c12da2..63b1d76 100755 --- a/setup.py +++ b/setup.py @@ -7,18 +7,11 @@ import sys import tablib try: - from setuptools import setup, find_packages + from setuptools import setup except ImportError: - from distutils.core import setup, find_packages + from distutils.core import setup -packages = find_packages(exclude=('docs',)) - -if sys.version_info[:2] < (3,0): - packages = [p for p in packages if '3' not in p] -else: - packages = [p for p in packages if '2' not in p] - if sys.argv[-1] == 'publish': os.system("python setup.py sdist upload") sys.exit() @@ -52,7 +45,17 @@ setup( author='Kenneth Reitz', author_email='me@kennethreitz.com', url='http://tablib.org', - packages=packages, + packages=[ + 'tablib', 'tablib.formats', + 'tablib.packages', + 'tablib.packages.xlwt', + 'tablib.packages.openpyxl', + 'tablib.packages.openpyxl.shared', + 'tablib.packages.openpyxl.reader', + 'tablib.packages.openpyxl.writer', + 'tablib.packages.yaml', + 'tablib.packages.unicodecsv' + ], license='MIT', classifiers=( 'Development Status :: 5 - Production/Stable', From cd5aa4fc06d9af90b076c02a9f596bb84db1d09f Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 4 Jul 2011 14:36:08 -0400 Subject: [PATCH 07/96] toxless --- tox.ini | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 tox.ini diff --git a/tox.ini b/tox.ini deleted file mode 100644 index c034d2a..0000000 --- a/tox.ini +++ /dev/null @@ -1,6 +0,0 @@ -[tox] -envlist = py25,py26,py27,py3 - -[testenv] -commands=py.test --junitxml=junit-{envname}.xml -deps = pytest From e74a8f41cc0bdfe1cb9aae90b9240fd15208e9cb Mon Sep 17 00:00:00 2001 From: Mark Walling Date: Mon, 11 Jul 2011 17:13:47 -0400 Subject: [PATCH 08/96] Created get_col method with tests and tutorial.rst update Useful when you have multiple columns with the same header --- docs/tutorial.rst | 7 +++++++ tablib/core.py | 7 ++++++- test_tablib.py | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 561b24c..117196d 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -146,6 +146,13 @@ To do so, we access the :class:`Dataset` as if it were a standard Python diction >>> data['First Name'] ['Kenneth', 'Bessie'] +You can also access the column using its index. :: + + >>> d.headers + ['Last Name', 'First Name', 'Age'] + >>> d.get_col(1) + ['Kenneth', 'Bessie'] + Let's find the average age. :: >>> ages = data['Age'] diff --git a/tablib/core.py b/tablib/core.py index 7e78b56..0429136 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -175,7 +175,6 @@ class Dataset(object): else: return [result.tuple for result in _results] - def __setitem__(self, key, value): self._validate(value) self._data[key] = Row(value) @@ -695,6 +694,12 @@ class Dataset(object): self.rpush_col(col, header) + def get_col(self, index): + """Returns the column from the :class:`Dataset` at the given index.""" + + return [row[index] for row in self._data] + + # ---- # Misc # ---- diff --git a/test_tablib.py b/test_tablib.py index 5fdf65a..48990a7 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -113,6 +113,22 @@ class TablibTestCase(unittest.TestCase): [self.john[2], self.george[2], self.tom[2]]) + def test_get_col(self): + """Verify getting columns by index""" + + self.assertEqual( + self.founders.get_col(self.headers.index('first_name')), + [self.john[0], self.george[0], self.tom[0]]) + + self.assertEqual( + self.founders.get_col(self.headers.index('last_name')), + [self.john[1], self.george[1], self.tom[1]]) + + self.assertEqual( + self.founders.get_col(self.headers.index('gpa')), + [self.john[2], self.george[2], self.tom[2]]) + + def test_data_slicing(self): """Verify slicing by data.""" From 2c5a9af76efd93f49eb0757f8c262dbb87d5be43 Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Thu, 14 Jul 2011 09:36:35 +0200 Subject: [PATCH 09/96] Fix pickling (again). Unit tests still pass. --- tablib/core.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index 7e78b56..eeb319d 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -57,7 +57,14 @@ class Row(object): del self._row[i] def __getstate__(self): - return {'slot': [getattr(self, slot) for slot in self.__slots__]} + + slots = dict() + + for slot in self.__slots__: + attribute = getattr(self, slot) + slots[slot] = attribute + + return slots def __setstate__(self, state): for (k, v) in list(state.items()): setattr(self, k, v) From 2f8083bda6d78246d41d4cf26f19c5bb447de30f Mon Sep 17 00:00:00 2001 From: Luca Beltrame Date: Thu, 14 Jul 2011 10:28:12 +0200 Subject: [PATCH 10/96] Fix also __slots__ to ensure proper unpickling --- tablib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index eeb319d..4e2d5d6 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -29,7 +29,7 @@ __docformat__ = 'restructuredtext' class Row(object): """Internal Row object. Mainly used for filtering.""" - __slots__ = ['tuple', '_row', 'tags'] + __slots__ = ['_row', 'tags'] def __init__(self, row=list(), tags=list()): self._row = list(row) From eaa2b9b8ea778e7fbe1fe5c659fe1b267d52d4b1 Mon Sep 17 00:00:00 2001 From: Greg Thornton Date: Thu, 14 Jul 2011 13:08:06 -0500 Subject: [PATCH 11/96] Added XLS import support --- tablib/compat.py | 1 + tablib/formats/_xls.py | 36 +- tablib/packages/xlrd/__init__.py | 1720 ++++++++++++++ tablib/packages/xlrd/biffh.py | 639 +++++ tablib/packages/xlrd/compdoc.py | 358 +++ tablib/packages/xlrd/doc/compdoc.html | 69 + tablib/packages/xlrd/doc/xlrd.html | 1845 +++++++++++++++ tablib/packages/xlrd/examples/namesdemo.xls | Bin 0 -> 22528 bytes .../packages/xlrd/examples/xlrdnameAPIdemo.py | 178 ++ tablib/packages/xlrd/formatting.py | 1256 ++++++++++ tablib/packages/xlrd/formula.py | 2092 +++++++++++++++++ tablib/packages/xlrd/licences.py | 77 + tablib/packages/xlrd/sheet.py | 1768 ++++++++++++++ tablib/packages/xlrd/timemachine.py | 44 + tablib/packages/xlrd/xldate.py | 171 ++ test_tablib.py | 25 + 16 files changed, 10277 insertions(+), 2 deletions(-) create mode 100644 tablib/packages/xlrd/__init__.py create mode 100644 tablib/packages/xlrd/biffh.py create mode 100644 tablib/packages/xlrd/compdoc.py create mode 100644 tablib/packages/xlrd/doc/compdoc.html create mode 100644 tablib/packages/xlrd/doc/xlrd.html create mode 100644 tablib/packages/xlrd/examples/namesdemo.xls create mode 100644 tablib/packages/xlrd/examples/xlrdnameAPIdemo.py create mode 100644 tablib/packages/xlrd/formatting.py create mode 100644 tablib/packages/xlrd/formula.py create mode 100644 tablib/packages/xlrd/licences.py create mode 100644 tablib/packages/xlrd/sheet.py create mode 100644 tablib/packages/xlrd/timemachine.py create mode 100644 tablib/packages/xlrd/xldate.py diff --git a/tablib/compat.py b/tablib/compat.py index 0881369..f6bcf8d 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -39,6 +39,7 @@ else: from cStringIO import StringIO as BytesIO from cStringIO import StringIO import tablib.packages.xlwt as xlwt + import tablib.packages.xlrd as xlrd from tablib.packages import markup from itertools import ifilter from tablib.packages import openpyxl diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 48dcc0b..1282a43 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -5,8 +5,8 @@ import sys -from tablib.compat import BytesIO, xlwt - +from tablib.compat import BytesIO, xlwt, xlrd +import tablib title = 'xls' extentions = ('xls',) @@ -16,6 +16,38 @@ wrap = xlwt.easyxf("alignment: wrap on") bold = xlwt.easyxf("font: bold on") +def import_set(dset, in_stream, headers=True): + """Returns dataset from XLS stream.""" + + dset.wipe() + + wb = xlrd.open_workbook(file_contents=in_stream) + ws = wb.sheet_by_index(0) + + for i in range(ws.nrows): + if (i == 0) and (headers): + dset.headers = ws.row_values(i) + else: + dset.append(ws.row_values(i)) + + +def import_book(dbook, in_stream, headers=True): + """Returns databook from XLS stream.""" + + dbook.wipe() + + wb = xlrd.open_workbook(file_contents=in_stream) + for ws in wb.sheets(): + data = tablib.Dataset() + data.title = ws.name + for i in range(ws.nrows): + if (i == 0) and (headers): + data.headers = ws.row_values(i) + else: + data.append(ws.row_values(i)) + dbook.add_sheet(data) + + def export_set(dataset): """Returns XLS representation of Dataset.""" diff --git a/tablib/packages/xlrd/__init__.py b/tablib/packages/xlrd/__init__.py new file mode 100644 index 0000000..9097d9d --- /dev/null +++ b/tablib/packages/xlrd/__init__.py @@ -0,0 +1,1720 @@ +# -*- coding: cp1252 -*- + +__VERSION__ = "0.7.1" # 2009-05-31 + +#

Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under a +# BSD-style licence.

+ +import licences + +## +#

A Python module for extracting data from MS Excel ™ spreadsheet files. +#

+# Version 0.7.1 -- 2009-05-31 +#

+# +#

General information

+# +#

Acknowledgements

+# +#

+# Development of this module would not have been possible without the document +# "OpenOffice.org's Documentation of the Microsoft Excel File Format" +# ("OOo docs" for short). +# The latest version is available from OpenOffice.org in +# PDF format +# and +# ODT format. +# Small portions of the OOo docs are reproduced in this +# document. A study of the OOo docs is recommended for those who wish a +# deeper understanding of the Excel file layout than the xlrd docs can provide. +#

+# +#

Backporting to Python 2.1 was partially funded by +# +# Journyx - provider of timesheet and project accounting solutions. +# +#

+# +#

Provision of formatting information in version 0.6.1 was funded by +# +# Simplistix Ltd. +# +#

+# +#

Unicode

+# +#

This module presents all text strings as Python unicode objects. +# From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode. +# Older files (Excel 95 and earlier) don't keep strings in Unicode; +# a CODEPAGE record provides a codepage number (for example, 1252) which is +# used by xlrd to derive the encoding (for same example: "cp1252") which is +# used to translate to Unicode.

+# +#

If the CODEPAGE record is missing (possible if the file was created +# by third-party software), xlrd will assume that the encoding is ascii, and keep going. +# If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and +# you will need to determine the encoding yourself, and tell xlrd: +#

+#     book = xlrd.open_workbook(..., encoding_override="cp1252")
+# 

+#

If the CODEPAGE record exists but is wrong (for example, the codepage +# number is 1251, but the strings are actually encoded in koi8_r), +# it can be overridden using the same mechanism. +# The supplied runxlrd.py has a corresponding command-line argument, which +# may be used for experimentation: +#

+#     runxlrd.py -e koi8_r 3rows myfile.xls
+# 

+#

The first place to look for an encoding ("codec name") is +# +# the Python documentation. +#

+#
+# +#

Dates in Excel spreadsheets

+# +#

In reality, there are no such things. What you have are floating point +# numbers and pious hope. +# There are several problems with Excel dates:

+# +#

(1) Dates are not stored as a separate data type; they are stored as +# floating point numbers and you have to rely on +# (a) the "number format" applied to them in Excel and/or +# (b) knowing which cells are supposed to have dates in them. +# This module helps with (a) by inspecting the +# format that has been applied to each number cell; +# if it appears to be a date format, the cell +# is classified as a date rather than a number. Feedback on this feature, +# especially from non-English-speaking locales, would be appreciated.

+# +#

(2) Excel for Windows stores dates by default as the number of +# days (or fraction thereof) since 1899-12-31T00:00:00. Excel for +# Macintosh uses a default start date of 1904-01-01T00:00:00. The date +# system can be changed in Excel on a per-workbook basis (for example: +# Tools -> Options -> Calculation, tick the "1904 date system" box). +# This is of course a bad idea if there are already dates in the +# workbook. There is no good reason to change it even if there are no +# dates in the workbook. Which date system is in use is recorded in the +# workbook. A workbook transported from Windows to Macintosh (or vice +# versa) will work correctly with the host Excel. When using this +# module's xldate_as_tuple function to convert numbers from a workbook, +# you must use the datemode attribute of the Book object. If you guess, +# or make a judgement depending on where you believe the workbook was +# created, you run the risk of being 1462 days out of kilter.

+# +#

Reference: +# http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162

+# +# +#

(3) The Excel implementation of the Windows-default 1900-based date system works on the +# incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29, +# which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the +# result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc +# program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored. +# Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.

+# +#

Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326

+# +#

(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero. +# Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00), +# independent of the calendar, +# or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)? +# The xldate_* functions in this module +# take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both +# date systems. This is consistent with more recent Microsoft documentation +# (for example, the help file for Excel 2002 which says that the first day +# in the 1904 date system is 1904-01-02). +# +#

(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file, +# in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive), +# Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)." +# This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era, +# means that DATE(1899, 12, 31) is interpreted as 3799-12-31.

+# +#

For further information, please refer to the documentation for the xldate_* functions.

+# +#

Named references, constants, formulas, and macros

+# +#

+# A name is used to refer to a cell, a group of cells, a constant +# value, a formula, or a macro. Usually the scope of a name is global +# across the whole workbook. However it can be local to a worksheet. +# For example, if the sales figures are in different cells in +# different sheets, the user may define the name "Sales" in each +# sheet. There are built-in names, like "Print_Area" and +# "Print_Titles"; these two are naturally local to a sheet. +#

+# To inspect the names with a user interface like MS Excel, OOo Calc, +# or Gnumeric, click on Insert/Names/Define. This will show the global +# names, plus those local to the currently selected sheet. +#

+# A Book object provides two dictionaries (name_map and +# name_and_scope_map) and a list (name_obj_list) which allow various +# ways of accessing the Name objects. There is one Name object for +# each NAME record found in the workbook. Name objects have many +# attributes, several of which are relevant only when obj.macro is 1. +#

+# In the examples directory you will find namesdemo.xls which +# showcases the many different ways that names can be used, and +# xlrdnamesAPIdemo.py which offers 3 different queries for inspecting +# the names in your files, and shows how to extract whatever a name is +# referring to. There is currently one "convenience method", +# Name.cell(), which extracts the value in the case where the name +# refers to a single cell. More convenience methods are planned. The +# source code for Name.cell (in __init__.py) is an extra source of +# information on how the Name attributes hang together. +#

+# +#

Name information is not extracted from files older than +# Excel 5.0 (Book.biff_version < 50)

+# +#

Formatting

+# +#

Introduction

+# +#

This collection of features, new in xlrd version 0.6.1, is intended +# to provide the information needed to (1) display/render spreadsheet contents +# (say) on a screen or in a PDF file, and (2) copy spreadsheet data to another +# file without losing the ability to display/render it.

+# +#

The Palette; Colour Indexes

+# +#

A colour is represented in Excel as a (red, green, blue) ("RGB") tuple +# with each component in range(256). However it is not possible to access an +# unlimited number of colours; each spreadsheet is limited to a palette of 64 different +# colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index +# ("colour index") into this palette. +# +# Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue, +# yellow, magenta, and cyan.

+# +# The remaining colours in the palette (8 to 63 in Excel 5.0 and later) +# can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette +# of 7 rows of 8 colours. The last two rows are reserved for use in charts.
+# The correspondence between this grid and the assigned +# colour indexes is NOT left-to-right top-to-bottom.
+# Indexes 8 to 15 correspond to changeable +# parallels of the 8 fixed colours -- for example, index 7 is forever cyan; +# index 15 starts off being cyan but can be changed by the user.
+# +# The default colour for each index depends on the file version; tables of the defaults +# are available in the source code. If the user changes one or more colours, +# a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable +# indexes.
+# Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer +# to colour index 7; "[COLOR16]...." will produce cyan +# unless the user changes colour index 15 to something else.
+# +#

In addition, there are several "magic" colour indexes used by Excel:
+# 0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines +# (used in XF, CF, and WINDOW2 records)
+# 0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background +# (used in XF and CF records )
+# 0x43: System face colour (dialogue background colour)
+# 0x4D: System window text colour for chart border lines
+# 0x4E: System window background colour for chart areas
+# 0x4F: Automatic colour for chart border lines (seems to be always Black)
+# 0x50: System ToolTip background colour (used in note objects)
+# 0x51: System ToolTip text colour (used in note objects)
+# 0x7FFF: System window text colour for fonts (used in FONT and CF records)
+# Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT +# records.
+# +#

Default Formatting

+# +# Default formatting is applied to all empty cells (those not described by a cell record). +# Firstly row default information (ROW record, Rowinfo class) is used if available. +# Failing that, column default information (COLINFO record, Colinfo class) is used if available. +# As a last resort the worksheet/workbook default cell format will be used; this +# should always be present in an Excel file, +# described by the XF record with the fixed index 15 (0-based). By default, it uses the +# worksheet/workbook default cell style, described by the very first XF record (index 0). +# +#

Formatting features not included in xlrd version 0.6.1

+#
    +#
  • Rich text i.e. strings containing partial bold italic +# and underlined text, change of font inside a string, etc. +# See OOo docs s3.4 and s3.2
  • +#
  • Asian phonetic text (known as "ruby"), used for Japanese furigana. See OOo docs +# s3.4.2 (p15)
  • +#
  • Conditional formatting. See OOo docs +# s5.12, s6.21 (CONDFMT record), s6.16 (CF record)
  • +#
  • Miscellaneous sheet-level and book-level items e.g. printing layout, screen panes.
  • +#
  • Modern Excel file versions don't keep most of the built-in +# "number formats" in the file; Excel loads formats according to the +# user's locale. Currently xlrd's emulation of this is limited to +# a hard-wired table that applies to the US English locale. This may mean +# that currency symbols, date order, thousands separator, decimals separator, etc +# are inappropriate. Note that this does not affect users who are copying XLS +# files, only those who are visually rendering cells.
  • +#
+# +#

Loading worksheets on demand

+# +#

This feature, new in version 0.7.1, is governed by the on_demand argument +# to the open_workbook() function and allows saving memory and time by loading +# only those sheets that the caller is interested in, and releasing sheets +# when no longer required.

+# +#

on_demand=False (default): No change. open_workbook() loads global data +# and all sheets, releases resources no longer required (principally the +# str or mmap object containing the Workbook stream), and returns.

+# +#

on_demand=True and BIFF version < 5.0: A warning message is emitted, +# on_demand is recorded as False, and the old process is followed.

+# +#

on_demand=True and BIFF version >= 5.0: open_workbook() loads global +# data and returns without releasing resources. At this stage, the only +# information available about sheets is Book.nsheets and Book.sheet_names().

+# +#

Book.sheet_by_name() and Book.sheet_by_index() will load the requested +# sheet if it is not already loaded.

+# +#

Book.sheets() will load all/any unloaded sheets.

+# +#

The caller may save memory by calling +# Book.unload_sheet(sheet_name_or_index) when finished with the sheet. +# This applies irrespective of the state of on_demand.

+# +#

The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx() +# -- except if those required resources have been released (which will +# have happened automatically when on_demand is false). This is the only +# case where an exception will be raised.

+# +#

The caller may query the state of a sheet: +# Book.sheet_loaded(sheet_name_or_index) -> a bool

+# +## + +# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo +# 2008-11-23 SJM Support dumping FILEPASS and EXTERNNAME records; extra info from SUPBOOK records +# 2008-11-23 SJM colname utility function now supports more than 256 columns +# 2008-04-24 SJM Recovery code for file with out-of-order/missing/wrong CODEPAGE record needed to be called for EXTERNSHEET/BOUNDSHEET/NAME/SHEETHDR records. +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-03 SJM Minor tweaks for IronPython support +# 2008-02-02 SJM Previous change stopped dump() and count_records() ... fixed +# 2007-12-25 SJM Decouple Book initialisation & loading -- to allow for multiple loaders. +# 2007-12-20 SJM Better error message for unsupported file format. +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-11-20 SJM Wasn't handling EXTERNSHEET record that needed CONTINUE record(s) +# 2007-07-07 SJM Version changed to 0.7.0 (alpha 1) +# 2007-07-07 SJM Logfile arg wasn't being passed from open_workbook to compdoc.CompDoc +# 2007-05-21 SJM If no CODEPAGE record in pre-8.0 file, assume ascii and keep going. +# 2007-04-22 SJM Removed antique undocumented Book.get_name_dict method. + +from timemachine import * +from biffh import * +from struct import unpack +import sys +import time +import sheet +import compdoc +from xldate import xldate_as_tuple, XLDateError +from formula import * +import formatting +if sys.version.startswith("IronPython"): + # print >> sys.stderr, "...importing encodings" + import encodings + +empty_cell = sheet.empty_cell # for exposure to the world ... + +DEBUG = 0 + +USE_FANCY_CD = 1 + +TOGGLE_GC = 0 +import gc +# gc.set_debug(gc.DEBUG_STATS) + +try: + import mmap + MMAP_AVAILABLE = 1 +except ImportError: + MMAP_AVAILABLE = 0 +USE_MMAP = MMAP_AVAILABLE + +MY_EOF = 0xF00BAAA # not a 16-bit number + +SUPBOOK_UNK, SUPBOOK_INTERNAL, SUPBOOK_EXTERNAL, SUPBOOK_ADDIN, SUPBOOK_DDEOLE = range(5) + +SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20) + +code_from_builtin_name = { + u"Consolidate_Area": u"\x00", + u"Auto_Open": u"\x01", + u"Auto_Close": u"\x02", + u"Extract": u"\x03", + u"Database": u"\x04", + u"Criteria": u"\x05", + u"Print_Area": u"\x06", + u"Print_Titles": u"\x07", + u"Recorder": u"\x08", + u"Data_Form": u"\x09", + u"Auto_Activate": u"\x0A", + u"Auto_Deactivate": u"\x0B", + u"Sheet_Title": u"\x0C", + u"_FilterDatabase": u"\x0D", + } +builtin_name_from_code = {} +for _bin, _bic in code_from_builtin_name.items(): + builtin_name_from_code[_bic] = _bin +del _bin, _bic + +## +# +# Open a spreadsheet file for data extraction. +# +# @param filename The path to the spreadsheet file to be opened. +# +# @param logfile An open file to which messages and diagnostics are written. +# +# @param verbosity Increases the volume of trace material written to the logfile. +# +# @param pickleable Default is true. In Python 2.4 or earlier, setting to false +# will cause use of array.array objects which save some memory but can't be pickled. +# In Python 2.5, array.arrays are used unconditionally. Note: if you have large files that +# you need to read multiple times, it can be much faster to cPickle.dump() the xlrd.Book object +# once, and use cPickle.load() multiple times. +# @param use_mmap Whether to use the mmap module is determined heuristically. +# Use this arg to override the result. Current heuristic: mmap is used if it exists. +# +# @param file_contents ... as a string or an mmap.mmap object or some other behave-alike object. +# If file_contents is supplied, filename will not be used, except (possibly) in messages. +# +# @param encoding_override Used to overcome missing or bad codepage information +# in older-version files. Refer to discussion in the Unicode section above. +#
-- New in version 0.6.0 +# +# @param formatting_info Governs provision of a reference to an XF (eXtended Format) object +# for each cell in the worksheet. +#
Default is False. This is backwards compatible and saves memory. +# "Blank" cells (those with their own formatting information but no data) are treated as empty +# (by ignoring the file's BLANK and MULBLANK records). +# It cuts off any bottom "margin" of rows of empty (and blank) cells and +# any right "margin" of columns of empty (and blank) cells. +# Only cell_value and cell_type are available. +#
True provides all cells, including empty and blank cells. +# XF information is available for each cell. +#
-- New in version 0.6.1 +# +# @param on_demand Governs whether sheets are all loaded initially or when demanded +# by the caller. Please refer back to the section "Loading worksheets on demand" for details. +# -- New in version 0.7.1 +# +# @return An instance of the Book class. + +def open_workbook(filename=None, + logfile=sys.stdout, verbosity=0, pickleable=True, use_mmap=USE_MMAP, + file_contents=None, + encoding_override=None, + formatting_info=False, on_demand=False, + ): + t0 = time.clock() + if TOGGLE_GC: + orig_gc_enabled = gc.isenabled() + if orig_gc_enabled: + gc.disable() + bk = Book() + bk.biff2_8_load( + filename=filename, file_contents=file_contents, + logfile=logfile, verbosity=verbosity, pickleable=pickleable, use_mmap=use_mmap, + encoding_override=encoding_override, + formatting_info=formatting_info, + on_demand=on_demand, + ) + t1 = time.clock() + bk.load_time_stage_1 = t1 - t0 + biff_version = bk.getbof(XL_WORKBOOK_GLOBALS) + if not biff_version: + raise XLRDError("Can't determine file's BIFF version") + if biff_version not in SUPPORTED_VERSIONS: + raise XLRDError( + "BIFF version %s is not supported" + % biff_text_from_num[biff_version] + ) + bk.biff_version = biff_version + if biff_version <= 40: + # no workbook globals, only 1 worksheet + if on_demand: + fprintf(bk.logfile, + "*** WARNING: on_demand is not supported for this Excel version.\n" + "*** Setting on_demand to False.\n") + bk.on_demand = on_demand = False + bk.fake_globals_get_sheet() + elif biff_version == 45: + # worksheet(s) embedded in global stream + bk.parse_globals() + if on_demand: + fprintf(bk.logfile, "*** WARNING: on_demand is not supported for this Excel version.\n" + "*** Setting on_demand to False.\n") + bk.on_demand = on_demand = False + else: + bk.parse_globals() + bk._sheet_list = [None for sh in bk._sheet_names] + if not on_demand: + bk.get_sheets() + bk.nsheets = len(bk._sheet_list) + if biff_version == 45 and bk.nsheets > 1: + fprintf(bk.logfile, + "*** WARNING: Excel 4.0 workbook (.XLW) file contains %d worksheets.\n" + "*** Book-level data will be that of the last worksheet.\n", + bk.nsheets + ) + if not on_demand: + bk.release_resources() + if TOGGLE_GC: + if orig_gc_enabled: + gc.enable() + t2 = time.clock() + bk.load_time_stage_2 = t2 - t1 + return bk + +## +# For debugging: dump the file's BIFF records in char & hex. +# @param filename The path to the file to be dumped. +# @param outfile An open file, to which the dump is written. +# @param unnumbered If true, omit offsets (for meaningful diffs). + +def dump(filename, outfile=sys.stdout, unnumbered=False): + bk = Book() + bk.biff2_8_load(filename=filename, logfile=outfile, ) + biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered) + +## +# For debugging and analysis: summarise the file's BIFF records. +# I.e. produce a sorted file of (record_name, count). +# @param filename The path to the file to be summarised. +# @param outfile An open file, to which the summary is written. + +def count_records(filename, outfile=sys.stdout): + bk = Book() + bk.biff2_8_load(filename=filename, logfile=outfile, ) + biff_count_records(bk.mem, bk.base, bk.stream_len, outfile) + +## +# Information relating to a named reference, formula, macro, etc. +#
-- New in version 0.6.0 +#
-- Name information is not extracted from files older than +# Excel 5.0 (Book.biff_version < 50) + +class Name(BaseObject): + + _repr_these = ['stack'] + book = None # parent + + ## + # 0 = Visible; 1 = Hidden + hidden = 0 + + ## + # 0 = Command macro; 1 = Function macro. Relevant only if macro == 1 + func = 0 + + ## + # 0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1 + vbasic = 0 + + ## + # 0 = Standard name; 1 = Macro name + macro = 0 + + ## + # 0 = Simple formula; 1 = Complex formula (array formula or user defined)
+ # No examples have been sighted. + complex = 0 + + ## + # 0 = User-defined name; 1 = Built-in name + # (common examples: Print_Area, Print_Titles; see OOo docs for full list) + builtin = 0 + + ## + # Function group. Relevant only if macro == 1; see OOo docs for values. + funcgroup = 0 + + ## + # 0 = Formula definition; 1 = Binary data
No examples have been sighted. + binary = 0 + + ## + # The index of this object in book.name_obj_list + name_index = 0 + + ## + # A Unicode string. If builtin, decoded as per OOo docs. + name = u"" + + ## + # An 8-bit string. + raw_formula = "" + + ## + # -1: The name is global (visible in all calculation sheets).
+ # -2: The name belongs to a macro sheet or VBA sheet.
+ # -3: The name is invalid.
+ # 0 <= scope < book.nsheets: The name is local to the sheet whose index is scope. + scope = -1 + + ## + # The result of evaluating the formula, if any. + # If no formula, or evaluation of the formula encountered problems, + # the result is None. Otherwise the result is a single instance of the + # Operand class. + # + result = None + + ## + # This is a convenience method for the frequent use case where the name + # refers to a single cell. + # @return An instance of the Cell class. + # @throws XLRDError The name is not a constant absolute reference + # to a single cell. + def cell(self): + res = self.result + if res: + # result should be an instance of the Operand class + kind = res.kind + value = res.value + if kind == oREF and len(value) == 1: + ref3d = value[0] + if (0 <= ref3d.shtxlo == ref3d.shtxhi - 1 + and ref3d.rowxlo == ref3d.rowxhi - 1 + and ref3d.colxlo == ref3d.colxhi - 1): + sh = self.book.sheet_by_index(ref3d.shtxlo) + return sh.cell(ref3d.rowxlo, ref3d.colxlo) + self.dump(self.book.logfile, + header="=== Dump of Name object ===", + footer="======= End of dump =======", + ) + raise XLRDError("Not a constant absolute reference to a single cell") + + ## + # This is a convenience method for the use case where the name + # refers to one rectangular area in one worksheet. + # @param clipped If true (the default), the returned rectangle is clipped + # to fit in (0, sheet.nrows, 0, sheet.ncols) -- it is guaranteed that + # 0 <= rowxlo <= rowxhi <= sheet.nrows and that the number of usable rows + # in the area (which may be zero) is rowxhi - rowxlo; likewise for columns. + # @return a tuple (sheet_object, rowxlo, rowxhi, colxlo, colxhi). + # @throws XLRDError The name is not a constant absolute reference + # to a single area in a single sheet. + def area2d(self, clipped=True): + res = self.result + if res: + # result should be an instance of the Operand class + kind = res.kind + value = res.value + if kind == oREF and len(value) == 1: # only 1 reference + ref3d = value[0] + if 0 <= ref3d.shtxlo == ref3d.shtxhi - 1: # only 1 usable sheet + sh = self.book.sheet_by_index(ref3d.shtxlo) + if not clipped: + return sh, ref3d.rowxlo, ref3d.rowxhi, ref3d.colxlo, ref3d.colxhi + rowxlo = min(ref3d.rowxlo, sh.nrows) + rowxhi = max(rowxlo, min(ref3d.rowxhi, sh.nrows)) + colxlo = min(ref3d.colxlo, sh.ncols) + colxhi = max(colxlo, min(ref3d.colxhi, sh.ncols)) + assert 0 <= rowxlo <= rowxhi <= sh.nrows + assert 0 <= colxlo <= colxhi <= sh.ncols + return sh, rowxlo, rowxhi, colxlo, colxhi + self.dump(self.book.logfile, + header="=== Dump of Name object ===", + footer="======= End of dump =======", + ) + raise XLRDError("Not a constant absolute reference to a single area in a single sheet") + +## +# Contents of a "workbook". +#

WARNING: You don't call this class yourself. You use the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls").

+ +class Book(BaseObject): + + ## + # The number of worksheets present in the workbook file. + # This information is available even when no sheets have yet been loaded. + nsheets = 0 + + ## + # Which date system was in force when this file was last saved.
+ # 0 => 1900 system (the Excel for Windows default).
+ # 1 => 1904 system (the Excel for Macintosh default).
+ datemode = 0 # In case it's not specified in the file. + + ## + # Version of BIFF (Binary Interchange File Format) used to create the file. + # Latest is 8.0 (represented here as 80), introduced with Excel 97. + # Earliest supported by this module: 2.0 (represented as 20). + biff_version = 0 + + ## + # List containing a Name object for each NAME record in the workbook. + #
-- New in version 0.6.0 + name_obj_list = [] + + ## + # An integer denoting the character set used for strings in this file. + # For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE. + # For earlier versions, this is used to derive the appropriate Python encoding + # to be used to convert to Unicode. + # Examples: 1252 -> 'cp1252', 10000 -> 'mac_roman' + codepage = None + + ## + # The encoding that was derived from the codepage. + encoding = None + + ## + # A tuple containing the (telephone system) country code for:
+ # [0]: the user-interface setting when the file was created.
+ # [1]: the regional settings.
+ # Example: (1, 61) meaning (USA, Australia). + # This information may give a clue to the correct encoding for an unknown codepage. + # For a long list of observed values, refer to the OpenOffice.org documentation for + # the COUNTRY record. + countries = (0, 0) + + ## + # What (if anything) is recorded as the name of the last user to save the file. + user_name = u'' + + ## + # A list of Font class instances, each corresponding to a FONT record. + #
-- New in version 0.6.1 + font_list = [] + + ## + # A list of XF class instances, each corresponding to an XF record. + #
-- New in version 0.6.1 + xf_list = [] + + ## + # A list of Format objects, each corresponding to a FORMAT record, in + # the order that they appear in the input file. + # It does not contain builtin formats. + # If you are creating an output file using (for example) pyExcelerator, + # use this list. + # The collection to be used for all visual rendering purposes is format_map. + #
-- New in version 0.6.1 + format_list = [] + + ## + # The mapping from XF.format_key to Format object. + #
-- New in version 0.6.1 + format_map = {} + + ## + # This provides access via name to the extended format information for + # both built-in styles and user-defined styles.
+ # It maps name to (built_in, xf_index), where:
+ # name is either the name of a user-defined style, + # or the name of one of the built-in styles. Known built-in names are + # Normal, RowLevel_1 to RowLevel_7, + # ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]", + # "Currency [0]", Hyperlink, and "Followed Hyperlink".
+ # built_in 1 = built-in style, 0 = user-defined
+ # xf_index is an index into Book.xf_list.
+ # References: OOo docs s6.99 (STYLE record); Excel UI Format/Style + #
-- New in version 0.6.1 + style_name_map = {} + + ## + # This provides definitions for colour indexes. Please refer to the + # above section "The Palette; Colour Indexes" for an explanation + # of how colours are represented in Excel.
+ # Colour indexes into the palette map into (red, green, blue) tuples. + # "Magic" indexes e.g. 0x7FFF map to None. + # colour_map is what you need if you want to render cells on screen or in a PDF + # file. If you are writing an output XLS file, use palette_record. + #
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + colour_map = {} + + ## + # If the user has changed any of the colours in the standard palette, the XLS + # file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier) + # RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)]. + # Otherwise this list will be empty. This is what you need if you are + # writing an output XLS file. If you want to render cells on screen or in a PDF + # file, use colour_map. + #
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + palette_record = [] + + ## + # Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent). + load_time_stage_1 = -1.0 + + ## + # Time in seconds to parse the data from the contiguous string (or mmap equivalent). + load_time_stage_2 = -1.0 + + ## + # @return A list of all sheets in the book. + # All sheets not already loaded will be loaded. + def sheets(self): + for sheetx in xrange(self.nsheets): + if not self._sheet_list[sheetx]: + self.get_sheet(sheetx) + return self._sheet_list[:] + + ## + # @param sheetx Sheet index in range(nsheets) + # @return An object of the Sheet class + def sheet_by_index(self, sheetx): + return self._sheet_list[sheetx] or self.get_sheet(sheetx) + + ## + # @param sheet_name Name of sheet required + # @return An object of the Sheet class + def sheet_by_name(self, sheet_name): + try: + sheetx = self._sheet_names.index(sheet_name) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name) + return self.sheet_by_index(sheetx) + + ## + # @return A list of the names of all the worksheets in the workbook file. + # This information is available even when no sheets have yet been loaded. + def sheet_names(self): + return self._sheet_names[:] + + ## + # @param sheet_name_or_index Name or index of sheet enquired upon + # @return true if sheet is loaded, false otherwise + #
-- New in version 0.7.1 + def sheet_loaded(self, sheet_name_or_index): + # using type(1) because int won't work with Python 2.1 + if isinstance(sheet_name_or_index, type(1)): + sheetx = sheet_name_or_index + else: + try: + sheetx = self._sheet_names.index(sheet_name_or_index) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name_or_index) + return self._sheet_list[sheetx] and True or False # Python 2.1 again + + ## + # @param sheet_name_or_index Name or index of sheet to be unloaded. + #
-- New in version 0.7.1 + def unload_sheet(self, sheet_name_or_index): + # using type(1) because int won't work with Python 2.1 + if isinstance(sheet_name_or_index, type(1)): + sheetx = sheet_name_or_index + else: + try: + sheetx = self._sheet_names.index(sheet_name_or_index) + except ValueError: + raise XLRDError('No sheet named <%r>' % sheet_name_or_index) + self._sheet_list[sheetx] = None + + ## + # A mapping from (lower_case_name, scope) to a single Name object. + #
-- New in version 0.6.0 + name_and_scope_map = {} + + ## + # A mapping from lower_case_name to a list of Name objects. The list is + # sorted in scope order. Typically there will be one item (of global scope) + # in the list. + #
-- New in version 0.6.0 + name_map = {} + + def __init__(self): + self._sheet_list = [] + self._sheet_names = [] + self._sheet_visibility = [] # from BOUNDSHEET record + self.nsheets = 0 + self._sh_abs_posn = [] # sheet's absolute position in the stream + self._sharedstrings = [] + self.raw_user_name = False + self._sheethdr_count = 0 # BIFF 4W only + self.builtinfmtcount = -1 # unknown as yet. BIFF 3, 4S, 4W + self.initialise_format_info() + self._all_sheets_count = 0 # includes macro & VBA sheets + self._supbook_count = 0 + self._supbook_locals_inx = None + self._supbook_addins_inx = None + self._all_sheets_map = [] # maps an all_sheets index to a calc-sheets index (or -1) + self._externsheet_info = [] + self._externsheet_type_b57 = [] + self._extnsht_name_from_num = {} + self._sheet_num_from_name = {} + self._extnsht_count = 0 + self._supbook_types = [] + self._resources_released = 0 + self.addin_func_names = [] + self.name_obj_list = [] + self.colour_map = {} + self.palette_record = [] + self.xf_list = [] + self.style_name_map = {} + + def biff2_8_load(self, filename=None, file_contents=None, + logfile=sys.stdout, verbosity=0, pickleable=True, use_mmap=USE_MMAP, + encoding_override=None, + formatting_info=False, + on_demand=False, + ): + # DEBUG = 0 + self.logfile = logfile + self.verbosity = verbosity + self.pickleable = pickleable + self.use_mmap = use_mmap and MMAP_AVAILABLE + self.encoding_override = encoding_override + self.formatting_info = formatting_info + self.on_demand = on_demand + + need_close_filestr = 0 + if not file_contents: + if python_version < (2, 2) and self.use_mmap: + # need to open for update + open_mode = "r+b" + else: + open_mode = "rb" + retry = False + try: + f = open(filename, open_mode) + except IOError: + e, v = sys.exc_info()[:2] + if open_mode == "r+b" \ + and (v.errno == 13 or v.strerror == "Permission denied"): + # Maybe the file is read-only + retry = True + self.use_mmap = False + else: + raise + if retry: + f = open(filename, "rb") + if self.use_mmap: + f.seek(0, 2) # EOF + size = f.tell() + f.seek(0, 0) # BOF + if python_version < (2, 2): + filestr = mmap.mmap(f.fileno(), size) + else: + filestr = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ) + need_close_filestr = 1 + self.stream_len = size + else: + filestr = f.read() + self.stream_len = len(filestr) + f.close() + else: + filestr = file_contents + self.stream_len = len(file_contents) + + self.base = 0 + if filestr[:8] != compdoc.SIGNATURE: + # got this one at the antique store + self.mem = filestr + else: + cd = compdoc.CompDoc(filestr, logfile=self.logfile) + if USE_FANCY_CD: + for qname in [u'Workbook', u'Book']: + self.mem, self.base, self.stream_len = cd.locate_named_stream(qname) + if self.mem: break + else: + raise XLRDError("Can't find workbook in OLE2 compound document") + else: + for qname in [u'Workbook', u'Book']: + self.mem = cd.get_named_stream(qname) + if self.mem: break + else: + raise XLRDError("Can't find workbook in OLE2 compound document") + self.stream_len = len(self.mem) + del cd + if self.mem is not filestr: + if need_close_filestr: + filestr.close() + del filestr + self._position = self.base + if DEBUG: + print >> self.logfile, "mem: %s, base: %d, len: %d" % (type(self.mem), self.base, self.stream_len) + + def initialise_format_info(self): + # needs to be done once per sheet for BIFF 4W :-( + self.format_map = {} + self.format_list = [] + self.xfcount = 0 + self.actualfmtcount = 0 # number of FORMAT records seen so far + self._xf_index_to_xl_type_map = {} + self._xf_epilogue_done = 0 + self.xf_list = [] + self.font_list = [] + + def release_resources(self): + self._resources_released = 1 + del self.mem + del self._sharedstrings + + def get2bytes(self): + pos = self._position + buff_two = self.mem[pos:pos+2] + lenbuff = len(buff_two) + self._position += lenbuff + if lenbuff < 2: + return MY_EOF + lo, hi = buff_two + return (ord(hi) << 8) | ord(lo) + + def get_record_parts(self): + pos = self._position + mem = self.mem + code, length = unpack('> self.logfile, "GET_SHEETS:", self._sheet_names, self._sh_abs_posn + for sheetno in xrange(len(self._sheet_names)): + if DEBUG: print >> self.logfile, "GET_SHEETS: sheetno =", sheetno, self._sheet_names, self._sh_abs_posn + self.get_sheet(sheetno) + + def fake_globals_get_sheet(self): # for BIFF 4.0 and earlier + formatting.initialise_book(self) + fake_sheet_name = u'Sheet 1' + self._sheet_names = [fake_sheet_name] + self._sh_abs_posn = [0] + self._sheet_visibility = [0] # one sheet, visible + self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand + self.get_sheets() + + def handle_boundsheet(self, data): + # DEBUG = 1 + bv = self.biff_version + self.derive_encoding() + if DEBUG: + fprintf(self.logfile, "BOUNDSHEET: bv=%d data %r\n", bv, data); + if bv == 45: # BIFF4W + #### Not documented in OOo docs ... + # In fact, the *only* data is the name of the sheet. + sheet_name = unpack_string(data, 0, self.encoding, lenlen=1) + visibility = 0 + sheet_type = XL_BOUNDSHEET_WORKSHEET # guess, patch later + if len(self._sh_abs_posn) == 0: + abs_posn = self._sheetsoffset + self.base + # Note (a) this won't be used + # (b) it's the position of the SHEETHDR record + # (c) add 11 to get to the worksheet BOF record + else: + abs_posn = -1 # unknown + else: + offset, visibility, sheet_type = unpack('= 2: + fprintf(self.logfile, + "BOUNDSHEET: inx=%d vis=%r sheet_name=%r abs_posn=%d sheet_type=0x%02x\n", + self._all_sheets_count, visibility, sheet_name, abs_posn, sheet_type) + self._all_sheets_count += 1 + if sheet_type != XL_BOUNDSHEET_WORKSHEET: + self._all_sheets_map.append(-1) + descr = { + 1: 'Macro sheet', + 2: 'Chart', + 6: 'Visual Basic module', + }.get(sheet_type, 'UNKNOWN') + + fprintf(self.logfile, + "NOTE *** Ignoring non-worksheet data named %r (type 0x%02x = %s)\n", + sheet_name, sheet_type, descr) + else: + snum = len(self._sheet_names) + self._all_sheets_map.append(snum) + self._sheet_names.append(sheet_name) + self._sh_abs_posn.append(abs_posn) + self._sheet_visibility.append(visibility) + self._sheet_num_from_name[sheet_name] = snum + + def handle_builtinfmtcount(self, data): + ### N.B. This count appears to be utterly useless. + # DEBUG = 1 + builtinfmtcount = unpack('= 2: + fprintf(self.logfile, "*** No CODEPAGE record; assuming 1200 (utf_16_le)\n") + else: + codepage = self.codepage + if encoding_from_codepage.has_key(codepage): + encoding = encoding_from_codepage[codepage] + elif 300 <= codepage <= 1999: + encoding = 'cp' + str(codepage) + else: + encoding = 'unknown_codepage_' + str(codepage) + if DEBUG or (self.verbosity and encoding != self.encoding) : + fprintf(self.logfile, "CODEPAGE: codepage %r -> encoding %r\n", codepage, encoding) + self.encoding = encoding + if self.codepage != 1200: # utf_16_le + # If we don't have a codec that can decode ASCII into Unicode, + # we're well & truly stuffed -- let the punter know ASAP. + try: + _unused = unicode('trial', self.encoding) + except: + ei = sys.exc_info()[:2] + fprintf(self.logfile, + "ERROR *** codepage %r -> encoding %r -> %s: %s\n", + self.codepage, self.encoding, ei[0].__name__.split(".")[-1], ei[1]) + raise + if self.raw_user_name: + strg = unpack_string(self.user_name, 0, self.encoding, lenlen=1) + strg = strg.rstrip() + # if DEBUG: + # print "CODEPAGE: user name decoded from %r to %r" % (self.user_name, strg) + self.user_name = strg + self.raw_user_name = False + return self.encoding + + def handle_codepage(self, data): + # DEBUG = 0 + codepage = unpack('> self.logfile, "Countries:", countries + # Note: in BIFF7 and earlier, country record was put (redundantly?) in each worksheet. + assert self.countries == (0, 0) or self.countries == countries + self.countries = countries + + def handle_datemode(self, data): + datemode = unpack('= 2 + if self.biff_version >= 80: + option_flags, other_info =unpack("= 1 + blah2 = DEBUG or self.verbosity >= 2 + if self.biff_version >= 80: + num_refs = unpack("= 2: + logf = self.logfile + fprintf(logf, "FILEPASS:\n") + hex_char_dump(data, 0, len(data), base=0, fout=logf) + if self.biff_version >= 80: + kind1, = unpack('= 2 + bv = self.biff_version + if bv < 50: + return + self.derive_encoding() + # print + # hex_char_dump(data, 0, len(data)) + ( + option_flags, kb_shortcut, name_len, fmla_len, extsht_index, sheet_index, + menu_text_len, description_text_len, help_topic_text_len, status_bar_text_len, + ) = unpack("> nshift) + + macro_flag = " M"[nobj.macro] + if bv < 80: + internal_name, pos = unpack_string_update_pos(data, 14, self.encoding, known_len=name_len) + else: + internal_name, pos = unpack_unicode_update_pos(data, 14, known_len=name_len) + nobj.extn_sheet_num = extsht_index + nobj.excel_sheet_index = sheet_index + nobj.scope = None # patched up in the names_epilogue() method + if blah: + print "NAME[%d]:%s oflags=%d, name_len=%d, fmla_len=%d, extsht_index=%d, sheet_index=%d, name=%r" \ + % (name_index, macro_flag, option_flags, name_len, + fmla_len, extsht_index, sheet_index, internal_name) + name = internal_name + if nobj.builtin: + name = builtin_name_from_code.get(name, "??Unknown??") + if blah: print " builtin: %s" % name + nobj.name = name + nobj.raw_formula = data[pos:] + nobj.basic_formula_len = fmla_len + nobj.evaluated = 0 + if blah: + nobj.dump( + self.logfile, + header="--- handle_name: name[%d] ---" % name_index, + footer="-------------------", + ) + + def names_epilogue(self): + blah = self.verbosity >= 2 + f = self.logfile + if blah: + print >> f, "+++++ names_epilogue +++++" + print >> f, "_all_sheets_map", self._all_sheets_map + print >> f, "_extnsht_name_from_num", self._extnsht_name_from_num + print >> f, "_sheet_num_from_name", self._sheet_num_from_name + num_names = len(self.name_obj_list) + for namex in range(num_names): + nobj = self.name_obj_list[namex] + # Convert from excel_sheet_index to scope. + # This is done here because in BIFF7 and earlier, the + # BOUNDSHEET records (from which _all_sheets_map is derived) + # come after the NAME records. + if self.biff_version >= 80: + sheet_index = nobj.excel_sheet_index + if sheet_index == 0: + intl_sheet_index = -1 # global + elif 1 <= sheet_index <= len(self._all_sheets_map): + intl_sheet_index = self._all_sheets_map[sheet_index-1] + if intl_sheet_index == -1: # maps to a macro or VBA sheet + intl_sheet_index = -2 # valid sheet reference but not useful + else: + # huh? + intl_sheet_index = -3 # invalid + elif 50 <= self.biff_version <= 70: + sheet_index = nobj.extn_sheet_num + if sheet_index == 0: + intl_sheet_index = -1 # global + else: + sheet_name = self._extnsht_name_from_num[sheet_index] + intl_sheet_index = self._sheet_num_from_name.get(sheet_name, -2) + nobj.scope = intl_sheet_index + + for namex in range(num_names): + nobj = self.name_obj_list[namex] + # Parse the formula ... + if nobj.macro or nobj.binary: continue + if nobj.evaluated: continue + evaluate_name_formula(self, nobj, namex, blah=blah) + + if self.verbosity >= 2: + print >> f, "---------- name object dump ----------" + for namex in range(num_names): + nobj = self.name_obj_list[namex] + nobj.dump(f, header="--- name[%d] ---" % namex) + print >> f, "--------------------------------------" + # + # Build some dicts for access to the name objects + # + name_and_scope_map = {} # (name.lower(), scope): Name_object + name_map = {} # name.lower() : list of Name_objects (sorted in scope order) + for namex in range(num_names): + nobj = self.name_obj_list[namex] + name_lcase = nobj.name.lower() + key = (name_lcase, nobj.scope) + if name_and_scope_map.has_key(key): + msg = 'Duplicate entry %r in name_and_scope_map' % (key, ) + if 0: + raise XLRDError(msg) + else: + if self.verbosity: + print >> f, msg + name_and_scope_map[key] = nobj + if name_map.has_key(name_lcase): + name_map[name_lcase].append((nobj.scope, nobj)) + else: + name_map[name_lcase] = [(nobj.scope, nobj)] + for key in name_map.keys(): + alist = name_map[key] + alist.sort() + name_map[key] = [x[1] for x in alist] + self.name_and_scope_map = name_and_scope_map + self.name_map = name_map + + def handle_obj(self, data): + # Not doing much handling at all. + # Worrying about embedded (BOF ... EOF) substreams is done elsewhere. + # DEBUG = 1 + obj_type, obj_id = unpack(' handle_obj type=%d id=0x%08x" % (obj_type, obj_id) + + def handle_supbook(self, data): + self._supbook_types.append(None) + blah = DEBUG or self.verbosity >= 2 + if 0: + print "SUPBOOK:" + hex_char_dump(data, 0, len(data)) + num_sheets = unpack("> self.logfile, 'SHEETHDR %d at posn %d: len=%d name=%r' % (sheetno, posn, sheet_len, sheet_name) + self.initialise_format_info() + if DEBUG: print >> self.logfile, 'SHEETHDR: xf epilogue flag is %d' % self._xf_epilogue_done + self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand + self.get_sheet(sheetno, update_pos=False) + if DEBUG: print >> self.logfile, 'SHEETHDR: posn after get_sheet() =', self._position + self._position = BOF_posn + sheet_len + + def handle_sheetsoffset(self, data): + # DEBUG = 0 + posn = unpack('> self.logfile, 'SHEETSOFFSET:', posn + self._sheetsoffset = posn + + def handle_sst(self, data): + # DEBUG = 1 + if DEBUG: + print >> self.logfile, "SST Processing" + t0 = time.time() + nbt = len(data) + strlist = [data] + uniquestrings = unpack('= 2: + fprintf(self.logfile, "SST: unique strings: %d\n", uniquestrings) + while 1: + code, nb, data = self.get_record_parts_conditional(XL_CONTINUE) + if code is None: + break + nbt += nb + if DEBUG >= 2: + fprintf(self.logfile, "CONTINUE: adding %d bytes to SST -> %d\n", nb, nbt) + strlist.append(data) + self._sharedstrings = unpack_SST_table(strlist, uniquestrings) + if DEBUG: + t1 = time.time() + print >> self.logfile, "SST processing took %.2f seconds" % (t1 - t0, ) + + def handle_writeaccess(self, data): + # DEBUG = 0 + if self.biff_version < 80: + if not self.encoding: + self.raw_user_name = True + self.user_name = data + return + strg = unpack_string(data, 0, self.encoding, lenlen=1) + else: + strg = unpack_unicode(data, 0, lenlen=2) + if DEBUG: print >> self.logfile, "WRITEACCESS: %d bytes; raw=%d %r" % (len(data), self.raw_user_name, strg) + strg = strg.rstrip() + self.user_name = strg + + def parse_globals(self): + # DEBUG = 0 + # no need to position, just start reading (after the BOF) + formatting.initialise_book(self) + while 1: + rc, length, data = self.get_record_parts() + if DEBUG: print "parse_globals: record code is 0x%04x" % rc + if rc == XL_SST: + self.handle_sst(data) + elif rc == XL_FONT or rc == XL_FONT_B3B4: + self.handle_font(data) + elif rc == XL_FORMAT: # XL_FORMAT2 is BIFF <= 3.0, can't appear in globals + self.handle_format(data) + elif rc == XL_XF: + self.handle_xf(data) + elif rc == XL_BOUNDSHEET: + self.handle_boundsheet(data) + elif rc == XL_DATEMODE: + self.handle_datemode(data) + elif rc == XL_CODEPAGE: + self.handle_codepage(data) + elif rc == XL_COUNTRY: + self.handle_country(data) + elif rc == XL_EXTERNNAME: + self.handle_externname(data) + elif rc == XL_EXTERNSHEET: + self.handle_externsheet(data) + elif rc == XL_FILEPASS: + self.handle_filepass(data) + elif rc == XL_WRITEACCESS: + self.handle_writeaccess(data) + elif rc == XL_SHEETSOFFSET: + self.handle_sheetsoffset(data) + elif rc == XL_SHEETHDR: + self.handle_sheethdr(data) + elif rc == XL_SUPBOOK: + self.handle_supbook(data) + elif rc == XL_NAME: + self.handle_name(data) + elif rc == XL_PALETTE: + self.handle_palette(data) + elif rc == XL_STYLE: + self.handle_style(data) + elif rc & 0xff == 9: + print >> self.logfile, "*** Unexpected BOF at posn %d: 0x%04x len=%d data=%r" \ + % (self._position - length - 4, rc, length, data) + elif rc == XL_EOF: + self.xf_epilogue() + self.names_epilogue() + self.palette_epilogue() + if not self.encoding: + self.derive_encoding() + if self.biff_version == 45: + # DEBUG = 0 + if DEBUG: print "global EOF: position", self._position + # if DEBUG: + # pos = self._position - 4 + # print repr(self.mem[pos:pos+40]) + return + else: + # if DEBUG: + # print "parse_globals: ignoring record code 0x%04x" % rc + pass + + def read(self, pos, length): + data = self.mem[pos:pos+length] + self._position = pos + len(data) + return data + + def getbof(self, rqd_stream): + # DEBUG = 1 + # if DEBUG: print >> self.logfile, "getbof(): position", self._position + if DEBUG: print >> self.logfile, "reqd: 0x%04x" % rqd_stream + def bof_error(msg): + raise XLRDError('Unsupported format, or corrupt file: ' + msg) + savpos = self._position + opcode = self.get2bytes() + if opcode == MY_EOF: + bof_error('Expected BOF record; met end of file') + if opcode not in bofcodes: + bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8]) + length = self.get2bytes() + if length == MY_EOF: + bof_error('Incomplete BOF record[1]; met end of file') + if length < boflen[opcode] or length > 20: + bof_error( + 'Invalid length (%d) for BOF record type 0x%04x' + % (length, opcode)) + data = self.read(self._position, length); + if DEBUG: print >> self.logfile, "\ngetbof(): data=%r" % data + if len(data) < length: + bof_error('Incomplete BOF record[2]; met end of file') + version1 = opcode >> 8 + version2, streamtype = unpack('> self.logfile, "getbof(): op=0x%04x version2=0x%04x streamtype=0x%04x" \ + % (opcode, version2, streamtype) + bof_offset = self._position - 4 - length + if DEBUG: + print >> self.logfile, "getbof(): BOF found at offset %d; savpos=%d" \ + % (bof_offset, savpos) + version = build = year = 0 + if version1 == 0x08: + build, year = unpack('= 2: + print >> self.logfile, \ + "BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d" \ + % (opcode, version2, streamtype, build, year, version) + got_globals = streamtype == XL_WORKBOOK_GLOBALS or ( + version == 45 and streamtype == XL_WORKBOOK_GLOBALS_4W) + if (rqd_stream == XL_WORKBOOK_GLOBALS and got_globals) or streamtype == rqd_stream: + return version + if version < 50 and streamtype == XL_WORKSHEET: + return version + if version >= 50 and streamtype == 0x0100: + bof_error("Workspace file -- no spreadsheet data") + bof_error( + 'BOF not workbook/worksheet: op=0x%04x vers=0x%04x strm=0x%04x build=%d year=%d -> BIFF%d' \ + % (opcode, version2, streamtype, build, year, version) + ) + +# === helper functions + +def expand_cell_address(inrow, incol): + # Ref : OOo docs, "4.3.4 Cell Addresses in BIFF8" + outrow = inrow + if incol & 0x8000: + if outrow >= 32768: + outrow -= 65536 + relrow = 1 + else: + relrow = 0 + outcol = incol & 0xFF + if incol & 0x4000: + if outcol >= 128: + outcol -= 256 + relcol = 1 + else: + relcol = 0 + return outrow, outcol, relrow, relcol + +def colname(colx, _A2Z="ABCDEFGHIJKLMNOPQRSTUVWXYZ"): + assert colx >= 0 + name = '' + while 1: + quot, rem = divmod(colx, 26) + name = _A2Z[rem] + name + if not quot: + return name + colx = quot - 1 + +def display_cell_address(rowx, colx, relrow, relcol): + if relrow: + rowpart = "(*%s%d)" % ("+-"[rowx < 0], abs(rowx)) + else: + rowpart = "$%d" % (rowx+1,) + if relcol: + colpart = "(*%s%d)" % ("+-"[colx < 0], abs(colx)) + else: + colpart = "$" + colname(colx) + return colpart + rowpart + +def unpack_SST_table(datatab, nstrings): + "Return list of strings" + datainx = 0 + ndatas = len(datatab) + data = datatab[0] + datalen = len(data) + pos = 8 + strings = [] + strappend = strings.append + local_unpack = unpack + local_min = min + local_ord = ord + latin_1 = "latin_1" + for _unused_i in xrange(nstrings): + nchars = local_unpack('> 1, charsneed) + rawstrg = data[pos:pos+2*charsavail] + # if DEBUG: print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) + try: + accstrg += unicode(rawstrg, "utf_16_le") + except: + # print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) + # Probable cause: dodgy data e.g. unfinished surrogate pair. + # E.g. file unicode2.xls in pyExcelerator's examples has cells containing + # unichr(i) for i in range(0x100000) + # so this will include 0xD800 etc + raise + pos += 2*charsavail + else: + # Note: this is COMPRESSED (not ASCII!) encoding!!! + charsavail = local_min(datalen - pos, charsneed) + rawstrg = data[pos:pos+charsavail] + # if DEBUG: print "SST CMPRSD: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) + accstrg += unicode(rawstrg, latin_1) + pos += charsavail + charsgot += charsavail + if charsgot == nchars: + break + datainx += 1 + data = datatab[datainx] + datalen = len(data) + options = local_ord(data[0]) + pos = 1 + pos += rtsz # size of richtext & phonetic stuff to skip + # also allow for the rich text etc being split ... + if pos >= datalen: + # adjust to correct position in next record + pos = pos - datalen + datainx += 1 + if datainx < ndatas: + data = datatab[datainx] + datalen = len(data) + else: + assert _unused_i == nstrings - 1 + strappend(accstrg) + return strings diff --git a/tablib/packages/xlrd/biffh.py b/tablib/packages/xlrd/biffh.py new file mode 100644 index 0000000..ba3b26c --- /dev/null +++ b/tablib/packages/xlrd/biffh.py @@ -0,0 +1,639 @@ +# -*- coding: cp1252 -*- + +## +# Support module for the xlrd package. +# +#

Portions copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under a BSD-style licence.

+## + +# 2008-02-10 SJM BIFF2 BLANK record +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte. +# 2007-04-22 SJM Remove experimental "trimming" facility. + +DEBUG = 0 + +from struct import unpack +import sys +from timemachine import * + +class XLRDError(Exception): + pass + +## +# Parent of almost all other classes in the package. Defines a common "dump" method +# for debugging. + +class BaseObject(object): + + _repr_these = [] + + ## + # @param f open file object, to which the dump is written + # @param header text to write before the dump + # @param footer text to write after the dump + # @param indent number of leading spaces (for recursive calls) + + def dump(self, f=None, header=None, footer=None, indent=0): + if f is None: + f = sys.stderr + alist = self.__dict__.items() + alist.sort() + pad = " " * indent + if header is not None: print >> f, header + list_type = type([]) + dict_type = type({}) + for attr, value in alist: + if getattr(value, 'dump', None) and attr != 'book': + value.dump(f, + header="%s%s (%s object):" % (pad, attr, value.__class__.__name__), + indent=indent+4) + elif attr not in self._repr_these and ( + isinstance(value, list_type) or isinstance(value, dict_type) + ): + print >> f, "%s%s: %s, len = %d" % (pad, attr, type(value), len(value)) + else: + print >> f, "%s%s: %r" % (pad, attr, value) + if footer is not None: print >> f, footer + +FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text +DATEFORMAT = FDT +NUMBERFORMAT = FNU + +( + XL_CELL_EMPTY, + XL_CELL_TEXT, + XL_CELL_NUMBER, + XL_CELL_DATE, + XL_CELL_BOOLEAN, + XL_CELL_ERROR, + XL_CELL_BLANK, # for use in debugging, gathering stats, etc +) = range(7) + +biff_text_from_num = { + 0: "(not BIFF)", + 20: "2.0", + 21: "2.1", + 30: "3", + 40: "4S", + 45: "4W", + 50: "5", + 70: "7", + 80: "8", + 85: "8X", + } + +## +#

This dictionary can be used to produce a text version of the internal codes +# that Excel uses for error cells. Here are its contents: +#

+# 0x00: '#NULL!',  # Intersection of two cell ranges is empty
+# 0x07: '#DIV/0!', # Division by zero
+# 0x0F: '#VALUE!', # Wrong type of operand
+# 0x17: '#REF!',   # Illegal or deleted cell reference
+# 0x1D: '#NAME?',  # Wrong function or range name
+# 0x24: '#NUM!',   # Value range overflow
+# 0x2A: '#N/A!',   # Argument or function not available
+# 

+ +error_text_from_code = { + 0x00: '#NULL!', # Intersection of two cell ranges is empty + 0x07: '#DIV/0!', # Division by zero + 0x0F: '#VALUE!', # Wrong type of operand + 0x17: '#REF!', # Illegal or deleted cell reference + 0x1D: '#NAME?', # Wrong function or range name + 0x24: '#NUM!', # Value range overflow + 0x2A: '#N/A!', # Argument or function not available +} + +BIFF_FIRST_UNICODE = 80 + +XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5 +XL_WORKBOOK_GLOBALS_4W = 0x100 +XL_WORKSHEET = WRKSHEET = 0x10 + +XL_BOUNDSHEET_WORKSHEET = 0x00 +XL_BOUNDSHEET_CHART = 0x02 +XL_BOUNDSHEET_VB_MODULE = 0x06 + +# XL_RK2 = 0x7e +XL_ARRAY = 0x0221 +XL_ARRAY2 = 0x0021 +XL_BLANK = 0x0201 +XL_BLANK_B2 = 0x01 +XL_BOF = 0x809 +XL_BOOLERR = 0x205 +XL_BOOLERR_B2 = 0x5 +XL_BOUNDSHEET = 0x85 +XL_BUILTINFMTCOUNT = 0x56 +XL_CF = 0x01B1 +XL_CODEPAGE = 0x42 +XL_COLINFO = 0x7D +XL_COLUMNDEFAULT = 0x20 # BIFF2 only +XL_COLWIDTH = 0x24 # BIFF2 only +XL_CONDFMT = 0x01B0 +XL_CONTINUE = 0x3c +XL_COUNTRY = 0x8C +XL_DATEMODE = 0x22 +XL_DEFAULTROWHEIGHT = 0x0225 +XL_DEFCOLWIDTH = 0x55 +XL_DIMENSION = 0x200 +XL_DIMENSION2 = 0x0 +XL_EFONT = 0x45 +XL_EOF = 0x0a +XL_EXTERNNAME = 0x23 +XL_EXTERNSHEET = 0x17 +XL_EXTSST = 0xff +XL_FEAT11 = 0x872 +XL_FILEPASS = 0x2f +XL_FONT = 0x31 +XL_FONT_B3B4 = 0x231 +XL_FORMAT = 0x41e +XL_FORMAT2 = 0x1E # BIFF2, BIFF3 +XL_FORMULA = 0x6 +XL_FORMULA3 = 0x206 +XL_FORMULA4 = 0x406 +XL_GCW = 0xab +XL_INDEX = 0x20b +XL_INTEGER = 0x2 # BIFF2 only +XL_IXFE = 0x44 # BIFF2 only +XL_LABEL = 0x204 +XL_LABEL_B2 = 0x04 +XL_LABELRANGES = 0x15f +XL_LABELSST = 0xfd +XL_MERGEDCELLS = 0xE5 +XL_MSO_DRAWING = 0x00EC +XL_MSO_DRAWING_GROUP = 0x00EB +XL_MSO_DRAWING_SELECTION = 0x00ED +XL_MULRK = 0xbd +XL_MULBLANK = 0xbe +XL_NAME = 0x18 +XL_NOTE = 0x1c +XL_NUMBER = 0x203 +XL_NUMBER_B2 = 0x3 +XL_OBJ = 0x5D +XL_PALETTE = 0x92 +XL_RK = 0x27e +XL_ROW = 0x208 +XL_ROW_B2 = 0x08 +XL_RSTRING = 0xd6 +XL_SHEETHDR = 0x8F # BIFF4W only +XL_SHEETSOFFSET = 0x8E # BIFF4W only +XL_SHRFMLA = 0x04bc +XL_SST = 0xfc +XL_STANDARDWIDTH = 0x99 +XL_STRING = 0x207 +XL_STRING_B2 = 0x7 +XL_STYLE = 0x293 +XL_SUPBOOK = 0x1AE +XL_TABLEOP = 0x236 +XL_TABLEOP2 = 0x37 +XL_TABLEOP_B2 = 0x36 +XL_TXO = 0x1b6 +XL_UNCALCED = 0x5e +XL_UNKNOWN = 0xffff +XL_WINDOW2 = 0x023E +XL_WRITEACCESS = 0x5C +XL_XF = 0xe0 +XL_XF2 = 0x0043 # BIFF2 version of XF record +XL_XF3 = 0x0243 # BIFF3 version of XF record +XL_XF4 = 0x0443 # BIFF4 version of XF record + +boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4} +bofcodes = (0x0809, 0x0409, 0x0209, 0x0009) + +XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206) + +_cell_opcode_list = [ + XL_BOOLERR, + XL_FORMULA, + XL_FORMULA3, + XL_FORMULA4, + XL_LABEL, + XL_LABELSST, + XL_MULRK, + XL_NUMBER, + XL_RK, + XL_RSTRING, + ] +_cell_opcode_dict = {} +for _cell_opcode in _cell_opcode_list: + _cell_opcode_dict[_cell_opcode] = 1 +is_cell_opcode = _cell_opcode_dict.has_key + +# def fprintf(f, fmt, *vargs): f.write(fmt % vargs) + +def fprintf(f, fmt, *vargs): + if fmt.endswith('\n'): + print >> f, fmt[:-1] % vargs + else: + print >> f, fmt % vargs, + +def upkbits(tgt_obj, src, manifest, local_setattr=setattr): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, (src & mask) >> n) + +def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, local_int((src & mask) >> n)) + +def unpack_string(data, pos, encoding, lenlen=1): + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + return unicode(data[pos:pos+nchars], encoding) + +def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None): + if known_len is not None: + # On a NAME record, the length byte is detached from the front of the string. + nchars = known_len + else: + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + newpos = pos + nchars + return (unicode(data[pos:newpos], encoding), newpos) + +def unpack_unicode(data, pos, lenlen=2): + "Return unicode_strg" + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + if not nchars: + # Ambiguous whether 0-length string should have an "options" byte. + # Avoid crash if missing. + return u"" + pos += lenlen + options = ord(data[pos]) + pos += 1 + # phonetic = options & 0x04 + # richtext = options & 0x08 + if options & 0x08: + # rt = unpack(' endpos=%d pos=%d endsub=%d substrg=%r\n', + ofs, dlen, base, endpos, pos, endsub, substrg) + break + hexd = ''.join(["%02x " % ord(c) for c in substrg]) + chard = '' + for c in substrg: + if c == '\0': + c = '~' + elif not (' ' <= c <= '~'): + c = '?' + chard += c + if numbered: + num_prefix = "%5d: " % (base+pos-ofs) + fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard) + pos = endsub + +def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False): + pos = stream_offset + stream_end = stream_offset + stream_len + adj = base - stream_offset + dummies = 0 + numbered = not unnumbered + num_prefix = '' + while stream_end - pos >= 4: + rc, length = unpack('') + if numbered: + num_prefix = "%5d: " % (adj + pos) + fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length) + pos += 4 + hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered) + pos += length + if dummies: + if numbered: + num_prefix = "%5d: " % (adj + savpos) + fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies) + if pos < stream_end: + if numbered: + num_prefix = "%5d: " % (adj + pos) + fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix) + hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered) + elif pos > stream_end: + fprintf(fout, "Last dumped record has length (%d) that is too large\n", length) + +def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout): + pos = stream_offset + stream_end = stream_offset + stream_len + tally = {} + while stream_end - pos >= 4: + rc, length = unpack('> fout, "%8d %s" % (count, recname) + +encoding_from_codepage = { + 1200 : 'utf_16_le', + 10000: 'mac_roman', + 10006: 'mac_greek', # guess + 10007: 'mac_cyrillic', # guess + 10029: 'mac_latin2', # guess + 10079: 'mac_iceland', # guess + 10081: 'mac_turkish', # guess + 32768: 'mac_roman', + 32769: 'cp1252', + } +# some more guessing, for Indic scripts +# codepage 57000 range: +# 2 Devanagari [0] +# 3 Bengali [1] +# 4 Tamil [5] +# 5 Telegu [6] +# 6 Assamese [1] c.f. Bengali +# 7 Oriya [4] +# 8 Kannada [7] +# 9 Malayalam [8] +# 10 Gujarati [3] +# 11 Gurmukhi [2] diff --git a/tablib/packages/xlrd/compdoc.py b/tablib/packages/xlrd/compdoc.py new file mode 100644 index 0000000..3abb7a6 --- /dev/null +++ b/tablib/packages/xlrd/compdoc.py @@ -0,0 +1,358 @@ +# -*- coding: cp1252 -*- + +## +# Implements the minimal functionality required +# to extract a "Workbook" or "Book" stream (as one big string) +# from an OLE2 Compound Document file. +#

Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under a BSD-style licence.

+## + +# No part of the content of this file was derived from the works of David Giffin. + +# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer] +# 2007-09-08 SJM Warning message if sector sizes are extremely large. +# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted. +# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms. + + +import sys +from struct import unpack +from timemachine import * + +## +# Magic cookie that should appear in the first 8 bytes of the file. +SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" + +EOCSID = -2 +FREESID = -1 +SATSID = -3 +MSATSID = -4 + +class CompDocError(Exception): + pass + +class DirNode(object): + + def __init__(self, DID, dent, DEBUG=0): + # dent is the 128-byte directory entry + self.DID = DID + # (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID, + # self.root_DID, + # self.first_SID, + # self.tot_size) = \ + # unpack('> logfile, "\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision) + self.mem = mem + ssz, sssz = unpack(' 20: # allows for 2**20 bytes i.e. 1MB + print >> logfile, \ + "WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \ + % ssz + ssz = 9 + if sssz > ssz: + print >> logfile, \ + "WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \ + % sssz + sssz = 6 + self.sec_size = sec_size = 1 << ssz + self.short_sec_size = 1 << sssz + ( + SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream, + SSAT_first_sec_sid, SSAT_tot_secs, + MSAT_first_sec_sid, MSAT_tot_secs, + # ) = unpack('> logfile, \ + "WARNING *** file size (%d) not 512 + multiple of sector size (%d)" \ + % (len(mem), sec_size) + if DEBUG: + print >> logfile, 'sec sizes', ssz, sssz, sec_size, self.short_sec_size + print >> logfile, "mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs) + print >> logfile, "SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d" \ + % (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,) + print >> logfile, "SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,) + print >> logfile, "MSAT_first_sec_sid=%d, MSAT_tot_secs=%d" % (MSAT_first_sec_sid, MSAT_tot_secs,) + nent = int_floor_div(sec_size, 4) # number of SID entries in a sector + fmt = "<%di" % nent + trunc_warned = 0 + # + # === build the MSAT === + # + MSAT = list(unpack('<109i', mem[76:512])) + sid = MSAT_first_sec_sid + while sid >= 0: + if sid >= mem_data_secs: + raise CompDocError( + "MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs) + ) + offset = 512 + sec_size * sid + news = list(unpack(fmt, mem[offset:offset+sec_size])) + sid = news.pop() + MSAT.extend(news) + if DEBUG: + print >> logfile, "MSAT: len =", len(MSAT) + print >> logfile, MSAT + # + # === build the SAT === + # + self.SAT = [] + for msid in MSAT: + if msid == FREESID: continue + if msid >= mem_data_secs: + if not trunc_warned: + print >> logfile, "WARNING *** File is truncated, or OLE2 MSAT is corrupt!!" + print >> logfile, \ + "INFO: Trying to access sector %d but only %d available" \ + % (msid, mem_data_secs) + trunc_warned = 1 + continue + offset = 512 + sec_size * msid + news = list(unpack(fmt, mem[offset:offset+sec_size])) + self.SAT.extend(news) + if DEBUG: + print >> logfile, "SAT: len =", len(self.SAT) + print >> logfile, self.SAT + # print >> logfile, "SAT ", + # for i, s in enumerate(self.SAT): + # print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s) + # print >> logfile, "%d:%d " % (i, s), + print + + # === build the directory === + # + dbytes = self._get_stream( + self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid, + name="directory") + dirlist = [] + did = -1 + for pos in xrange(0, len(dbytes), 128): + did += 1 + dirlist.append(DirNode(did, dbytes[pos:pos+128], 0)) + self.dirlist = dirlist + _build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ... + if DEBUG: + for d in dirlist: + d.dump(DEBUG) + # + # === get the SSCS === + # + sscs_dir = self.dirlist[0] + assert sscs_dir.etype == 5 # root entry + if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0: + # Problem reported by Frank Hoffsuemmer: some software was + # writing -1 instead of -2 (EOCSID) for the first_SID + # when the SCCS was empty. Not having EOCSID caused assertion + # failure in _get_stream. + # Solution: avoid calling _get_stream in any case when the + # SCSS appears to be empty. + self.SSCS = "" + else: + self.SSCS = self._get_stream( + self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID, + sscs_dir.tot_size, name="SSCS") + # if DEBUG: print >> logfile, "SSCS", repr(self.SSCS) + # + # === build the SSAT === + # + self.SSAT = [] + if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0: + print >> logfile, \ + "WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero" + if sscs_dir.tot_size > 0: + sid = SSAT_first_sec_sid + nsecs = SSAT_tot_secs + while sid >= 0 and nsecs > 0: + nsecs -= 1 + start_pos = 512 + sid * sec_size + news = list(unpack(fmt, mem[start_pos:start_pos+sec_size])) + self.SSAT.extend(news) + sid = self.SAT[sid] + # assert SSAT_tot_secs == 0 or sid == EOCSID + if DEBUG: print >> logfile, "SSAT last sid %d; remaining sectors %d" % (sid, nsecs) + assert nsecs == 0 and sid == EOCSID + if DEBUG: print >> logfile, "SSAT", self.SSAT + + def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''): + # print >> self.logfile, "_get_stream", base, sec_size, start_sid, size + sectors = [] + s = start_sid + if size is None: + # nothing to check against + while s >= 0: + start_pos = base + s * sec_size + sectors.append(mem[start_pos:start_pos+sec_size]) + try: + s = sat[s] + except IndexError: + raise CompDocError( + "OLE2 stream %r: sector allocation table invalid entry (%d)" % + (name, s) + ) + assert s == EOCSID + else: + todo = size + while s >= 0: + start_pos = base + s * sec_size + grab = sec_size + if grab > todo: + grab = todo + todo -= grab + sectors.append(mem[start_pos:start_pos+grab]) + try: + s = sat[s] + except IndexError: + raise CompDocError( + "OLE2 stream %r: sector allocation table invalid entry (%d)" % + (name, s) + ) + assert s == EOCSID + if todo != 0: + print >> self.logfile, \ + "WARNING *** OLE2 stream %r: expected size %d, actual size %d" \ + % (name, size, size - todo) + return ''.join(sectors) + + def _dir_search(self, path, storage_DID=0): + # Return matching DirNode instance, or None + head = path[0] + tail = path[1:] + dl = self.dirlist + for child in dl[storage_DID].children: + if dl[child].name.lower() == head.lower(): + et = dl[child].etype + if et == 2: + return dl[child] + if et == 1: + if not tail: + raise CompDocError("Requested component is a 'storage'") + return self._dir_search(tail, child) + dl[child].dump(1) + raise CompDocError("Requested stream is not a 'user stream'") + return None + + ## + # Interrogate the compound document's directory; return the stream as a string if found, otherwise + # return None. + # @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto. + + def get_named_stream(self, qname): + d = self._dir_search(qname.split("/")) + if d is None: + return None + if d.tot_size >= self.min_size_std_stream: + return self._get_stream( + self.mem, 512, self.SAT, self.sec_size, d.first_SID, + d.tot_size, name=qname) + else: + return self._get_stream( + self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID, + d.tot_size, name=qname + " (from SSCS)") + + ## + # Interrogate the compound document's directory. + # If the named stream is not found, (None, 0, 0) will be returned. + # If the named stream is found and is contiguous within the original byte sequence ("mem") + # used when the document was opened, + # then (mem, offset_to_start_of_stream, length_of_stream) is returned. + # Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned. + # @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto. + + def locate_named_stream(self, qname): + d = self._dir_search(qname.split("/")) + if d is None: + return (None, 0, 0) + if d.tot_size >= self.min_size_std_stream: + return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size) + else: + return ( + self._get_stream( + self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID, + d.tot_size, qname + " (from SSCS)"), + 0, + d.tot_size + ) + return (None, 0, 0) # not found + + def _locate_stream(self, mem, base, sat, sec_size, start_sid, size): + # print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size + s = start_sid + if s < 0: + raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid) + p = -99 # dummy previous SID + start_pos = -9999 + end_pos = -8888 + slices = [] + while s >= 0: + if s == p+1: + # contiguous sectors + end_pos += sec_size + else: + # start new slice + if p >= 0: + # not first time + slices.append((start_pos, end_pos)) + start_pos = base + s * sec_size + end_pos = start_pos + sec_size + p = s + s = sat[s] + assert s == EOCSID + # print >> self.logfile, len(slices) + 1, "slices" + if not slices: + # The stream is contiguous ... just what we like! + return (mem, start_pos, size) + slices.append((start_pos, end_pos)) + return (''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size) + +# ========================================================================================== diff --git a/tablib/packages/xlrd/doc/compdoc.html b/tablib/packages/xlrd/doc/compdoc.html new file mode 100644 index 0000000..c55a194 --- /dev/null +++ b/tablib/packages/xlrd/doc/compdoc.html @@ -0,0 +1,69 @@ + + + + +The compdoc Module + + +

The compdoc Module

+

Implements the minimal functionality required +to extract a "Workbook" or "Book" stream (as one big string) +from an OLE2 Compound Document file. +

Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd

+

This module is part of the xlrd package, which is released under a BSD-style licence.

+

Module Contents

+
+
CompDoc(mem, logfile=sys.stdout, DEBUG=0) (class) [#]
+
+

Compound document handler.

+
+
mem
+
+The raw contents of the file, as a string, or as an mmap.mmap() object. The +only operation it needs to support is slicing.
+

+

For more information about this class, see The CompDoc Class.

+
+
SIGNATURE (variable) [#]
+
+

Magic cookie that should appear in the first 8 bytes of the file.

+
+
+

The CompDoc Class

+
+
CompDoc(mem, logfile=sys.stdout, DEBUG=0) (class) [#]
+
+

Compound document handler.

+
+
mem
+
+The raw contents of the file, as a string, or as an mmap.mmap() object. The +only operation it needs to support is slicing.
+

+
+
get_named_stream(qname) [#]
+
+

Interrogate the compound document's directory; return the stream as a string if found, otherwise +return None.

+
+
qname
+
+Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
+

+
+
locate_named_stream(qname) [#]
+
+

Interrogate the compound document's directory. +If the named stream is not found, (None, 0, 0) will be returned. +If the named stream is found and is contiguous within the original byte sequence ("mem") +used when the document was opened, +then (mem, offset_to_start_of_stream, length_of_stream) is returned. +Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.

+
+
qname
+
+Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
+

+
+
+ diff --git a/tablib/packages/xlrd/doc/xlrd.html b/tablib/packages/xlrd/doc/xlrd.html new file mode 100644 index 0000000..f982e0e --- /dev/null +++ b/tablib/packages/xlrd/doc/xlrd.html @@ -0,0 +1,1845 @@ + + + + +The xlrd Module + + +

The xlrd Module

+

A Python module for extracting data from MS Excel ™ spreadsheet files. +

+Version 0.7.1 -- 2009-05-31 +

+ +

General information

+ +

Acknowledgements

+ +

+Development of this module would not have been possible without the document +"OpenOffice.org's Documentation of the Microsoft Excel File Format" +("OOo docs" for short). +The latest version is available from OpenOffice.org in + PDF format +and + ODT format. +Small portions of the OOo docs are reproduced in this +document. A study of the OOo docs is recommended for those who wish a +deeper understanding of the Excel file layout than the xlrd docs can provide. +

+ +

Backporting to Python 2.1 was partially funded by + + Journyx - provider of timesheet and project accounting solutions. + +

+ +

Provision of formatting information in version 0.6.1 was funded by + + Simplistix Ltd. + +

+ +

Unicode

+ +

This module presents all text strings as Python unicode objects. +From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode. +Older files (Excel 95 and earlier) don't keep strings in Unicode; +a CODEPAGE record provides a codepage number (for example, 1252) which is +used by xlrd to derive the encoding (for same example: "cp1252") which is +used to translate to Unicode.

+ +

If the CODEPAGE record is missing (possible if the file was created +by third-party software), xlrd will assume that the encoding is ascii, and keep going. +If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and +you will need to determine the encoding yourself, and tell xlrd: +

+    book = xlrd.open_workbook(..., encoding_override="cp1252")
+

+

If the CODEPAGE record exists but is wrong (for example, the codepage +number is 1251, but the strings are actually encoded in koi8_r), +it can be overridden using the same mechanism. +The supplied runxlrd.py has a corresponding command-line argument, which +may be used for experimentation: +

+    runxlrd.py -e koi8_r 3rows myfile.xls
+

+

The first place to look for an encoding ("codec name") is + +the Python documentation. +

+
+ +

Dates in Excel spreadsheets

+ +

In reality, there are no such things. What you have are floating point +numbers and pious hope. +There are several problems with Excel dates:

+ +

(1) Dates are not stored as a separate data type; they are stored as +floating point numbers and you have to rely on +(a) the "number format" applied to them in Excel and/or +(b) knowing which cells are supposed to have dates in them. +This module helps with (a) by inspecting the +format that has been applied to each number cell; +if it appears to be a date format, the cell +is classified as a date rather than a number. Feedback on this feature, +especially from non-English-speaking locales, would be appreciated.

+ +

(2) Excel for Windows stores dates by default as the number of +days (or fraction thereof) since 1899-12-31T00:00:00. Excel for +Macintosh uses a default start date of 1904-01-01T00:00:00. The date +system can be changed in Excel on a per-workbook basis (for example: +Tools -> Options -> Calculation, tick the "1904 date system" box). +This is of course a bad idea if there are already dates in the +workbook. There is no good reason to change it even if there are no +dates in the workbook. Which date system is in use is recorded in the +workbook. A workbook transported from Windows to Macintosh (or vice +versa) will work correctly with the host Excel. When using this +module's xldate_as_tuple function to convert numbers from a workbook, +you must use the datemode attribute of the Book object. If you guess, +or make a judgement depending on where you believe the workbook was +created, you run the risk of being 1462 days out of kilter.

+ +

Reference: +http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162

+ + +

(3) The Excel implementation of the Windows-default 1900-based date system works on the +incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29, +which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the +result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc +program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored. +Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.

+ +

Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326

+ +

(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero. +Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00), +independent of the calendar, +or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)? +The xldate_* functions in this module +take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both +date systems. This is consistent with more recent Microsoft documentation +(for example, the help file for Excel 2002 which says that the first day +in the 1904 date system is 1904-01-02). + +

(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file, +in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive), +Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)." +This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era, +means that DATE(1899, 12, 31) is interpreted as 3799-12-31.

+ +

For further information, please refer to the documentation for the xldate_* functions.

+ +

Named references, constants, formulas, and macros

+ +

+A name is used to refer to a cell, a group of cells, a constant +value, a formula, or a macro. Usually the scope of a name is global +across the whole workbook. However it can be local to a worksheet. +For example, if the sales figures are in different cells in +different sheets, the user may define the name "Sales" in each +sheet. There are built-in names, like "Print_Area" and +"Print_Titles"; these two are naturally local to a sheet. +

+To inspect the names with a user interface like MS Excel, OOo Calc, +or Gnumeric, click on Insert/Names/Define. This will show the global +names, plus those local to the currently selected sheet. +

+A Book object provides two dictionaries (name_map and +name_and_scope_map) and a list (name_obj_list) which allow various +ways of accessing the Name objects. There is one Name object for +each NAME record found in the workbook. Name objects have many +attributes, several of which are relevant only when obj.macro is 1. +

+In the examples directory you will find namesdemo.xls which +showcases the many different ways that names can be used, and +xlrdnamesAPIdemo.py which offers 3 different queries for inspecting +the names in your files, and shows how to extract whatever a name is +referring to. There is currently one "convenience method", +Name.cell(), which extracts the value in the case where the name +refers to a single cell. More convenience methods are planned. The +source code for Name.cell (in __init__.py) is an extra source of +information on how the Name attributes hang together. +

+ +

Name information is not extracted from files older than +Excel 5.0 (Book.biff_version < 50)

+ +

Formatting

+ +

Introduction

+ +

This collection of features, new in xlrd version 0.6.1, is intended +to provide the information needed to (1) display/render spreadsheet contents +(say) on a screen or in a PDF file, and (2) copy spreadsheet data to another +file without losing the ability to display/render it.

+ +

The Palette; Colour Indexes

+ +

A colour is represented in Excel as a (red, green, blue) ("RGB") tuple +with each component in range(256). However it is not possible to access an +unlimited number of colours; each spreadsheet is limited to a palette of 64 different +colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index +("colour index") into this palette. + +Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue, +yellow, magenta, and cyan.

+ +The remaining colours in the palette (8 to 63 in Excel 5.0 and later) +can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette +of 7 rows of 8 colours. The last two rows are reserved for use in charts.
+The correspondence between this grid and the assigned +colour indexes is NOT left-to-right top-to-bottom.
+Indexes 8 to 15 correspond to changeable +parallels of the 8 fixed colours -- for example, index 7 is forever cyan; +index 15 starts off being cyan but can be changed by the user.
+ +The default colour for each index depends on the file version; tables of the defaults +are available in the source code. If the user changes one or more colours, +a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable +indexes.
+Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer +to colour index 7; "[COLOR16]...." will produce cyan +unless the user changes colour index 15 to something else.
+ +

In addition, there are several "magic" colour indexes used by Excel:
+0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines +(used in XF, CF, and WINDOW2 records)
+0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background +(used in XF and CF records )
+0x43: System face colour (dialogue background colour)
+0x4D: System window text colour for chart border lines
+0x4E: System window background colour for chart areas
+0x4F: Automatic colour for chart border lines (seems to be always Black)
+0x50: System ToolTip background colour (used in note objects)
+0x51: System ToolTip text colour (used in note objects)
+0x7FFF: System window text colour for fonts (used in FONT and CF records)
+Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT +records.
+ +

Default Formatting

+ +Default formatting is applied to all empty cells (those not described by a cell record). +Firstly row default information (ROW record, Rowinfo class) is used if available. +Failing that, column default information (COLINFO record, Colinfo class) is used if available. +As a last resort the worksheet/workbook default cell format will be used; this +should always be present in an Excel file, +described by the XF record with the fixed index 15 (0-based). By default, it uses the +worksheet/workbook default cell style, described by the very first XF record (index 0). + +

Formatting features not included in xlrd version 0.6.1

+
    +
  • Rich text i.e. strings containing partial bold italic + and underlined text, change of font inside a string, etc. + See OOo docs s3.4 and s3.2
  • +
  • Asian phonetic text (known as "ruby"), used for Japanese furigana. See OOo docs + s3.4.2 (p15)
  • +
  • Conditional formatting. See OOo docs + s5.12, s6.21 (CONDFMT record), s6.16 (CF record)
  • +
  • Miscellaneous sheet-level and book-level items e.g. printing layout, screen panes.
  • +
  • Modern Excel file versions don't keep most of the built-in + "number formats" in the file; Excel loads formats according to the + user's locale. Currently xlrd's emulation of this is limited to + a hard-wired table that applies to the US English locale. This may mean + that currency symbols, date order, thousands separator, decimals separator, etc + are inappropriate. Note that this does not affect users who are copying XLS + files, only those who are visually rendering cells.
  • +
+ +

Loading worksheets on demand

+ +

This feature, new in version 0.7.1, is governed by the on_demand argument +to the open_workbook() function and allows saving memory and time by loading +only those sheets that the caller is interested in, and releasing sheets +when no longer required.

+ +

on_demand=False (default): No change. open_workbook() loads global data +and all sheets, releases resources no longer required (principally the +str or mmap object containing the Workbook stream), and returns.

+ +

on_demand=True and BIFF version < 5.0: A warning message is emitted, +on_demand is recorded as False, and the old process is followed.

+ +

on_demand=True and BIFF version >= 5.0: open_workbook() loads global +data and returns without releasing resources. At this stage, the only +information available about sheets is Book.nsheets and Book.sheet_names().

+ +

Book.sheet_by_name() and Book.sheet_by_index() will load the requested +sheet if it is not already loaded.

+ +

Book.sheets() will load all/any unloaded sheets.

+ +

The caller may save memory by calling +Book.unload_sheet(sheet_name_or_index) when finished with the sheet. +This applies irrespective of the state of on_demand.

+ +

The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx() + -- except if those required resources have been released (which will +have happened automatically when on_demand is false). This is the only +case where an exception will be raised.

+ +

The caller may query the state of a sheet: +Book.sheet_loaded(sheet_name_or_index) -> a bool

+ +

Module Contents

+
+
BaseObject (class) [#]
+
+

Parent of almost all other classes in the package.

+

For more information about this class, see The BaseObject Class.

+
+
Book() (class) [#]
+
+

Contents of a "workbook".

+

For more information about this class, see The Book Class.

+
+
Cell(ctype, value, xf_index=None) (class) [#]
+
+

Contains the data for one cell.

+

For more information about this class, see The Cell Class.

+
+
cellname(rowx, colx) [#]
+
+

Utility function: (5, 7) => 'H6'

+
+
cellnameabs(rowx, colx) [#]
+
+

Utility function: (5, 7) => '$H$6'

+
+
Colinfo (class) [#]
+
+

Width and default formatting information that applies to one or +more columns in a sheet.

+

For more information about this class, see The Colinfo Class.

+
+
colname(colx) [#]
+
+

Utility function: 7 => 'H', 27 => 'AB'

+
+
count_records(filename, outfile=sys.stdout) [#]
+
+

For debugging and analysis: summarise the file's BIFF records. +I.e. produce a sorted file of (record_name, count).

+
+
filename
+
+The path to the file to be summarised.
+
outfile
+
+An open file, to which the summary is written.
+

+
+
dump(filename, outfile=sys.stdout, unnumbered=False) [#]
+
+

For debugging: dump the file's BIFF records in char & hex. +

+
filename
+
+The path to the file to be dumped.
+
outfile
+
+An open file, to which the dump is written.
+
unnumbered
+
+If true, omit offsets (for meaningful diffs).
+

+
+
empty_cell (variable) [#]
+
+

There is one and only one instance of an empty cell -- it's a singleton. This is it. +You may use a test like "acell is empty_cell".

+
+
EqNeAttrs (class) [#]
+
+

This mixin class exists solely so that Format, Font, and XF....

+

For more information about this class, see The EqNeAttrs Class.

+
+
error_text_from_code (variable) [#]
+
+

This dictionary can be used to produce a text version of the internal codes +that Excel uses for error cells. Here are its contents: +

+0x00: '#NULL!',  # Intersection of two cell ranges is empty
+0x07: '#DIV/0!', # Division by zero
+0x0F: '#VALUE!', # Wrong type of operand
+0x17: '#REF!',   # Illegal or deleted cell reference
+0x1D: '#NAME?',  # Wrong function or range name
+0x24: '#NUM!',   # Value range overflow
+0x2A: '#N/A!',   # Argument or function not available
+

+
+
Font (class) [#]
+
+

An Excel "font" contains the details of not only what is normally +considered a font, but also several other display attributes.

+

For more information about this class, see The Font Class.

+
+
Format(format_key, ty, format_str) (class) [#]
+
+

"Number format" information from a FORMAT record.

+

For more information about this class, see The Format Class.

+
+
Name (class) [#]
+
+

Information relating to a named reference, formula, macro, etc.

+

For more information about this class, see The Name Class.

+
+
open_workbook(filename=None, +logfile=sys.stdout, verbosity=0, pickleable=True, use_mmap=USE_MMAP, +file_contents=None, +encoding_override=None, +formatting_info=False, on_demand=False, +) [#]
+
+

Open a spreadsheet file for data extraction.

+
+
filename
+
+The path to the spreadsheet file to be opened.
+
logfile
+
+An open file to which messages and diagnostics are written.
+
verbosity
+
+Increases the volume of trace material written to the logfile.
+
pickleable
+
+Default is true. In Python 2.4 or earlier, setting to false +will cause use of array.array objects which save some memory but can't be pickled. +In Python 2.5, array.arrays are used unconditionally. Note: if you have large files that +you need to read multiple times, it can be much faster to cPickle.dump() the xlrd.Book object +once, and use cPickle.load() multiple times.
+
use_mmap
+
+Whether to use the mmap module is determined heuristically. +Use this arg to override the result. Current heuristic: mmap is used if it exists.
+
file_contents
+
+... as a string or an mmap.mmap object or some other behave-alike object. +If file_contents is supplied, filename will not be used, except (possibly) in messages.
+
encoding_override
+
+Used to overcome missing or bad codepage information +in older-version files. Refer to discussion in the Unicode section above. +
-- New in version 0.6.0 + +
+
formatting_info
+
+Governs provision of a reference to an XF (eXtended Format) object +for each cell in the worksheet. +
Default is False. This is backwards compatible and saves memory. +"Blank" cells (those with their own formatting information but no data) are treated as empty +(by ignoring the file's BLANK and MULBLANK records). +It cuts off any bottom "margin" of rows of empty (and blank) cells and +any right "margin" of columns of empty (and blank) cells. +Only cell_value and cell_type are available. +
True provides all cells, including empty and blank cells. +XF information is available for each cell. +
-- New in version 0.6.1 + +
+
on_demand
+
+Governs whether sheets are all loaded initially or when demanded +by the caller. Please refer back to the section "Loading worksheets on demand" for details. +-- New in version 0.7.1
+
Returns:
+
+An instance of the Book class.
+

+
+
Operand(akind=None, avalue=None, arank=0, atext='?') (class) [#]
+
+

Used in evaluating formulas.

+

For more information about this class, see The Operand Class.

+
+
rangename3d(book, ref3d) [#]
+
+

Utility function: +
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20' +

+
rangename3drel(book, ref3d) [#]
+
+

Utility function: +
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1)) +=> 'Sheet1![@-13,#-32]:[@+12,#-23]' +where '@' refers to the current or base column and '#' +refers to the current or base row. +

+
Ref3D(atuple) (class) [#]
+
+

Represents an absolute or relative 3-dimensional reference to a box +of one or more cells.

+

For more information about this class, see The Ref3D Class.

+
+
Rowinfo (class) [#]
+
+

Height and default formatting information that applies to a row in a sheet.

+

For more information about this class, see The Rowinfo Class.

+
+
Sheet(book, position, name, number) (class) [#]
+
+

Contains the data for one worksheet.

+

For more information about this class, see The Sheet Class.

+
+
XF (class) [#]
+
+

eXtended Formatting information for cells, rows, columns and styles.

+

For more information about this class, see The XF Class.

+
+
XFAlignment (class) [#]
+
+

A collection of the alignment and similar attributes of an XF record.

+

For more information about this class, see The XFAlignment Class.

+
+
XFBackground (class) [#]
+
+

A collection of the background-related attributes of an XF record.

+

For more information about this class, see The XFBackground Class.

+
+
XFBorder (class) [#]
+
+

A collection of the border-related attributes of an XF record.

+

For more information about this class, see The XFBorder Class.

+
+
XFProtection (class) [#]
+
+

A collection of the protection-related attributes of an XF record.

+

For more information about this class, see The XFProtection Class.

+
+
xldate_as_tuple(xldate, datemode) [#]
+
+

Convert an Excel number (presumed to represent a date, a datetime or a time) into +a tuple suitable for feeding to datetime or mx.DateTime constructors.

+
+
xldate
+
+The Excel number
+
datemode
+
+0: 1900-based, 1: 1904-based. +
WARNING: when using this function to +interpret the contents of a workbook, you should pass in the Book.datemode +attribute of that workbook. Whether +the workbook has ever been anywhere near a Macintosh is irrelevant. +
+
Returns:
+
+Gregorian (year, month, day, hour, minute, nearest_second). +
Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time; +(0, 0, 0, hour, minute, second) will be returned. +
Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number" +is zero. +
+
Raises XLDateNegative:
+xldate < 0.00 +
+
Raises XLDateAmbiguous:
+The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0) +
+
Raises XLDateTooLarge:
+Gregorian year 10000 or later
+
Raises XLDateBadDatemode:
+datemode arg is neither 0 nor 1
+
Raises XLDateError:
+Covers the 4 specific errors
+

+
+
xldate_from_date_tuple((year, month, day), datemode) [#]
+
+

Convert a date tuple (year, month, day) to an Excel date.

+
+
year
+
+Gregorian year.
+
month
+
+1 <= month <= 12 +
+
day
+
+1 <= day <= last day of that (year, month) +
+
datemode
+
+0: 1900-based, 1: 1904-based.
+
Raises XLDateAmbiguous:
+The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0) +
+
Raises XLDateBadDatemode:
+datemode arg is neither 0 nor 1
+
Raises XLDateBadTuple:
+(year, month, day) is too early/late or has invalid component(s)
+
Raises XLDateError:
+Covers the specific errors
+

+
+
xldate_from_datetime_tuple(datetime_tuple, datemode) [#]
+
+

Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value. +For more details, refer to other xldate_from_*_tuple functions.

+
+
datetime_tuple
+
+(year, month, day, hour, minute, second)
+
datemode
+
+0: 1900-based, 1: 1904-based.
+

+
+
xldate_from_time_tuple((hour, minute, second)) [#]
+
+

Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).

+
+
hour
+
+0 <= hour < 24 +
+
minute
+
+0 <= minute < 60 +
+
second
+
+0 <= second < 60 +
+
Raises XLDateBadTuple:
+Out-of-range hour, minute, or second
+

+
+
+

The BaseObject Class

+
+
BaseObject (class) [#]
+
+

Parent of almost all other classes in the package. Defines a common "dump" method +for debugging.

+
+
dump(f=None, header=None, footer=None, indent=0) [#]
+
+
+
f
+
+open file object, to which the dump is written
+
header
+
+text to write before the dump
+
footer
+
+text to write after the dump
+
indent
+
+number of leading spaces (for recursive calls)
+

+
+
+

The Book Class

+
+
Book() (class) [#]
+
+

Contents of a "workbook". +

WARNING: You don't call this class yourself. You use the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").

+
+
biff_version [#]
+
+

Version of BIFF (Binary Interchange File Format) used to create the file. +Latest is 8.0 (represented here as 80), introduced with Excel 97. +Earliest supported by this module: 2.0 (represented as 20).

+
+
codepage [#]
+
+

An integer denoting the character set used for strings in this file. +For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE. +For earlier versions, this is used to derive the appropriate Python encoding +to be used to convert to Unicode. +Examples: 1252 -> 'cp1252', 10000 -> 'mac_roman'

+
+
colour_map [#]
+
+

This provides definitions for colour indexes. Please refer to the +above section "The Palette; Colour Indexes" for an explanation +of how colours are represented in Excel.
+Colour indexes into the palette map into (red, green, blue) tuples. +"Magic" indexes e.g. 0x7FFF map to None. +colour_map is what you need if you want to render cells on screen or in a PDF +file. If you are writing an output XLS file, use palette_record. +
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) +

+
countries [#]
+
+

A tuple containing the (telephone system) country code for:
+ [0]: the user-interface setting when the file was created.
+ [1]: the regional settings.
+Example: (1, 61) meaning (USA, Australia). +This information may give a clue to the correct encoding for an unknown codepage. +For a long list of observed values, refer to the OpenOffice.org documentation for +the COUNTRY record. +

+
datemode [#]
+
+

Which date system was in force when this file was last saved.
+ 0 => 1900 system (the Excel for Windows default).
+ 1 => 1904 system (the Excel for Macintosh default).
+

+
encoding [#]
+
+

The encoding that was derived from the codepage.

+
+
font_list [#]
+
+

A list of Font class instances, each corresponding to a FONT record. +
-- New in version 0.6.1 +

+
format_list [#]
+
+

A list of Format objects, each corresponding to a FORMAT record, in +the order that they appear in the input file. +It does not contain builtin formats. +If you are creating an output file using (for example) pyExcelerator, +use this list. +The collection to be used for all visual rendering purposes is format_map. +
-- New in version 0.6.1 +

+
format_map [#]
+
+

The mapping from XF.format_key to Format object. +
-- New in version 0.6.1 +

+
load_time_stage_1 [#]
+
+

Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent).

+
+
load_time_stage_2 [#]
+
+

Time in seconds to parse the data from the contiguous string (or mmap equivalent).

+
+
name_and_scope_map [#]
+
+

A mapping from (lower_case_name, scope) to a single Name object. +
-- New in version 0.6.0 +

+
name_map [#]
+
+

A mapping from lower_case_name to a list of Name objects. The list is +sorted in scope order. Typically there will be one item (of global scope) +in the list. +
-- New in version 0.6.0 +

+
name_obj_list [#]
+
+

List containing a Name object for each NAME record in the workbook. +
-- New in version 0.6.0 +

+
nsheets [#]
+
+

The number of worksheets present in the workbook file. +This information is available even when no sheets have yet been loaded.

+
+
palette_record [#]
+
+

If the user has changed any of the colours in the standard palette, the XLS +file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier) +RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)]. +Otherwise this list will be empty. This is what you need if you are +writing an output XLS file. If you want to render cells on screen or in a PDF +file, use colour_map. +
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) +

+
sheet_by_index(sheetx) [#]
+
+
+
sheetx
+
+Sheet index in range(nsheets)
+
Returns:
+
+An object of the Sheet class
+

+
+
sheet_by_name(sheet_name) [#]
+
+
+
sheet_name
+
+Name of sheet required
+
Returns:
+
+An object of the Sheet class
+

+
+
sheet_loaded(sheet_name_or_index) [#]
+
+
+
sheet_name_or_index
+
+Name or index of sheet enquired upon
+
Returns:
+
+true if sheet is loaded, false otherwise +
-- New in version 0.7.1 +
+

+
+
sheet_names() [#]
+
+
+
Returns:
+
+A list of the names of all the worksheets in the workbook file. +This information is available even when no sheets have yet been loaded.
+

+
+
sheets() [#]
+
+
+
Returns:
+
+A list of all sheets in the book. +All sheets not already loaded will be loaded.
+

+
+
style_name_map [#]
+
+

This provides access via name to the extended format information for +both built-in styles and user-defined styles.
+It maps name to (built_in, xf_index), where:
+name is either the name of a user-defined style, +or the name of one of the built-in styles. Known built-in names are +Normal, RowLevel_1 to RowLevel_7, +ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]", +"Currency [0]", Hyperlink, and "Followed Hyperlink".
+built_in 1 = built-in style, 0 = user-defined
+xf_index is an index into Book.xf_list.
+References: OOo docs s6.99 (STYLE record); Excel UI Format/Style +
-- New in version 0.6.1 +

+
unload_sheet(sheet_name_or_index) [#]
+
+
+
sheet_name_or_index
+
+Name or index of sheet to be unloaded. +
-- New in version 0.7.1 +
+

+
+
user_name [#]
+
+

What (if anything) is recorded as the name of the last user to save the file.

+
+
xf_list [#]
+
+

A list of XF class instances, each corresponding to an XF record. +
-- New in version 0.6.1 +

+
+

The Cell Class

+
+
Cell(ctype, value, xf_index=None) (class) [#]
+
+

Contains the data for one cell.

+ +

WARNING: You don't call this class yourself. You access Cell objects +via methods of the Sheet object(s) that you found in the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").

+

Cell objects have three attributes: ctype is an int, value +(which depends on ctype) and xf_index. +If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. +The following table describes the types of cells and how their values +are represented in Python.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Type symbolType numberPython value
XL_CELL_EMPTY0empty string u''
XL_CELL_TEXT1a Unicode string
XL_CELL_NUMBER2float
XL_CELL_DATE3float
XL_CELL_BOOLEAN4int; 1 means TRUE, 0 means FALSE
XL_CELL_ERROR5int representing internal Excel codes; for a text representation, +refer to the supplied dictionary error_text_from_code
XL_CELL_BLANK6empty string u''. Note: this type will appear only when +open_workbook(..., formatting_info=True) is used.
+

+

+
+

The Colinfo Class

+
+
Colinfo (class) [#]
+
+

Width and default formatting information that applies to one or +more columns in a sheet. Derived from COLINFO records. + +

Here is the default hierarchy for width, according to the OOo docs: + +
"""In BIFF3, if a COLINFO record is missing for a column, +the width specified in the record DEFCOLWIDTH is used instead. + +
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used, +if the corresponding bit for this column is cleared in the GCW +record, otherwise the column width set in the DEFCOLWIDTH record +is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]). + +
In BIFF8, if a COLINFO record is missing for a column, +the width specified in the record STANDARDWIDTH is used. +If this [STANDARDWIDTH] record is also missing, +the column width of the record DEFCOLWIDTH is used instead.""" +
+ +Footnote: The docs on the GCW record say this: +"""
+If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH +record. If a bit is cleared, the corresponding column uses the width set in the +COLINFO record for this column. +
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if +the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH +record of the worksheet will be used instead. +
"""
+At the moment (2007-01-17) xlrd is going with the GCW version of the story. +Reference to the source may be useful: see the computed_column_width(colx) method +of the Sheet class. +
-- New in version 0.6.1 +

+
+
bit1_flag [#]
+
+

Value of a 1-bit flag whose purpose is unknown +but is often seen set to 1

+
+
collapsed [#]
+
+

1 = column is collapsed

+
+
hidden [#]
+
+

1 = column is hidden

+
+
outline_level [#]
+
+

Outline level of the column, in range(7). +(0 = no outline)

+
+
width [#]
+
+

Width of the column in 1/256 of the width of the zero character, +using default font (first FONT record in the file).

+
+
xf_index [#]
+
+

XF index to be used for formatting empty cells.

+
+
+

The EqNeAttrs Class

+
+
EqNeAttrs (class) [#]
+
+

This mixin class exists solely so that Format, Font, and XF.... objects +can be compared by value of their attributes.

+
+
+

The Font Class

+
+
Font (class) [#]
+
+

An Excel "font" contains the details of not only what is normally +considered a font, but also several other display attributes. +Items correspond to those in the Excel UI's Format/Cells/Font tab. +
-- New in version 0.6.1 +

+
bold [#]
+
+

1 = Characters are bold. Redundant; see "weight" attribute.

+
+
character_set [#]
+
+

Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol, +77 = Apple Roman, +128 = ANSI Japanese Shift-JIS, +129 = ANSI Korean (Hangul), +130 = ANSI Korean (Johab), +134 = ANSI Chinese Simplified GBK, +136 = ANSI Chinese Traditional BIG5, +161 = ANSI Greek, +162 = ANSI Turkish, +163 = ANSI Vietnamese, +177 = ANSI Hebrew, +178 = ANSI Arabic, +186 = ANSI Baltic, +204 = ANSI Cyrillic, +222 = ANSI Thai, +238 = ANSI Latin II (Central European), +255 = OEM Latin I

+
+
colour_index [#]
+
+

An explanation of "colour index" is given in the Formatting +section at the start of this document.

+
+
escapement [#]
+
+

1 = Superscript, 2 = Subscript.

+
+
family [#]
+
+

0 = None (unknown or don't care)
+1 = Roman (variable width, serifed)
+2 = Swiss (variable width, sans-serifed)
+3 = Modern (fixed width, serifed or sans-serifed)
+4 = Script (cursive)
+5 = Decorative (specialised, for example Old English, Fraktur) +

+
font_index [#]
+
+

The 0-based index used to refer to this Font() instance. +Note that index 4 is never used; xlrd supplies a dummy place-holder.

+
+
height [#]
+
+

Height of the font (in twips). A twip = 1/20 of a point.

+
+
italic [#]
+
+

1 = Characters are italic.

+
+
name [#]
+
+

The name of the font. Example: u"Arial"

+
+
outline [#]
+
+

1 = Font is outline style (Macintosh only)

+
+
shadow [#]
+
+

1 = Font is shadow style (Macintosh only)

+
+
struck_out [#]
+
+

1 = Characters are struck out.

+
+
underline_type [#]
+
+

0 = None
+1 = Single; 0x21 (33) = Single accounting
+2 = Double; 0x22 (34) = Double accounting +

+
underlined [#]
+
+

1 = Characters are underlined. Redundant; see "underline_type" attribute.

+
+
weight [#]
+
+

Font weight (100-1000). Standard values are 400 for normal text +and 700 for bold text.

+
+
+

The Format Class

+
+
Format(format_key, ty, format_str) (class) [#]
+
+

"Number format" information from a FORMAT record. +
-- New in version 0.6.1 +

+
format_key [#]
+
+

The key into Book.format_map

+
+
format_str [#]
+
+

The format string

+
+
type [#]
+
+

A classification that has been inferred from the format string. +Currently, this is used only to distinguish between numbers and dates. +
Values: +
FUN = 0 # unknown +
FDT = 1 # date +
FNU = 2 # number +
FGE = 3 # general +
FTX = 4 # text +

+
+

The Name Class

+
+
Name (class) [#]
+
+

Information relating to a named reference, formula, macro, etc. +
-- New in version 0.6.0 +
-- Name information is not extracted from files older than +Excel 5.0 (Book.biff_version < 50) +

+
area2d(clipped=True) [#]
+
+

This is a convenience method for the use case where the name +refers to one rectangular area in one worksheet.

+
+
clipped
+
+If true (the default), the returned rectangle is clipped +to fit in (0, sheet.nrows, 0, sheet.ncols) -- it is guaranteed that +0 <= rowxlo <= rowxhi <= sheet.nrows and that the number of usable rows +in the area (which may be zero) is rowxhi - rowxlo; likewise for columns. +
+
Returns:
+
+a tuple (sheet_object, rowxlo, rowxhi, colxlo, colxhi).
+
Raises XLRDError:
+The name is not a constant absolute reference +to a single area in a single sheet.
+

+
+
binary [#]
+
+

0 = Formula definition; 1 = Binary data
No examples have been sighted. +

+
builtin [#]
+
+

0 = User-defined name; 1 = Built-in name +(common examples: Print_Area, Print_Titles; see OOo docs for full list)

+
+
cell() [#]
+
+

This is a convenience method for the frequent use case where the name +refers to a single cell.

+
+
Returns:
+
+An instance of the Cell class.
+
Raises XLRDError:
+The name is not a constant absolute reference +to a single cell.
+

+
+
complex [#]
+
+

0 = Simple formula; 1 = Complex formula (array formula or user defined)
+No examples have been sighted. +

+
func [#]
+
+

0 = Command macro; 1 = Function macro. Relevant only if macro == 1

+
+
funcgroup [#]
+
+

Function group. Relevant only if macro == 1; see OOo docs for values.

+
+
hidden [#]
+
+

0 = Visible; 1 = Hidden

+
+
macro [#]
+
+

0 = Standard name; 1 = Macro name

+
+
name [#]
+
+

A Unicode string. If builtin, decoded as per OOo docs.

+
+
name_index [#]
+
+

The index of this object in book.name_obj_list

+
+
raw_formula [#]
+
+

An 8-bit string.

+
+
result [#]
+
+

The result of evaluating the formula, if any. +If no formula, or evaluation of the formula encountered problems, +the result is None. Otherwise the result is a single instance of the +Operand class.

+
+
scope [#]
+
+

-1: The name is global (visible in all calculation sheets).
+-2: The name belongs to a macro sheet or VBA sheet.
+-3: The name is invalid.
+0 <= scope < book.nsheets: The name is local to the sheet whose index is scope. +

+
vbasic [#]
+
+

0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1

+
+
+

The Operand Class

+
+
Operand(akind=None, avalue=None, arank=0, atext='?') (class) [#]
+
+

Used in evaluating formulas. +The following table describes the kinds and how their values +are represented.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Kind symbolKind numberValue representation
oBOOL3integer: 0 => False; 1 => True
oERR4None, or an int error code (same as XL_CELL_ERROR in the Cell class). +
oMSNG5Used by Excel as a placeholder for a missing (not supplied) function +argument. Should *not* appear as a final formula result. Value is None.
oNUM2A float. Note that there is no way of distinguishing dates.
oREF-1The value is either None or a non-empty list of +absolute Ref3D instances.
+
oREL-2The value is None or a non-empty list of +fully or partially relative Ref3D instances. +
oSTRG1A Unicode string.
oUNK0The kind is unknown or ambiguous. The value is None
+

+

+
kind [#]
+
+

oUNK means that the kind of operand is not known unambiguously.

+
+
text [#]
+
+

The reconstituted text of the original formula. Function names will be +in English irrespective of the original language, which doesn't seem +to be recorded anywhere. The separator is ",", not ";" or whatever else +might be more appropriate for the end-user's locale; patches welcome.

+
+
value [#]
+
+

None means that the actual value of the operand is a variable +(depends on cell data), not a constant.

+
+
+

The Ref3D Class

+
+
Ref3D(atuple) (class) [#]
+
+

Represents an absolute or relative 3-dimensional reference to a box +of one or more cells.
+-- New in version 0.6.0 +

+ +

The coords attribute is a tuple of the form:
+(shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
+where 0 <= thingxlo <= thingx < thingxhi.
+Note that it is quite possible to have thingx > nthings; for example +Print_Titles could have colxhi == 256 and/or rowxhi == 65536 +irrespective of how many columns/rows are actually used in the worksheet. +The caller will need to decide how to handle this situation. +Keyword: IndexError :-) +

+ +

The components of the coords attribute are also available as individual +attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.

+ +

The relflags attribute is a 6-tuple of flags which indicate whether +the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).
+Note that there is necessarily no information available as to what cell(s) +the reference could possibly be relative to. The caller must decide what if +any use to make of oREL operands. Note also that a partially relative +reference may well be a typo. +For example, define name A1Z10 as $a$1:$z10 (missing $ after z) +while the cursor is on cell Sheet3!A27.
+The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26) +and relflags = (0, 0, 0, 1, 0, 0).
+So far, only one possibility of a sheet-relative component in +a reference has been noticed: a 2D reference located in the "current sheet". +
This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...). +

+
+

The Rowinfo Class

+
+
Rowinfo (class) [#]
+
+

Height and default formatting information that applies to a row in a sheet. +Derived from ROW records. +
-- New in version 0.6.1 +

+
additional_space_above [#]
+
+

This flag is set, if the upper border of at least one cell in this row +or if the lower border of at least one cell in the row above is +formatted with a thick line style. Thin and medium line styles are not +taken into account.

+
+
additional_space_below [#]
+
+

This flag is set, if the lower border of at least one cell in this row +or if the upper border of at least one cell in the row below is +formatted with a medium or thick line style. Thin line styles are not +taken into account.

+
+
has_default_height [#]
+
+

0 = Row has custom height; 1 = Row has default height

+
+
has_default_xf_index [#]
+
+

1 = the xf_index attribute is usable; 0 = ignore it

+
+
height [#]
+
+

Height of the row, in twips. One twip == 1/20 of a point

+
+
height_mismatch [#]
+
+

1 = Row height and default font height do not match

+
+
hidden [#]
+
+

1 = Row is hidden (manually, or by a filter or outline group)

+
+
outline_group_starts_ends [#]
+
+

1 = Outline group starts or ends here (depending on where the +outline buttons are located, see WSBOOL record [TODO ??]), +and is collapsed +

+
outline_level [#]
+
+

Outline level of the row

+
+
xf_index [#]
+
+

Index to default XF record for empty cells in this row. +Don't use this if has_default_xf_index == 0.

+
+
+

The Sheet Class

+
+
Sheet(book, position, name, number) (class) [#]
+
+

Contains the data for one worksheet.

+ +

In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a +column index, counting from zero. +Negative values for row/column indexes and slice positions are supported in the expected fashion.

+ +

For information about cell types and cell values, refer to the documentation of the Cell class.

+ +

WARNING: You don't call this class yourself. You access Sheet objects via the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").

+
+
cell(rowx, colx) [#]
+
+

Cell object in the given row and column.

+
+
cell_type(rowx, colx) [#]
+
+

Type of the cell in the given row and column. +Refer to the documentation of the Cell class.

+
+
cell_value(rowx, colx) [#]
+
+

Value of the cell in the given row and column.

+
+
cell_xf_index(rowx, colx) [#]
+
+

XF index of the cell in the given row and column. +This is an index into Book.xf_list. +
-- New in version 0.6.1 +

+
col(colx) [#]
+
+

Returns a sequence of the Cell objects in the given column.

+
+
col_label_ranges [#]
+
+

List of address ranges of cells containing column labels. +These are set up in Excel by Insert > Name > Labels > Columns. +
-- New in version 0.6.0 +
How to deconstruct the list: +

+for crange in thesheet.col_label_ranges:
+    rlo, rhi, clo, chi = crange
+    for rx in xrange(rlo, rhi):
+        for cx in xrange(clo, chi):
+            print "Column label at (rowx=%d, colx=%d) is %r" \
+                (rx, cx, thesheet.cell_value(rx, cx))
+
+

+
col_slice(colx, start_rowx=0, end_rowx=None) [#]
+
+

Returns a slice of the Cell objects in the given column.

+
+
col_types(colx, start_rowx=0, end_rowx=None) [#]
+
+

Returns a slice of the types of the cells in the given column.

+
+
col_values(colx, start_rowx=0, end_rowx=None) [#]
+
+

Returns a slice of the values of the cells in the given column.

+
+
colinfo_map [#]
+
+

The map from a column index to a Colinfo object. Often there is an entry +in COLINFO records for all column indexes in range(257). +Note that xlrd ignores the entry for the non-existent +257th column. On the other hand, there may be no entry for unused columns. +
-- New in version 0.6.1 +

+
computed_column_width(colx) [#]
+
+

Determine column display width. +
-- New in version 0.6.1 +
+

+
colx
+
+Index of the queried column, range 0 to 255. +Note that it is possible to find out the width that will be used to display +columns with no cell information e.g. column IV (colx=255).
+
Returns:
+
+The column width that will be used for displaying +the given column by Excel, in units of 1/256th of the width of a +standard character (the digit zero in the first font).
+

+
+
default_additional_space_above [#]
+
+

Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +

+
default_additional_space_below [#]
+
+

Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +

+
default_row_height [#]
+
+

Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +

+
default_row_height_mismatch [#]
+
+

Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +

+
default_row_hidden [#]
+
+

Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +

+
defcolwidth [#]
+
+

Default column width from DEFCOLWIDTH record, else None. +From the OOo docs:
+"""Column width in characters, using the width of the zero character +from default font (first FONT record in the file). Excel adds some +extra space to the default width, depending on the default font and +default font size. The algorithm how to exactly calculate the resulting +column width is not known.
+Example: The default width of 8 set in this record results in a column +width of 8.43 using Arial font with a size of 10 points."""
+For the default hierarchy, refer to the Colinfo class above. +
-- New in version 0.6.1 +

+
gcw [#]
+
+

A 256-element tuple corresponding to the contents of the GCW record for this sheet. +If no such record, treat as all bits zero. +Applies to BIFF4-7 only. See docs of Colinfo class for discussion.

+
+
merged_cells [#]
+
+

List of address ranges of cells which have been merged. +These are set up in Excel by Format > Cells > Alignment, then ticking +the "Merge cells" box. +
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) +
How to deconstruct the list: +

+for crange in thesheet.merged_cells:
+    rlo, rhi, clo, chi = crange
+    for rowx in xrange(rlo, rhi):
+        for colx in xrange(clo, chi):
+            # cell (rlo, clo) (the top left one) will carry the data
+            # and formatting info; the remainder will be recorded as
+            # blank cells, but a renderer will apply the formatting info
+            # for the top left cell (e.g. border, pattern) to all cells in
+            # the range.
+
+

+
name [#]
+
+

Name of sheet.

+
+
ncols [#]
+
+

Number of columns in sheet. A column index is in range(thesheet.ncols).

+
+
nrows [#]
+
+

Number of rows in sheet. A row index is in range(thesheet.nrows).

+
+
row(rowx) [#]
+
+

Returns a sequence of the Cell objects in the given row.

+
+
row_label_ranges [#]
+
+

List of address ranges of cells containing row labels. +For more details, see col_label_ranges above. +
-- New in version 0.6.0 +

+
row_slice(rowx, start_colx=0, end_colx=None) [#]
+
+

Returns a slice of the Cell objects in the given row.

+
+
row_types(rowx, start_colx=0, end_colx=None) [#]
+
+

Returns a slice of the types +of the cells in the given row.

+
+
row_values(rowx, start_colx=0, end_colx=None) [#]
+
+

Returns a slice of the values +of the cells in the given row.

+
+
rowinfo_map [#]
+
+

The map from a row index to a Rowinfo object. Note that it is possible +to have missing entries -- at least one source of XLS files doesn't +bother writing ROW records. +
-- New in version 0.6.1 +

+
standardwidth [#]
+
+

Default column width from STANDARDWIDTH record, else None. +From the OOo docs:
+"""Default width of the columns in 1/256 of the width of the zero +character, using default font (first FONT record in the file)."""
+For the default hierarchy, refer to the Colinfo class above. +
-- New in version 0.6.1 +

+
visibility [#]
+
+

Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden +by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden +only by VBA macro).

+
+
+

The XF Class

+
+
XF (class) [#]
+
+

eXtended Formatting information for cells, rows, columns and styles. +
-- New in version 0.6.1 + +

Each of the 6 flags below describes the validity of +a specific group of attributes. +
+In cell XFs, flag==0 means the attributes of the parent style XF are used, +(but only if the attributes are valid there); flag==1 means the attributes +of this XF are used.
+In style XFs, flag==0 means the attribute setting is valid; flag==1 means +the attribute should be ignored.
+Note that the API +provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs +have had the above inheritance mechanism applied. +

+
+
_alignment_flag [#]
+
+
+
_background_flag [#]
+
+
+
_border_flag [#]
+
+
+
_font_flag [#]
+
+
+
_format_flag [#]
+
+
+
_protection_flag [#]
+
+

  +

+
alignment [#]
+
+

An instance of an XFAlignment object.

+
+
background [#]
+
+

An instance of an XFBackground object.

+
+
border [#]
+
+

An instance of an XFBorder object.

+
+
font_index [#]
+
+

Index into Book.font_list

+
+
format_key [#]
+
+

Key into Book.format_map +

+Warning: OOo docs on the XF record call this "Index to FORMAT record". +It is not an index in the Python sense. It is a key to a map. +It is true only for Excel 4.0 and earlier files +that the key into format_map from an XF instance +is the same as the index into format_list, and only +if the index is less than 164. +

+
+
is_style [#]
+
+

0 = cell XF, 1 = style XF

+
+
parent_style_index [#]
+
+

cell XF: Index into Book.xf_list +of this XF's style XF
+style XF: 0xFFF +

+
protection [#]
+
+

An instance of an XFProtection object.

+
+
xf_index [#]
+
+

Index into Book.xf_list

+
+
+

The XFAlignment Class

+
+
XFAlignment (class) [#]
+
+

A collection of the alignment and similar attributes of an XF record. +Items correspond to those in the Excel UI's Format/Cells/Alignment tab. +
-- New in version 0.6.1 +

+
hor_align [#]
+
+

Values: section 6.115 (p 214) of OOo docs

+
+
indent_level [#]
+
+

A number in range(15).

+
+
rotation [#]
+
+

Values: section 6.115 (p 215) of OOo docs.
+Note: file versions BIFF7 and earlier use the documented +"orientation" attribute; this will be mapped (without loss) +into "rotation". +

+
shrink_to_fit [#]
+
+

1 = shrink font size to fit text into cell.

+
+
text_direction [#]
+
+

0 = according to context; 1 = left-to-right; 2 = right-to-left

+
+
text_wrapped [#]
+
+

1 = text is wrapped at right margin

+
+
vert_align [#]
+
+

Values: section 6.115 (p 215) of OOo docs

+
+
+

The XFBackground Class

+
+
XFBackground (class) [#]
+
+

A collection of the background-related attributes of an XF record. +Items correspond to those in the Excel UI's Format/Cells/Patterns tab. +An explanation of "colour index" is given in the Formatting +section at the start of this document. +
-- New in version 0.6.1 +

+
background_colour_index [#]
+
+

See section 3.11 of the OOo docs.

+
+
fill_pattern [#]
+
+

See section 3.11 of the OOo docs.

+
+
pattern_colour_index [#]
+
+

See section 3.11 of the OOo docs.

+
+
+

The XFBorder Class

+
+
XFBorder (class) [#]
+
+

A collection of the border-related attributes of an XF record. +Items correspond to those in the Excel UI's Format/Cells/Border tab.

+

An explanations of "colour index" is given in the Formatting +section at the start of this document. +There are five line style attributes; possible values and the +associated meanings are: +0 = No line, +1 = Thin, +2 = Medium, +3 = Dashed, +4 = Dotted, +5 = Thick, +6 = Double, +7 = Hair, +8 = Medium dashed, +9 = Thin dash-dotted, +10 = Medium dash-dotted, +11 = Thin dash-dot-dotted, +12 = Medium dash-dot-dotted, +13 = Slanted medium dash-dotted. +The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only. +For pictures of the line styles, refer to OOo docs s3.10 (p22) +"Line Styles for Cell Borders (BIFF3-BIFF8)".

+
-- New in version 0.6.1 +
+
bottom_colour_index [#]
+
+

The colour index for the cell's bottom line

+
+
bottom_line_style [#]
+
+

The line style for the cell's bottom line

+
+
diag_colour_index [#]
+
+

The colour index for the cell's diagonal lines, if any

+
+
diag_down [#]
+
+

1 = draw a diagonal from top left to bottom right

+
+
diag_line_style [#]
+
+

The line style for the cell's diagonal lines, if any

+
+
diag_up [#]
+
+

1 = draw a diagonal from bottom left to top right

+
+
left_colour_index [#]
+
+

The colour index for the cell's left line

+
+
left_line_style [#]
+
+

The line style for the cell's left line

+
+
right_colour_index [#]
+
+

The colour index for the cell's right line

+
+
right_line_style [#]
+
+

The line style for the cell's right line

+
+
top_colour_index [#]
+
+

The colour index for the cell's top line

+
+
top_line_style [#]
+
+

The line style for the cell's top line

+
+
+

The XFProtection Class

+
+
XFProtection (class) [#]
+
+

A collection of the protection-related attributes of an XF record. +Items correspond to those in the Excel UI's Format/Cells/Protection tab. +Note the OOo docs include the "cell or style" bit +in this bundle of attributes. +This is incorrect; the bit is used in determining which bundles to use. +
-- New in version 0.6.1 +

+
cell_locked [#]
+
+

1 = Cell is prevented from being changed, moved, resized, or deleted +(only if the sheet is protected).

+
+
formula_hidden [#]
+
+

1 = Hide formula so that it doesn't appear in the formula bar when +the cell is selected (only if the sheet is protected).

+
+
+ diff --git a/tablib/packages/xlrd/examples/namesdemo.xls b/tablib/packages/xlrd/examples/namesdemo.xls new file mode 100644 index 0000000000000000000000000000000000000000..8a1686537b578e1a170339f5af03bff459fd9ce7 GIT binary patch literal 22528 zcmeHPYiwLc6+U+#-t{ZqI1jIr*xuMl6X#XiN$Dd^vTo{72RpIimIji_-P$+tsBY!zr*Ih*IV}NsB6Jd|$m@vvWn31B^ylpHVS6mG zGc!Dv=uHg{r^gbPE!?Csq%6uX1;g87Ma2RvC?R-O>14+I5hNjJ$&O>U+&-I=^DWqMN$P1{o@Wo>^c#(U0 zN2xK?B2j6aNm0T@$*#c+&?Yj#%GV@Rl7aO`mSWMtHm`36ZeV?KV0~j~owUj--dH5t z@3|&?2K^RUC)dra2m+MUWgx}swrwj`xJlN^)t(C7yPf*oTeaHaeWzq0`&|B0NN8@7 zsRZFP2ZjIJMV%i`+ntSmE;zZ)M*mTUTasY&oZ#wnt9Dj_rJeq^}#ifbpuu5YHJKI0o-nN-Hz6qlGd|aA|D6cSRrm3N!vB}Yi zT(*L)%vmri10O3N!yjf@P8WCl|s1KHHrL3^ULNv;n`6)^Qlv2e)-$DC_6 zH5Qi@$fUw6&3Fd&WbkO&90sZ!u^v*lkgh7q)P&Cd?cLo2{kd#<>|kA3QWJyebR3fk zLIs{-Zn!Cv%VkD;(qndOHKnNoqGdUfnz%HOs1i>&H*}4T=Z-im1(j;sq8{P84v*Vo z6ZQnO#h-;1#bpt14^e+k*pZq%+yI0MQ>A@sFx_K0J3CA}p9?T%FI$>2|dPX&J({28VY9bjS42mx`)(3n5V z06UaTr5mhS#^-13RCeIj3{`JI_0`&$S5Xd6_gKq9TSJ|p8$unSwvfk^PvV{OBs9|e zOH1CDdxWzHBaC8aY7FTP`!1xtsVvgYai+bgBS?2n61#J9gz(9ONc-(^q`L=mNOxrp zA-&li#NzUbI_Y3=f0MJc zd25f&cx$f=m^T-i2pMIOoJ6eFd8-QP5S}a4(|n|CX;j{S?Cn4F-qp8lK;df?zE;!w z6p{yx8&={8Nc;a1FegK-iz0nlslTG>Big+MwH^zPsbV;_&0I6T4Yc4Im?y5JW@3v5 zVhXWPhX$(#kGLj<)i|uWl5E?q_sgY)?CWFiRJ;nJYZd(~s|rt!N;LxlwgzhDgkqAK znxp|r2vR?+wMsShCRl`($=tg2JYjI>?_lm9F1^OyDe6@lH=UeE4dz|J40p@A5H%PR zBk!e%53LfZ!wbs?r;1*A4zv6%(uX;?S2A)JmSrPiqtvHMv@yv6GA0KV(m$<)nHN?l z5dO3Lt**b253=?DsK`Wzkk?>|$N{o9z*)Qpd~^8O;Qapw%rhF8>oeK~IB+-X+tdB& z*W29ErOWl${~Kj~WK{J|2k}Q^mlt8)!=$e|&_K)*T}kG6G5K-K2DR zUaCAH`!Jtm5N)~RQZe=UTrnE(`~QmRs>>p1aqIW8u=kmM$ThGD^7T0~y3envsVRlH zxr5e>G>k*jz`V2gnFc7nSmJh{iC!45-TGX3zwE?Y?|^i;06+hjY2Zd|!cJmMl95py zM{%t)C|f^fI`Er&F&c2^5O-cGz1M}ixo)*MH+iqU_qlL4_u~BPkGBQ_upMlw_`%nJ zuYt>`0lr`Q!|XCD*k^skXn=c$^ttQWyS=MtL)YFeM{X8rW8PleQ8QT zl-P@=6hevpXG&p|cuHeRWhk*9O(}vBd(f1kC`G1AjAX?8BHMRx4C zdW;paV`mt4Q&+zXc3=;7>XuZW6ytGp6VCZ59ev?u(keUlype9HI!n+l+3)m|!7bE& z0!4NX2ct1tgN6`9)2@i-xOCh0R#He#G>0b9%nkZf;Z2}HPBcd;(afjsepX0MG$#e3 z7dbtJV*I`&eY+G*n=FIx+#-w-ujGp*P>4yLB^}fQd|Q}RlMzQd>q$tD#Wtc|*26&+ z2}Egd>WNkc=Q1!+(sd3gWGF<$ae0aa)S14$V>)w5c}fpqGFV(A0iOrHx=^+U-^fAqr7#H`-iDZk)B)-adyLDknEOeMJJ* zXs^egrwkr%ai`Jyr|AQQ1u}~dSHyUHw0CTv1~y!K-SyN$yrd%_pwU9Rc0{?`5e;-- zHlBBqtbHSCp&U31r)Jn*=U-G0bO@*9IY$ymM#*Vrv(>#6e%@4u3FArHh$@a=g0Eta zwGRsZg>?c}Jr7=59T*bLn4_9yxBi+?HotB-!HmjxWsevSlzMbkIStKtpl~jdBoim7 z2MQ;BPV!6egOjpF?_y4G2Fl}7dSG3gO9qGGaGS`Zh##Cw#<^ZUrWGQ^7a6f;dZ$oQ zw!Zk{;`RFZLV$CYF~hDT9ekR#d>&7h2>PB3BlraZGu5W|`pN7Xpqp^mQfr+-z`i+k z^r3g&+dUk6{Qjt{UG>y!Fe~{XA}1c=lZbrPxCN0<+Wm+eZR3c1CHN{LA7I}?rD@4GZUEuxkT6DL3>2&&fkAD_1ufkSxMmK=`G*evg~=w zwLOCT;A_CwfUf~x1HJ})4fq=HHQ;N&*MP49Ujx1dd<_^4oNxUfc;&@c9^9}r_Q1EX z{$KarajyTXuz=?VF4zBkh}=Khi^x4nuKT$?d>D~y_BF3+dd|ENH?uD8O_(pPik~)uwGvL0=Ee5SYG_*wg}y5X!R>wFSHMzG3N$4vvx{% zvX$?~ZnwktJi4<+<-VuN(IO@Z@+uK`~Jz6N{^_!{sv;A_CwfUf~x z1HJ})4O|Ht;5wdbaGslU#Yev;E{C~}=9xa%`t-)&0+DNWt_EE%GOpL@WyCXouD$7! zMTbeQ^|>EFS0es5C_P-3A}&Lua}5`qO^69Z;+v7TAg(}MiFg&_D#X=@#GgTA?uSdS zfj#(?FawvVgmmFA4s!UblOxdjg@?uP9&x3j9VMbnMW!^B9VTMJKU&Hej`_hHq zj@q+wH~z?la@_5LFF42le@@p=IWF|ZH`)wkSo-ln<*0s6$+mYx3kOst_XA7*^~2m@ pHR|UW;`q7H`svB0`%uWjax-chR2gjTfs2y||808F?O*Px|9_m~J%<1Q literal 0 HcmV?d00001 diff --git a/tablib/packages/xlrd/examples/xlrdnameAPIdemo.py b/tablib/packages/xlrd/examples/xlrdnameAPIdemo.py new file mode 100644 index 0000000..6cd60df --- /dev/null +++ b/tablib/packages/xlrd/examples/xlrdnameAPIdemo.py @@ -0,0 +1,178 @@ +# -*- coding: cp1252 -*- + +## +# Module/script example of the xlrd API for extracting information +# about named references, named constants, etc. +# +#

Copyright © 2006 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under a BSD-style licence.

+## + +import xlrd +import sys +import glob + +def scope_as_string(book, scope): + if 0 <= scope < book.nsheets: + return "sheet #%d (%r)" % (scope, book.sheet_names()[scope]) + if scope == -1: + return "Global" + if scope == -2: + return "Macro/VBA" + return "Unknown scope value (%r)" % scope + +def do_scope_query(book, scope_strg, show_contents=0, f=sys.stdout): + try: + qscope = int(scope_strg) + except ValueError: + if scope_strg == "*": + qscope = None # means "all' + else: + # so assume it's a sheet name ... + qscope = book.sheet_names().index(scope_strg) + print >> f, "%r => %d" % (scope_strg, qscope) + for nobj in book.name_obj_list: + if qscope is None or nobj.scope == qscope: + show_name_object(book, nobj, show_contents, f) + +def show_name_details(book, name, show_contents=0, f=sys.stdout): + """ + book -- Book object obtained from xlrd.open_workbook(). + name -- The name that's being investigated. + show_contents -- 0: Don't; 1: Non-empty cells only; 2: All cells + f -- Open output file handle. + """ + name_lcase = name.lower() # Excel names are case-insensitive. + nobj_list = book.name_map.get(name_lcase) + if not nobj_list: + print >> f, "%r: unknown name" % name + return + for nobj in nobj_list: + show_name_object(book, nobj, show_contents, f) + +def show_name_details_in_scope( + book, name, scope_strg, show_contents=0, f=sys.stdout, + ): + try: + scope = int(scope_strg) + except ValueError: + # so assume it's a sheet name ... + scope = book.sheet_names().index(scope_strg) + print >> f, "%r => %d" % (scope_strg, scope) + name_lcase = name.lower() # Excel names are case-insensitive. + while 1: + nobj = book.name_and_scope_map.get((name_lcase, scope)) + if nobj: + break + print >> f, "Name %r not found in scope %d" % (name, scope) + if scope == -1: + return + scope = -1 # Try again with global scope + print >> f, "Name %r found in scope %d" % (name, scope) + show_name_object(book, nobj, show_contents, f) + +def showable_cell_value(celltype, cellvalue, datemode): + if celltype == xlrd.XL_CELL_DATE: + try: + showval = xlrd.xldate_as_tuple(cellvalue, datemode) + except xlrd.XLDateError: + e1, e2 = sys.exc_info()[:2] + showval = "%s:%s" % (e1.__name__, e2) + elif celltype == xlrd.XL_CELL_ERROR: + showval = xlrd.error_text_from_code.get( + cellvalue, '' % cellvalue) + else: + showval = cellvalue + return showval + +def show_name_object(book, nobj, show_contents=0, f=sys.stdout): + print >> f, "\nName: %r, scope: %r (%s)" \ + % (nobj.name, nobj.scope, scope_as_string(book, nobj.scope)) + res = nobj.result + print >> f, "Formula eval result: %r" % res + if res is None: + return + # result should be an instance of the Operand class + kind = res.kind + value = res.value + if kind >= 0: + # A scalar, or unknown ... you've seen all there is to see. + pass + elif kind == xlrd.oREL: + # A list of Ref3D objects representing *relative* ranges + for i in xrange(len(value)): + ref3d = value[i] + print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3drel(book, ref3d)) + elif kind == xlrd.oREF: + # A list of Ref3D objects + for i in xrange(len(value)): + ref3d = value[i] + print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3d(book, ref3d)) + if not show_contents: + continue + datemode = book.datemode + for shx in xrange(ref3d.shtxlo, ref3d.shtxhi): + sh = book.sheet_by_index(shx) + print >> f, " Sheet #%d (%s)" % (shx, sh.name) + rowlim = min(ref3d.rowxhi, sh.nrows) + collim = min(ref3d.colxhi, sh.ncols) + for rowx in xrange(ref3d.rowxlo, rowlim): + for colx in xrange(ref3d.colxlo, collim): + cty = sh.cell_type(rowx, colx) + if cty == xlrd.XL_CELL_EMPTY and show_contents == 1: + continue + cval = sh.cell_value(rowx, colx) + sval = showable_cell_value(cty, cval, datemode) + print >> f, " (%3d,%3d) %-5s: %r" \ + % (rowx, colx, xlrd.cellname(rowx, colx), sval) + +if __name__ == "__main__": + def usage(): + text = """ +usage: xlrdnameAIPdemo.py glob_pattern name scope show_contents + +where: + "glob_pattern" designates a set of files + "name" is a name or '*' (all names) + "scope" is -1 (global) or a sheet number + or a sheet name or * (all scopes) + "show_contents" is one of 0 (no show), + 1 (only non-empty cells), or 2 (all cells) + +Examples (script name and glob_pattern arg omitted for brevity) + [Searching through book.name_obj_list] + * * 0 lists all names + * * 1 lists all names, showing referenced non-empty cells + * 1 0 lists all names local to the 2nd sheet + * Northern 0 lists all names local to the 'Northern' sheet + * -1 0 lists all names with global scope + [Initial direct access through book.name_map] + Sales * 0 lists all occurrences of "Sales" in any scope + [Direct access through book.name_and_scope_map] + Revenue -1 0 checks if "Revenue" exists in global scope + +""" + sys.stdout.write(text) + + if len(sys.argv) != 5: + usage() + sys.exit(0) + arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls" + arg_name = sys.argv[2] # see below + arg_scope = sys.argv[3] # see below + arg_show_contents = int(sys.argv[4]) # 0: no show, 1: only non-empty cells, + # 2: all cells + for fname in glob.glob(arg_pattern): + book = xlrd.open_workbook(fname) + if arg_name == "*": + # Examine book.name_obj_list to find all names + # in a given scope ("*" => all scopes) + do_scope_query(book, arg_scope, arg_show_contents) + elif arg_scope == "*": + # Using book.name_map to find all usage of a name. + show_name_details(book, arg_name, arg_show_contents) + else: + # Using book.name_and_scope_map to find which if any instances + # of a name are visible in the given scope, which can be supplied + # as -1 (global) or a sheet number or a sheet name. + show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents) diff --git a/tablib/packages/xlrd/formatting.py b/tablib/packages/xlrd/formatting.py new file mode 100644 index 0000000..302764b --- /dev/null +++ b/tablib/packages/xlrd/formatting.py @@ -0,0 +1,1256 @@ +# -*- coding:cp1252 -*- + +## +# Module for formatting information. +# +#

Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd

+#

Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under +# a BSD-style licence.

+## + +# No part of the content of this file was derived from the works of David Giffin. + +# 2009-05-31 SJM Fixed problem with non-zero reserved bits in some STYLE records in Mac Excel files +# 2008-08-03 SJM Ignore PALETTE record when Book.formatting_info is false +# 2008-08-03 SJM Tolerate up to 4 bytes trailing junk on PALETTE record +# 2008-05-10 SJM Do some XF checks only when Book.formatting_info is true +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-03 SJM Another tweak to is_date_format_string() +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-10-13 SJM Warning: style XF whose parent XF index != 0xFFF +# 2007-09-08 SJM Work around corrupt STYLE record +# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file + +DEBUG = 0 +import copy, re +from timemachine import * +from biffh import BaseObject, unpack_unicode, unpack_string, \ + upkbits, upkbitsL, fprintf, \ + FUN, FDT, FNU, FGE, FTX, XL_CELL_NUMBER, XL_CELL_DATE, \ + XL_FORMAT, XL_FORMAT2, \ + XLRDError +from struct import unpack + +excel_default_palette_b5 = ( + ( 0, 0, 0), (255, 255, 255), (255, 0, 0), ( 0, 255, 0), + ( 0, 0, 255), (255, 255, 0), (255, 0, 255), ( 0, 255, 255), + (128, 0, 0), ( 0, 128, 0), ( 0, 0, 128), (128, 128, 0), + (128, 0, 128), ( 0, 128, 128), (192, 192, 192), (128, 128, 128), + (153, 153, 255), (153, 51, 102), (255, 255, 204), (204, 255, 255), + (102, 0, 102), (255, 128, 128), ( 0, 102, 204), (204, 204, 255), + ( 0, 0, 128), (255, 0, 255), (255, 255, 0), ( 0, 255, 255), + (128, 0, 128), (128, 0, 0), ( 0, 128, 128), ( 0, 0, 255), + ( 0, 204, 255), (204, 255, 255), (204, 255, 204), (255, 255, 153), + (153, 204, 255), (255, 153, 204), (204, 153, 255), (227, 227, 227), + ( 51, 102, 255), ( 51, 204, 204), (153, 204, 0), (255, 204, 0), + (255, 153, 0), (255, 102, 0), (102, 102, 153), (150, 150, 150), + ( 0, 51, 102), ( 51, 153, 102), ( 0, 51, 0), ( 51, 51, 0), + (153, 51, 0), (153, 51, 102), ( 51, 51, 153), ( 51, 51, 51), + ) + +excel_default_palette_b2 = excel_default_palette_b5[:16] + +# Following two tables borrowed from Gnumeric 1.4 source. +excel_default_palette_b5_gnumeric_14 = ( + #### dodgy; didn't match Excel results + ( 0, 0, 0), (255,255,255), (255, 0, 0), ( 0,255, 0), + ( 0, 0,255), (255,255, 0), (255, 0,255), ( 0,255,255), + (128, 0, 0), ( 0,128, 0), ( 0, 0,128), (128,128, 0), + (128, 0,128), ( 0,128,128), (192,192,192), (128,128,128), + (128,128,255), (128, 32, 96), (255,255,192), (160,224,224), + ( 96, 0,128), (255,128,128), ( 0,128,192), (192,192,255), + ( 0, 0,128), (255, 0,255), (255,255, 0), ( 0,255,255), + (128, 0,128), (128, 0, 0), ( 0,128,128), ( 0, 0,255), + ( 0,204,255), (105,255,255), (204,255,204), (255,255,153), + (166,202,240), (204,156,204), (204,153,255), (227,227,227), + ( 51,102,255), ( 51,204,204), ( 51,153, 51), (153,153, 51), + (153,102, 51), (153,102,102), (102,102,153), (150,150,150), + ( 51, 51,204), ( 51,102,102), ( 0, 51, 0), ( 51, 51, 0), + (102, 51, 0), (153, 51,102), ( 51, 51,153), ( 66, 66, 66), + ) +excel_default_palette_b8 = ( # (red, green, blue) + ( 0, 0, 0), (255,255,255), (255, 0, 0), ( 0,255, 0), + ( 0, 0,255), (255,255, 0), (255, 0,255), ( 0,255,255), + (128, 0, 0), ( 0,128, 0), ( 0, 0,128), (128,128, 0), + (128, 0,128), ( 0,128,128), (192,192,192), (128,128,128), + (153,153,255), (153, 51,102), (255,255,204), (204,255,255), + (102, 0,102), (255,128,128), ( 0,102,204), (204,204,255), + ( 0, 0,128), (255, 0,255), (255,255, 0), ( 0,255,255), + (128, 0,128), (128, 0, 0), ( 0,128,128), ( 0, 0,255), + ( 0,204,255), (204,255,255), (204,255,204), (255,255,153), + (153,204,255), (255,153,204), (204,153,255), (255,204,153), + ( 51,102,255), ( 51,204,204), (153,204, 0), (255,204, 0), + (255,153, 0), (255,102, 0), (102,102,153), (150,150,150), + ( 0, 51,102), ( 51,153,102), ( 0, 51, 0), ( 51, 51, 0), + (153, 51, 0), (153, 51,102), ( 51, 51,153), ( 51, 51, 51), + ) + +default_palette = { + 80: excel_default_palette_b8, + 70: excel_default_palette_b5, + 50: excel_default_palette_b5, + 45: excel_default_palette_b2, + 40: excel_default_palette_b2, + 30: excel_default_palette_b2, + 21: excel_default_palette_b2, + 20: excel_default_palette_b2, + } + +""" +00H = Normal +01H = RowLevel_lv (see next field) +02H = ColLevel_lv (see next field) +03H = Comma +04H = Currency +05H = Percent +06H = Comma [0] (BIFF4-BIFF8) +07H = Currency [0] (BIFF4-BIFF8) +08H = Hyperlink (BIFF8) +09H = Followed Hyperlink (BIFF8) +""" +built_in_style_names = [ + "Normal", + "RowLevel_", + "ColLevel_", + "Comma", + "Currency", + "Percent", + "Comma [0]", + "Currency [0]", + "Hyperlink", + "Followed Hyperlink", + ] + +def initialise_colour_map(book): + book.colour_map = {} + book.colour_indexes_used = {} + if not book.formatting_info: + return + # Add the 8 invariant colours + for i in xrange(8): + book.colour_map[i] = excel_default_palette_b8[i] + # Add the default palette depending on the version + dpal = default_palette[book.biff_version] + ndpal = len(dpal) + for i in xrange(ndpal): + book.colour_map[i+8] = dpal[i] + # Add the specials -- None means the RGB value is not known + # System window text colour for border lines + book.colour_map[ndpal+8] = None + # System window background colour for pattern background + book.colour_map[ndpal+8+1] = None # + for ci in ( + 0x51, # System ToolTip text colour (used in note objects) + 0x7FFF, # 32767, system window text colour for fonts + ): + book.colour_map[ci] = None + +def nearest_colour_index(colour_map, rgb, debug=0): + # General purpose function. Uses Euclidean distance. + # So far used only for pre-BIFF8 WINDOW2 record. + # Doesn't have to be fast. + # Doesn't have to be fancy. + best_metric = 3 * 256 * 256 + best_colourx = 0 + for colourx, cand_rgb in colour_map.items(): + if cand_rgb is None: + continue + metric = 0 + for v1, v2 in zip(rgb, cand_rgb): + metric += (v1 - v2) * (v1 - v2) + if metric < best_metric: + best_metric = metric + best_colourx = colourx + if metric == 0: + break + if debug: + print "nearest_colour_index for %r is %r -> %r; best_metric is %d" \ + % (rgb, best_colourx, colour_map[best_colourx], best_metric) + return best_colourx + +## +# This mixin class exists solely so that Format, Font, and XF.... objects +# can be compared by value of their attributes. +class EqNeAttrs(object): + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + def __ne__(self, other): + return self.__dict__ != other.__dict__ + +## +# An Excel "font" contains the details of not only what is normally +# considered a font, but also several other display attributes. +# Items correspond to those in the Excel UI's Format/Cells/Font tab. +#
-- New in version 0.6.1 +class Font(BaseObject, EqNeAttrs): + ## + # 1 = Characters are bold. Redundant; see "weight" attribute. + bold = 0 + ## + # Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol, + # 77 = Apple Roman, + # 128 = ANSI Japanese Shift-JIS, + # 129 = ANSI Korean (Hangul), + # 130 = ANSI Korean (Johab), + # 134 = ANSI Chinese Simplified GBK, + # 136 = ANSI Chinese Traditional BIG5, + # 161 = ANSI Greek, + # 162 = ANSI Turkish, + # 163 = ANSI Vietnamese, + # 177 = ANSI Hebrew, + # 178 = ANSI Arabic, + # 186 = ANSI Baltic, + # 204 = ANSI Cyrillic, + # 222 = ANSI Thai, + # 238 = ANSI Latin II (Central European), + # 255 = OEM Latin I + character_set = 0 + ## + # An explanation of "colour index" is given in the Formatting + # section at the start of this document. + colour_index = 0 + ## + # 1 = Superscript, 2 = Subscript. + escapement = 0 + ## + # 0 = None (unknown or don't care)
+ # 1 = Roman (variable width, serifed)
+ # 2 = Swiss (variable width, sans-serifed)
+ # 3 = Modern (fixed width, serifed or sans-serifed)
+ # 4 = Script (cursive)
+ # 5 = Decorative (specialised, for example Old English, Fraktur) + family = 0 + ## + # The 0-based index used to refer to this Font() instance. + # Note that index 4 is never used; xlrd supplies a dummy place-holder. + font_index = 0 + ## + # Height of the font (in twips). A twip = 1/20 of a point. + height = 0 + ## + # 1 = Characters are italic. + italic = 0 + ## + # The name of the font. Example: u"Arial" + name = u"" + ## + # 1 = Characters are struck out. + struck_out = 0 + ## + # 0 = None
+ # 1 = Single; 0x21 (33) = Single accounting
+ # 2 = Double; 0x22 (34) = Double accounting + underline_type = 0 + ## + # 1 = Characters are underlined. Redundant; see "underline_type" attribute. + underlined = 0 + ## + # Font weight (100-1000). Standard values are 400 for normal text + # and 700 for bold text. + weight = 400 + ## + # 1 = Font is outline style (Macintosh only) + outline = 0 + ## + # 1 = Font is shadow style (Macintosh only) + shadow = 0 + + # No methods ... + +def handle_efont(book, data): # BIFF2 only + if not book.formatting_info: + return + book.font_list[-1].colour_index = unpack('= 2 + bv = book.biff_version + k = len(book.font_list) + if k == 4: + f = Font() + f.name = u'Dummy Font' + f.font_index = k + book.font_list.append(f) + k += 1 + f = Font() + f.font_index = k + book.font_list.append(f) + if bv >= 50: + ( + f.height, option_flags, f.colour_index, f.weight, + f.escapement_type, f.underline_type, f.family, + f.character_set, + ) = unpack('> 1 + f.underlined = (option_flags & 4) >> 2 + f.struck_out = (option_flags & 8) >> 3 + f.outline = (option_flags & 16) >> 4 + f.shadow = (option_flags & 32) >> 5 + if bv >= 80: + f.name = unpack_unicode(data, 14, lenlen=1) + else: + f.name = unpack_string(data, 14, book.encoding, lenlen=1) + elif bv >= 30: + f.height, option_flags, f.colour_index = unpack('> 1 + f.underlined = (option_flags & 4) >> 2 + f.struck_out = (option_flags & 8) >> 3 + f.outline = (option_flags & 16) >> 4 + f.shadow = (option_flags & 32) >> 5 + f.name = unpack_string(data, 6, book.encoding, lenlen=1) + # Now cook up the remaining attributes ... + f.weight = [400, 700][f.bold] + f.escapement_type = 0 # None + f.underline_type = f.underlined # None or Single + f.family = 0 # Unknown / don't care + f.character_set = 1 # System default (0 means "ANSI Latin") + else: # BIFF2 + f.height, option_flags = unpack('> 1 + f.underlined = (option_flags & 4) >> 2 + f.struck_out = (option_flags & 8) >> 3 + f.outline = 0 + f.shadow = 0 + f.name = unpack_string(data, 4, book.encoding, lenlen=1) + # Now cook up the remaining attributes ... + f.weight = [400, 700][f.bold] + f.escapement_type = 0 # None + f.underline_type = f.underlined # None or Single + f.family = 0 # Unknown / don't care + f.character_set = 1 # System default (0 means "ANSI Latin") + if blah: + f.dump( + book.logfile, + header="--- handle_font: font[%d] ---" % f.font_index, + footer="-------------------", + ) + +# === "Number formats" === + +## +# "Number format" information from a FORMAT record. +#
-- New in version 0.6.1 +class Format(BaseObject, EqNeAttrs): + ## + # The key into Book.format_map + format_key = 0 + ## + # A classification that has been inferred from the format string. + # Currently, this is used only to distinguish between numbers and dates. + #
Values: + #
FUN = 0 # unknown + #
FDT = 1 # date + #
FNU = 2 # number + #
FGE = 3 # general + #
FTX = 4 # text + type = FUN + ## + # The format string + format_str = u'' + + def __init__(self, format_key, ty, format_str): + self.format_key = format_key + self.type = ty + self.format_str = format_str + +std_format_strings = { + # "std" == "standard for US English locale" + # #### TODO ... a lot of work to tailor these to the user's locale. + # See e.g. gnumeric-1.x.y/src/formats.c + 0x00: "General", + 0x01: "0", + 0x02: "0.00", + 0x03: "#,##0", + 0x04: "#,##0.00", + 0x05: "$#,##0_);($#,##0)", + 0x06: "$#,##0_);[Red]($#,##0)", + 0x07: "$#,##0.00_);($#,##0.00)", + 0x08: "$#,##0.00_);[Red]($#,##0.00)", + 0x09: "0%", + 0x0a: "0.00%", + 0x0b: "0.00E+00", + 0x0c: "# ?/?", + 0x0d: "# ??/??", + 0x0e: "m/d/yy", + 0x0f: "d-mmm-yy", + 0x10: "d-mmm", + 0x11: "mmm-yy", + 0x12: "h:mm AM/PM", + 0x13: "h:mm:ss AM/PM", + 0x14: "h:mm", + 0x15: "h:mm:ss", + 0x16: "m/d/yy h:mm", + 0x25: "#,##0_);(#,##0)", + 0x26: "#,##0_);[Red](#,##0)", + 0x27: "#,##0.00_);(#,##0.00)", + 0x28: "#,##0.00_);[Red](#,##0.00)", + 0x29: "_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)", + 0x2a: "_($* #,##0_);_($* (#,##0);_($* \"-\"_);_(@_)", + 0x2b: "_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)", + 0x2c: "_($* #,##0.00_);_($* (#,##0.00);_($* \"-\"??_);_(@_)", + 0x2d: "mm:ss", + 0x2e: "[h]:mm:ss", + 0x2f: "mm:ss.0", + 0x30: "##0.0E+0", + 0x31: "@", + } + +fmt_code_ranges = [ # both-inclusive ranges of "standard" format codes + # Source: the openoffice.org doc't + ( 0, 0, FGE), + ( 1, 13, FNU), + (14, 22, FDT), + #### (27, 36, FDT), # Japanese dates -- not sure of reliability of this + (37, 44, FNU), + (45, 47, FDT), + (48, 48, FNU), + (49, 49, FTX), + ####(50, 58, FDT), # Japanese dates -- but Gnumeric assumes + # built-in formats finish at 49, not at 163 + ] + +std_format_code_types = {} +for lo, hi, ty in fmt_code_ranges: + for x in xrange(lo, hi+1): + std_format_code_types[x] = ty +del lo, hi, ty, x + +date_chars = u'ymdhs' # year, month/minute, day, hour, second +date_char_dict = {} +for _c in date_chars + date_chars.upper(): + date_char_dict[_c] = 5 +del _c, date_chars + +skip_char_dict = {} +for _c in u'$-+/(): ': + skip_char_dict[_c] = 1 + +num_char_dict = { + u'0': 5, + u'#': 5, + u'?': 5, + } + +non_date_formats = { + u'0.00E+00':1, + u'##0.0E+0':1, + u'General' :1, + u'GENERAL' :1, # OOo Calc 1.1.4 does this. + u'general' :1, # pyExcelerator 0.6.3 does this. + u'@' :1, + } + +fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub + +# Boolean format strings (actual cases) +# u'"Yes";"Yes";"No"' +# u'"True";"True";"False"' +# u'"On";"On";"Off"' + +def is_date_format_string(book, fmt): + # Heuristics: + # Ignore "text" and [stuff in square brackets (aarrgghh -- see below)]. + # Handle backslashed-escaped chars properly. + # E.g. hh\hmm\mss\s should produce a display like 23h59m59s + # Date formats have one or more of ymdhs (caseless) in them. + # Numeric formats have # and 0. + # N.B. u'General"."' hence get rid of "text" first. + # TODO: Find where formats are interpreted in Gnumeric + # TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23) + state = 0 + s = '' + ignorable = skip_char_dict.has_key + for c in fmt: + if state == 0: + if c == u'"': + state = 1 + elif c in ur"\_*": + state = 2 + elif ignorable(c): + pass + else: + s += c + elif state == 1: + if c == u'"': + state = 0 + elif state == 2: + # Ignore char after backslash, underscore or asterisk + state = 0 + assert 0 <= state <= 2 + if book.verbosity >= 4: + print "is_date_format_string: reduced format is %r" % s + s = fmt_bracketed_sub('', s) + if non_date_formats.has_key(s): + return False + state = 0 + separator = ";" + got_sep = 0 + date_count = num_count = 0 + for c in s: + if date_char_dict.has_key(c): + date_count += date_char_dict[c] + elif num_char_dict.has_key(c): + num_count += num_char_dict[c] + elif c == separator: + got_sep = 1 + # print num_count, date_count, repr(fmt) + if date_count and not num_count: + return True + if num_count and not date_count: + return False + if date_count: + fprintf(book.logfile, + 'WARNING *** is_date_format: ambiguous d=%d n=%d fmt=%r\n', + date_count, num_count, fmt) + elif not got_sep: + fprintf(book.logfile, + "WARNING *** format %r produces constant result\n", + fmt) + return date_count > num_count + +def handle_format(self, data, rectype=XL_FORMAT): + DEBUG = 0 + bv = self.biff_version + if rectype == XL_FORMAT2: + bv = min(bv, 30) + if not self.encoding: + self.derive_encoding() + strpos = 2 + if bv >= 50: + fmtkey = unpack('= 80: + unistrg = unpack_unicode(data, 2) + else: + unistrg = unpack_string(data, strpos, self.encoding, lenlen=1) + blah = DEBUG or self.verbosity >= 3 + if blah: + fprintf(self.logfile, + "FORMAT: count=%d fmtkey=0x%04x (%d) s=%r\n", + self.actualfmtcount, fmtkey, fmtkey, unistrg) + is_date_s = self.is_date_format_string(unistrg) + ty = [FGE, FDT][is_date_s] + if not(fmtkey > 163 or bv < 50): + # user_defined if fmtkey > 163 + # N.B. Gnumeric incorrectly starts these at 50 instead of 164 :-( + # if earlier than BIFF 5, standard info is useless + std_ty = std_format_code_types.get(fmtkey, FUN) + # print "std ty", std_ty + is_date_c = std_ty == FDT + if 0 < fmtkey < 50 and (is_date_c ^ is_date_s): + DEBUG = 2 + fprintf(self.logfile, + "WARNING *** Conflict between " + "std format key %d and its format string %r\n", + fmtkey, unistrg) + if DEBUG == 2: + fprintf(self.logfile, + "ty: %d; is_date_c: %r; is_date_s: %r; fmt_strg: %r", + ty, is_date_c, is_date_s, unistrg) + fmtobj = Format(fmtkey, ty, unistrg) + if blah: + fmtobj.dump(self.logfile, + header="--- handle_format [%d] ---" % (self.actualfmtcount-1, )) + self.format_map[fmtkey] = fmtobj + self.format_list.append(fmtobj) + +# ============================================================================= + +def handle_palette(book, data): + if not book.formatting_info: + return + blah = DEBUG or book.verbosity >= 2 + n_colours, = unpack('= 50] + if ((DEBUG or book.verbosity >= 1) + and n_colours != expected_n_colours): + fprintf(book.logfile, + "NOTE *** Expected %d colours in PALETTE record, found %d\n", + expected_n_colours, n_colours) + elif blah: + fprintf(book.logfile, + "PALETTE record with %d colours\n", n_colours) + fmt = '> 8) & 0xff + blue = (c >> 16) & 0xff + old_rgb = book.colour_map[8+i] + new_rgb = (red, green, blue) + book.palette_record.append(new_rgb) + book.colour_map[8+i] = new_rgb + if blah: + if new_rgb != old_rgb: + print >> book.logfile, "%2d: %r -> %r" % (i, old_rgb, new_rgb) + +def palette_epilogue(book): + # Check colour indexes in fonts etc. + # This must be done here as FONT records + # come *before* the PALETTE record :-( + for font in book.font_list: + if font.font_index == 4: # the missing font record + continue + cx = font.colour_index + if cx == 0x7fff: # system window text colour + continue + if book.colour_map.has_key(cx): + book.colour_indexes_used[cx] = 1 + else: + print "Size of colour table:", len(book.colour_map) + print >> book.logfile, \ + "*** Font #%d (%r): colour index 0x%04x is unknown" \ + % (font.font_index, font.name, cx) + if book.verbosity >= 1: + used = book.colour_indexes_used.keys() + used.sort() + print >> book.logfile, "\nColour indexes used:\n%r\n" % used + +def handle_style(book, data): + blah = DEBUG or book.verbosity >= 2 + bv = book.biff_version + flag_and_xfx, built_in_id, level = unpack('= 80: + name = unpack_unicode(data, 2, lenlen=2) + else: + name = unpack_string(data, 2, book.encoding, lenlen=1) + if blah and not name: + print >> book.logfile, \ + "WARNING *** A user-defined style has a zero-length name" + built_in = 0 + built_in_id = 0 + level = 0 + book.style_name_map[name] = (built_in, xf_index) + if blah: + print >> book.logfile, \ + "STYLE: built_in=%d xf_index=%d built_in_id=%d level=%d name=%r" \ + % (built_in, xf_index, built_in_id, level, name) + +def check_colour_indexes_in_obj(book, obj, orig_index): + alist = obj.__dict__.items() + alist.sort() + for attr, nobj in alist: + if hasattr(nobj, 'dump'): + check_colour_indexes_in_obj(book, nobj, orig_index) + elif attr.find('colour_index') >= 0: + if book.colour_map.has_key(nobj): + book.colour_indexes_used[nobj] = 1 + continue + oname = obj.__class__.__name__ + print >> book.logfile, \ + "*** xf #%d : %s.%s = 0x%04x (unknown)" \ + % (orig_index, oname, attr, nobj) + +def handle_xf(self, data): + ### self is a Book instance + # DEBUG = 0 + blah = DEBUG or self.verbosity >= 3 + bv = self.biff_version + xf = XF() + xf.alignment = XFAlignment() + xf.alignment.indent_level = 0 + xf.alignment.shrink_to_fit = 0 + xf.alignment.text_direction = 0 + xf.border = XFBorder() + xf.border.diag_up = 0 + xf.border.diag_down = 0 + xf.border.diag_colour_index = 0 + xf.border.diag_line_style = 0 # no line + xf.background = XFBackground() + xf.protection = XFProtection() + # fill in the known standard formats + if bv >= 50 and not self.xfcount: + # i.e. do this once before we process the first XF record + for x in std_format_code_types.keys(): + if not self.format_map.has_key(x): + ty = std_format_code_types[x] + fmt_str = std_format_strings[x] + fmtobj = Format(x, ty, fmt_str) + self.format_map[x] = fmtobj + if bv >= 80: + unpack_fmt = '> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbitsL(xf.border, pkd_brdbkg1, ( + (0, 0x0000000f, 'left_line_style'), + (4, 0x000000f0, 'right_line_style'), + (8, 0x00000f00, 'top_line_style'), + (12, 0x0000f000, 'bottom_line_style'), + (16, 0x007f0000, 'left_colour_index'), + (23, 0x3f800000, 'right_colour_index'), + (30, 0x40000000, 'diag_down'), + (31, 0x80000000L, 'diag_up'), + )) + upkbits(xf.border, pkd_brdbkg2, ( + (0, 0x0000007F, 'top_colour_index'), + (7, 0x00003F80, 'bottom_colour_index'), + (14, 0x001FC000, 'diag_colour_index'), + (21, 0x01E00000, 'diag_line_style'), + )) + upkbitsL(xf.background, pkd_brdbkg2, ( + (26, 0xFC000000L, 'fill_pattern'), + )) + upkbits(xf.background, pkd_brdbkg3, ( + (0, 0x007F, 'pattern_colour_index'), + (7, 0x3F80, 'background_colour_index'), + )) + elif bv >= 50: + unpack_fmt = '> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbitsL(xf.background, pkd_brdbkg1, ( + ( 0, 0x0000007F, 'pattern_colour_index'), + ( 7, 0x00003F80, 'background_colour_index'), + (16, 0x003F0000, 'fill_pattern'), + )) + upkbitsL(xf.border, pkd_brdbkg1, ( + (22, 0x01C00000, 'bottom_line_style'), + (25, 0xFE000000L, 'bottom_colour_index'), + )) + upkbits(xf.border, pkd_brdbkg2, ( + ( 0, 0x00000007, 'top_line_style'), + ( 3, 0x00000038, 'left_line_style'), + ( 6, 0x000001C0, 'right_line_style'), + ( 9, 0x0000FE00, 'top_colour_index'), + (16, 0x007F0000, 'left_colour_index'), + (23, 0x3F800000, 'right_colour_index'), + )) + elif bv >= 40: + unpack_fmt = '> 6 + xf.alignment.rotation = [0, 255, 90, 180][orientation] + reg = pkd_used >> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbits(xf.background, pkd_bkg_34, ( + ( 0, 0x003F, 'fill_pattern'), + ( 6, 0x07C0, 'pattern_colour_index'), + (11, 0xF800, 'background_colour_index'), + )) + upkbitsL(xf.border, pkd_brd_34, ( + ( 0, 0x00000007, 'top_line_style'), + ( 3, 0x000000F8, 'top_colour_index'), + ( 8, 0x00000700, 'left_line_style'), + (11, 0x0000F800, 'left_colour_index'), + (16, 0x00070000, 'bottom_line_style'), + (19, 0x00F80000, 'bottom_colour_index'), + (24, 0x07000000, 'right_line_style'), + (27, 0xF8000000L, 'right_colour_index'), + )) + elif bv == 30: + unpack_fmt = '> 2 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, reg & 1) + reg >>= 1 + upkbits(xf.background, pkd_bkg_34, ( + ( 0, 0x003F, 'fill_pattern'), + ( 6, 0x07C0, 'pattern_colour_index'), + (11, 0xF800, 'background_colour_index'), + )) + upkbitsL(xf.border, pkd_brd_34, ( + ( 0, 0x00000007, 'top_line_style'), + ( 3, 0x000000F8, 'top_colour_index'), + ( 8, 0x00000700, 'left_line_style'), + (11, 0x0000F800, 'left_colour_index'), + (16, 0x00070000, 'bottom_line_style'), + (19, 0x00F80000, 'bottom_colour_index'), + (24, 0x07000000, 'right_line_style'), + (27, 0xF8000000L, 'right_colour_index'), + )) + xf.alignment.vert_align = 2 # bottom + xf.alignment.rotation = 0 + elif bv == 21: + #### Warning: incomplete treatment; formatting_info not fully supported. + #### Probably need to offset incoming BIFF2 XF[n] to BIFF8-like XF[n+16], + #### and create XF[0:16] like the standard ones in BIFF8 + #### *AND* add 16 to all XF references in cell records :-( + (xf.font_index, format_etc, halign_etc) = unpack('= 3 + blah1 = DEBUG or self.verbosity >= 1 + if blah: + fprintf(self.logfile, "xf_epilogue called ...\n") + + def check_same(book_arg, xf_arg, parent_arg, attr): + # the _arg caper is to avoid a Warning msg from Python 2.1 :-( + if getattr(xf_arg, attr) != getattr(parent_arg, attr): + fprintf(book_arg.logfile, + "NOTE !!! XF[%d] parent[%d] %s different\n", + xf_arg.xf_index, parent_arg.xf_index, attr) + + for xfx in xrange(num_xfs): + xf = self.xf_list[xfx] + if not self.format_map.has_key(xf.format_key): + msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n" + fprintf(self.logfile, msg, + xf.xf_index, xf.format_key, xf.format_key) + xf.format_key = 0 + cellty_from_fmtty = { + FNU: XL_CELL_NUMBER, + FUN: XL_CELL_NUMBER, + FGE: XL_CELL_NUMBER, + FDT: XL_CELL_DATE, + FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. + } + fmt = self.format_map[xf.format_key] + cellty = cellty_from_fmtty[fmt.type] + self._xf_index_to_xl_type_map[xf.xf_index] = cellty + # Now for some assertions etc + if not self.formatting_info: + continue + if xf.is_style: + continue + if not(0 <= xf.parent_style_index < num_xfs): + fprintf(self.logfile, + "WARNING *** XF[%d]: is_style=%d but parent_style_index=%d\n", + xf.xf_index, xf.is_style, xf.parent_style_index) + # make it conform + xf.parent_style_index = 0 + if self.biff_version >= 30: + assert xf.parent_style_index != xf.xf_index + assert self.xf_list[xf.parent_style_index].is_style + if blah1 and xf.parent_style_index > xf.xf_index: + fprintf(self.logfile, + "NOTE !!! XF[%d]: parent_style_index is %d; out of order?\n", + xf.xf_index, xf.parent_style_index) + parent = self.xf_list[xf.parent_style_index] + if not xf._alignment_flag and not parent._alignment_flag: + if blah1: check_same(self, xf, parent, 'alignment') + if not xf._background_flag and not parent._background_flag: + if blah1: check_same(self, xf, parent, 'background') + if not xf._border_flag and not parent._border_flag: + if blah1: check_same(self, xf, parent, 'border') + if not xf._protection_flag and not parent._protection_flag: + if blah1: check_same(self, xf, parent, 'protection') + if not xf._format_flag and not parent._format_flag: + if blah1 and xf.format_key != parent.format_key: + fprintf(self.logfile, + "NOTE !!! XF[%d] fmtk=%d, parent[%d] fmtk=%r\n%r / %r\n", + xf.xf_index, xf.format_key, parent.xf_index, parent.format_key, + self.format_map[xf.format_key].format_str, + self.format_map[parent.format_key].format_str) + if not xf._font_flag and not parent._font_flag: + if blah1 and xf.font_index != parent.font_index: + fprintf(self.logfile, + "NOTE !!! XF[%d] fontx=%d, parent[%d] fontx=%r\n", + xf.xf_index, xf.font_index, parent.xf_index, parent.font_index) + +def initialise_book(book): + initialise_colour_map(book) + book._xf_epilogue_done = 0 + methods = ( + handle_font, + handle_efont, + handle_format, + is_date_format_string, + handle_palette, + palette_epilogue, + handle_style, + handle_xf, + xf_epilogue, + ) + for method in methods: + setattr(book.__class__, method.__name__, method) + +## +#

A collection of the border-related attributes of an XF record. +# Items correspond to those in the Excel UI's Format/Cells/Border tab.

+#

An explanations of "colour index" is given in the Formatting +# section at the start of this document. +# There are five line style attributes; possible values and the +# associated meanings are: +# 0 = No line, +# 1 = Thin, +# 2 = Medium, +# 3 = Dashed, +# 4 = Dotted, +# 5 = Thick, +# 6 = Double, +# 7 = Hair, +# 8 = Medium dashed, +# 9 = Thin dash-dotted, +# 10 = Medium dash-dotted, +# 11 = Thin dash-dot-dotted, +# 12 = Medium dash-dot-dotted, +# 13 = Slanted medium dash-dotted. +# The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only. +# For pictures of the line styles, refer to OOo docs s3.10 (p22) +# "Line Styles for Cell Borders (BIFF3-BIFF8)".

+#
-- New in version 0.6.1 +class XFBorder(BaseObject, EqNeAttrs): + + ## + # The colour index for the cell's top line + top_colour_index = 0 + ## + # The colour index for the cell's bottom line + bottom_colour_index = 0 + ## + # The colour index for the cell's left line + left_colour_index = 0 + ## + # The colour index for the cell's right line + right_colour_index = 0 + ## + # The colour index for the cell's diagonal lines, if any + diag_colour_index = 0 + ## + # The line style for the cell's top line + top_line_style = 0 + ## + # The line style for the cell's bottom line + bottom_line_style = 0 + ## + # The line style for the cell's left line + left_line_style = 0 + ## + # The line style for the cell's right line + right_line_style = 0 + ## + # The line style for the cell's diagonal lines, if any + diag_line_style = 0 + ## + # 1 = draw a diagonal from top left to bottom right + diag_down = 0 + ## + # 1 = draw a diagonal from bottom left to top right + diag_up = 0 + +## +# A collection of the background-related attributes of an XF record. +# Items correspond to those in the Excel UI's Format/Cells/Patterns tab. +# An explanation of "colour index" is given in the Formatting +# section at the start of this document. +#
-- New in version 0.6.1 +class XFBackground(BaseObject, EqNeAttrs): + + ## + # See section 3.11 of the OOo docs. + fill_pattern = 0 + ## + # See section 3.11 of the OOo docs. + background_colour_index = 0 + ## + # See section 3.11 of the OOo docs. + pattern_colour_index = 0 + +## +# A collection of the alignment and similar attributes of an XF record. +# Items correspond to those in the Excel UI's Format/Cells/Alignment tab. +#
-- New in version 0.6.1 + +class XFAlignment(BaseObject, EqNeAttrs): + + ## + # Values: section 6.115 (p 214) of OOo docs + hor_align = 0 + ## + # Values: section 6.115 (p 215) of OOo docs + vert_align = 0 + ## + # Values: section 6.115 (p 215) of OOo docs.
+ # Note: file versions BIFF7 and earlier use the documented + # "orientation" attribute; this will be mapped (without loss) + # into "rotation". + rotation = 0 + ## + # 1 = text is wrapped at right margin + text_wrapped = 0 + ## + # A number in range(15). + indent_level = 0 + ## + # 1 = shrink font size to fit text into cell. + shrink_to_fit = 0 + ## + # 0 = according to context; 1 = left-to-right; 2 = right-to-left + text_direction = 0 + +## +# A collection of the protection-related attributes of an XF record. +# Items correspond to those in the Excel UI's Format/Cells/Protection tab. +# Note the OOo docs include the "cell or style" bit +# in this bundle of attributes. +# This is incorrect; the bit is used in determining which bundles to use. +#
-- New in version 0.6.1 + +class XFProtection(BaseObject, EqNeAttrs): + + ## + # 1 = Cell is prevented from being changed, moved, resized, or deleted + # (only if the sheet is protected). + cell_locked = 0 + ## + # 1 = Hide formula so that it doesn't appear in the formula bar when + # the cell is selected (only if the sheet is protected). + formula_hidden = 0 + +## +# eXtended Formatting information for cells, rows, columns and styles. +#
-- New in version 0.6.1 +# +#

Each of the 6 flags below describes the validity of +# a specific group of attributes. +#
+# In cell XFs, flag==0 means the attributes of the parent style XF are used, +# (but only if the attributes are valid there); flag==1 means the attributes +# of this XF are used.
+# In style XFs, flag==0 means the attribute setting is valid; flag==1 means +# the attribute should be ignored.
+# Note that the API +# provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs +# have had the above inheritance mechanism applied. +#

+ +class XF(BaseObject): + + ## + # 0 = cell XF, 1 = style XF + is_style = 0 + ## + # cell XF: Index into Book.xf_list + # of this XF's style XF
+ # style XF: 0xFFF + parent_style_index = 0 + ## + # + _format_flag = 0 + ## + # + _font_flag = 0 + ## + # + _alignment_flag = 0 + ## + # + _border_flag = 0 + ## + # + _background_flag = 0 + ## + #   + _protection_flag = 0 + ## + # Index into Book.xf_list + xf_index = 0 + ## + # Index into Book.font_list + font_index = 0 + ## + # Key into Book.format_map + #

+ # Warning: OOo docs on the XF record call this "Index to FORMAT record". + # It is not an index in the Python sense. It is a key to a map. + # It is true only for Excel 4.0 and earlier files + # that the key into format_map from an XF instance + # is the same as the index into format_list, and only + # if the index is less than 164. + #

+ format_key = 0 + ## + # An instance of an XFProtection object. + protection = None + ## + # An instance of an XFBackground object. + background = None + ## + # An instance of an XFAlignment object. + alignment = None + ## + # An instance of an XFBorder object. + border = None diff --git a/tablib/packages/xlrd/formula.py b/tablib/packages/xlrd/formula.py new file mode 100644 index 0000000..4edbc29 --- /dev/null +++ b/tablib/packages/xlrd/formula.py @@ -0,0 +1,2092 @@ +# -*- coding: cp1252 -*- + +## +# Module for parsing/evaluating Microsoft Excel formulas. +# +#

Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under +# a BSD-style licence.

+## + +# No part of the content of this file was derived from the works of David Giffin. + +import copy +from struct import unpack +from timemachine import * +from biffh import unpack_unicode_update_pos, unpack_string_update_pos, \ + XLRDError, hex_char_dump, error_text_from_code, BaseObject + +__all__ = [ + 'oBOOL', 'oERR', 'oNUM', 'oREF', 'oREL', 'oSTRG', 'oUNK', + 'decompile_formula', + 'dump_formula', + 'evaluate_name_formula', + 'okind_dict', + 'rangename3d', 'rangename3drel', 'cellname', 'cellnameabs', 'colname', + ] + +# sztabN[opcode] -> the number of bytes to consume. +# -1 means variable +# -2 means this opcode not implemented in this version. +# Which N to use? Depends on biff_version; see szdict. +sztab0 = [-2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 8, 4, 2, 2, 3, 9, 8, 2, 3, 8, 4, 7, 5, 5, 5, 2, 4, 7, 4, 7, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] +sztab1 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 2, 3, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2] +sztab2 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 3, 4, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2] +sztab3 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 15, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 25, 18, 21, 18, 21, -2, -2] +sztab4 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 5, 5, 9, 7, 7, 7, 3, 5, 9, 5, 9, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 7, 7, 11, 7, 11, -2, -2] + +szdict = { + 20 : sztab0, + 30 : sztab1, + 40 : sztab2, + 45 : sztab2, + 50 : sztab3, + 70 : sztab3, + 80 : sztab4, + } + +# For debugging purposes ... the name for each opcode +# (without the prefix "t" used on OOo docs) +onames = ['Unk00', 'Exp', 'Tbl', 'Add', 'Sub', 'Mul', 'Div', 'Power', 'Concat', 'LT', 'LE', 'EQ', 'GE', 'GT', 'NE', 'Isect', 'List', 'Range', 'Uplus', 'Uminus', 'Percent', 'Paren', 'MissArg', 'Str', 'Extended', 'Attr', 'Sheet', 'EndSheet', 'Err', 'Bool', 'Int', 'Num', 'Array', 'Func', 'FuncVar', 'Name', 'Ref', 'Area', 'MemArea', 'MemErr', 'MemNoMem', 'MemFunc', 'RefErr', 'AreaErr', 'RefN', 'AreaN', 'MemAreaN', 'MemNoMemN', '', '', '', '', '', '', '', '', 'FuncCE', 'NameX', 'Ref3d', 'Area3d', 'RefErr3d', 'AreaErr3d', '', ''] + +func_defs = { + # index: (name, min#args, max#args, flags, #known_args, return_type, kargs) + 0 : ('COUNT', 0, 30, 0x04, 1, 'V', 'R'), + 1 : ('IF', 2, 3, 0x04, 3, 'V', 'VRR'), + 2 : ('ISNA', 1, 1, 0x02, 1, 'V', 'V'), + 3 : ('ISERROR', 1, 1, 0x02, 1, 'V', 'V'), + 4 : ('SUM', 0, 30, 0x04, 1, 'V', 'R'), + 5 : ('AVERAGE', 1, 30, 0x04, 1, 'V', 'R'), + 6 : ('MIN', 1, 30, 0x04, 1, 'V', 'R'), + 7 : ('MAX', 1, 30, 0x04, 1, 'V', 'R'), + 8 : ('ROW', 0, 1, 0x04, 1, 'V', 'R'), + 9 : ('COLUMN', 0, 1, 0x04, 1, 'V', 'R'), + 10 : ('NA', 0, 0, 0x02, 0, 'V', ''), + 11 : ('NPV', 2, 30, 0x04, 2, 'V', 'VR'), + 12 : ('STDEV', 1, 30, 0x04, 1, 'V', 'R'), + 13 : ('DOLLAR', 1, 2, 0x04, 1, 'V', 'V'), + 14 : ('FIXED', 2, 3, 0x04, 3, 'V', 'VVV'), + 15 : ('SIN', 1, 1, 0x02, 1, 'V', 'V'), + 16 : ('COS', 1, 1, 0x02, 1, 'V', 'V'), + 17 : ('TAN', 1, 1, 0x02, 1, 'V', 'V'), + 18 : ('ATAN', 1, 1, 0x02, 1, 'V', 'V'), + 19 : ('PI', 0, 0, 0x02, 0, 'V', ''), + 20 : ('SQRT', 1, 1, 0x02, 1, 'V', 'V'), + 21 : ('EXP', 1, 1, 0x02, 1, 'V', 'V'), + 22 : ('LN', 1, 1, 0x02, 1, 'V', 'V'), + 23 : ('LOG10', 1, 1, 0x02, 1, 'V', 'V'), + 24 : ('ABS', 1, 1, 0x02, 1, 'V', 'V'), + 25 : ('INT', 1, 1, 0x02, 1, 'V', 'V'), + 26 : ('SIGN', 1, 1, 0x02, 1, 'V', 'V'), + 27 : ('ROUND', 2, 2, 0x02, 2, 'V', 'VV'), + 28 : ('LOOKUP', 2, 3, 0x04, 2, 'V', 'VR'), + 29 : ('INDEX', 2, 4, 0x0c, 4, 'R', 'RVVV'), + 30 : ('REPT', 2, 2, 0x02, 2, 'V', 'VV'), + 31 : ('MID', 3, 3, 0x02, 3, 'V', 'VVV'), + 32 : ('LEN', 1, 1, 0x02, 1, 'V', 'V'), + 33 : ('VALUE', 1, 1, 0x02, 1, 'V', 'V'), + 34 : ('TRUE', 0, 0, 0x02, 0, 'V', ''), + 35 : ('FALSE', 0, 0, 0x02, 0, 'V', ''), + 36 : ('AND', 1, 30, 0x04, 1, 'V', 'R'), + 37 : ('OR', 1, 30, 0x04, 1, 'V', 'R'), + 38 : ('NOT', 1, 1, 0x02, 1, 'V', 'V'), + 39 : ('MOD', 2, 2, 0x02, 2, 'V', 'VV'), + 40 : ('DCOUNT', 3, 3, 0x02, 3, 'V', 'RRR'), + 41 : ('DSUM', 3, 3, 0x02, 3, 'V', 'RRR'), + 42 : ('DAVERAGE', 3, 3, 0x02, 3, 'V', 'RRR'), + 43 : ('DMIN', 3, 3, 0x02, 3, 'V', 'RRR'), + 44 : ('DMAX', 3, 3, 0x02, 3, 'V', 'RRR'), + 45 : ('DSTDEV', 3, 3, 0x02, 3, 'V', 'RRR'), + 46 : ('VAR', 1, 30, 0x04, 1, 'V', 'R'), + 47 : ('DVAR', 3, 3, 0x02, 3, 'V', 'RRR'), + 48 : ('TEXT', 2, 2, 0x02, 2, 'V', 'VV'), + 49 : ('LINEST', 1, 4, 0x04, 4, 'A', 'RRVV'), + 50 : ('TREND', 1, 4, 0x04, 4, 'A', 'RRRV'), + 51 : ('LOGEST', 1, 4, 0x04, 4, 'A', 'RRVV'), + 52 : ('GROWTH', 1, 4, 0x04, 4, 'A', 'RRRV'), + 56 : ('PV', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 57 : ('FV', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 58 : ('NPER', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 59 : ('PMT', 3, 5, 0x04, 5, 'V', 'VVVVV'), + 60 : ('RATE', 3, 6, 0x04, 6, 'V', 'VVVVVV'), + 61 : ('MIRR', 3, 3, 0x02, 3, 'V', 'RVV'), + 62 : ('IRR', 1, 2, 0x04, 2, 'V', 'RV'), + 63 : ('RAND', 0, 0, 0x0a, 0, 'V', ''), + 64 : ('MATCH', 2, 3, 0x04, 3, 'V', 'VRR'), + 65 : ('DATE', 3, 3, 0x02, 3, 'V', 'VVV'), + 66 : ('TIME', 3, 3, 0x02, 3, 'V', 'VVV'), + 67 : ('DAY', 1, 1, 0x02, 1, 'V', 'V'), + 68 : ('MONTH', 1, 1, 0x02, 1, 'V', 'V'), + 69 : ('YEAR', 1, 1, 0x02, 1, 'V', 'V'), + 70 : ('WEEKDAY', 1, 2, 0x04, 2, 'V', 'VV'), + 71 : ('HOUR', 1, 1, 0x02, 1, 'V', 'V'), + 72 : ('MINUTE', 1, 1, 0x02, 1, 'V', 'V'), + 73 : ('SECOND', 1, 1, 0x02, 1, 'V', 'V'), + 74 : ('NOW', 0, 0, 0x0a, 0, 'V', ''), + 75 : ('AREAS', 1, 1, 0x02, 1, 'V', 'R'), + 76 : ('ROWS', 1, 1, 0x02, 1, 'V', 'R'), + 77 : ('COLUMNS', 1, 1, 0x02, 1, 'V', 'R'), + 78 : ('OFFSET', 3, 5, 0x04, 5, 'R', 'RVVVV'), + 82 : ('SEARCH', 2, 3, 0x04, 3, 'V', 'VVV'), + 83 : ('TRANSPOSE', 1, 1, 0x02, 1, 'A', 'A'), + 86 : ('TYPE', 1, 1, 0x02, 1, 'V', 'V'), + 92 : ('SERIESSUM', 4, 4, 0x02, 4, 'V', 'VVVA'), + 97 : ('ATAN2', 2, 2, 0x02, 2, 'V', 'VV'), + 98 : ('ASIN', 1, 1, 0x02, 1, 'V', 'V'), + 99 : ('ACOS', 1, 1, 0x02, 1, 'V', 'V'), + 100: ('CHOOSE', 2, 30, 0x04, 2, 'V', 'VR'), + 101: ('HLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), + 102: ('VLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'), + 105: ('ISREF', 1, 1, 0x02, 1, 'V', 'R'), + 109: ('LOG', 1, 2, 0x04, 2, 'V', 'VV'), + 111: ('CHAR', 1, 1, 0x02, 1, 'V', 'V'), + 112: ('LOWER', 1, 1, 0x02, 1, 'V', 'V'), + 113: ('UPPER', 1, 1, 0x02, 1, 'V', 'V'), + 114: ('PROPER', 1, 1, 0x02, 1, 'V', 'V'), + 115: ('LEFT', 1, 2, 0x04, 2, 'V', 'VV'), + 116: ('RIGHT', 1, 2, 0x04, 2, 'V', 'VV'), + 117: ('EXACT', 2, 2, 0x02, 2, 'V', 'VV'), + 118: ('TRIM', 1, 1, 0x02, 1, 'V', 'V'), + 119: ('REPLACE', 4, 4, 0x02, 4, 'V', 'VVVV'), + 120: ('SUBSTITUTE', 3, 4, 0x04, 4, 'V', 'VVVV'), + 121: ('CODE', 1, 1, 0x02, 1, 'V', 'V'), + 124: ('FIND', 2, 3, 0x04, 3, 'V', 'VVV'), + 125: ('CELL', 1, 2, 0x0c, 2, 'V', 'VR'), + 126: ('ISERR', 1, 1, 0x02, 1, 'V', 'V'), + 127: ('ISTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 128: ('ISNUMBER', 1, 1, 0x02, 1, 'V', 'V'), + 129: ('ISBLANK', 1, 1, 0x02, 1, 'V', 'V'), + 130: ('T', 1, 1, 0x02, 1, 'V', 'R'), + 131: ('N', 1, 1, 0x02, 1, 'V', 'R'), + 140: ('DATEVALUE', 1, 1, 0x02, 1, 'V', 'V'), + 141: ('TIMEVALUE', 1, 1, 0x02, 1, 'V', 'V'), + 142: ('SLN', 3, 3, 0x02, 3, 'V', 'VVV'), + 143: ('SYD', 4, 4, 0x02, 4, 'V', 'VVVV'), + 144: ('DDB', 4, 5, 0x04, 5, 'V', 'VVVVV'), + 148: ('INDIRECT', 1, 2, 0x0c, 2, 'R', 'VV'), + 162: ('CLEAN', 1, 1, 0x02, 1, 'V', 'V'), + 163: ('MDETERM', 1, 1, 0x02, 1, 'V', 'A'), + 164: ('MINVERSE', 1, 1, 0x02, 1, 'A', 'A'), + 165: ('MMULT', 2, 2, 0x02, 2, 'A', 'AA'), + 167: ('IPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), + 168: ('PPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'), + 169: ('COUNTA', 0, 30, 0x04, 1, 'V', 'R'), + 183: ('PRODUCT', 0, 30, 0x04, 1, 'V', 'R'), + 184: ('FACT', 1, 1, 0x02, 1, 'V', 'V'), + 189: ('DPRODUCT', 3, 3, 0x02, 3, 'V', 'RRR'), + 190: ('ISNONTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 193: ('STDEVP', 1, 30, 0x04, 1, 'V', 'R'), + 194: ('VARP', 1, 30, 0x04, 1, 'V', 'R'), + 195: ('DSTDEVP', 3, 3, 0x02, 3, 'V', 'RRR'), + 196: ('DVARP', 3, 3, 0x02, 3, 'V', 'RRR'), + 197: ('TRUNC', 1, 2, 0x04, 2, 'V', 'VV'), + 198: ('ISLOGICAL', 1, 1, 0x02, 1, 'V', 'V'), + 199: ('DCOUNTA', 3, 3, 0x02, 3, 'V', 'RRR'), + 204: ('USDOLLAR', 1, 2, 0x04, 2, 'V', 'VV'), + 205: ('FINDB', 2, 3, 0x04, 3, 'V', 'VVV'), + 206: ('SEARCHB', 2, 3, 0x04, 3, 'V', 'VVV'), + 207: ('REPLACEB', 4, 4, 0x02, 4, 'V', 'VVVV'), + 208: ('LEFTB', 1, 2, 0x04, 2, 'V', 'VV'), + 209: ('RIGHTB', 1, 2, 0x04, 2, 'V', 'VV'), + 210: ('MIDB', 3, 3, 0x02, 3, 'V', 'VVV'), + 211: ('LENB', 1, 1, 0x02, 1, 'V', 'V'), + 212: ('ROUNDUP', 2, 2, 0x02, 2, 'V', 'VV'), + 213: ('ROUNDDOWN', 2, 2, 0x02, 2, 'V', 'VV'), + 214: ('ASC', 1, 1, 0x02, 1, 'V', 'V'), + 215: ('DBCS', 1, 1, 0x02, 1, 'V', 'V'), + 216: ('RANK', 2, 3, 0x04, 3, 'V', 'VRV'), + 219: ('ADDRESS', 2, 5, 0x04, 5, 'V', 'VVVVV'), + 220: ('DAYS360', 2, 3, 0x04, 3, 'V', 'VVV'), + 221: ('TODAY', 0, 0, 0x0a, 0, 'V', ''), + 222: ('VDB', 5, 7, 0x04, 7, 'V', 'VVVVVVV'), + 227: ('MEDIAN', 1, 30, 0x04, 1, 'V', 'R'), + 228: ('SUMPRODUCT', 1, 30, 0x04, 1, 'V', 'A'), + 229: ('SINH', 1, 1, 0x02, 1, 'V', 'V'), + 230: ('COSH', 1, 1, 0x02, 1, 'V', 'V'), + 231: ('TANH', 1, 1, 0x02, 1, 'V', 'V'), + 232: ('ASINH', 1, 1, 0x02, 1, 'V', 'V'), + 233: ('ACOSH', 1, 1, 0x02, 1, 'V', 'V'), + 234: ('ATANH', 1, 1, 0x02, 1, 'V', 'V'), + 235: ('DGET', 3, 3, 0x02, 3, 'V', 'RRR'), + 244: ('INFO', 1, 1, 0x02, 1, 'V', 'V'), + 247: ('DB', 4, 5, 0x04, 5, 'V', 'VVVVV'), + 252: ('FREQUENCY', 2, 2, 0x02, 2, 'A', 'RR'), + 261: ('ERROR.TYPE', 1, 1, 0x02, 1, 'V', 'V'), + 269: ('AVEDEV', 1, 30, 0x04, 1, 'V', 'R'), + 270: ('BETADIST', 3, 5, 0x04, 1, 'V', 'V'), + 271: ('GAMMALN', 1, 1, 0x02, 1, 'V', 'V'), + 272: ('BETAINV', 3, 5, 0x04, 1, 'V', 'V'), + 273: ('BINOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 274: ('CHIDIST', 2, 2, 0x02, 2, 'V', 'VV'), + 275: ('CHIINV', 2, 2, 0x02, 2, 'V', 'VV'), + 276: ('COMBIN', 2, 2, 0x02, 2, 'V', 'VV'), + 277: ('CONFIDENCE', 3, 3, 0x02, 3, 'V', 'VVV'), + 278: ('CRITBINOM', 3, 3, 0x02, 3, 'V', 'VVV'), + 279: ('EVEN', 1, 1, 0x02, 1, 'V', 'V'), + 280: ('EXPONDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 281: ('FDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 282: ('FINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 283: ('FISHER', 1, 1, 0x02, 1, 'V', 'V'), + 284: ('FISHERINV', 1, 1, 0x02, 1, 'V', 'V'), + 285: ('FLOOR', 2, 2, 0x02, 2, 'V', 'VV'), + 286: ('GAMMADIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 287: ('GAMMAINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 288: ('CEILING', 2, 2, 0x02, 2, 'V', 'VV'), + 289: ('HYPGEOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 290: ('LOGNORMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 291: ('LOGINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 292: ('NEGBINOMDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 293: ('NORMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'), + 294: ('NORMSDIST', 1, 1, 0x02, 1, 'V', 'V'), + 295: ('NORMINV', 3, 3, 0x02, 3, 'V', 'VVV'), + 296: ('NORMSINV', 1, 1, 0x02, 1, 'V', 'V'), + 297: ('STANDARDIZE', 3, 3, 0x02, 3, 'V', 'VVV'), + 298: ('ODD', 1, 1, 0x02, 1, 'V', 'V'), + 299: ('PERMUT', 2, 2, 0x02, 2, 'V', 'VV'), + 300: ('POISSON', 3, 3, 0x02, 3, 'V', 'VVV'), + 301: ('TDIST', 3, 3, 0x02, 3, 'V', 'VVV'), + 302: ('WEIBULL', 4, 4, 0x02, 4, 'V', 'VVVV'), + 303: ('SUMXMY2', 2, 2, 0x02, 2, 'V', 'AA'), + 304: ('SUMX2MY2', 2, 2, 0x02, 2, 'V', 'AA'), + 305: ('SUMX2PY2', 2, 2, 0x02, 2, 'V', 'AA'), + 306: ('CHITEST', 2, 2, 0x02, 2, 'V', 'AA'), + 307: ('CORREL', 2, 2, 0x02, 2, 'V', 'AA'), + 308: ('COVAR', 2, 2, 0x02, 2, 'V', 'AA'), + 309: ('FORECAST', 3, 3, 0x02, 3, 'V', 'VAA'), + 310: ('FTEST', 2, 2, 0x02, 2, 'V', 'AA'), + 311: ('INTERCEPT', 2, 2, 0x02, 2, 'V', 'AA'), + 312: ('PEARSON', 2, 2, 0x02, 2, 'V', 'AA'), + 313: ('RSQ', 2, 2, 0x02, 2, 'V', 'AA'), + 314: ('STEYX', 2, 2, 0x02, 2, 'V', 'AA'), + 315: ('SLOPE', 2, 2, 0x02, 2, 'V', 'AA'), + 316: ('TTEST', 4, 4, 0x02, 4, 'V', 'AAVV'), + 317: ('PROB', 3, 4, 0x04, 3, 'V', 'AAV'), + 318: ('DEVSQ', 1, 30, 0x04, 1, 'V', 'R'), + 319: ('GEOMEAN', 1, 30, 0x04, 1, 'V', 'R'), + 320: ('HARMEAN', 1, 30, 0x04, 1, 'V', 'R'), + 321: ('SUMSQ', 0, 30, 0x04, 1, 'V', 'R'), + 322: ('KURT', 1, 30, 0x04, 1, 'V', 'R'), + 323: ('SKEW', 1, 30, 0x04, 1, 'V', 'R'), + 324: ('ZTEST', 2, 3, 0x04, 2, 'V', 'RV'), + 325: ('LARGE', 2, 2, 0x02, 2, 'V', 'RV'), + 326: ('SMALL', 2, 2, 0x02, 2, 'V', 'RV'), + 327: ('QUARTILE', 2, 2, 0x02, 2, 'V', 'RV'), + 328: ('PERCENTILE', 2, 2, 0x02, 2, 'V', 'RV'), + 329: ('PERCENTRANK', 2, 3, 0x04, 2, 'V', 'RV'), + 330: ('MODE', 1, 30, 0x04, 1, 'V', 'A'), + 331: ('TRIMMEAN', 2, 2, 0x02, 2, 'V', 'RV'), + 332: ('TINV', 2, 2, 0x02, 2, 'V', 'VV'), + 336: ('CONCATENATE', 0, 30, 0x04, 1, 'V', 'V'), + 337: ('POWER', 2, 2, 0x02, 2, 'V', 'VV'), + 342: ('RADIANS', 1, 1, 0x02, 1, 'V', 'V'), + 343: ('DEGREES', 1, 1, 0x02, 1, 'V', 'V'), + 344: ('SUBTOTAL', 2, 30, 0x04, 2, 'V', 'VR'), + 345: ('SUMIF', 2, 3, 0x04, 3, 'V', 'RVR'), + 346: ('COUNTIF', 2, 2, 0x02, 2, 'V', 'RV'), + 347: ('COUNTBLANK', 1, 1, 0x02, 1, 'V', 'R'), + 350: ('ISPMT', 4, 4, 0x02, 4, 'V', 'VVVV'), + 351: ('DATEDIF', 3, 3, 0x02, 3, 'V', 'VVV'), + 352: ('DATESTRING', 1, 1, 0x02, 1, 'V', 'V'), + 353: ('NUMBERSTRING', 2, 2, 0x02, 2, 'V', 'VV'), + 354: ('ROMAN', 1, 2, 0x04, 2, 'V', 'VV'), + 358: ('GETPIVOTDATA', 2, 2, 0x02, 2, 'V', 'RV'), + 359: ('HYPERLINK', 1, 2, 0x04, 2, 'V', 'VV'), + 360: ('PHONETIC', 1, 1, 0x02, 1, 'V', 'V'), + 361: ('AVERAGEA', 1, 30, 0x04, 1, 'V', 'R'), + 362: ('MAXA', 1, 30, 0x04, 1, 'V', 'R'), + 363: ('MINA', 1, 30, 0x04, 1, 'V', 'R'), + 364: ('STDEVPA', 1, 30, 0x04, 1, 'V', 'R'), + 365: ('VARPA', 1, 30, 0x04, 1, 'V', 'R'), + 366: ('STDEVA', 1, 30, 0x04, 1, 'V', 'R'), + 367: ('VARA', 1, 30, 0x04, 1, 'V', 'R'), + 368: ('BAHTTEXT', 1, 1, 0x02, 1, 'V', 'V'), + 369: ('THAIDAYOFWEEK', 1, 1, 0x02, 1, 'V', 'V'), + 370: ('THAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), + 371: ('THAIMONTHOFYEAR', 1, 1, 0x02, 1, 'V', 'V'), + 372: ('THAINUMSOUND', 1, 1, 0x02, 1, 'V', 'V'), + 373: ('THAINUMSTRING', 1, 1, 0x02, 1, 'V', 'V'), + 374: ('THAISTRINGLENGTH', 1, 1, 0x02, 1, 'V', 'V'), + 375: ('ISTHAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'), + 376: ('ROUNDBAHTDOWN', 1, 1, 0x02, 1, 'V', 'V'), + 377: ('ROUNDBAHTUP', 1, 1, 0x02, 1, 'V', 'V'), + 378: ('THAIYEAR', 1, 1, 0x02, 1, 'V', 'V'), + 379: ('RTD', 2, 5, 0x04, 1, 'V', 'V'), + } + +tAttrNames = { + 0x00: "Skip??", # seen in SAMPLES.XLS which shipped with Excel 5.0 + 0x01: "Volatile", + 0x02: "If", + 0x04: "Choose", + 0x08: "Skip", + 0x10: "Sum", + 0x20: "Assign", + 0x40: "Space", + 0x41: "SpaceVolatile", + } + +_error_opcodes = {} +for _x in [0x07, 0x08, 0x0A, 0x0B, 0x1C, 0x1D, 0x2F]: + _error_opcodes[_x] = 1 +is_error_opcode = _error_opcodes.has_key + +tRangeFuncs = (min, max, min, max, min, max) +tIsectFuncs = (max, min, max, min, max, min) + +def do_box_funcs(box_funcs, boxa, boxb): + return tuple([ + func(numa, numb) + for func, numa, numb in zip(box_funcs, boxa.coords, boxb.coords) + ]) + +def adjust_cell_addr_biff8(rowval, colval, reldelta, browx=None, bcolx=None): + row_rel = (colval >> 15) & 1 + col_rel = (colval >> 14) & 1 + rowx = rowval + colx = colval & 0xff + if reldelta: + if row_rel and rowx >= 32768: + rowx -= 65536 + if col_rel and colx >= 128: + colx -= 256 + else: + if row_rel: + rowx -= browx + if col_rel: + colx -= bcolx + return rowx, colx, row_rel, col_rel + +def adjust_cell_addr_biff_le7( + rowval, colval, reldelta, browx=None, bcolx=None): + row_rel = (rowval >> 15) & 1 + col_rel = (rowval >> 14) & 1 + rowx = rowval & 0x3fff + colx = colval + if reldelta: + if row_rel and rowx >= 8192: + rowx -= 16384 + if col_rel and colx >= 128: + colx -= 256 + else: + if row_rel: + rowx -= browx + if col_rel: + colx -= bcolx + return rowx, colx, row_rel, col_rel + +def get_cell_addr(data, pos, bv, reldelta, browx=None, bcolx=None): + if bv >= 80: + rowval, colval = unpack("= 80: + row1val, row2val, col1val, col2val = unpack(" addins %r" % (refx, info) + assert ref_first_sheetx == 0xFFFE == ref_last_sheetx + return (-5, -5) + if ref_recordx != bk._supbook_locals_inx: + if blah: + print "/// get_externsheet_local_range(refx=%d) -> external %r" % (refx, info) + return (-4, -4) # external reference + if ref_first_sheetx == 0xFFFE == ref_last_sheetx: + if blah: + print "/// get_externsheet_local_range(refx=%d) -> unspecified sheet %r" % (refx, info) + return (-1, -1) # internal reference, any sheet + if ref_first_sheetx == 0xFFFF == ref_last_sheetx: + if blah: + print "/// get_externsheet_local_range(refx=%d) -> deleted sheet(s)" % (refx, ) + return (-2, -2) # internal reference, deleted sheet(s) + nsheets = len(bk._all_sheets_map) + if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets): + if blah: + print "/// get_externsheet_local_range(refx=%d) -> %r" % (refx, info) + print "--- first/last sheet not in range(%d)" % nsheets + return (-102, -102) # stuffed up somewhere :-( + xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx] + xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx] + if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2): + return (-3, -3) # internal reference, but to a macro sheet + return xlrd_sheetx1, xlrd_sheetx2 + +def get_externsheet_local_range_b57( + bk, raw_extshtx, ref_first_sheetx, ref_last_sheetx, blah=0): + if raw_extshtx > 0: + if blah: + print "/// get_externsheet_local_range_b57(raw_extshtx=%d) -> external" % raw_extshtx + return (-4, -4) # external reference + if ref_first_sheetx == -1 and ref_last_sheetx == -1: + return (-2, -2) # internal reference, deleted sheet(s) + nsheets = len(bk._all_sheets_map) + if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets): + if blah: + print "/// get_externsheet_local_range_b57(%d, %d, %d) -> ???" \ + % (raw_extshtx, ref_first_sheetx, ref_last_sheetx) + print "--- first/last sheet not in range(%d)" % nsheets + return (-103, -103) # stuffed up somewhere :-( + xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx] + xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx] + if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2): + return (-3, -3) # internal reference, but to a macro sheet + return xlrd_sheetx1, xlrd_sheetx2 + +class FormulaError(Exception): + pass + +oBOOL = 3 +oERR = 4 +oMSNG = 5 # tMissArg +oNUM = 2 +oREF = -1 +oREL = -2 +oSTRG = 1 +oUNK = 0 + +okind_dict = { + -2: "oREL", + -1: "oREF", + 0 : "oUNK", + 1 : "oSTRG", + 2 : "oNUM", + 3 : "oBOOL", + 4 : "oERR", + 5 : "oMSNG", + } + +listsep = ',' #### probably should depend on locale + +## +# Used in evaluating formulas. +# The following table describes the kinds and how their values +# are represented.

+# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +#
Kind symbolKind numberValue representation
oBOOL3integer: 0 => False; 1 => True
oERR4None, or an int error code (same as XL_CELL_ERROR in the Cell class). +#
oMSNG5Used by Excel as a placeholder for a missing (not supplied) function +# argument. Should *not* appear as a final formula result. Value is None.
oNUM2A float. Note that there is no way of distinguishing dates.
oREF-1The value is either None or a non-empty list of +# absolute Ref3D instances.
+#
oREL-2The value is None or a non-empty list of +# fully or partially relative Ref3D instances. +#
oSTRG1A Unicode string.
oUNK0The kind is unknown or ambiguous. The value is None
+#

+ +class Operand(object): + + ## + # None means that the actual value of the operand is a variable + # (depends on cell data), not a constant. + value = None + ## + # oUNK means that the kind of operand is not known unambiguously. + kind = oUNK + ## + # The reconstituted text of the original formula. Function names will be + # in English irrespective of the original language, which doesn't seem + # to be recorded anywhere. The separator is ",", not ";" or whatever else + # might be more appropriate for the end-user's locale; patches welcome. + text = '?' + + def __init__(self, akind=None, avalue=None, arank=0, atext='?'): + if akind is not None: + self.kind = akind + if avalue is not None: + self.value = avalue + self.rank = arank + # rank is an internal gizmo (operator precedence); + # it's used in reconstructing formula text. + self.text = atext + + def __repr__(self): + kind_text = okind_dict.get(self.kind, "?Unknown kind?") + return "Operand(kind=%s, value=%r, text=%r)" \ + % (kind_text, self.value, self.text) + +if CAN_SUBCLASS_BUILTIN: + _ref3d_base = tuple +else: + _ref3d_base = object + +## +#

Represents an absolute or relative 3-dimensional reference to a box +# of one or more cells.
+# -- New in version 0.6.0 +#

+# +#

The coords attribute is a tuple of the form:
+# (shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
+# where 0 <= thingxlo <= thingx < thingxhi.
+# Note that it is quite possible to have thingx > nthings; for example +# Print_Titles could have colxhi == 256 and/or rowxhi == 65536 +# irrespective of how many columns/rows are actually used in the worksheet. +# The caller will need to decide how to handle this situation. +# Keyword: IndexError :-) +#

+# +#

The components of the coords attribute are also available as individual +# attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.

+# +#

The relflags attribute is a 6-tuple of flags which indicate whether +# the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).
+# Note that there is necessarily no information available as to what cell(s) +# the reference could possibly be relative to. The caller must decide what if +# any use to make of oREL operands. Note also that a partially relative +# reference may well be a typo. +# For example, define name A1Z10 as $a$1:$z10 (missing $ after z) +# while the cursor is on cell Sheet3!A27.
+# The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26) +# and relflags = (0, 0, 0, 1, 0, 0).
+# So far, only one possibility of a sheet-relative component in +# a reference has been noticed: a 2D reference located in the "current sheet". +#
This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...). + +class Ref3D(_ref3d_base): + + def __init__(self, atuple): + self.coords = atuple[0:6] + self.relflags = atuple[6:12] + if not self.relflags: + self.relflags = (0, 0, 0, 0, 0, 0) + (self.shtxlo, self.shtxhi, + self.rowxlo, self.rowxhi, + self.colxlo, self.colxhi) = self.coords + + def __repr__(self): + if not self.relflags or self.relflags == (0, 0, 0, 0, 0, 0): + return "Ref3D(coords=%r)" % (self.coords, ) + else: + return "Ref3D(coords=%r, relflags=%r)" \ + % (self.coords, self.relflags) + +tAdd = 0x03 +tSub = 0x04 +tMul = 0x05 +tDiv = 0x06 +tPower = 0x07 +tConcat = 0x08 +tLT, tLE, tEQ, tGE, tGT, tNE = range(0x09, 0x0F) + +import operator as opr + +def nop(x): + return x + +def _opr_pow(x, y): return x ** y + +def _opr_lt(x, y): return x < y +def _opr_le(x, y): return x <= y +def _opr_eq(x, y): return x == y +def _opr_ge(x, y): return x >= y +def _opr_gt(x, y): return x > y +def _opr_ne(x, y): return x != y + +def num2strg(num): + """Attempt to emulate Excel's default conversion + from number to string. + """ + s = str(num) + if s.endswith(".0"): + s = s[:-2] + return s + +_arith_argdict = {oNUM: nop, oSTRG: float} +_cmp_argdict = {oNUM: nop, oSTRG: nop} +# Seems no conversions done on relops; in Excel, "1" > 9 produces TRUE. +_strg_argdict = {oNUM:num2strg, oSTRG:nop} +binop_rules = { + tAdd: (_arith_argdict, oNUM, opr.add, 30, '+'), + tSub: (_arith_argdict, oNUM, opr.sub, 30, '-'), + tMul: (_arith_argdict, oNUM, opr.mul, 40, '*'), + tDiv: (_arith_argdict, oNUM, opr.div, 40, '/'), + tPower: (_arith_argdict, oNUM, _opr_pow, 50, '^',), + tConcat:(_strg_argdict, oSTRG, opr.add, 20, '&'), + tLT: (_cmp_argdict, oBOOL, _opr_lt, 10, '<'), + tLE: (_cmp_argdict, oBOOL, _opr_le, 10, '<='), + tEQ: (_cmp_argdict, oBOOL, _opr_eq, 10, '='), + tGE: (_cmp_argdict, oBOOL, _opr_ge, 10, '>='), + tGT: (_cmp_argdict, oBOOL, _opr_gt, 10, '>'), + tNE: (_cmp_argdict, oBOOL, _opr_ne, 10, '<>'), + } + +unop_rules = { + 0x13: (lambda x: -x, 70, '-', ''), # unary minus + 0x12: (lambda x: x, 70, '+', ''), # unary plus + 0x14: (lambda x: x / 100.0, 60, '', '%'),# percent + } + +LEAF_RANK = 90 +FUNC_RANK = 90 + +STACK_ALARM_LEVEL = 5 +STACK_PANIC_LEVEL = 10 + +def evaluate_name_formula(bk, nobj, namex, blah=0, level=0): + if level > STACK_ALARM_LEVEL: + blah = 1 + data = nobj.raw_formula + fmlalen = nobj.basic_formula_len + bv = bk.biff_version + reldelta = 1 # All defined name formulas use "Method B" [OOo docs] + if blah: + print "::: evaluate_name_formula %r %r %d %d %r level=%d" \ + % (namex, nobj.name, fmlalen, bv, data, level) + hex_char_dump(data, 0, fmlalen) + if level > STACK_PANIC_LEVEL: + raise XLRDError("Excessive indirect references in NAME formula") + sztab = szdict[bv] + pos = 0 + stack = [] + any_rel = 0 + any_err = 0 + any_external = 0 + unk_opnd = Operand(oUNK, None) + error_opnd = Operand(oERR, None) + spush = stack.append + + def do_binop(opcd, stk): + assert len(stk) >= 2 + bop = stk.pop() + aop = stk.pop() + argdict, result_kind, func, rank, sym = binop_rules[opcd] + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + resop = Operand(result_kind, None, rank, otext) + try: + bconv = argdict[bop.kind] + aconv = argdict[aop.kind] + except KeyError: + stk.append(resop) + return + if bop.value is None or aop.value is None: + stk.append(resop) + return + bval = bconv(bop.value) + aval = aconv(aop.value) + result = func(aval, bval) + if result_kind == oBOOL: + result = intbool(result) # -> 1 or 0 + resop.value = result + stk.append(resop) + + def do_unaryop(opcode, arglist, result_kind, stk): + assert len(stk) >= 1 + aop = stk.pop() + assert aop.kind in arglist + val = aop.value + func, rank, sym1, sym2 = unop_rules[opcode] + otext = ''.join([ + sym1, + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym2, + ]) + if val is not None: + val = func(val) + stk.append(Operand(result_kind, val, rank, otext)) + + def not_in_name_formula(op_arg, oname_arg): + msg = "ERROR *** Token 0x%02x (%s) found in NAME formula" \ + % (op_arg, oname_arg) + raise FormulaError(msg) + + if fmlalen == 0: + stack = [unk_opnd] + + while 0 <= pos < fmlalen: + op = ord(data[pos]) + opcode = op & 0x1f + optype = (op & 0x60) >> 5 + if optype: + opx = opcode + 32 + else: + opx = opcode + oname = onames[opx] # + [" RVA"][optype] + sz = sztab[opx] + if blah: + print "Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \ + % (pos, op, oname, sz, opcode, optype) + print "Stack =", stack + if sz == -2: + msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \ + % (op, oname, bv) + raise FormulaError(msg) + if not optype: + if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl + not_in_name_formula(op, oname) + elif 0x03 <= opcode <= 0x0E: + # Add, Sub, Mul, Div, Power + # tConcat + # tLT, ..., tNE + do_binop(opcode, stack) + elif opcode == 0x0F: # tIsect + if blah: print >> bk.logfile, "tIsect pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ' ' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF) + res.text = otext + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind == oUNK or aop.kind == oUNK: + # This can happen with undefined + # (go search in the current sheet) labels. + # For example =Bob Sales + # Each label gets a NAME record with an empty formula (!) + # Evaluation of the tName token classifies it as oUNK + # res.kind = oREF + pass + elif bop.kind == oREF == aop.kind: + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tIsectFuncs, aop.value[0], bop.value[0]) + res.value = [Ref3D(coords)] + elif bop.kind == oREL == aop.kind: + res.kind = oREL + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tIsectFuncs, aop.value[0], bop.value[0]) + relfa = aop.value[0].relflags + relfb = bop.value[0].relflags + if relfa == relfb: + res.value = [Ref3D(coords + relfa)] + else: + pass + spush(res) + if blah: print >> bk.logfile, "tIsect post", stack + elif opcode == 0x10: # tList + if blah: print >> bk.logfile, "tList pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ',' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL): + res.kind = oREF + if aop.kind == oREL or bop.kind == oREL: + res.kind = oREL + if aop.value is not None and bop.value is not None: + assert len(aop.value) >= 1 + assert len(bop.value) == 1 + res.value = aop.value + bop.value + else: + pass + spush(res) + if blah: print >> bk.logfile, "tList post", stack + elif opcode == 0x11: # tRange + if blah: print >> bk.logfile, "tRange pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ':' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res = oERR + elif bop.kind == oREF == aop.kind: + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tRangeFuncs, aop.value[0], bop.value[0]) + res.value = [Ref3D(coords)] + elif bop.kind == oREL == aop.kind: + res.kind = oREL + if aop.value is not None and bop.value is not None: + assert len(aop.value) == 1 + assert len(bop.value) == 1 + coords = do_box_funcs( + tRangeFuncs, aop.value[0], bop.value[0]) + relfa = aop.value[0].relflags + relfb = bop.value[0].relflags + if relfa == relfb: + res.value = [Ref3D(coords + relfa)] + else: + pass + spush(res) + if blah: print >> bk.logfile, "tRange post", stack + elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent + do_unaryop(opcode, (oUNK, oNUM,), oNUM, stack) + elif opcode == 0x15: # tParen + # source cosmetics + pass + elif opcode == 0x16: # tMissArg + spush(Operand(oMSNG, None, LEAF_RANK, '')) + elif opcode == 0x17: # tStr + if bv <= 70: + strg, newpos = unpack_string_update_pos( + data, pos+1, bk.encoding, lenlen=1) + else: + strg, newpos = unpack_unicode_update_pos( + data, pos+1, lenlen=1) + sz = newpos - pos + if blah: print >> bk.logfile, " sz=%d strg=%r" % (sz, strg) + text = '"' + strg.replace('"', '""') + '"' + spush(Operand(oSTRG, strg, LEAF_RANK, text)) + elif opcode == 0x18: # tExtended + # new with BIFF 8 + assert bv >= 80 + # not in OOo docs + raise FormulaError("tExtended token not implemented") + elif opcode == 0x19: # tAttr + subop, nc = unpack("> bk.logfile, "tAttrSum", stack + assert len(stack) >= 1 + aop = stack[-1] + otext = 'SUM(%s)' % aop.text + stack[-1] = Operand(oNUM, None, FUNC_RANK, otext) + else: + sz = 4 + if blah: + print " subop=%02xh subname=t%s sz=%d nc=%02xh" \ + % (subop, subname, sz, nc) + elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet + assert bv < 50 + raise FormulaError("tSheet & tEndsheet tokens not implemented") + elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum + inx = opcode - 0x1C + nb = [1, 1, 2, 8][inx] + kind = [oERR, oBOOL, oNUM, oNUM][inx] + value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb]) + if inx == 2: # tInt + value = float(value) + text = str(value) + elif inx == 3: # tNum + text = str(value) + elif inx == 1: # tBool + text = ('FALSE', 'TRUE')[value] + else: + text = '"' +error_text_from_code[value] + '"' + spush(Operand(kind, value, LEAF_RANK, text)) + else: + raise FormulaError("Unhandled opcode: 0x%02x" % opcode) + if sz <= 0: + raise FormulaError("Size not set for opcode 0x%02x" % opcode) + pos += sz + continue + if opcode == 0x00: # tArray + spush(unk_opnd) + elif opcode == 0x01: # tFunc + nb = 1 + int(bv >= 40) + funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0] + func_attrs = func_defs.get(funcx, None) + if not func_attrs: + print >> bk.logfile, "*** formula/tFunc unknown FuncID:%d" \ + % funcx + spush(unk_opnd) + else: + func_name, nargs = func_attrs[:2] + if blah: + print " FuncID=%d name=%s nargs=%d" \ + % (funcx, func_name, nargs) + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + del stack[-nargs:] + res = Operand(oUNK, None, FUNC_RANK, otext) + spush(res) + elif opcode == 0x02: #tFuncVar + nb = 1 + int(bv >= 40) + nargs, funcx = unpack("> bk.logfile, "*** formula/tFuncVar unknown FuncID:%d" \ + % funcx + spush(unk_opnd) + else: + func_name, minargs, maxargs = func_attrs[:3] + if blah: + print " name: %r, min~max args: %d~%d" \ + % (func_name, minargs, maxargs) + assert minargs <= nargs <= maxargs + assert len(stack) >= nargs + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + res = Operand(oUNK, None, FUNC_RANK, otext) + if funcx == 1: # IF + testarg = stack[-nargs] + if testarg.kind not in (oNUM, oBOOL): + if blah and testarg.kind != oUNK: + print "IF testarg kind?" + elif testarg.value not in (0, 1): + if blah and testarg.value is not None: + print "IF testarg value?" + else: + if nargs == 2 and not testarg.value: + # IF(FALSE, tv) => FALSE + res.kind, res.value = oBOOL, 0 + else: + respos = -nargs + 2 - int(testarg.value) + chosen = stack[respos] + if chosen.kind == oMSNG: + res.kind, res.value = oNUM, 0 + else: + res.kind, res.value = chosen.kind, chosen.value + if blah: + print "$$$$$$ IF => constant" + elif funcx == 100: # CHOOSE + testarg = stack[-nargs] + if testarg.kind == oNUM: + if 1 <= testarg.value < nargs: + chosen = stack[-nargs + int(testarg.value)] + if chosen.kind == oMSNG: + res.kind, res.value = oNUM, 0 + else: + res.kind, res.value = chosen.kind, chosen.value + del stack[-nargs:] + spush(res) + elif opcode == 0x03: #tName + tgtnamex = unpack("> bk.logfile, " tgtnamex=%d" % tgtnamex + tgtobj = bk.name_obj_list[tgtnamex] + if not tgtobj.evaluated: + ### recursive ### + evaluate_name_formula(bk, tgtobj, tgtnamex, blah, level+1) + if tgtobj.macro or tgtobj.binary \ + or tgtobj.any_err: + if blah: + tgtobj.dump( + bk.logfile, + header="!!! tgtobj has problems!!!", + footer="----------- --------", + ) + res = Operand(oUNK, None) + any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err + any_rel = any_rel or tgtobj.any_rel + else: + assert len(tgtobj.stack) == 1 + res = copy.deepcopy(tgtobj.stack[0]) + res.rank = LEAF_RANK + if tgtobj.scope == -1: + res.text = tgtobj.name + else: + res.text = "%s!%s" \ + % (bk._sheet_names[tgtobj.scope], tgtobj.name) + if blah: + print >> bk.logfile, " tName: setting text to", repr(res.text) + spush(res) + elif opcode == 0x04: # tRef + # not_in_name_formula(op, oname) + res = get_cell_addr(data, pos+1, bv, reldelta) + if blah: print >> bk.logfile, " ", res + rowx, colx, row_rel, col_rel = res + shx1 = shx2 = 0 ####### N.B. relative to the CURRENT SHEET + any_rel = 1 + coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1) + if blah: print >> bk.logfile, " ", coords + res = Operand(oUNK, None) + if optype == 1: + relflags = (1, 1, row_rel, row_rel, col_rel, col_rel) + res = Operand(oREL, [Ref3D(coords + relflags)]) + spush(res) + elif opcode == 0x05: # tArea + # not_in_name_formula(op, oname) + res1, res2 = get_cell_range_addr(data, pos+1, bv, reldelta) + if blah: print >> bk.logfile, " ", res1, res2 + rowx1, colx1, row_rel1, col_rel1 = res1 + rowx2, colx2, row_rel2, col_rel2 = res2 + shx1 = shx2 = 0 ####### N.B. relative to the CURRENT SHEET + any_rel = 1 + coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1) + if blah: print >> bk.logfile, " ", coords + res = Operand(oUNK, None) + if optype == 1: + relflags = (1, 1, row_rel1, row_rel2, col_rel1, col_rel2) + res = Operand(oREL, [Ref3D(coords + relflags)]) + spush(res) + elif opcode == 0x06: # tMemArea + not_in_name_formula(op, oname) + elif opcode == 0x09: # tMemFunc + nb = unpack("> bk.logfile, " %d bytes of cell ref formula" % nb + # no effect on stack + elif opcode == 0x0C: #tRefN + not_in_name_formula(op, oname) + # res = get_cell_addr(data, pos+1, bv, reldelta=1) + # # note *ALL* tRefN usage has signed offset for relative addresses + # any_rel = 1 + # if blah: print >> bk.logfile, " ", res + # spush(res) + elif opcode == 0x0D: #tAreaN + not_in_name_formula(op, oname) + # res = get_cell_range_addr(data, pos+1, bv, reldelta=1) + # # note *ALL* tAreaN usage has signed offset for relative addresses + # any_rel = 1 + # if blah: print >> bk.logfile, " ", res + elif opcode == 0x1A: # tRef3d + if bv >= 80: + res = get_cell_addr(data, pos+3, bv, reldelta) + refx = unpack("> bk.logfile, "tRef3d", raw_extshtx, raw_shx1, raw_shx2 + shx1, shx2 = get_externsheet_local_range_b57( + bk, raw_extshtx, raw_shx1, raw_shx2, blah) + rowx, colx, row_rel, col_rel = res + is_rel = row_rel or col_rel + any_rel = any_rel or is_rel + coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1) + any_err |= shx1 < -1 + if blah: print >> bk.logfile, " ", coords + res = Operand(oUNK, None) + if is_rel: + relflags = (0, 0, row_rel, row_rel, col_rel, col_rel) + ref3d = Ref3D(coords + relflags) + res.kind = oREL + res.text = rangename3drel(bk, ref3d) + else: + ref3d = Ref3D(coords) + res.kind = oREF + res.text = rangename3d(bk, ref3d) + res.rank = LEAF_RANK + if optype == 1: + res.value = [ref3d] + spush(res) + elif opcode == 0x1B: # tArea3d + if bv >= 80: + res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta) + refx = unpack("> bk.logfile, "tArea3d", raw_extshtx, raw_shx1, raw_shx2 + shx1, shx2 = get_externsheet_local_range_b57( + bk, raw_extshtx, raw_shx1, raw_shx2, blah) + any_err |= shx1 < -1 + rowx1, colx1, row_rel1, col_rel1 = res1 + rowx2, colx2, row_rel2, col_rel2 = res2 + is_rel = row_rel1 or col_rel1 or row_rel2 or col_rel2 + any_rel = any_rel or is_rel + coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1) + if blah: print >> bk.logfile, " ", coords + res = Operand(oUNK, None) + if is_rel: + relflags = (0, 0, row_rel1, row_rel2, col_rel1, col_rel2) + ref3d = Ref3D(coords + relflags) + res.kind = oREL + res.text = rangename3drel(bk, ref3d) + else: + ref3d = Ref3D(coords) + res.kind = oREF + res.text = rangename3d(bk, ref3d) + res.rank = LEAF_RANK + if optype == 1: + res.value = [ref3d] + + spush(res) + elif opcode == 0x19: # tNameX + dodgy = 0 + res = Operand(oUNK, None) + if bv >= 80: + refx, tgtnamex = unpack(" 0: + refx -= 1 + elif refx < 0: + refx = -refx - 1 + else: + dodgy = 1 + if blah: + print >> bk.logfile, \ + " origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \ + % (origrefx, refx, tgtnamex, dodgy) + if tgtnamex == namex: + if blah: print >> bk.logfile, "!!!! Self-referential !!!!" + dodgy = any_err = 1 + if not dodgy: + if bv >= 80: + shx1, shx2 = get_externsheet_local_range(bk, refx, blah) + elif origrefx > 0: + shx1, shx2 = (-4, -4) # external ref + else: + exty = bk._externsheet_type_b57[refx] + if exty == 4: # non-specific sheet in own doc't + shx1, shx2 = (-1, -1) # internal, any sheet + else: + shx1, shx2 = (-666, -666) + if dodgy or shx1 < -1: + otext = "<>" \ + % (tgtnamex, origrefx) + res = Operand(oUNK, None, LEAF_RANK, otext) + else: + tgtobj = bk.name_obj_list[tgtnamex] + if not tgtobj.evaluated: + ### recursive ### + evaluate_name_formula(bk, tgtobj, tgtnamex, blah, level+1) + if tgtobj.macro or tgtobj.binary \ + or tgtobj.any_err: + if blah: + tgtobj.dump( + bk.logfile, + header="!!! bad tgtobj !!!", + footer="------------------", + ) + res = Operand(oUNK, None) + any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err + any_rel = any_rel or tgtobj.any_rel + else: + assert len(tgtobj.stack) == 1 + res = copy.deepcopy(tgtobj.stack[0]) + res.rank = LEAF_RANK + if tgtobj.scope == -1: + res.text = tgtobj.name + else: + res.text = "%s!%s" \ + % (bk._sheet_names[tgtobj.scope], tgtobj.name) + if blah: + print >> bk.logfile, " tNameX: setting text to", repr(res.text) + spush(res) + elif is_error_opcode(opcode): + any_err = 1 + spush(error_opnd) + else: + if blah: + print >> bk.logfile, "FORMULA: /// Not handled yet: t" + oname + any_err = 1 + if sz <= 0: + raise FormulaError("Fatal: token size is not positive") + pos += sz + any_rel = not not any_rel + if blah: + print "End of formula. level=%d any_rel=%d any_err=%d stack=%r" % \ + (level, not not any_rel, any_err, stack) + if len(stack) >= 2: + print "*** Stack has unprocessed args" + print + nobj.stack = stack + if len(stack) != 1: + nobj.result = None + else: + nobj.result = stack[0] + nobj.any_rel = any_rel + nobj.any_err = any_err + nobj.any_external = any_external + nobj.evaluated = 1 + +#### under construction #### +def decompile_formula(bk, fmla, fmlalen, + reldelta, browx=None, bcolx=None, + # browx & bcolx are required when reldelta == 0 + blah=0, level=0): + if level > STACK_ALARM_LEVEL: + blah = 1 + data = fmla + bv = bk.biff_version + if blah: + print "::: decompile_formula len=%d reldelta=%d %r level=%d" \ + % (fmlalen, reldelta, data, level) + hex_char_dump(data, 0, fmlalen) + if level > STACK_PANIC_LEVEL: + raise XLRDError("Excessive indirect references in formula") + sztab = szdict[bv] + pos = 0 + stack = [] + any_rel = 0 + any_err = 0 + any_external = 0 + unk_opnd = Operand(oUNK, None) + error_opnd = Operand(oERR, None) + spush = stack.append + + def do_binop(opcd, stk): + assert len(stk) >= 2 + bop = stk.pop() + aop = stk.pop() + argdict, result_kind, func, rank, sym = binop_rules[opcd] + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + resop = Operand(result_kind, None, rank, otext) + stk.append(resop) + + def do_unaryop(opcode, arglist, result_kind, stk): + assert len(stk) >= 1 + aop = stk.pop() + assert aop.kind in arglist + func, rank, sym1, sym2 = unop_rules[opcode] + otext = ''.join([ + sym1, + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym2, + ]) + stk.append(Operand(result_kind, None, rank, otext)) + + def not_in_name_formula(op_arg, oname_arg): + msg = "ERROR *** Unexpected token 0x%02x (%s) found in formula" \ + % (op_arg, oname_arg) + # print msg + raise FormulaError(msg) + + if fmlalen == 0: + stack = [unk_opnd] + + while 0 <= pos < fmlalen: + op = ord(data[pos]) + opcode = op & 0x1f + optype = (op & 0x60) >> 5 + if optype: + opx = opcode + 32 + else: + opx = opcode + oname = onames[opx] # + [" RVA"][optype] + sz = sztab[opx] + if blah: + print "Pos:%d Op:0x%02x opname:t%s Sz:%d opcode:%02xh optype:%02xh" \ + % (pos, op, oname, sz, opcode, optype) + print "Stack =", stack + if sz == -2: + msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \ + % (op, oname, bv) + raise FormulaError(msg) + if not optype: + if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl + not_in_name_formula(op, oname) + elif 0x03 <= opcode <= 0x0E: + # Add, Sub, Mul, Div, Power + # tConcat + # tLT, ..., tNE + do_binop(opcode, stack) + elif opcode == 0x0F: # tIsect + if blah: print >> bk.logfile, "tIsect pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ' ' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF) + res.text = otext + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind == oUNK or aop.kind == oUNK: + # This can happen with undefined + # (go search in the current sheet) labels. + # For example =Bob Sales + # Each label gets a NAME record with an empty formula (!) + # Evaluation of the tName token classifies it as oUNK + # res.kind = oREF + pass + elif bop.kind == oREF == aop.kind: + pass + elif bop.kind == oREL == aop.kind: + res.kind = oREL + else: + pass + spush(res) + if blah: print >> bk.logfile, "tIsect post", stack + elif opcode == 0x10: # tList + if blah: print >> bk.logfile, "tList pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ',' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res.kind = oERR + elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL): + res.kind = oREF + if aop.kind == oREL or bop.kind == oREL: + res.kind = oREL + else: + pass + spush(res) + if blah: print >> bk.logfile, "tList post", stack + elif opcode == 0x11: # tRange + if blah: print >> bk.logfile, "tRange pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + sym = ':' + rank = 80 ########## check ####### + otext = ''.join([ + '('[:aop.rank < rank], + aop.text, + ')'[:aop.rank < rank], + sym, + '('[:bop.rank < rank], + bop.text, + ')'[:bop.rank < rank], + ]) + res = Operand(oREF, None, rank, otext) + if bop.kind == oERR or aop.kind == oERR: + res = oERR + elif bop.kind == oREF == aop.kind: + pass + else: + pass + spush(res) + if blah: print >> bk.logfile, "tRange post", stack + elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent + do_unaryop(opcode, (oUNK, oNUM,), oNUM, stack) + elif opcode == 0x15: # tParen + # source cosmetics + pass + elif opcode == 0x16: # tMissArg + spush(Operand(oMSNG, None, LEAF_RANK, '')) + elif opcode == 0x17: # tStr + if bv <= 70: + strg, newpos = unpack_string_update_pos( + data, pos+1, bk.encoding, lenlen=1) + else: + strg, newpos = unpack_unicode_update_pos( + data, pos+1, lenlen=1) + sz = newpos - pos + if blah: print >> bk.logfile, " sz=%d strg=%r" % (sz, strg) + text = '"' + strg.replace('"', '""') + '"' + spush(Operand(oSTRG, None, LEAF_RANK, text)) + elif opcode == 0x18: # tExtended + # new with BIFF 8 + assert bv >= 80 + # not in OOo docs + raise FormulaError("tExtended token not implemented") + elif opcode == 0x19: # tAttr + subop, nc = unpack("> bk.logfile, "tAttrSum", stack + assert len(stack) >= 1 + aop = stack[-1] + otext = 'SUM(%s)' % aop.text + stack[-1] = Operand(oNUM, None, FUNC_RANK, otext) + else: + sz = 4 + if blah: + print " subop=%02xh subname=t%s sz=%d nc=%02xh" \ + % (subop, subname, sz, nc) + elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet + assert bv < 50 + raise FormulaError("tSheet & tEndsheet tokens not implemented") + elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum + inx = opcode - 0x1C + nb = [1, 1, 2, 8][inx] + kind = [oERR, oBOOL, oNUM, oNUM][inx] + value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb]) + if inx == 2: # tInt + value = float(value) + text = str(value) + elif inx == 3: # tNum + text = str(value) + elif inx == 1: # tBool + text = ('FALSE', 'TRUE')[value] + else: + text = '"' +error_text_from_code[value] + '"' + spush(Operand(kind, None, LEAF_RANK, text)) + else: + raise FormulaError("Unhandled opcode: 0x%02x" % opcode) + if sz <= 0: + raise FormulaError("Size not set for opcode 0x%02x" % opcode) + pos += sz + continue + if opcode == 0x00: # tArray + spush(unk_opnd) + elif opcode == 0x01: # tFunc + nb = 1 + int(bv >= 40) + funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0] + func_attrs = func_defs.get(funcx, None) + if not func_attrs: + print >> bk.logfile, "*** formula/tFunc unknown FuncID:%d" % funcx + spush(unk_opnd) + else: + func_name, nargs = func_attrs[:2] + if blah: + print " FuncID=%d name=%s nargs=%d" \ + % (funcx, func_name, nargs) + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + del stack[-nargs:] + res = Operand(oUNK, None, FUNC_RANK, otext) + spush(res) + elif opcode == 0x02: #tFuncVar + nb = 1 + int(bv >= 40) + nargs, funcx = unpack("> bk.logfile, "*** formula/tFuncVar unknown FuncID:%d" \ + % funcx + spush(unk_opnd) + else: + func_name, minargs, maxargs = func_attrs[:3] + if blah: + print " name: %r, min~max args: %d~%d" \ + % (func_name, minargs, maxargs) + assert minargs <= nargs <= maxargs + assert len(stack) >= nargs + assert len(stack) >= nargs + argtext = listsep.join([arg.text for arg in stack[-nargs:]]) + otext = "%s(%s)" % (func_name, argtext) + res = Operand(oUNK, None, FUNC_RANK, otext) + del stack[-nargs:] + spush(res) + elif opcode == 0x03: #tName + tgtnamex = unpack("> bk.logfile, " tgtnamex=%d" % tgtnamex + tgtobj = bk.name_obj_list[tgtnamex] + if tgtobj.scope == -1: + otext = tgtobj.name + else: + otext = "%s!%s" % (bk._sheet_names[tgtobj.scope], tgtobj.name) + if blah: + print >> bk.logfile, " tName: setting text to", repr(otext) + res = Operand(oUNK, None, LEAF_RANK, otext) + spush(res) + elif opcode == 0x04: # tRef + res = get_cell_addr(data, pos+1, bv, reldelta, browx, bcolx) + if blah: print >> bk.logfile, " ", res + rowx, colx, row_rel, col_rel = res + is_rel = row_rel or col_rel + if is_rel: + okind = oREL + else: + okind = oREF + otext = cellnamerel(rowx, colx, row_rel, col_rel) + res = Operand(okind, None, LEAF_RANK, otext) + spush(res) + elif opcode == 0x05: # tArea + res1, res2 = get_cell_range_addr( + data, pos+1, bv, reldelta, browx, bcolx) + if blah: print >> bk.logfile, " ", res1, res2 + rowx1, colx1, row_rel1, col_rel1 = res1 + rowx2, colx2, row_rel2, col_rel2 = res2 + coords = (rowx1, rowx2+1, colx1, colx2+1) + relflags = (row_rel1, row_rel2, col_rel1, col_rel2) + is_rel = intbool(sum(relflags)) + if is_rel: + okind = oREL + else: + okind = oREF + if blah: print >> bk.logfile, " ", coords, relflags + otext = rangename2drel(coords, relflags) + res = Operand(okind, None, LEAF_RANK, otext) + spush(res) + elif opcode == 0x06: # tMemArea + not_in_name_formula(op, oname) + elif opcode == 0x09: # tMemFunc + nb = unpack("> bk.logfile, " %d bytes of cell ref formula" % nb + # no effect on stack + elif opcode == 0x0C: #tRefN + not_in_name_formula(op, oname) + # res = get_cell_addr(data, pos+1, bv, reldelta=1) + # # note *ALL* tRefN usage has signed offset for relative addresses + # any_rel = 1 + # if blah: print >> bk.logfile, " ", res + # spush(res) + elif opcode == 0x0D: #tAreaN + not_in_name_formula(op, oname) + # res = get_cell_range_addr(data, pos+1, bv, reldelta=1) + # # note *ALL* tAreaN usage has signed offset for relative addresses + # any_rel = 1 + # if blah: print >> bk.logfile, " ", res + elif opcode == 0x1A: # tRef3d + if bv >= 80: + res = get_cell_addr(data, pos+3, bv, reldelta, browx, bcolx) + refx = unpack("> bk.logfile, "tRef3d", raw_extshtx, raw_shx1, raw_shx2 + shx1, shx2 = get_externsheet_local_range_b57( + bk, raw_extshtx, raw_shx1, raw_shx2, blah) + rowx, colx, row_rel, col_rel = res + is_rel = row_rel or col_rel + any_rel = any_rel or is_rel + coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1) + any_err |= shx1 < -1 + if blah: print >> bk.logfile, " ", coords + res = Operand(oUNK, None) + if is_rel: + relflags = (0, 0, row_rel, row_rel, col_rel, col_rel) + ref3d = Ref3D(coords + relflags) + res.kind = oREL + res.text = rangename3drel(bk, ref3d) + else: + ref3d = Ref3D(coords) + res.kind = oREF + res.text = rangename3d(bk, ref3d) + res.rank = LEAF_RANK + res.value = None + spush(res) + elif opcode == 0x1B: # tArea3d + if bv >= 80: + res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta) + refx = unpack("> bk.logfile, "tArea3d", raw_extshtx, raw_shx1, raw_shx2 + shx1, shx2 = get_externsheet_local_range_b57( + bk, raw_extshtx, raw_shx1, raw_shx2, blah) + any_err |= shx1 < -1 + rowx1, colx1, row_rel1, col_rel1 = res1 + rowx2, colx2, row_rel2, col_rel2 = res2 + is_rel = row_rel1 or col_rel1 or row_rel2 or col_rel2 + any_rel = any_rel or is_rel + coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1) + if blah: print >> bk.logfile, " ", coords + res = Operand(oUNK, None) + if is_rel: + relflags = (0, 0, row_rel1, row_rel2, col_rel1, col_rel2) + ref3d = Ref3D(coords + relflags) + res.kind = oREL + res.text = rangename3drel(bk, ref3d) + else: + ref3d = Ref3D(coords) + res.kind = oREF + res.text = rangename3d(bk, ref3d) + res.rank = LEAF_RANK + spush(res) + elif opcode == 0x19: # tNameX + dodgy = 0 + res = Operand(oUNK, None) + if bv >= 80: + refx, tgtnamex = unpack(" 0: + refx -= 1 + elif refx < 0: + refx = -refx - 1 + else: + dodgy = 1 + if blah: + print >> bk.logfile, \ + " origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \ + % (origrefx, refx, tgtnamex, dodgy) + # if tgtnamex == namex: + # if blah: print >> bk.logfile, "!!!! Self-referential !!!!" + # dodgy = any_err = 1 + if not dodgy: + if bv >= 80: + shx1, shx2 = get_externsheet_local_range(bk, refx, blah) + elif origrefx > 0: + shx1, shx2 = (-4, -4) # external ref + else: + exty = bk._externsheet_type_b57[refx] + if exty == 4: # non-specific sheet in own doc't + shx1, shx2 = (-1, -1) # internal, any sheet + else: + shx1, shx2 = (-666, -666) + okind = oUNK + ovalue = None + if shx1 == -5: # addin func name + okind = oSTRG + ovalue = bk.addin_func_names[tgtnamex] + otext = '"' + ovalue.replace('"', '""') + '"' + elif dodgy or shx1 < -1: + otext = "<>" \ + % (tgtnamex, origrefx) + else: + tgtobj = bk.name_obj_list[tgtnamex] + if tgtobj.scope == -1: + otext = tgtobj.name + else: + otext = "%s!%s" \ + % (bk._sheet_names[tgtobj.scope], tgtobj.name) + if blah: + print >> bk.logfile, " tNameX: setting text to", repr(res.text) + res = Operand(okind, ovalue, LEAF_RANK, otext) + spush(res) + elif is_error_opcode(opcode): + any_err = 1 + spush(error_opnd) + else: + if blah: + print >> bk.logfile, "FORMULA: /// Not handled yet: t" + oname + any_err = 1 + if sz <= 0: + raise FormulaError("Fatal: token size is not positive") + pos += sz + any_rel = not not any_rel + if blah: + print "End of formula. level=%d any_rel=%d any_err=%d stack=%r" % \ + (level, not not any_rel, any_err, stack) + if len(stack) >= 2: + print "*** Stack has unprocessed args" + print + + if len(stack) != 1: + result = None + else: + result = stack[0].text + return result + +#### under deconstruction ### +def dump_formula(bk, data, fmlalen, bv, reldelta, blah=0, isname=0): + if blah: + print "dump_formula", fmlalen, bv, len(data) + hex_char_dump(data, 0, fmlalen) + assert bv >= 80 #### this function needs updating #### + sztab = szdict[bv] + pos = 0 + stack = [] + any_rel = 0 + any_err = 0 + spush = stack.append + while 0 <= pos < fmlalen: + op = ord(data[pos]) + opcode = op & 0x1f + optype = (op & 0x60) >> 5 + if optype: + opx = opcode + 32 + else: + opx = opcode + oname = onames[opx] # + [" RVA"][optype] + + sz = sztab[opx] + if blah: + print "Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \ + % (pos, op, oname, sz, opcode, optype) + if not optype: + if 0x01 <= opcode <= 0x02: # tExp, tTbl + # reference to a shared formula or table record + rowx, colx = unpack("> bk.logfile, " ", (rowx, colx) + elif opcode == 0x10: # tList + if blah: print >> bk.logfile, "tList pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + spush(aop + bop) + if blah: print >> bk.logfile, "tlist post", stack + elif opcode == 0x11: # tRange + if blah: print >> bk.logfile, "tRange pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + assert len(aop) == 1 + assert len(bop) == 1 + result = do_box_funcs(tRangeFuncs, aop[0], bop[0]) + spush(result) + if blah: print >> bk.logfile, "tRange post", stack + elif opcode == 0x0F: # tIsect + if blah: print >> bk.logfile, "tIsect pre", stack + assert len(stack) >= 2 + bop = stack.pop() + aop = stack.pop() + assert len(aop) == 1 + assert len(bop) == 1 + result = do_box_funcs(tIsectFuncs, aop[0], bop[0]) + spush(result) + if blah: print >> bk.logfile, "tIsect post", stack + elif opcode == 0x19: # tAttr + subop, nc = unpack("> bk.logfile, " subop=%02xh subname=t%s sz=%d nc=%02xh" % (subop, subname, sz, nc) + elif opcode == 0x17: # tStr + if bv <= 70: + nc = ord(data[pos+1]) + strg = data[pos+2:pos+2+nc] # left in 8-bit encoding + sz = nc + 2 + else: + strg, newpos = unpack_unicode_update_pos(data, pos+1, lenlen=1) + sz = newpos - pos + if blah: print >> bk.logfile, " sz=%d strg=%r" % (sz, strg) + else: + if sz <= 0: + print "**** Dud size; exiting ****" + return + pos += sz + continue + if opcode == 0x00: # tArray + pass + elif opcode == 0x01: # tFunc + nb = 1 + int(bv >= 40) + funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb]) + if blah: print >> bk.logfile, " FuncID=%d" % funcx + elif opcode == 0x02: #tFuncVar + nb = 1 + int(bv >= 40) + nargs, funcx = unpack("> bk.logfile, " FuncID=%d nargs=%d macro=%d prompt=%d" % (funcx, nargs, macro, prompt) + elif opcode == 0x03: #tName + namex = unpack("> bk.logfile, " namex=%d" % namex + elif opcode == 0x04: # tRef + res = get_cell_addr(data, pos+1, bv, reldelta) + if blah: print >> bk.logfile, " ", res + elif opcode == 0x05: # tArea + res = get_cell_range_addr(data, pos+1, bv, reldelta) + if blah: print >> bk.logfile, " ", res + elif opcode == 0x09: # tMemFunc + nb = unpack("> bk.logfile, " %d bytes of cell ref formula" % nb + elif opcode == 0x0C: #tRefN + res = get_cell_addr(data, pos+1, bv, reldelta=1) + # note *ALL* tRefN usage has signed offset for relative addresses + any_rel = 1 + if blah: print >> bk.logfile, " ", res + elif opcode == 0x0D: #tAreaN + res = get_cell_range_addr(data, pos+1, bv, reldelta=1) + # note *ALL* tAreaN usage has signed offset for relative addresses + any_rel = 1 + if blah: print >> bk.logfile, " ", res + elif opcode == 0x1A: # tRef3d + refx = unpack("> bk.logfile, " ", refx, res + rowx, colx, row_rel, col_rel = res + any_rel = any_rel or row_rel or col_rel + shx1, shx2 = get_externsheet_local_range(bk, refx, blah) + any_err |= shx1 < -1 + coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1) + if blah: print >> bk.logfile, " ", coords + if optype == 1: spush([coords]) + elif opcode == 0x1B: # tArea3d + refx = unpack("> bk.logfile, " ", refx, res1, res2 + rowx1, colx1, row_rel1, col_rel1 = res1 + rowx2, colx2, row_rel2, col_rel2 = res2 + any_rel = any_rel or row_rel1 or col_rel1 or row_rel2 or col_rel2 + shx1, shx2 = get_externsheet_local_range(bk, refx, blah) + any_err |= shx1 < -1 + coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1) + if blah: print >> bk.logfile, " ", coords + if optype == 1: spush([coords]) + elif opcode == 0x19: # tNameX + refx, namex = unpack("> bk.logfile, " refx=%d namex=%d" % (refx, namex) + elif is_error_opcode(opcode): + any_err = 1 + else: + if blah: print >> bk.logfile, "FORMULA: /// Not handled yet: t" + oname + any_err = 1 + if sz <= 0: + print "**** Dud size; exiting ****" + return + pos += sz + if blah: + print >> bk.logfile, "End of formula. any_rel=%d any_err=%d stack=%r" % \ + (not not any_rel, any_err, stack) + if len(stack) >= 2: + print >> bk.logfile, "*** Stack has unprocessed args" + +# === Some helper functions for displaying cell references === + +# Note that a "non-standard" syntax is used in row and column +# components in relative references. +# For example, consider a relative reference: up two rows, right 3 columns. +# On screen, with cursor in cell D10, this would appear as G8. +# On screen, with cursor in cell Z100, this would appear as AC98. +# On screen, with cursor in cell A1, this would appear as D65535. +# These functions will display such a reference as [@+3,#-2]. +# "@" refers to the unknown base column. +# "#" refers to the unknown base row. +# +# I'm aware of only one possibility of a sheet-relative component in +# a reference: a 2D reference located in the "current sheet". +# xlrd stores this internally with bounds of (0, 1, ...) and +# relative flags of (1, 1, ...). These functions display the +# sheet component as empty, just like Excel etc. + +def rownamerel(rowx, rowxrel): + if not rowxrel: + return "$%d" % rowx + if rowx > 0: + return "#+%d" % rowx + if rowx < 0: + return "#-%d" % (-rowx) + return "#" + +def colnamerel(colx, colxrel): + if not colxrel: + return "$" + colname(colx) + if colx > 0: + return "@+%d" % colx + if colx < 0: + return "@-%d" % (-colx) + return "@" +## +# Utility function: (5, 7) => 'H6' +def cellname(rowx, colx): + """ (5, 7) => 'H6' """ + return "%s%d" % (colname(colx), rowx+1) + +## +# Utility function: (5, 7) => '$H$6' +def cellnameabs(rowx, colx): + """ (5, 7) => '$H$6' """ + return "$%s$%d" % (colname(colx), rowx+1) + +def cellnamerel(rowx, colx, rowxrel, colxrel): + if not rowxrel and not colxrel: + return cellnameabs(rowx, colx) + return "[%s,%s]" % ( + colnamerel(colx, colxrel), + rownamerel(rowx, rowxrel)) +## +# Utility function: 7 => 'H', 27 => 'AB' +def colname(colx): + """ 7 => 'H', 27 => 'AB' """ + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + if colx <= 25: + return alphabet[colx] + else: + xdiv26, xmod26 = divmod(colx, 26) + return alphabet[xdiv26 - 1] + alphabet[xmod26] + +def rangename2d(rlo, rhi, clo, chi): + """ (5, 20, 7, 10) => '$H$6:$J$20' """ + if rhi == rlo+1 and chi == clo+1: + return cellnameabs(rlo, clo) + return "%s:%s" % (cellnameabs(rlo, clo), cellnameabs(rhi-1, chi-1)) + +def rangename2drel((rlo, rhi, clo, chi), (rlorel, rhirel, clorel, chirel)): + return "%s:%s" % ( + cellnamerel(rlo, clo, rlorel, clorel), + cellnamerel(rhi-1, chi-1, rhirel, chirel) + ) +## +# Utility function: +#
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20' +def rangename3d(book, ref3d): + """ Ref3D(1, 4, 5, 20, 7, 10) => 'Sheet2:Sheet3!$H$6:$J$20' + (assuming Excel's default sheetnames) """ + coords = ref3d.coords + return "%s!%s" % ( + sheetrange(book, *coords[:2]), + rangename2d(*coords[2:6])) + +## +# Utility function: +#
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1)) +# => 'Sheet1![@-13,#-32]:[@+12,#-23]' +# where '@' refers to the current or base column and '#' +# refers to the current or base row. +def rangename3drel(book, ref3d): + coords = ref3d.coords + relflags = ref3d.relflags + shdesc = sheetrangerel(book, coords[:2], relflags[:2]) + rngdesc = rangename2drel(coords[2:6], relflags[2:6]) + if not shdesc: + return rngdesc + return "%s!%s" % (shdesc, rngdesc) + +def quotedsheetname(shnames, shx): + if shx >= 0: + shname = shnames[shx] + else: + shname = { + -1: "?internal; any sheet?", + -2: "internal; deleted sheet", + -3: "internal; macro sheet", + -4: "<>", + }.get(shx, "?error %d?" % shx) + if "'" in shname: + return "'" + shname.replace("'", "''") + "'" + if " " in shname: + return "'" + shname + "'" + return shname + +def sheetrange(book, slo, shi): + shnames = book.sheet_names() + shdesc = quotedsheetname(shnames, slo) + if slo != shi-1: + shdesc += ":" + quotedsheetname(shnames, shi-1) + return shdesc + +def sheetrangerel(book, (slo, shi), (slorel, shirel)): + if not slorel and not shirel: + return sheetrange(book, slo, shi) + assert (slo == 0 == shi-1) and slorel and shirel + return "" + +# ============================================================== diff --git a/tablib/packages/xlrd/licences.py b/tablib/packages/xlrd/licences.py new file mode 100644 index 0000000..1e262a9 --- /dev/null +++ b/tablib/packages/xlrd/licences.py @@ -0,0 +1,77 @@ +# -*- coding: cp1252 -*- + +""" +Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any +contributors may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. +""" + +""" +/*- + * Copyright (c) 2001 David Giffin. + * All rights reserved. + * + * Based on the the Java version: Andrew Khan Copyright (c) 2000. + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by + * David Giffin ." + * + * 4. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by + * David Giffin ." + * + * THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID GIFFIN OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ +""" diff --git a/tablib/packages/xlrd/sheet.py b/tablib/packages/xlrd/sheet.py new file mode 100644 index 0000000..70f7779 --- /dev/null +++ b/tablib/packages/xlrd/sheet.py @@ -0,0 +1,1768 @@ +# -*- coding: cp1252 -*- + +## +#

Portions copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under a BSD-style licence.

+## + +# 2009-05-31 SJM Fixed problem with no CODEPAGE record on extremely minimal BIFF2.x 3rd-party file +# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo +# 2008-02-09 SJM Excel 2.0: build XFs on the fly from cell attributes +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-10-11 SJM Added missing entry for blank cell type to ctype_text +# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file +# 2007-04-22 SJM Remove experimental "trimming" facility. + +from biffh import * +from timemachine import * +from struct import unpack +from formula import dump_formula, decompile_formula, rangename2d +from formatting import nearest_colour_index, Format +import time + +DEBUG = 0 +OBJ_MSO_DEBUG = 0 + +_WINDOW2_options = ( + # Attribute names and initial values to use in case + # a WINDOW2 record is not written. + ("show_formulas", 0), + ("show_grid_lines", 1), + ("show_sheet_headers", 1), + ("panes_are_frozen", 0), + ("show_zero_values", 1), + ("automatic_grid_line_colour", 1), + ("columns_from_right_to_left", 0), + ("show_outline_symbols", 1), + ("remove_splits_if_pane_freeze_is_removed", 0), + ("sheet_selected", 0), + # "sheet_visible" appears to be merely a clone of "sheet_selected". + # The real thing is the visibility attribute from the BOUNDSHEET record. + ("sheet_visible", 0), + ("show_in_page_break_preview", 0), + ) + +## +#

Contains the data for one worksheet.

+# +#

In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a +# column index, counting from zero. +# Negative values for row/column indexes and slice positions are supported in the expected fashion.

+# +#

For information about cell types and cell values, refer to the documentation of the Cell class.

+# +#

WARNING: You don't call this class yourself. You access Sheet objects via the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls").

+ + +class Sheet(BaseObject): + ## + # Name of sheet. + name = '' + + ## + # Number of rows in sheet. A row index is in range(thesheet.nrows). + nrows = 0 + + ## + # Number of columns in sheet. A column index is in range(thesheet.ncols). + ncols = 0 + + ## + # The map from a column index to a Colinfo object. Often there is an entry + # in COLINFO records for all column indexes in range(257). + # Note that xlrd ignores the entry for the non-existent + # 257th column. On the other hand, there may be no entry for unused columns. + #
-- New in version 0.6.1 + colinfo_map = {} + + ## + # The map from a row index to a Rowinfo object. Note that it is possible + # to have missing entries -- at least one source of XLS files doesn't + # bother writing ROW records. + #
-- New in version 0.6.1 + rowinfo_map = {} + + ## + # List of address ranges of cells containing column labels. + # These are set up in Excel by Insert > Name > Labels > Columns. + #
-- New in version 0.6.0 + #
How to deconstruct the list: + #
+    # for crange in thesheet.col_label_ranges:
+    #     rlo, rhi, clo, chi = crange
+    #     for rx in xrange(rlo, rhi):
+    #         for cx in xrange(clo, chi):
+    #             print "Column label at (rowx=%d, colx=%d) is %r" \
+    #                 (rx, cx, thesheet.cell_value(rx, cx))
+    # 
+ col_label_ranges = [] + + ## + # List of address ranges of cells containing row labels. + # For more details, see col_label_ranges above. + #
-- New in version 0.6.0 + row_label_ranges = [] + + ## + # List of address ranges of cells which have been merged. + # These are set up in Excel by Format > Cells > Alignment, then ticking + # the "Merge cells" box. + #
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True) + #
How to deconstruct the list: + #
+    # for crange in thesheet.merged_cells:
+    #     rlo, rhi, clo, chi = crange
+    #     for rowx in xrange(rlo, rhi):
+    #         for colx in xrange(clo, chi):
+    #             # cell (rlo, clo) (the top left one) will carry the data
+    #             # and formatting info; the remainder will be recorded as
+    #             # blank cells, but a renderer will apply the formatting info
+    #             # for the top left cell (e.g. border, pattern) to all cells in
+    #             # the range.
+    # 
+ merged_cells = [] + + ## + # Default column width from DEFCOLWIDTH record, else None. + # From the OOo docs:
+ # """Column width in characters, using the width of the zero character + # from default font (first FONT record in the file). Excel adds some + # extra space to the default width, depending on the default font and + # default font size. The algorithm how to exactly calculate the resulting + # column width is not known.
+ # Example: The default width of 8 set in this record results in a column + # width of 8.43 using Arial font with a size of 10 points."""
+ # For the default hierarchy, refer to the Colinfo class above. + #
-- New in version 0.6.1 + defcolwidth = None + + ## + # Default column width from STANDARDWIDTH record, else None. + # From the OOo docs:
+ # """Default width of the columns in 1/256 of the width of the zero + # character, using default font (first FONT record in the file)."""
+ # For the default hierarchy, refer to the Colinfo class above. + #
-- New in version 0.6.1 + standardwidth = None + + ## + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_row_height = None + + ## + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_row_height_mismatch = None + + ## + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_row_hidden = None + + ## + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_additional_space_above = None + + ## + # Default value to be used for a row if there is + # no ROW record for that row. + # From the optional DEFAULTROWHEIGHT record. + default_additional_space_below = None + + ## + # Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden + # by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden + # only by VBA macro). + visibility = 0 + + ## + # A 256-element tuple corresponding to the contents of the GCW record for this sheet. + # If no such record, treat as all bits zero. + # Applies to BIFF4-7 only. See docs of Colinfo class for discussion. + gcw = (0, ) * 256 + + def __init__(self, book, position, name, number): + self.book = book + self.biff_version = book.biff_version + self._position = position + self.logfile = book.logfile + self.pickleable = book.pickleable + self.dont_use_array = not(array_array and (CAN_PICKLE_ARRAY or not book.pickleable)) + self.name = name + self.number = number + self.verbosity = book.verbosity + self.formatting_info = book.formatting_info + self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map + self.nrows = 0 # actual, including possibly empty cells + self.ncols = 0 + self._maxdatarowx = -1 # highest rowx containing a non-empty cell + self._maxdatacolx = -1 # highest colx containing a non-empty cell + self._dimnrows = 0 # as per DIMENSIONS record + self._dimncols = 0 + self._cell_values = [] + self._cell_types = [] + self._cell_xf_indexes = [] + self._need_fix_ragged_rows = 0 + self.defcolwidth = None + self.standardwidth = None + self.default_row_height = None + self.default_row_height_mismatch = 0 + self.default_row_hidden = 0 + self.default_additional_space_above = 0 + self.default_additional_space_below = 0 + self.colinfo_map = {} + self.rowinfo_map = {} + self.col_label_ranges = [] + self.row_label_ranges = [] + self.merged_cells = [] + self._xf_index_stats = [0, 0, 0, 0] + self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record + for attr, defval in _WINDOW2_options: + setattr(self, attr, defval) + self.first_visible_rowx = 0 + self.first_visible_colx = 0 + self.gridline_colour_index = 0x40 + self.gridline_colour_rgb = None # pre-BIFF8 + self.cached_page_break_preview_mag_factor = 0 + self.cached_normal_view_mag_factor = 0 + self._ixfe = None # BIFF2 only + self._cell_attr_to_xfx = {} # BIFF2.0 only + + #### Don't initialise this here, use class attribute initialisation. + #### self.gcw = (0, ) * 256 #### + + if self.biff_version >= 80: + self.utter_max_rows = 65536 + else: + self.utter_max_rows = 16384 + self.utter_max_cols = 256 + + ## + # Cell object in the given row and column. + def cell(self, rowx, colx): + if self.formatting_info: + xfx = self.cell_xf_index(rowx, colx) + else: + xfx = None + return Cell( + self._cell_types[rowx][colx], + self._cell_values[rowx][colx], + xfx, + ) + + ## + # Value of the cell in the given row and column. + def cell_value(self, rowx, colx): + return self._cell_values[rowx][colx] + + ## + # Type of the cell in the given row and column. + # Refer to the documentation of the Cell class. + def cell_type(self, rowx, colx): + return self._cell_types[rowx][colx] + + ## + # XF index of the cell in the given row and column. + # This is an index into Book.xf_list. + #
-- New in version 0.6.1 + def cell_xf_index(self, rowx, colx): + self.req_fmt_info() + xfx = self._cell_xf_indexes[rowx][colx] + if xfx > -1: + self._xf_index_stats[0] += 1 + return xfx + # Check for a row xf_index + try: + xfx = self.rowinfo_map[rowx].xf_index + if xfx > -1: + self._xf_index_stats[1] += 1 + return xfx + except KeyError: + pass + # Check for a column xf_index + try: + xfx = self.colinfo_map[colx].xf_index + assert xfx > -1 + self._xf_index_stats[2] += 1 + return xfx + except KeyError: + # If all else fails, 15 is used as hardwired global default xf_index. + self._xf_index_stats[3] += 1 + return 15 + + ## + # Returns a sequence of the Cell objects in the given row. + def row(self, rowx): + return [ + self.cell(rowx, colx) + for colx in xrange(self.ncols) + ] + + ## + # Returns a slice of the types + # of the cells in the given row. + def row_types(self, rowx, start_colx=0, end_colx=None): + if end_colx is None: + return self._cell_types[rowx][start_colx:] + return self._cell_types[rowx][start_colx:end_colx] + + ## + # Returns a slice of the values + # of the cells in the given row. + def row_values(self, rowx, start_colx=0, end_colx=None): + if end_colx is None: + return self._cell_values[rowx][start_colx:] + return self._cell_values[rowx][start_colx:end_colx] + + ## + # Returns a slice of the Cell objects in the given row. + def row_slice(self, rowx, start_colx=0, end_colx=None): + nc = self.ncols + if start_colx < 0: + start_colx += nc + if start_colx < 0: + start_colx = 0 + if end_colx is None or end_colx > nc: + end_colx = nc + elif end_colx < 0: + end_colx += nc + return [ + self.cell(rowx, colx) + for colx in xrange(start_colx, end_colx) + ] + + ## + # Returns a slice of the Cell objects in the given column. + def col_slice(self, colx, start_rowx=0, end_rowx=None): + nr = self.nrows + if start_rowx < 0: + start_rowx += nr + if start_rowx < 0: + start_rowx = 0 + if end_rowx is None or end_rowx > nr: + end_rowx = nr + elif end_rowx < 0: + end_rowx += nr + return [ + self.cell(rowx, colx) + for rowx in xrange(start_rowx, end_rowx) + ] + + ## + # Returns a slice of the values of the cells in the given column. + def col_values(self, colx, start_rowx=0, end_rowx=None): + nr = self.nrows + if start_rowx < 0: + start_rowx += nr + if start_rowx < 0: + start_rowx = 0 + if end_rowx is None or end_rowx > nr: + end_rowx = nr + elif end_rowx < 0: + end_rowx += nr + return [ + self._cell_values[rowx][colx] + for rowx in xrange(start_rowx, end_rowx) + ] + + ## + # Returns a slice of the types of the cells in the given column. + def col_types(self, colx, start_rowx=0, end_rowx=None): + nr = self.nrows + if start_rowx < 0: + start_rowx += nr + if start_rowx < 0: + start_rowx = 0 + if end_rowx is None or end_rowx > nr: + end_rowx = nr + elif end_rowx < 0: + end_rowx += nr + return [ + self._cell_types[rowx][colx] + for rowx in xrange(start_rowx, end_rowx) + ] + + ## + # Returns a sequence of the Cell objects in the given column. + def col(self, colx): + return self.col_slice(colx) + # Above two lines just for the docs. Here's the real McCoy: + col = col_slice + + # === Following methods are used in building the worksheet. + # === They are not part of the API. + + def extend_cells(self, nr, nc): + # print "extend_cells_2", self.nrows, self.ncols, nr, nc + assert 1 <= nc <= self.utter_max_cols + assert 1 <= nr <= self.utter_max_rows + if nr <= self.nrows: + # New cell is in an existing row, so extend that row (if necessary). + # Note that nr < self.nrows means that the cell data + # is not in ascending row order!! + self._need_fix_ragged_rows = 1 + nrx = nr - 1 + trow = self._cell_types[nrx] + tlen = len(trow) + nextra = max(nc, self.ncols) - tlen + if nextra > 0: + xce = XL_CELL_EMPTY + if self.dont_use_array: + trow.extend([xce] * nextra) + if self.formatting_info: + self._cell_xf_indexes[nrx].extend([-1] * nextra) + else: + aa = array_array + trow.extend(aa('B', [xce]) * nextra) + if self.formatting_info: + self._cell_xf_indexes[nrx].extend(aa('h', [-1]) * nextra) + self._cell_values[nrx].extend([''] * nextra) + if nc > self.ncols: + self.ncols = nc + self._need_fix_ragged_rows = 1 + if nr > self.nrows: + scta = self._cell_types.append + scva = self._cell_values.append + scxa = self._cell_xf_indexes.append + fmt_info = self.formatting_info + xce = XL_CELL_EMPTY + nc = self.ncols + if self.dont_use_array: + for _unused in xrange(self.nrows, nr): + scta([xce] * nc) + scva([''] * nc) + if fmt_info: + scxa([-1] * nc) + else: + aa = array_array + for _unused in xrange(self.nrows, nr): + scta(aa('B', [xce]) * nc) + scva([''] * nc) + if fmt_info: + scxa(aa('h', [-1]) * nc) + self.nrows = nr + + def fix_ragged_rows(self): + t0 = time.time() + ncols = self.ncols + xce = XL_CELL_EMPTY + aa = array_array + s_cell_types = self._cell_types + s_cell_values = self._cell_values + s_cell_xf_indexes = self._cell_xf_indexes + s_dont_use_array = self.dont_use_array + s_fmt_info = self.formatting_info + totrowlen = 0 + for rowx in xrange(self.nrows): + trow = s_cell_types[rowx] + rlen = len(trow) + totrowlen += rlen + nextra = ncols - rlen + if nextra > 0: + s_cell_values[rowx][rlen:] = [''] * nextra + if s_dont_use_array: + trow[rlen:] = [xce] * nextra + if s_fmt_info: + s_cell_xf_indexes[rowx][rlen:] = [-1] * nextra + else: + trow.extend(aa('B', [xce]) * nextra) + if s_fmt_info: + s_cell_xf_indexes[rowx][rlen:] = aa('h', [-1]) * nextra + self._fix_ragged_rows_time = time.time() - t0 + if 0 and self.nrows: + avgrowlen = float(totrowlen) / self.nrows + print >> self.logfile, \ + "sheet %d: avg row len %.1f; max row len %d" \ + % (self.number, avgrowlen, self.ncols) + + def tidy_dimensions(self): + if self.verbosity >= 3: + fprintf(self.logfile, + "tidy_dimensions: nrows=%d ncols=%d _need_fix_ragged_rows=%d\n", + self.nrows, self.ncols, self._need_fix_ragged_rows, + ) + if 1 and self.merged_cells: + nr = nc = 0 + umaxrows = self.utter_max_rows + umaxcols = self.utter_max_cols + for crange in self.merged_cells: + rlo, rhi, clo, chi = crange + if not (0 <= rlo < rhi <= umaxrows) \ + or not (0 <= clo < chi <= umaxcols): + fprintf(self.logfile, + "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n", + self.number, self.name, crange) + if rhi > nr: nr = rhi + if chi > nc: nc = chi + self.extend_cells(nr, nc) + if self.verbosity >= 1 \ + and (self.nrows != self._dimnrows or self.ncols != self._dimncols): + fprintf(self.logfile, + "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n", + self.number, + self.name, + self._dimnrows, + self._dimncols, + self.nrows, + self.ncols, + ) + if self._need_fix_ragged_rows: + self.fix_ragged_rows() + + def put_cell(self, rowx, colx, ctype, value, xf_index): + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except IndexError: + # print >> self.logfile, "put_cell extending", rowx, colx + self.extend_cells(rowx+1, colx+1) + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except: + print >> self.logfile, "put_cell", rowx, colx + raise + except: + print >> self.logfile, "put_cell", rowx, colx + raise + + def put_blank_cell(self, rowx, colx, xf_index): + # This is used for cells from BLANK and MULBLANK records + ctype = XL_CELL_BLANK + value = '' + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + self._cell_xf_indexes[rowx][colx] = xf_index + except IndexError: + # print >> self.logfile, "put_cell extending", rowx, colx + self.extend_cells(rowx+1, colx+1) + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + self._cell_xf_indexes[rowx][colx] = xf_index + except: + print >> self.logfile, "put_cell", rowx, colx + raise + except: + print >> self.logfile, "put_cell", rowx, colx + raise + + def put_number_cell(self, rowx, colx, value, xf_index): + ctype = self._xf_index_to_xl_type_map[xf_index] + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except IndexError: + # print >> self.logfile, "put_number_cell extending", rowx, colx + self.extend_cells(rowx+1, colx+1) + try: + self._cell_types[rowx][colx] = ctype + self._cell_values[rowx][colx] = value + if self.formatting_info: + self._cell_xf_indexes[rowx][colx] = xf_index + except: + print >> self.logfile, "put_number_cell", rowx, colx + raise + except: + print >> self.logfile, "put_number_cell", rowx, colx + raise + + # === Methods after this line neither know nor care about how cells are stored. + + def read(self, bk): + global rc_stats + DEBUG = 0 + blah = DEBUG or self.verbosity >= 2 + blah_rows = DEBUG or self.verbosity >= 4 + blah_formulas = 1 and blah + oldpos = bk._position + bk._position = self._position + XL_SHRFMLA_ETC_ETC = ( + XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2, + XL_ARRAY2, XL_TABLEOP_B2, + ) + self_put_number_cell = self.put_number_cell + self_put_cell = self.put_cell + self_put_blank_cell = self.put_blank_cell + local_unpack = unpack + bk_get_record_parts = bk.get_record_parts + bv = self.biff_version + fmt_info = self.formatting_info + eof_found = 0 + while 1: + # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position + rc, data_len, data = bk_get_record_parts() + # if rc in rc_stats: + # rc_stats[rc] += 1 + # else: + # rc_stats[rc] = 1 + # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data) + if rc == XL_NUMBER: + rowx, colx, xf_index, d = local_unpack('> self.logfile, \ + "*** NOTE: ROW record has row index %d; " \ + "should have 0 <= rowx < %d -- record ignored!" \ + % (rowx, self.utter_max_rows) + continue + r = Rowinfo() + # Using upkbits() is far too slow on a file + # with 30 sheets each with 10K rows :-( + # upkbits(r, bits1, ( + # ( 0, 0x7FFF, 'height'), + # (15, 0x8000, 'has_default_height'), + # )) + # upkbits(r, bits2, ( + # ( 0, 0x00000007, 'outline_level'), + # ( 4, 0x00000010, 'outline_group_starts_ends'), + # ( 5, 0x00000020, 'hidden'), + # ( 6, 0x00000040, 'height_mismatch'), + # ( 7, 0x00000080, 'has_default_xf_index'), + # (16, 0x0FFF0000, 'xf_index'), + # (28, 0x10000000, 'additional_space_above'), + # (29, 0x20000000, 'additional_space_below'), + # )) + # So: + r.height = bits1 & 0x7fff + r.has_default_height = (bits1 >> 15) & 1 + r.outline_level = bits2 & 7 + r.outline_group_starts_ends = (bits2 >> 4) & 1 + r.hidden = (bits2 >> 5) & 1 + r.height_mismatch = (bits2 >> 6) & 1 + r.has_default_xf_index = (bits2 >> 7) & 1 + r.xf_index = (bits2 >> 16) & 0xfff + r.additional_space_above = (bits2 >> 28) & 1 + r.additional_space_below = (bits2 >> 29) & 1 + if not r.has_default_xf_index: + r.xf_index = -1 + self.rowinfo_map[rowx] = r + if 0 and r.xf_index > -1: + fprintf(self.logfile, + "**ROW %d %d %d\n", + self.number, rowx, r.xf_index) + if blah_rows: + print >> self.logfile, 'ROW', rowx, bits1, bits2 + r.dump(self.logfile, + header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) + elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406 + # DEBUG = 1 + # if DEBUG: print "FORMULA: rc: 0x%04x data: %r" % (rc, data) + if bv >= 50: + rowx, colx, xf_index, result_str, flags = local_unpack('= 30: + rowx, colx, xf_index, result_str, flags = local_unpack(' 20)) + else: + strg = unpack_unicode(data2, 0, lenlen=2) + self.put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index) + # if DEBUG: print "FORMULA strg %r" % strg + elif result_str[0] == '\x01': + # boolean formula result + value = ord(result_str[2]) + self.put_cell(rowx, colx, XL_CELL_BOOLEAN, value, xf_index) + elif result_str[0] == '\x02': + # Error in cell + value = ord(result_str[2]) + self.put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index) + elif result_str[0] == '\x03': + # empty ... i.e. empty (zero-length) string, NOT an empty cell. + self.put_cell(rowx, colx, XL_CELL_TEXT, u"", xf_index) + else: + raise XLRDError("unexpected special case (0x%02x) in FORMULA" % ord(result_str[0])) + else: + # it is a number + d = local_unpack('> self.logfile, \ + "*** NOTE: COLINFO record has first col index %d, last %d; " \ + "should have 0 <= first <= last <= 255 -- record ignored!" \ + % (first_colx, last_colx) + del c + continue + upkbits(c, flags, ( + ( 0, 0x0001, 'hidden'), + ( 1, 0x0002, 'bit1_flag'), + # *ALL* colinfos created by Excel in "default" cases are 0x0002!! + # Maybe it's "locked" by analogy with XFProtection data. + ( 8, 0x0700, 'outline_level'), + (12, 0x1000, 'collapsed'), + )) + for colx in xrange(first_colx, last_colx+1): + if colx > 255: break # Excel does 0 to 256 inclusive + self.colinfo_map[colx] = c + if 0: + fprintf(self.logfile, + "**COL %d %d %d\n", + self.number, colx, c.xf_index) + if blah: + fprintf( + self.logfile, + "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n", + self.number, first_colx, last_colx, c.width, c.xf_index, flags, + ) + c.dump(self.logfile, header='===') + elif rc == XL_DEFCOLWIDTH: + self.defcolwidth, = local_unpack("> self.logfile, 'DEFCOLWIDTH', self.defcolwidth + elif rc == XL_STANDARDWIDTH: + if data_len != 2: + print >> self.logfile, '*** ERROR *** STANDARDWIDTH', data_len, repr(data) + self.standardwidth, = local_unpack("> self.logfile, 'STANDARDWIDTH', self.standardwidth + elif rc == XL_GCW: + if not fmt_info: continue # useless w/o COLINFO + assert data_len == 34 + assert data[0:2] == "\x20\x00" + iguff = unpack("<8i", data[2:34]) + gcw = [] + for bits in iguff: + for j in xrange(32): + gcw.append(bits & 1) + bits >>= 1 + self.gcw = tuple(gcw) + if 0: + showgcw = "".join(map(lambda x: "F "[x], gcw)).rstrip().replace(' ', '.') + print "GCW:", showgcw + elif rc == XL_BLANK: + if not fmt_info: continue + rowx, colx, xf_index = local_unpack('> self.logfile, "BLANK", rowx, colx, xf_index + self_put_blank_cell(rowx, colx, xf_index) + elif rc == XL_MULBLANK: # 00BE + if not fmt_info: continue + mul_row, mul_first = local_unpack('> self.logfile, "MULBLANK", mul_row, mul_first, mul_last + pos = 4 + for colx in xrange(mul_first, mul_last+1): + xf_index, = local_unpack('> self.logfile, "SHEET.READ: EOF" + eof_found = 1 + break + elif rc == XL_OBJ: + # handle SHEET-level objects; note there's a separate Book.handle_obj + self.handle_obj(data) + elif rc == XL_MSO_DRAWING: + self.handle_msodrawingetc(rc, data_len, data) + elif rc == XL_TXO: + self.handle_txo(data) + elif rc == XL_NOTE: + self.handle_note(data) + elif rc == XL_FEAT11: + self.handle_feat11(data) + elif rc in bofcodes: ##### EMBEDDED BOF ##### + version, boftype = local_unpack('> self.logfile, \ + "*** Unexpected embedded BOF (0x%04x) at offset %d: version=0x%04x type=0x%04x" \ + % (rc, bk._position - data_len - 4, version, boftype) + while 1: + code, data_len, data = bk.get_record_parts() + if code == XL_EOF: + break + if DEBUG: print >> self.logfile, "---> found EOF" + elif rc == XL_COUNTRY: + bk.handle_country(data) + elif rc == XL_LABELRANGES: + pos = 0 + pos = unpack_cell_range_address_list_update_pos( + self.row_label_ranges, data, pos, bv, addr_size=8, + ) + pos = unpack_cell_range_address_list_update_pos( + self.col_label_ranges, data, pos, bv, addr_size=8, + ) + assert pos == data_len + elif rc == XL_ARRAY: + row1x, rownx, col1x, colnx, array_flags, tokslen = \ + local_unpack("= 80 + num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \ + unpack("<6H", data[0:12]) + if self.verbosity >= 1: + fprintf(self.logfile, + "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n" \ + "*** in Sheet %d (%r).\n" \ + "*** %d CF record(s); needs_recalc_or_redraw = %d\n" \ + "*** Bounding box is %s\n", + self.number, self.name, num_CFs, needs_recalc, + rangename2d(browx1, browx2+1, bcolx1, bcolx2+1), + ) + olist = [] # updated by the function + pos = unpack_cell_range_address_list_update_pos( + olist, data, 12, bv, addr_size=8) + # print >> self.logfile, repr(result), len(result) + if self.verbosity >= 1: + fprintf(self.logfile, + "*** %d individual range(s):\n" \ + "*** %s\n", + len(olist), + ", ".join([rangename2d(*coords) for coords in olist]), + ) + elif rc == XL_CF: + if not fmt_info: continue + cf_type, cmp_op, sz1, sz2, flags = unpack("> 26) & 1 + bord_block = (flags >> 28) & 1 + patt_block = (flags >> 29) & 1 + if self.verbosity >= 1: + fprintf(self.logfile, + "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n" \ + "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n" \ + "*** optional data blocks: font=%d, border=%d, pattern=%d\n", + cf_type, cmp_op, sz1, sz2, flags, + font_block, bord_block, patt_block, + ) + # hex_char_dump(data, 0, data_len) + pos = 12 + if font_block: + (font_height, font_options, weight, escapement, underline, + font_colour_index, two_bits, font_esc, font_underl) = \ + unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118]) + font_style = (two_bits > 1) & 1 + posture = (font_options > 1) & 1 + font_canc = (two_bits > 7) & 1 + cancellation = (font_options > 7) & 1 + if self.verbosity >= 1: + fprintf(self.logfile, + "*** Font info: height=%d, weight=%d, escapement=%d,\n" \ + "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n" \ + "*** style=%d, posture=%d, canc=%d, cancellation=%d\n", + font_height, weight, escapement, underline, + font_colour_index, font_esc, font_underl, + font_style, posture, font_canc, cancellation, + ) + pos += 118 + if bord_block: + pos += 8 + if patt_block: + pos += 4 + fmla1 = data[pos:pos+sz1] + pos += sz1 + if blah and sz1: + fprintf(self.logfile, + "*** formula 1:\n", + ) + dump_formula(bk, fmla1, sz1, bv, reldelta=0, blah=1) + fmla2 = data[pos:pos+sz2] + pos += sz2 + assert pos == data_len + if blah and sz2: + fprintf(self.logfile, + "*** formula 2:\n", + ) + dump_formula(bk, fmla2, sz2, bv, reldelta=0, blah=1) + elif rc == XL_DEFAULTROWHEIGHT: + if data_len == 4: + bits, self.default_row_height = unpack("> 1) & 1 + self.default_additional_space_above = (bits >> 2) & 1 + self.default_additional_space_below = (bits >> 3) & 1 + elif rc == XL_MERGEDCELLS: + if not fmt_info: continue + pos = unpack_cell_range_address_list_update_pos( + self.merged_cells, data, 0, bv, addr_size=8) + if blah: + fprintf(self.logfile, + "MERGEDCELLS: %d ranges\n", int_floor_div(pos - 2, 8)) + assert pos == data_len, \ + "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len) + elif rc == XL_WINDOW2: + if bv >= 80: + (options, + self.first_visible_rowx, self.first_visible_colx, + self.gridline_colour_index, + self.cached_page_break_preview_mag_factor, + self.cached_normal_view_mag_factor + ) = unpack(">= 1 + # print "WINDOW2: visible=%d selected=%d" \ + # % (self.sheet_visible, self.sheet_selected) + #### all of the following are for BIFF <= 4W + elif bv <= 45: + if rc == XL_FORMAT or rc == XL_FORMAT2: + bk.handle_format(data, rc) + elif rc == XL_FONT or rc == XL_FONT_B3B4: + bk.handle_font(data) + elif rc == XL_STYLE: + if not self.book._xf_epilogue_done: + self.book.xf_epilogue() + bk.handle_style(data) + elif rc == XL_PALETTE: + bk.handle_palette(data) + elif rc == XL_BUILTINFMTCOUNT: + bk.handle_builtinfmtcount(data) + elif rc == XL_XF4 or rc == XL_XF3 or rc == XL_XF2: #### N.B. not XL_XF + bk.handle_xf(data) + elif rc == XL_DATEMODE: + bk.handle_datemode(data) + elif rc == XL_CODEPAGE: + bk.handle_codepage(data) + elif rc == XL_FILEPASS: + bk.handle_filepass(data) + elif rc == XL_WRITEACCESS: + bk.handle_writeaccess(data) + elif rc == XL_IXFE: + self._ixfe = local_unpack('> self.logfile, \ + "*** NOTE: ROW_B2 record has row index %d; " \ + "should have 0 <= rowx < %d -- record ignored!" \ + % (rowx, self.utter_max_rows) + continue + r = Rowinfo() + r.height = bits1 & 0x7fff + r.has_default_height = (bits1 >> 15) & 1 + r.outline_level = 0 + r.outline_group_starts_ends = 0 + r.hidden = 0 + r.height_mismatch = 0 + r.has_default_xf_index = has_defaults & 1 + r.additional_space_above = 0 + r.additional_space_below = 0 + if not r.has_default_xf_index: + r.xf_index = -1 + elif data_len == 18: + # Seems the XF index in the cell_attr is dodgy + xfx = local_unpack(' -1: + fprintf(self.logfile, + "**ROW %d %d %d\n", + self.number, rowx, r.xf_index) + if blah_rows: + print >> self.logfile, 'ROW_B2', rowx, bits1, has_defaults + r.dump(self.logfile, + header="--- sh #%d, rowx=%d ---" % (self.number, rowx)) + elif rc == XL_COLWIDTH: # BIFF2 only + if not fmt_info: continue + first_colx, last_colx, width\ + = local_unpack("> self.logfile, \ + "*** NOTE: COLWIDTH record has first col index %d, last %d; " \ + "should have first <= last -- record ignored!" \ + % (first_colx, last_colx) + continue + for colx in xrange(first_colx, last_colx+1): + if self.colinfo_map.has_key(colx): + c = self.colinfo_map[colx] + else: + c = Colinfo() + self.colinfo_map[colx] = c + c.width = width + if blah: + fprintf( + self.logfile, + "COLWIDTH sheet #%d cols %d-%d: wid=%d\n", + self.number, first_colx, last_colx, width + ) + elif rc == XL_COLUMNDEFAULT: # BIFF2 only + if not fmt_info: continue + first_colx, last_colx = local_unpack("> self.logfile, \ + "*** NOTE: COLUMNDEFAULT record has first col index %d, last %d; " \ + "should have 0 <= first < last <= 256" \ + % (first_colx, last_colx) + last_colx = min(last_colx, 256) + for colx in xrange(first_colx, last_colx): + offset = 4 + 3 * (colx - first_colx) + cell_attr = data[offset:offset+3] + xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx=-1, colx=colx) + if self.colinfo_map.has_key(colx): + c = self.colinfo_map[colx] + else: + c = Colinfo() + self.colinfo_map[colx] = c + c.xf_index = xf_index + else: + # if DEBUG: print "SHEET.READ: Unhandled record type %02x %d bytes %r" % (rc, data_len, data) + pass + if not eof_found: + raise XLRDError("Sheet %d (%r) missing EOF record" \ + % (self.number, self.name)) + self.tidy_dimensions() + bk._position = oldpos + return 1 + + def fixed_BIFF2_xfindex(self, cell_attr, rowx, colx, true_xfx=None): + DEBUG = 0 + blah = DEBUG or self.verbosity >= 2 + if self.biff_version == 21: + if self._xf_index_to_xl_type_map: + if true_xfx is not None: + xfx = true_xfx + else: + xfx = ord(cell_attr[0]) & 0x3F + if xfx == 0x3F: + if self._ixfe is None: + raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.") + xfx = self._ixfe + # OOo docs are capable of interpretation that each + # cell record is preceded immediately by its own IXFE record. + # Empirical evidence is that (sensibly) an IXFE record applies to all + # following cell records until another IXFE comes along. + return xfx + # Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect. + self.biff_version = self.book.biff_version = 20 + #### check that XF slot in cell_attr is zero + xfx_slot = ord(cell_attr[0]) & 0x3F + assert xfx_slot == 0 + xfx = self._cell_attr_to_xfx.get(cell_attr) + if xfx is not None: + return xfx + if blah: + fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx) + book = self.book + xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr) + xfx = len(book.xf_list) + xf.xf_index = xfx + book.xf_list.append(xf) + if blah: + xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======") + if not book.format_map.has_key(xf.format_key): + msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n" + fprintf(self.logfile, msg, + xf.xf_index, xf.format_key, xf.format_key) + fmt = Format(xf.format_key, FUN, u"General") + book.format_map[xf.format_key] = fmt + while len(book.format_list) <= xf.format_key: + book.format_list.append(fmt) + cellty_from_fmtty = { + FNU: XL_CELL_NUMBER, + FUN: XL_CELL_NUMBER, + FGE: XL_CELL_NUMBER, + FDT: XL_CELL_DATE, + FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text. + } + fmt = book.format_map[xf.format_key] + cellty = cellty_from_fmtty[fmt.type] + self._xf_index_to_xl_type_map[xf.xf_index] = cellty + self._cell_attr_to_xfx[cell_attr] = xfx + return xfx + + def fake_XF_from_BIFF20_cell_attr(self, cell_attr): + from formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection + xf = XF() + xf.alignment = XFAlignment() + xf.alignment.indent_level = 0 + xf.alignment.shrink_to_fit = 0 + xf.alignment.text_direction = 0 + xf.border = XFBorder() + xf.border.diag_up = 0 + xf.border.diag_down = 0 + xf.border.diag_colour_index = 0 + xf.border.diag_line_style = 0 # no line + xf.background = XFBackground() + xf.protection = XFProtection() + (prot_bits, font_and_format, halign_etc) = unpack('> 6 + upkbits(xf.protection, prot_bits, ( + (6, 0x40, 'cell_locked'), + (7, 0x80, 'formula_hidden'), + )) + xf.alignment.hor_align = halign_etc & 0x07 + for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')): + if halign_etc & mask: + colour_index, line_style = 8, 1 # black, thin + else: + colour_index, line_style = 0, 0 # none, none + setattr(xf.border, side + '_colour_index', colour_index) + setattr(xf.border, side + '_line_style', line_style) + bg = xf.background + if halign_etc & 0x80: + bg.fill_pattern = 17 + else: + bg.fill_pattern = 0 + bg.background_colour_index = 9 # white + bg.pattern_colour_index = 8 # black + xf.parent_style_index = 0 # ??????????? + xf.alignment.vert_align = 2 # bottom + xf.alignment.rotation = 0 + for attr_stem in \ + "format font alignment border background protection".split(): + attr = "_" + attr_stem + "_flag" + setattr(xf, attr, 1) + return xf + + def req_fmt_info(self): + if not self.formatting_info: + raise XLRDError("Feature requires open_workbook(..., formatting_info=True)") + + ## + # Determine column display width. + #
-- New in version 0.6.1 + #
+ # @param colx Index of the queried column, range 0 to 255. + # Note that it is possible to find out the width that will be used to display + # columns with no cell information e.g. column IV (colx=255). + # @return The column width that will be used for displaying + # the given column by Excel, in units of 1/256th of the width of a + # standard character (the digit zero in the first font). + + def computed_column_width(self, colx): + self.req_fmt_info() + if self.biff_version >= 80: + colinfo = self.colinfo_map.get(colx, None) + if colinfo is not None: + return colinfo.width + if self.standardwidth is not None: + return self.standardwidth + elif self.biff_version >= 40: + if self.gcw[colx]: + if self.standardwidth is not None: + return self.standardwidth + else: + colinfo = self.colinfo_map.get(colx, None) + if colinfo is not None: + return colinfo.width + elif self.biff_version == 30: + colinfo = self.colinfo_map.get(colx, None) + if colinfo is not None: + return colinfo.width + # All roads lead to Rome and the DEFCOLWIDTH ... + if self.defcolwidth is not None: + return self.defcolwidth * 256 + return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record + + def handle_msodrawingetc(self, recid, data_len, data): + if not OBJ_MSO_DEBUG: + return + DEBUG = 1 + if self.biff_version < 80: + return + o = MSODrawing() + pos = 0 + while pos < data_len: + tmp, fbt, cb = unpack('> 4) & 0xFFF + if ver == 0xF: + ndb = 0 # container + else: + ndb = cb + if DEBUG: + hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile) + fprintf(self.logfile, + "fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n", + fbt, inst, ver, cb, cb) + if fbt == 0xF010: # Client Anchor + assert ndb == 18 + (o.anchor_unk, + o.anchor_colx_lo, o.anchor_rowx_lo, + o.anchor_colx_hi, o.anchor_rowx_hi) = unpack('> 1) & 1 + # Docs say NULL [sic] bytes padding between string count and string data + # to ensure that string is word-aligned. Appears to be nonsense. + # There also seems to be a random(?) byte after the string (not counted in the + # string length. + o.original_author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2) + assert endpos == data_len - 1 + o.last_byte = data[-1] + if DEBUG: + o.dump(self.logfile, header="=== MSNote ===", footer= " ") + + def handle_txo(self, data): + if not OBJ_MSO_DEBUG: + return + DEBUG = 1 + if self.biff_version < 80: + return + o = MSTxo() + data_len = len(data) + option_flags, o.rot, cchText, cbRuns = unpack('>= 2 # div by 4 to drop the 2 flag bits + if flags & 1: + return i / 100.0 + return float(i) + else: + # It's the most significant 30 bits of an IEEE 754 64-bit FP number + d, = unpack('Contains the data for one cell.

+# +#

WARNING: You don't call this class yourself. You access Cell objects +# via methods of the Sheet object(s) that you found in the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls").

+#

Cell objects have three attributes: ctype is an int, value +# (which depends on ctype) and xf_index. +# If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. +# The following table describes the types of cells and how their values +# are represented in Python.

+# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +# +#
Type symbolType numberPython value
XL_CELL_EMPTY0empty string u''
XL_CELL_TEXT1a Unicode string
XL_CELL_NUMBER2float
XL_CELL_DATE3float
XL_CELL_BOOLEAN4int; 1 means TRUE, 0 means FALSE
XL_CELL_ERROR5int representing internal Excel codes; for a text representation, +# refer to the supplied dictionary error_text_from_code
XL_CELL_BLANK6empty string u''. Note: this type will appear only when +# open_workbook(..., formatting_info=True) is used.
+#

+ +class Cell(BaseObject): + + __slots__ = ['ctype', 'value', 'xf_index'] + + def __init__(self, ctype, value, xf_index=None): + self.ctype = ctype + self.value = value + self.xf_index = xf_index + + def __repr__(self): + if self.xf_index is None: + return "%s:%r" % (ctype_text[self.ctype], self.value) + else: + return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index) + +## +# There is one and only one instance of an empty cell -- it's a singleton. This is it. +# You may use a test like "acell is empty_cell". +empty_cell = Cell(XL_CELL_EMPTY, '') + +##### =============== Colinfo and Rowinfo ============================== ##### + +## +# Width and default formatting information that applies to one or +# more columns in a sheet. Derived from COLINFO records. +# +#

Here is the default hierarchy for width, according to the OOo docs: +# +#
"""In BIFF3, if a COLINFO record is missing for a column, +# the width specified in the record DEFCOLWIDTH is used instead. +# +#
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used, +# if the corresponding bit for this column is cleared in the GCW +# record, otherwise the column width set in the DEFCOLWIDTH record +# is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]). +# +#
In BIFF8, if a COLINFO record is missing for a column, +# the width specified in the record STANDARDWIDTH is used. +# If this [STANDARDWIDTH] record is also missing, +# the column width of the record DEFCOLWIDTH is used instead.""" +#
+# +# Footnote: The docs on the GCW record say this: +# """
+# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH +# record. If a bit is cleared, the corresponding column uses the width set in the +# COLINFO record for this column. +#
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if +# the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH +# record of the worksheet will be used instead. +#
"""
+# At the moment (2007-01-17) xlrd is going with the GCW version of the story. +# Reference to the source may be useful: see the computed_column_width(colx) method +# of the Sheet class. +#
-- New in version 0.6.1 +#

+ +class Colinfo(BaseObject): + ## + # Width of the column in 1/256 of the width of the zero character, + # using default font (first FONT record in the file). + width = 0 + ## + # XF index to be used for formatting empty cells. + xf_index = -1 + ## + # 1 = column is hidden + hidden = 0 + ## + # Value of a 1-bit flag whose purpose is unknown + # but is often seen set to 1 + bit1_flag = 0 + ## + # Outline level of the column, in range(7). + # (0 = no outline) + outline_level = 0 + ## + # 1 = column is collapsed + collapsed = 0 + +## +# Height and default formatting information that applies to a row in a sheet. +# Derived from ROW records. +#
-- New in version 0.6.1 + +class Rowinfo(BaseObject): + ## + # Height of the row, in twips. One twip == 1/20 of a point + height = 0 + ## + # 0 = Row has custom height; 1 = Row has default height + has_default_height = 0 + ## + # Outline level of the row + outline_level = 0 + ## + # 1 = Outline group starts or ends here (depending on where the + # outline buttons are located, see WSBOOL record [TODO ??]), + # and is collapsed + outline_group_starts_ends = 0 + ## + # 1 = Row is hidden (manually, or by a filter or outline group) + hidden = 0 + ## + # 1 = Row height and default font height do not match + height_mismatch = 0 + ## + # 1 = the xf_index attribute is usable; 0 = ignore it + has_default_xf_index = 0 + ## + # Index to default XF record for empty cells in this row. + # Don't use this if has_default_xf_index == 0. + xf_index = -9999 + ## + # This flag is set, if the upper border of at least one cell in this row + # or if the lower border of at least one cell in the row above is + # formatted with a thick line style. Thin and medium line styles are not + # taken into account. + additional_space_above = 0 + ## + # This flag is set, if the lower border of at least one cell in this row + # or if the upper border of at least one cell in the row below is + # formatted with a medium or thick line style. Thin line styles are not + # taken into account. + additional_space_below = 0 diff --git a/tablib/packages/xlrd/timemachine.py b/tablib/packages/xlrd/timemachine.py new file mode 100644 index 0000000..1718c03 --- /dev/null +++ b/tablib/packages/xlrd/timemachine.py @@ -0,0 +1,44 @@ +# -*- coding: cp1252 -*- + +## +#

Copyright © 2006-2008 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under a BSD-style licence.

+## + +# timemachine.py -- adaptation for earlier Pythons e.g. 2.1 +# usage: from timemachine import * + +# 2008-02-08 SJM Generalised method of detecting IronPython + +import sys + +python_version = sys.version_info[:2] # e.g. version 2.4 -> (2, 4) + +CAN_PICKLE_ARRAY = python_version >= (2, 5) +CAN_SUBCLASS_BUILTIN = python_version >= (2, 2) + +if sys.version.find("IronPython") >= 0: + array_array = None +else: + from array import array as array_array + +if python_version < (2, 2): + class object: + pass + False = 0 + True = 1 + +def int_floor_div(x, y): + return divmod(x, y)[0] + +def intbool(x): + if x: + return 1 + return 0 + +if python_version < (2, 3): + def sum(sequence, start=0): + tot = start + for item in aseq: + tot += item + return tot diff --git a/tablib/packages/xlrd/xldate.py b/tablib/packages/xlrd/xldate.py new file mode 100644 index 0000000..e5f7591 --- /dev/null +++ b/tablib/packages/xlrd/xldate.py @@ -0,0 +1,171 @@ +# -*- coding: cp1252 -*- + +# No part of the content of this file was derived from the works of David Giffin. + +## +#

Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd

+#

This module is part of the xlrd package, which is released under a BSD-style licence.

+# +#

Provides function(s) for dealing with Microsoft Excel ™ dates.

+## + +# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099) + +# The conversion from days to (year, month, day) starts with +# an integral "julian day number" aka JDN. +# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713. +# More importantly: +# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0 +# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0 + +from timemachine import int_floor_div as ifd + +_JDN_delta = (2415080 - 61, 2416482 - 1) +assert _JDN_delta[1] - _JDN_delta[0] == 1462 + +class XLDateError(ValueError): pass + +class XLDateNegative(XLDateError): pass +class XLDateAmbiguous(XLDateError): pass +class XLDateTooLarge(XLDateError): pass +class XLDateBadDatemode(XLDateError): pass +class XLDateBadTuple(XLDateError): pass + +_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01 + +## +# Convert an Excel number (presumed to represent a date, a datetime or a time) into +# a tuple suitable for feeding to datetime or mx.DateTime constructors. +# @param xldate The Excel number +# @param datemode 0: 1900-based, 1: 1904-based. +#
WARNING: when using this function to +# interpret the contents of a workbook, you should pass in the Book.datemode +# attribute of that workbook. Whether +# the workbook has ever been anywhere near a Macintosh is irrelevant. +# @return Gregorian (year, month, day, hour, minute, nearest_second). +#
Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time; +# (0, 0, 0, hour, minute, second) will be returned. +#
Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number" +# is zero. +# @throws XLDateNegative xldate < 0.00 +# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0) +# @throws XLDateTooLarge Gregorian year 10000 or later +# @throws XLDateBadDatemode datemode arg is neither 0 nor 1 +# @throws XLDateError Covers the 4 specific errors + +def xldate_as_tuple(xldate, datemode): + if datemode not in (0, 1): + raise XLDateBadDatemode(datemode) + if xldate == 0.00: + return (0, 0, 0, 0, 0, 0) + if xldate < 0.00: + raise XLDateNegative(xldate) + xldays = int(xldate) + frac = xldate - xldays + seconds = int(round(frac * 86400.0)) + assert 0 <= seconds <= 86400 + if seconds == 86400: + hour = minute = second = 0 + xldays += 1 + else: + # second = seconds % 60; minutes = seconds // 60 + minutes, second = divmod(seconds, 60) + # minute = minutes % 60; hour = minutes // 60 + hour, minute = divmod(minutes, 60) + if xldays >= _XLDAYS_TOO_LARGE[datemode]: + raise XLDateTooLarge(xldate) + + if xldays == 0: + return (0, 0, 0, hour, minute, second) + + if xldays < 61 and datemode == 0: + raise XLDateAmbiguous(xldate) + + jdn = xldays + _JDN_delta[datemode] + yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3 + mp = ifd(yreg % 1461, 4) * 535 + 333 + d = ifd(mp % 16384, 535) + 1 + # mp /= 16384 + mp >>= 14 + if mp >= 10: + return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second) + else: + return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second) + +# === conversions from date/time to xl numbers + +def _leap(y): + if y % 4: return 0 + if y % 100: return 1 + if y % 400: return 0 + return 1 + +_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + +## +# Convert a date tuple (year, month, day) to an Excel date. +# @param year Gregorian year. +# @param month 1 <= month <= 12 +# @param day 1 <= day <= last day of that (year, month) +# @param datemode 0: 1900-based, 1: 1904-based. +# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0) +# @throws XLDateBadDatemode datemode arg is neither 0 nor 1 +# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s) +# @throws XLDateError Covers the specific errors + +def xldate_from_date_tuple((year, month, day), datemode): + + if datemode not in (0, 1): + raise XLDateBadDatemode(datemode) + + if year == 0 and month == 0 and day == 0: + return 0.00 + + if not (1900 <= year <= 9999): + raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),)) + if not (1 <= month <= 12): + raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),)) + if day < 1 \ + or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))): + raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),)) + + Yp = year + 4716 + M = month + if M <= 2: + Yp = Yp - 1 + Mp = M + 9 + else: + Mp = M - 3 + jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \ + day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4) + xldays = jdn - _JDN_delta[datemode] + if xldays <= 0: + raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),)) + if xldays < 61 and datemode == 0: + raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),)) + return float(xldays) + +## +# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day). +# @param hour 0 <= hour < 24 +# @param minute 0 <= minute < 60 +# @param second 0 <= second < 60 +# @throws XLDateBadTuple Out-of-range hour, minute, or second + +def xldate_from_time_tuple((hour, minute, second)): + if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60: + return ((second / 60.0 + minute) / 60.0 + hour) / 24.0 + raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),)) + +## +# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value. +# For more details, refer to other xldate_from_*_tuple functions. +# @param datetime_tuple (year, month, day, hour, minute, second) +# @param datemode 0: 1900-based, 1: 1904-based. + +def xldate_from_datetime_tuple(datetime_tuple, datemode): + return ( + xldate_from_date_tuple(datetime_tuple[:3], datemode) + + + xldate_from_time_tuple(datetime_tuple[3:]) + ) diff --git a/test_tablib.py b/test_tablib.py index 5fdf65a..6223098 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -552,6 +552,31 @@ class TablibTestCase(unittest.TestCase): data.sort(target_header) self.assertEquals(self.founders[orig_target_header], data[target_header]) + + def test_xls_import_set(self): + """Generate and import XLS set serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + _xls = data.xls + + data.xls = _xls + + self.assertEqual(_xls, data.xls) + + def test_xls_import_book(self): + """Generate and import XLS book serialization.""" + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + book.add_sheet(data) + _xls = book.xls + + book.xls = _xls + + self.assertEqual(_xls, book.xls) if __name__ == '__main__': From c19e2f2c5bcc2c3bf2b77594c00433704b7111af Mon Sep 17 00:00:00 2001 From: Greg Thornton Date: Thu, 14 Jul 2011 13:11:33 -0500 Subject: [PATCH 12/96] Added xlrd license to NOTICE. --- NOTICE | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/NOTICE b/NOTICE index 637455c..217ea25 100644 --- a/NOTICE +++ b/NOTICE @@ -172,4 +172,81 @@ Libknecht St., 4 +7(0712)56-09-83 -Subject: pyExcelerator \ No newline at end of file +Subject: pyExcelerator + + + + +XLRD License +============ + +Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any +contributors may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +THE POSSIBILITY OF SUCH DAMAGE. + + +Copyright (c) 2001 David Giffin. +All rights reserved. + +Based on the the Java version: Andrew Khan Copyright (c) 2000. + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +3. All advertising materials mentioning features or use of this + software must display the following acknowledgment: + "This product includes software developed by + David Giffin ." + +4. Redistributions of any form whatsoever must retain the following + acknowledgment: + "This product includes software developed by + David Giffin ." + +THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY +EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID GIFFIN OR +ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file From f6d7888d9e28222b8df9c5ba5359e358e9c1677b Mon Sep 17 00:00:00 2001 From: Greg Thornton Date: Thu, 14 Jul 2011 13:47:07 -0500 Subject: [PATCH 13/96] Added xls detection. --- tablib/formats/_xls.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 1282a43..c801658 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -112,3 +112,11 @@ def dset_sheet(dataset, ws): ws.write(i, j, col) +def detect(stream): + """Returns True if given stream is valid XLS.""" + + try: + xlrd.open_workbook(file_contents=stream) + return True + except xlrd.XLRDError: + return False From a2b4e4c6ba6315571e476b09f69312c7d5788e45 Mon Sep 17 00:00:00 2001 From: Mike Waldner Date: Sun, 7 Aug 2011 19:19:54 -0400 Subject: [PATCH 14/96] Replace None with empty string before creating td --- tablib/formats/_html.py | 8 ++++++-- test_tablib.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 485536c..ffde95f 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -30,13 +30,17 @@ def export_set(dataset): page.table.open() if dataset.headers is not None: + new_header = [item if item is not None else '' for item in dataset.headers] + page.thead.open() - headers = markup.oneliner.th(dataset.headers) + headers = markup.oneliner.th(new_header) page.tr(headers) page.thead.close() for row in dataset: - html_row = markup.oneliner.td(row) + new_row = [item if item is not None else '' for item in row] + + html_row = markup.oneliner.td(new_row) page.tr(html_row) page.table.close() diff --git a/test_tablib.py b/test_tablib.py index 48990a7..1ad514a 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -224,6 +224,26 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(html, self.founders.html) + def test_html_export_none_value(self): + """HTML export""" + + html = markup.page() + html.table.open() + html.thead.open() + + html.tr(markup.oneliner.th(['foo','', 'bar'])) + html.thead.close() + + html.tr(markup.oneliner.td(['foo','', 'bar'])) + + html.table.close() + html = str(html) + + headers = ['foo', None, 'bar']; + d = tablib.Dataset(['foo', None, 'bar'], headers=headers) + + self.assertEqual(html, d.html) + def test_unicode_append(self): """Passes in a single unicode charecter and exports.""" From bfd211854a5974459083c7afac98265cbbc94b55 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 8 Aug 2011 06:48:34 -0400 Subject: [PATCH 15/96] Added Mike Waldner to Authors. #34 --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index abd4d8f..4b0a00a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -16,4 +16,5 @@ Patches and Suggestions - Benjamin Wohlwend - Erik Youngren - Mark Rogers -- Mark Walling \ No newline at end of file +- Mark Walling +- Mike Waldner \ No newline at end of file From 887ee2fbacc749002a1d70538c42fb93fd22b7bd Mon Sep 17 00:00:00 2001 From: Mike Waldner Date: Tue, 9 Aug 2011 20:51:24 -0400 Subject: [PATCH 16/96] Adding documentation changes for append_col Related #21 --- README.rst | 2 +- tablib/core.py | 52 +++++++++++++++++++++++++++----------------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/README.rst b/README.rst index e46228b..aa6b73a 100644 --- a/README.rst +++ b/README.rst @@ -55,7 +55,7 @@ Intelligently add new rows: :: Intelligently add new columns: :: - >>> data.append(col=(90, 67, 83), header='age') + >>> data.append_col((90, 67, 83), header='age') Slice rows: :: diff --git a/tablib/core.py b/tablib/core.py index 53f3767..72700eb 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -105,7 +105,7 @@ class Dataset(object): functionality. Usually you create a :class:`Dataset` instance in your main module, and append - rows and columns as you collect data. :: + rows as you collect data. :: data = tablib.Dataset() data.headers = ('name', 'age') @@ -113,6 +113,19 @@ class Dataset(object): for (name, age) in some_collector(): data.append((name, age)) + + Setting columns is similar. The column data length must equal the + current height of the data and headers must be set :: + + data = tablib.Dataset() + data.headers = ('first_name', 'last_name') + + data.append(('John', 'Adams')) + data.append(('George', 'Washington')) + + data.append_col((90, 67), header='age') + + You can also set rows and headers upon instantiation. This is useful if dealing with dozens or hundres of :class:`Dataset` objects. :: @@ -121,7 +134,6 @@ class Dataset(object): data = tablib.Dataset(*data, headers=headers) - :param \*args: (optional) list of rows to populate Dataset :param headers: (optional) list strings for Dataset header row @@ -518,30 +530,11 @@ class Dataset(object): def insert(self, index, row, tags=list()): """Inserts a row to the :class:`Dataset` at the given index. - Rows and columns inserted must be the correct size (height or width). + Rows inserted must be the correct size (height or width). The default behaviour is to insert the given row to the :class:`Dataset` - object at the given index. If the ``col`` parameter is given, however, - a new column will be insert to the :class:`Dataset` object instead. - - You can also insert a column of a single callable object, which will - add a new column with the return values of the callable each as an - item in the column. :: - - data.append(col=random.randint) - - See :ref:`dyncols` for an in-depth example. - - .. versionchanged:: 0.9.0 - If inserting a column, and :class:`Dataset.headers` is set, the - header attribute must be set, and will be considered the header for - that row. - - .. versionadded:: 0.9.0 - If inserting a row, you can add :ref:`tags ` to the row you are inserting. - This gives you the ability to :class:`filter ` your - :class:`Dataset` later. - """ + object at the given index. + """ self._validate(row) self._data.insert(index, Row(row, tags=tags)) @@ -615,6 +608,17 @@ class Dataset(object): that row. See :ref:`dyncols` for an in-depth example. + + .. versionchanged:: 0.9.0 + If inserting a column, and :class:`Dataset.headers` is set, the + header attribute must be set, and will be considered the header for + that row. + + .. versionadded:: 0.9.0 + If inserting a row, you can add :ref:`tags ` to the row you are inserting. + This gives you the ability to :class:`filter ` your + :class:`Dataset` later. + """ # Callable Columns... From d611233c80b6cfbba8681211e02aab3e468ed5e9 Mon Sep 17 00:00:00 2001 From: Mike Waldner Date: Wed, 10 Aug 2011 19:50:31 -0400 Subject: [PATCH 17/96] Throwing InvalidDimensions when append_col with header is called but only headers exists Related #33 --- tablib/core.py | 6 +++++ test_tablib.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index f52ca92..166d18e 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -649,8 +649,14 @@ class Dataset(object): # pop the first item off, add to headers if not header: raise HeadersNeeded() + + # corner case - if header is set without data + elif header and self.height == 0: + raise InvalidDimensions + self.headers.insert(index, header) + if self.height and self.width: for i, row in enumerate(self._data): diff --git a/test_tablib.py b/test_tablib.py index 998233b..0c07d3a 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -58,6 +58,18 @@ class TablibTestCase(unittest.TestCase): self.assertRaises(tablib.InvalidDimensions, data.append, new_row) + def test_set_headers_with_incorrect_dimension(self): + """Verify headers correctly detects mismatch of number of + headers and data. + """ + + data.append(self.john) + + def set_header_callable(): + data.headers = ['first_name'] + + self.assertRaises(tablib.InvalidDimensions, set_header_callable) + def test_add_column(self): """Verify adding column works with/without headers.""" @@ -92,6 +104,53 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(data.height, len(new_col)) + def test_add_column_with_header_ignored(self): + """Verify append_col() ignores the header if data.headers has + not previously been set + """ + + new_col = ('reitz', 'monke') + + data.append_col(new_col, header='first_name') + + self.assertEqual(data[0], tuple([new_col[0]])) + self.assertEqual(data.width, 1) + self.assertEqual(data.height, len(new_col)) + self.assertEqual(data.headers, None) + + + def test_add_column_with_header_and_headers_only_exist(self): + """Verify append_col() with header correctly detects mismatch when + headers exist but there is no existing row data + """ + + data.headers = ['first_name'] + #no data + + new_col = ('allen') + + def append_col_callable(): + data.append_col(new_col, header='middle_name') + + self.assertRaises(tablib.InvalidDimensions, append_col_callable) + + + def test_add_column_with_header_and_data_exists(self): + """Verify append_col() works when headers and rows exists""" + + data.headers = self.headers + data.append(self.john) + + new_col = [10]; + + data.append_col(new_col, header='age') + + self.assertEqual(data.height, 1) + self.assertEqual(data.width, 4) + self.assertEqual(data['age'], new_col) + self.assertEqual(len(data.headers), len(self.headers) + 1) + + def test_add_callable_column(self): """Verify adding column with values specified as callable.""" @@ -128,7 +187,7 @@ class TablibTestCase(unittest.TestCase): self.founders.get_col(self.headers.index('gpa')), [self.john[2], self.george[2], self.tom[2]]) - + def test_data_slicing(self): """Verify slicing by data.""" @@ -224,6 +283,7 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(html, self.founders.html) + def test_html_export_none_value(self): """HTML export""" @@ -547,7 +607,7 @@ class TablibTestCase(unittest.TestCase): data.csv - + def test_csv_column_select(self): """Build up a CSV and test selecting a column""" @@ -588,7 +648,7 @@ class TablibTestCase(unittest.TestCase): data.sort(target_header) self.assertEquals(self.founders[orig_target_header], data[target_header]) - + def test_xls_import_set(self): """Generate and import XLS set serialization.""" data.append(self.john) From 43c68b396fd51a567f954297a686dcd828778037 Mon Sep 17 00:00:00 2001 From: Mike Waldner Date: Wed, 10 Aug 2011 20:05:17 -0400 Subject: [PATCH 18/96] Fixing magic number in test --- test_tablib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_tablib.py b/test_tablib.py index 0c07d3a..1391097 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -146,7 +146,7 @@ class TablibTestCase(unittest.TestCase): data.append_col(new_col, header='age') self.assertEqual(data.height, 1) - self.assertEqual(data.width, 4) + self.assertEqual(data.width, len(self.john) + 1) self.assertEqual(data['age'], new_col) self.assertEqual(len(data.headers), len(self.headers) + 1) From 906138b138e05670913ecf233b8430d59cb6d29b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Thu, 11 Aug 2011 00:47:23 -0400 Subject: [PATCH 19/96] a column w/ no length could work --- tablib/core.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 166d18e..485d610 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -130,7 +130,7 @@ class Dataset(object): data.append(('John', 'Adams')) data.append(('George', 'Washington')) - data.append_col((90, 67), header='age') + data.append_col((90, 67), header='age') You can also set rows and headers upon instantiation. This is useful if dealing @@ -470,7 +470,7 @@ class Dataset(object): data.csv = 'age, first_name, last_name\\n90, John, Adams' Import assumes (for now) that headers exist. - + .. admonition:: Binary Warning :class:`Dataset.csv` uses \\r\\n line endings by default, so make @@ -478,7 +478,7 @@ class Dataset(object): with open('output.csv', 'wb') as f: f.write(data.csv)' - + If you do not do this, and you export the file on Windows, your CSV file will open in Excel with a blank line between each row. """ @@ -550,7 +550,7 @@ class Dataset(object): Rows inserted must be the correct size (height or width). The default behaviour is to insert the given row to the :class:`Dataset` - object at the given index. + object at the given index. """ self._validate(row) @@ -635,9 +635,12 @@ class Dataset(object): If inserting a row, you can add :ref:`tags ` to the row you are inserting. This gives you the ability to :class:`filter ` your :class:`Dataset` later. - + """ + if col is None: + col = [] + # Callable Columns... if hasattr(col, '__call__'): col = list(map(col, self._data)) @@ -651,7 +654,7 @@ class Dataset(object): raise HeadersNeeded() # corner case - if header is set without data - elif header and self.height == 0: + elif header and self.height == 0 and len(col): raise InvalidDimensions self.headers.insert(index, header) From 9778a96351dbbadc3c7a522a44ca4ce7b33c84d0 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 15 Aug 2011 22:43:12 -0400 Subject: [PATCH 20/96] tuples didn't have index method in the past. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …why? --- test_tablib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_tablib.py b/test_tablib.py index 1391097..692426d 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -176,15 +176,15 @@ class TablibTestCase(unittest.TestCase): """Verify getting columns by index""" self.assertEqual( - self.founders.get_col(self.headers.index('first_name')), + self.founders.get_col(list(self.headers).index('first_name')), [self.john[0], self.george[0], self.tom[0]]) self.assertEqual( - self.founders.get_col(self.headers.index('last_name')), + self.founders.get_col(list(self.headers).index('last_name')), [self.john[1], self.george[1], self.tom[1]]) self.assertEqual( - self.founders.get_col(self.headers.index('gpa')), + self.founders.get_col(list(self.headers).index('gpa')), [self.john[2], self.george[2], self.tom[2]]) From f01d65c2e9d288743302acc2b494af0a7c24ce09 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 15 Aug 2011 22:45:35 -0400 Subject: [PATCH 21/96] I don't remember merging that.. --- tablib/formats/_xls.py | 43 +----------------------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index c801658..89d2845 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -5,7 +5,7 @@ import sys -from tablib.compat import BytesIO, xlwt, xlrd +from tablib.compat import BytesIO, xlwt import tablib title = 'xls' @@ -16,37 +16,6 @@ wrap = xlwt.easyxf("alignment: wrap on") bold = xlwt.easyxf("font: bold on") -def import_set(dset, in_stream, headers=True): - """Returns dataset from XLS stream.""" - - dset.wipe() - - wb = xlrd.open_workbook(file_contents=in_stream) - ws = wb.sheet_by_index(0) - - for i in range(ws.nrows): - if (i == 0) and (headers): - dset.headers = ws.row_values(i) - else: - dset.append(ws.row_values(i)) - - -def import_book(dbook, in_stream, headers=True): - """Returns databook from XLS stream.""" - - dbook.wipe() - - wb = xlrd.open_workbook(file_contents=in_stream) - for ws in wb.sheets(): - data = tablib.Dataset() - data.title = ws.name - for i in range(ws.nrows): - if (i == 0) and (headers): - data.headers = ws.row_values(i) - else: - data.append(ws.row_values(i)) - dbook.add_sheet(data) - def export_set(dataset): """Returns XLS representation of Dataset.""" @@ -110,13 +79,3 @@ def dset_sheet(dataset, ws): ws.write(i, j, col) except TypeError: ws.write(i, j, col) - - -def detect(stream): - """Returns True if given stream is valid XLS.""" - - try: - xlrd.open_workbook(file_contents=stream) - return True - except xlrd.XLRDError: - return False From 19ee1997b570183c082c656ccf0e2545f47fc69a Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 15 Aug 2011 22:49:14 -0400 Subject: [PATCH 22/96] really need to use testing branches.. --- tablib/core.py | 2 +- test_tablib.py | 26 +------------------------- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 485d610..1bdd49f 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -14,7 +14,7 @@ from operator import itemgetter from tablib import formats -from tablib.compat import OrderedDict +from tablib.compat import OrderedDict, unicode __title__ = 'tablib' diff --git a/test_tablib.py b/test_tablib.py index 692426d..5455c28 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -7,7 +7,7 @@ import unittest import sys import tablib -from tablib.compat import markup +from tablib.compat import markup, unicode @@ -649,30 +649,6 @@ class TablibTestCase(unittest.TestCase): self.assertEquals(self.founders[orig_target_header], data[target_header]) - def test_xls_import_set(self): - """Generate and import XLS set serialization.""" - data.append(self.john) - data.append(self.george) - data.headers = self.headers - - _xls = data.xls - - data.xls = _xls - - self.assertEqual(_xls, data.xls) - - def test_xls_import_book(self): - """Generate and import XLS book serialization.""" - data.append(self.john) - data.append(self.george) - data.headers = self.headers - - book.add_sheet(data) - _xls = book.xls - - book.xls = _xls - - self.assertEqual(_xls, book.xls) if __name__ == '__main__': From ec7273d02ddfc68f0a48be6018482bd26ca88b11 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 15 Aug 2011 23:29:19 -0400 Subject: [PATCH 23/96] that wasn't right. --- tablib/packages/odf3/opendocument.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tablib/packages/odf3/opendocument.py b/tablib/packages/odf3/opendocument.py index c0d556f..006cd08 100644 --- a/tablib/packages/odf3/opendocument.py +++ b/tablib/packages/odf3/opendocument.py @@ -39,7 +39,7 @@ UNIXPERMS = 0o100644 << 16 # -rw-r--r-- IS_FILENAME = 0 IS_IMAGE = 1 # We need at least Python 2.2 -assert sys.version_info[0]>=2 and sys.version_info[1] >= 2 +assert sys.version_info[0]>=2 #sys.setrecursionlimit(100) #The recursion limit is set conservative so mistakes like From 05c9b330034597a6097fb175460dbf0e0f34323a Mon Sep 17 00:00:00 2001 From: Joel Friedly Date: Thu, 25 Aug 2011 23:33:29 -0300 Subject: [PATCH 24/96] Fixed a few typos. --- docs/tutorial.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 117196d..bd2dbc0 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -68,7 +68,7 @@ Adding Headers -------------- -It's time enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. :: +It's time to enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. :: data.headers = ['First Name', 'Last Name'] @@ -251,7 +251,7 @@ Filtering Datasets with Tags When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter. -This allows you to filter your :class:`Dataset` later. This can be useful so separate rows of data based on +This allows you to filter your :class:`Dataset` later. This can be useful to separate rows of data based on arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`. Let's tag some students. :: @@ -263,7 +263,7 @@ Let's tag some students. :: students.rpush(['Kenneth', 'Reitz'], tags=['male', 'technical']) students.rpush(['Bessie', 'Monke'], tags=['female', 'creative']) -Now that we have extra meta-data on our rows, we can use easily filter our :class:`Dataset`. Let's just see Male students. :: +Now that we have extra meta-data on our rows, we can easily filter our :class:`Dataset`. Let's just see Male students. :: >>> students.filter(['male']).yaml From 32a09ccd6ac26a3cbf4d632871ef383787acbb80 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 31 Aug 2011 02:16:16 -0300 Subject: [PATCH 25/96] Edited AUTHORS via GitHub --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 4b0a00a..b94e63b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -17,4 +17,5 @@ Patches and Suggestions - Erik Youngren - Mark Rogers - Mark Walling -- Mike Waldner \ No newline at end of file +- Mike Waldner +- Joel Friedly \ No newline at end of file From 23ab6c4724cded5dac64c164773b76794ec3b00e Mon Sep 17 00:00:00 2001 From: Alexander Artemenko Date: Sun, 16 Oct 2011 11:00:06 +0400 Subject: [PATCH 26/96] Render table in Markdown format on unicode(dataset). Closes #41. --- tablib/core.py | 17 +++++++++++++++++ test_tablib.py | 16 ++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/tablib/core.py b/tablib/core.py index 1bdd49f..80f4418 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -223,6 +223,23 @@ class Dataset(object): except AttributeError: return '' + def __unicode__(self): + result = [self.__headers] + + result.extend(map(unicode, row) for row in self._data) + + # here, we calculate max width for each column + lens = (map(len, row) for row in result) + field_lens = map(max, zip(*lens)) + + # delimiter between header and data + result.insert(1, [u'-' * length for length in field_lens]) + + format_string = u'|'.join(u'{%s:%s}' % item for item in enumerate(field_lens)) + + return u'\n'.join(format_string.format(*row) for row in result) + + # --------- # Internals diff --git a/test_tablib.py b/test_tablib.py index 5455c28..0bfb417 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -649,6 +649,22 @@ class TablibTestCase(unittest.TestCase): self.assertEquals(self.founders[orig_target_header], data[target_header]) + def test_unicode_renders_markdown_table(self): + # add another entry to test right field width for + # integer + self.founders.append(('Old', 'Man', 100500)) + + self.assertEquals( + u""" +first_name|last_name |gpa +----------|----------|------ +John |Adams |90 +George |Washington|67 +Thomas |Jefferson |50 +Old |Man |100500 +""".strip(), + unicode(self.founders) + ) if __name__ == '__main__': From 42e40ed0ab4859995264abb7ee3ba338b5e0371b Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 2 Nov 2011 02:35:59 -0300 Subject: [PATCH 27/96] use yaml safe_load (thanks @toastdriven) --- tablib/formats/_yaml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 974228b..0eab78e 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -38,7 +38,7 @@ def import_set(dset, in_stream): """Returns dataset from YAML stream.""" dset.wipe() - dset.dict = yaml.load(in_stream) + dset.dict = yaml.safe_load(in_stream) def import_book(dbook, in_stream): @@ -46,7 +46,7 @@ def import_book(dbook, in_stream): dbook.wipe() - for sheet in yaml.load(in_stream): + for sheet in yaml.safe_load(in_stream): data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] @@ -55,7 +55,7 @@ def import_book(dbook, in_stream): def detect(stream): """Returns True if given stream is valid YAML.""" try: - _yaml = yaml.load(stream) + _yaml = yaml.safe_load(stream) if isinstance(_yaml, (list, tuple, dict)): return True else: From 9c65515e7a7d466be2168c65ea5a88a40cd3f248 Mon Sep 17 00:00:00 2001 From: VanL Date: Fri, 6 Jan 2012 00:12:06 +0000 Subject: [PATCH 28/96] Add detect function in _xls. Update yaml, csv, and tsv detection functions to catch other errors when faced with invalid input. --- tablib/formats/_csv.py | 4 ++-- tablib/formats/_tsv.py | 2 +- tablib/formats/_xls.py | 20 ++++++++++++++++++++ tablib/formats/_yaml.py | 4 ++-- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 3a4c374..549259c 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -51,5 +51,5 @@ def detect(stream): try: csv.Sniffer().sniff(stream) return True - except csv.Error: - return False \ No newline at end of file + except (csv.Error, TypeError): + return False diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index f581f81..ac9554c 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -55,5 +55,5 @@ def detect(stream): try: csv.Sniffer().sniff(stream, delimiters='\t') return True - except csv.Error: + except (csv.Error, TypeError): return False diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 89d2845..6530d91 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -6,6 +6,8 @@ import sys from tablib.compat import BytesIO, xlwt +from tablib.packages import xlrd +from tablib.packages.xlrd.biffh import XLRDError import tablib title = 'xls' @@ -16,6 +18,24 @@ wrap = xlwt.easyxf("alignment: wrap on") bold = xlwt.easyxf("font: bold on") +def detect(stream): + """Returns True if given stream is a readable excel file.""" + try: + xlrd.open_workbook(file_contents=stream) + return True + except (TypeError, XLRDError): + pass + try: + xlrd.open_workbook(file_contents=stream.read()) + return True + except (AttributeError, XLRDError): + pass + try: + xlrd.open_workbook(filename=stream) + return True + except: + return False + def export_set(dataset): """Returns XLS representation of Dataset.""" diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 0eab78e..c9ffbd3 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -60,5 +60,5 @@ def detect(stream): return True else: return False - except yaml.parser.ParserError: - return False \ No newline at end of file + except (yaml.parser.ParserError, yaml.reader.ReaderError): + return False From 7c963a0f4dc681f4fb086575188c340fefd7a2e5 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Wed, 18 Jan 2012 11:24:18 -0500 Subject: [PATCH 29/96] SOPA --- docs/_themes/kr/layout.html | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/_themes/kr/layout.html b/docs/_themes/kr/layout.html index 696413a..1431b49 100644 --- a/docs/_themes/kr/layout.html +++ b/docs/_themes/kr/layout.html @@ -15,6 +15,12 @@ Fork me on GitHub + + + +