From eaa2b9b8ea778e7fbe1fe5c659fe1b267d52d4b1 Mon Sep 17 00:00:00 2001
From: Greg Thornton Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd This module is part of the xlrd package, which is released under a
+# BSD-style licence. A Python module for extracting data from MS Excel ™ spreadsheet files.
+#
+# Development of this module would not have been possible without the document
+# "OpenOffice.org's Documentation of the Microsoft Excel File Format"
+# ("OOo docs" for short).
+# The latest version is available from OpenOffice.org in
+# PDF format
+# and
+# ODT format.
+# Small portions of the OOo docs are reproduced in this
+# document. A study of the OOo docs is recommended for those who wish a
+# deeper understanding of the Excel file layout than the xlrd docs can provide.
+# Backporting to Python 2.1 was partially funded by
+#
+# Journyx - provider of timesheet and project accounting solutions.
+#
+# Provision of formatting information in version 0.6.1 was funded by
+#
+# Simplistix Ltd.
+#
+# This module presents all text strings as Python unicode objects.
+# From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode.
+# Older files (Excel 95 and earlier) don't keep strings in Unicode;
+# a CODEPAGE record provides a codepage number (for example, 1252) which is
+# used by xlrd to derive the encoding (for same example: "cp1252") which is
+# used to translate to Unicode. If the CODEPAGE record is missing (possible if the file was created
+# by third-party software), xlrd will assume that the encoding is ascii, and keep going.
+# If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and
+# you will need to determine the encoding yourself, and tell xlrd:
+#
+# Version 0.7.1 -- 2009-05-31
+# General information
+#
+# Acknowledgements
+#
+# Unicode
+#
+#
+# book = xlrd.open_workbook(..., encoding_override="cp1252")
+#
If the CODEPAGE record exists but is wrong (for example, the codepage +# number is 1251, but the strings are actually encoded in koi8_r), +# it can be overridden using the same mechanism. +# The supplied runxlrd.py has a corresponding command-line argument, which +# may be used for experimentation: +#
+# runxlrd.py -e koi8_r 3rows myfile.xls +#+#
The first place to look for an encoding ("codec name") is +# +# the Python documentation. +#
+# +# +#In reality, there are no such things. What you have are floating point +# numbers and pious hope. +# There are several problems with Excel dates:
+# +#(1) Dates are not stored as a separate data type; they are stored as +# floating point numbers and you have to rely on +# (a) the "number format" applied to them in Excel and/or +# (b) knowing which cells are supposed to have dates in them. +# This module helps with (a) by inspecting the +# format that has been applied to each number cell; +# if it appears to be a date format, the cell +# is classified as a date rather than a number. Feedback on this feature, +# especially from non-English-speaking locales, would be appreciated.
+# +#(2) Excel for Windows stores dates by default as the number of +# days (or fraction thereof) since 1899-12-31T00:00:00. Excel for +# Macintosh uses a default start date of 1904-01-01T00:00:00. The date +# system can be changed in Excel on a per-workbook basis (for example: +# Tools -> Options -> Calculation, tick the "1904 date system" box). +# This is of course a bad idea if there are already dates in the +# workbook. There is no good reason to change it even if there are no +# dates in the workbook. Which date system is in use is recorded in the +# workbook. A workbook transported from Windows to Macintosh (or vice +# versa) will work correctly with the host Excel. When using this +# module's xldate_as_tuple function to convert numbers from a workbook, +# you must use the datemode attribute of the Book object. If you guess, +# or make a judgement depending on where you believe the workbook was +# created, you run the risk of being 1462 days out of kilter.
+# +#Reference: +# http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162
+# +# +#(3) The Excel implementation of the Windows-default 1900-based date system works on the +# incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29, +# which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the +# result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc +# program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored. +# Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.
+# +#Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326
+# +#(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero. +# Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00), +# independent of the calendar, +# or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)? +# The xldate_* functions in this module +# take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both +# date systems. This is consistent with more recent Microsoft documentation +# (for example, the help file for Excel 2002 which says that the first day +# in the 1904 date system is 1904-01-02). +# +#
(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file, +# in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive), +# Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)." +# This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era, +# means that DATE(1899, 12, 31) is interpreted as 3799-12-31.
+# +#For further information, please refer to the documentation for the xldate_* functions.
+# +#+# A name is used to refer to a cell, a group of cells, a constant +# value, a formula, or a macro. Usually the scope of a name is global +# across the whole workbook. However it can be local to a worksheet. +# For example, if the sales figures are in different cells in +# different sheets, the user may define the name "Sales" in each +# sheet. There are built-in names, like "Print_Area" and +# "Print_Titles"; these two are naturally local to a sheet. +#
+# To inspect the names with a user interface like MS Excel, OOo Calc, +# or Gnumeric, click on Insert/Names/Define. This will show the global +# names, plus those local to the currently selected sheet. +#
+# A Book object provides two dictionaries (name_map and +# name_and_scope_map) and a list (name_obj_list) which allow various +# ways of accessing the Name objects. There is one Name object for +# each NAME record found in the workbook. Name objects have many +# attributes, several of which are relevant only when obj.macro is 1. +#
+# In the examples directory you will find namesdemo.xls which +# showcases the many different ways that names can be used, and +# xlrdnamesAPIdemo.py which offers 3 different queries for inspecting +# the names in your files, and shows how to extract whatever a name is +# referring to. There is currently one "convenience method", +# Name.cell(), which extracts the value in the case where the name +# refers to a single cell. More convenience methods are planned. The +# source code for Name.cell (in __init__.py) is an extra source of +# information on how the Name attributes hang together. +#
+# +#Name information is not extracted from files older than +# Excel 5.0 (Book.biff_version < 50)
+# +#This collection of features, new in xlrd version 0.6.1, is intended +# to provide the information needed to (1) display/render spreadsheet contents +# (say) on a screen or in a PDF file, and (2) copy spreadsheet data to another +# file without losing the ability to display/render it.
+# +#A colour is represented in Excel as a (red, green, blue) ("RGB") tuple +# with each component in range(256). However it is not possible to access an +# unlimited number of colours; each spreadsheet is limited to a palette of 64 different +# colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index +# ("colour index") into this palette. +# +# Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue, +# yellow, magenta, and cyan.
+#
+# The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
+# can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette
+# of 7 rows of 8 colours. The last two rows are reserved for use in charts.
+# The correspondence between this grid and the assigned
+# colour indexes is NOT left-to-right top-to-bottom.
+# Indexes 8 to 15 correspond to changeable
+# parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
+# index 15 starts off being cyan but can be changed by the user.
+#
+# The default colour for each index depends on the file version; tables of the defaults
+# are available in the source code. If the user changes one or more colours,
+# a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable
+# indexes.
+# Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer
+# to colour index 7; "[COLOR16]...." will produce cyan
+# unless the user changes colour index 15 to something else.
+#
+#
In addition, there are several "magic" colour indexes used by Excel:
+# 0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines
+# (used in XF, CF, and WINDOW2 records)
+# 0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background
+# (used in XF and CF records )
+# 0x43: System face colour (dialogue background colour)
+# 0x4D: System window text colour for chart border lines
+# 0x4E: System window background colour for chart areas
+# 0x4F: Automatic colour for chart border lines (seems to be always Black)
+# 0x50: System ToolTip background colour (used in note objects)
+# 0x51: System ToolTip text colour (used in note objects)
+# 0x7FFF: System window text colour for fonts (used in FONT and CF records)
+# Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT
+# records.
+#
+#
This feature, new in version 0.7.1, is governed by the on_demand argument +# to the open_workbook() function and allows saving memory and time by loading +# only those sheets that the caller is interested in, and releasing sheets +# when no longer required.
+# +#on_demand=False (default): No change. open_workbook() loads global data +# and all sheets, releases resources no longer required (principally the +# str or mmap object containing the Workbook stream), and returns.
+# +#on_demand=True and BIFF version < 5.0: A warning message is emitted, +# on_demand is recorded as False, and the old process is followed.
+# +#on_demand=True and BIFF version >= 5.0: open_workbook() loads global +# data and returns without releasing resources. At this stage, the only +# information available about sheets is Book.nsheets and Book.sheet_names().
+# +#Book.sheet_by_name() and Book.sheet_by_index() will load the requested +# sheet if it is not already loaded.
+# +#Book.sheets() will load all/any unloaded sheets.
+# +#The caller may save memory by calling +# Book.unload_sheet(sheet_name_or_index) when finished with the sheet. +# This applies irrespective of the state of on_demand.
+# +#The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx() +# -- except if those required resources have been released (which will +# have happened automatically when on_demand is false). This is the only +# case where an exception will be raised.
+# +#The caller may query the state of a sheet: +# Book.sheet_loaded(sheet_name_or_index) -> a bool
+# +## + +# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo +# 2008-11-23 SJM Support dumping FILEPASS and EXTERNNAME records; extra info from SUPBOOK records +# 2008-11-23 SJM colname utility function now supports more than 256 columns +# 2008-04-24 SJM Recovery code for file with out-of-order/missing/wrong CODEPAGE record needed to be called for EXTERNSHEET/BOUNDSHEET/NAME/SHEETHDR records. +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-03 SJM Minor tweaks for IronPython support +# 2008-02-02 SJM Previous change stopped dump() and count_records() ... fixed +# 2007-12-25 SJM Decouple Book initialisation & loading -- to allow for multiple loaders. +# 2007-12-20 SJM Better error message for unsupported file format. +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-11-20 SJM Wasn't handling EXTERNSHEET record that needed CONTINUE record(s) +# 2007-07-07 SJM Version changed to 0.7.0 (alpha 1) +# 2007-07-07 SJM Logfile arg wasn't being passed from open_workbook to compdoc.CompDoc +# 2007-05-21 SJM If no CODEPAGE record in pre-8.0 file, assume ascii and keep going. +# 2007-04-22 SJM Removed antique undocumented Book.get_name_dict method. + +from timemachine import * +from biffh import * +from struct import unpack +import sys +import time +import sheet +import compdoc +from xldate import xldate_as_tuple, XLDateError +from formula import * +import formatting +if sys.version.startswith("IronPython"): + # print >> sys.stderr, "...importing encodings" + import encodings + +empty_cell = sheet.empty_cell # for exposure to the world ... + +DEBUG = 0 + +USE_FANCY_CD = 1 + +TOGGLE_GC = 0 +import gc +# gc.set_debug(gc.DEBUG_STATS) + +try: + import mmap + MMAP_AVAILABLE = 1 +except ImportError: + MMAP_AVAILABLE = 0 +USE_MMAP = MMAP_AVAILABLE + +MY_EOF = 0xF00BAAA # not a 16-bit number + +SUPBOOK_UNK, SUPBOOK_INTERNAL, SUPBOOK_EXTERNAL, SUPBOOK_ADDIN, SUPBOOK_DDEOLE = range(5) + +SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20) + +code_from_builtin_name = { + u"Consolidate_Area": u"\x00", + u"Auto_Open": u"\x01", + u"Auto_Close": u"\x02", + u"Extract": u"\x03", + u"Database": u"\x04", + u"Criteria": u"\x05", + u"Print_Area": u"\x06", + u"Print_Titles": u"\x07", + u"Recorder": u"\x08", + u"Data_Form": u"\x09", + u"Auto_Activate": u"\x0A", + u"Auto_Deactivate": u"\x0B", + u"Sheet_Title": u"\x0C", + u"_FilterDatabase": u"\x0D", + } +builtin_name_from_code = {} +for _bin, _bic in code_from_builtin_name.items(): + builtin_name_from_code[_bic] = _bin +del _bin, _bic + +## +# +# Open a spreadsheet file for data extraction. +# +# @param filename The path to the spreadsheet file to be opened. +# +# @param logfile An open file to which messages and diagnostics are written. +# +# @param verbosity Increases the volume of trace material written to the logfile. +# +# @param pickleable Default is true. In Python 2.4 or earlier, setting to false +# will cause use of array.array objects which save some memory but can't be pickled. +# In Python 2.5, array.arrays are used unconditionally. Note: if you have large files that +# you need to read multiple times, it can be much faster to cPickle.dump() the xlrd.Book object +# once, and use cPickle.load() multiple times. +# @param use_mmap Whether to use the mmap module is determined heuristically. +# Use this arg to override the result. Current heuristic: mmap is used if it exists. +# +# @param file_contents ... as a string or an mmap.mmap object or some other behave-alike object. +# If file_contents is supplied, filename will not be used, except (possibly) in messages. +# +# @param encoding_override Used to overcome missing or bad codepage information +# in older-version files. Refer to discussion in the Unicode section above. +#WARNING: You don't call this class yourself. You use the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls").
+ +class Book(BaseObject): + + ## + # The number of worksheets present in the workbook file. + # This information is available even when no sheets have yet been loaded. + nsheets = 0 + + ## + # Which date system was in force when this file was last saved.Portions copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+## + +# 2008-02-10 SJM BIFF2 BLANK record +# 2008-02-08 SJM Preparation for Excel 2.0 support +# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count +# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files. +# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte. +# 2007-04-22 SJM Remove experimental "trimming" facility. + +DEBUG = 0 + +from struct import unpack +import sys +from timemachine import * + +class XLRDError(Exception): + pass + +## +# Parent of almost all other classes in the package. Defines a common "dump" method +# for debugging. + +class BaseObject(object): + + _repr_these = [] + + ## + # @param f open file object, to which the dump is written + # @param header text to write before the dump + # @param footer text to write after the dump + # @param indent number of leading spaces (for recursive calls) + + def dump(self, f=None, header=None, footer=None, indent=0): + if f is None: + f = sys.stderr + alist = self.__dict__.items() + alist.sort() + pad = " " * indent + if header is not None: print >> f, header + list_type = type([]) + dict_type = type({}) + for attr, value in alist: + if getattr(value, 'dump', None) and attr != 'book': + value.dump(f, + header="%s%s (%s object):" % (pad, attr, value.__class__.__name__), + indent=indent+4) + elif attr not in self._repr_these and ( + isinstance(value, list_type) or isinstance(value, dict_type) + ): + print >> f, "%s%s: %s, len = %d" % (pad, attr, type(value), len(value)) + else: + print >> f, "%s%s: %r" % (pad, attr, value) + if footer is not None: print >> f, footer + +FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text +DATEFORMAT = FDT +NUMBERFORMAT = FNU + +( + XL_CELL_EMPTY, + XL_CELL_TEXT, + XL_CELL_NUMBER, + XL_CELL_DATE, + XL_CELL_BOOLEAN, + XL_CELL_ERROR, + XL_CELL_BLANK, # for use in debugging, gathering stats, etc +) = range(7) + +biff_text_from_num = { + 0: "(not BIFF)", + 20: "2.0", + 21: "2.1", + 30: "3", + 40: "4S", + 45: "4W", + 50: "5", + 70: "7", + 80: "8", + 85: "8X", + } + +## +#This dictionary can be used to produce a text version of the internal codes +# that Excel uses for error cells. Here are its contents: +#
+# 0x00: '#NULL!', # Intersection of two cell ranges is empty +# 0x07: '#DIV/0!', # Division by zero +# 0x0F: '#VALUE!', # Wrong type of operand +# 0x17: '#REF!', # Illegal or deleted cell reference +# 0x1D: '#NAME?', # Wrong function or range name +# 0x24: '#NUM!', # Value range overflow +# 0x2A: '#N/A!', # Argument or function not available +#+ +error_text_from_code = { + 0x00: '#NULL!', # Intersection of two cell ranges is empty + 0x07: '#DIV/0!', # Division by zero + 0x0F: '#VALUE!', # Wrong type of operand + 0x17: '#REF!', # Illegal or deleted cell reference + 0x1D: '#NAME?', # Wrong function or range name + 0x24: '#NUM!', # Value range overflow + 0x2A: '#N/A!', # Argument or function not available +} + +BIFF_FIRST_UNICODE = 80 + +XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5 +XL_WORKBOOK_GLOBALS_4W = 0x100 +XL_WORKSHEET = WRKSHEET = 0x10 + +XL_BOUNDSHEET_WORKSHEET = 0x00 +XL_BOUNDSHEET_CHART = 0x02 +XL_BOUNDSHEET_VB_MODULE = 0x06 + +# XL_RK2 = 0x7e +XL_ARRAY = 0x0221 +XL_ARRAY2 = 0x0021 +XL_BLANK = 0x0201 +XL_BLANK_B2 = 0x01 +XL_BOF = 0x809 +XL_BOOLERR = 0x205 +XL_BOOLERR_B2 = 0x5 +XL_BOUNDSHEET = 0x85 +XL_BUILTINFMTCOUNT = 0x56 +XL_CF = 0x01B1 +XL_CODEPAGE = 0x42 +XL_COLINFO = 0x7D +XL_COLUMNDEFAULT = 0x20 # BIFF2 only +XL_COLWIDTH = 0x24 # BIFF2 only +XL_CONDFMT = 0x01B0 +XL_CONTINUE = 0x3c +XL_COUNTRY = 0x8C +XL_DATEMODE = 0x22 +XL_DEFAULTROWHEIGHT = 0x0225 +XL_DEFCOLWIDTH = 0x55 +XL_DIMENSION = 0x200 +XL_DIMENSION2 = 0x0 +XL_EFONT = 0x45 +XL_EOF = 0x0a +XL_EXTERNNAME = 0x23 +XL_EXTERNSHEET = 0x17 +XL_EXTSST = 0xff +XL_FEAT11 = 0x872 +XL_FILEPASS = 0x2f +XL_FONT = 0x31 +XL_FONT_B3B4 = 0x231 +XL_FORMAT = 0x41e +XL_FORMAT2 = 0x1E # BIFF2, BIFF3 +XL_FORMULA = 0x6 +XL_FORMULA3 = 0x206 +XL_FORMULA4 = 0x406 +XL_GCW = 0xab +XL_INDEX = 0x20b +XL_INTEGER = 0x2 # BIFF2 only +XL_IXFE = 0x44 # BIFF2 only +XL_LABEL = 0x204 +XL_LABEL_B2 = 0x04 +XL_LABELRANGES = 0x15f +XL_LABELSST = 0xfd +XL_MERGEDCELLS = 0xE5 +XL_MSO_DRAWING = 0x00EC +XL_MSO_DRAWING_GROUP = 0x00EB +XL_MSO_DRAWING_SELECTION = 0x00ED +XL_MULRK = 0xbd +XL_MULBLANK = 0xbe +XL_NAME = 0x18 +XL_NOTE = 0x1c +XL_NUMBER = 0x203 +XL_NUMBER_B2 = 0x3 +XL_OBJ = 0x5D +XL_PALETTE = 0x92 +XL_RK = 0x27e +XL_ROW = 0x208 +XL_ROW_B2 = 0x08 +XL_RSTRING = 0xd6 +XL_SHEETHDR = 0x8F # BIFF4W only +XL_SHEETSOFFSET = 0x8E # BIFF4W only +XL_SHRFMLA = 0x04bc +XL_SST = 0xfc +XL_STANDARDWIDTH = 0x99 +XL_STRING = 0x207 +XL_STRING_B2 = 0x7 +XL_STYLE = 0x293 +XL_SUPBOOK = 0x1AE +XL_TABLEOP = 0x236 +XL_TABLEOP2 = 0x37 +XL_TABLEOP_B2 = 0x36 +XL_TXO = 0x1b6 +XL_UNCALCED = 0x5e +XL_UNKNOWN = 0xffff +XL_WINDOW2 = 0x023E +XL_WRITEACCESS = 0x5C +XL_XF = 0xe0 +XL_XF2 = 0x0043 # BIFF2 version of XF record +XL_XF3 = 0x0243 # BIFF3 version of XF record +XL_XF4 = 0x0443 # BIFF4 version of XF record + +boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4} +bofcodes = (0x0809, 0x0409, 0x0209, 0x0009) + +XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206) + +_cell_opcode_list = [ + XL_BOOLERR, + XL_FORMULA, + XL_FORMULA3, + XL_FORMULA4, + XL_LABEL, + XL_LABELSST, + XL_MULRK, + XL_NUMBER, + XL_RK, + XL_RSTRING, + ] +_cell_opcode_dict = {} +for _cell_opcode in _cell_opcode_list: + _cell_opcode_dict[_cell_opcode] = 1 +is_cell_opcode = _cell_opcode_dict.has_key + +# def fprintf(f, fmt, *vargs): f.write(fmt % vargs) + +def fprintf(f, fmt, *vargs): + if fmt.endswith('\n'): + print >> f, fmt[:-1] % vargs + else: + print >> f, fmt % vargs, + +def upkbits(tgt_obj, src, manifest, local_setattr=setattr): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, (src & mask) >> n) + +def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int): + for n, mask, attr in manifest: + local_setattr(tgt_obj, attr, local_int((src & mask) >> n)) + +def unpack_string(data, pos, encoding, lenlen=1): + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + return unicode(data[pos:pos+nchars], encoding) + +def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None): + if known_len is not None: + # On a NAME record, the length byte is detached from the front of the string. + nchars = known_len + else: + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + pos += lenlen + newpos = pos + nchars + return (unicode(data[pos:newpos], encoding), newpos) + +def unpack_unicode(data, pos, lenlen=2): + "Return unicode_strg" + nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0] + if not nchars: + # Ambiguous whether 0-length string should have an "options" byte. + # Avoid crash if missing. + return u"" + pos += lenlen + options = ord(data[pos]) + pos += 1 + # phonetic = options & 0x04 + # richtext = options & 0x08 + if options & 0x08: + # rt = unpack('
Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+## + +# No part of the content of this file was derived from the works of David Giffin. + +# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer] +# 2007-09-08 SJM Warning message if sector sizes are extremely large. +# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted. +# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms. + + +import sys +from struct import unpack +from timemachine import * + +## +# Magic cookie that should appear in the first 8 bytes of the file. +SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1" + +EOCSID = -2 +FREESID = -1 +SATSID = -3 +MSATSID = -4 + +class CompDocError(Exception): + pass + +class DirNode(object): + + def __init__(self, DID, dent, DEBUG=0): + # dent is the 128-byte directory entry + self.DID = DID + # (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID, + # self.root_DID, + # self.first_SID, + # self.tot_size) = \ + # unpack('Implements the minimal functionality required +to extract a "Workbook" or "Book" stream (as one big string) +from an OLE2 Compound Document file. +
Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
+This module is part of the xlrd package, which is released under a BSD-style licence.
+Compound document handler.
+For more information about this class, see The CompDoc Class.
+Magic cookie that should appear in the first 8 bytes of the file.
+Compound document handler.
+Interrogate the compound document's directory; return the stream as a string if found, otherwise +return None.
+Interrogate the compound document's directory. +If the named stream is not found, (None, 0, 0) will be returned. +If the named stream is found and is contiguous within the original byte sequence ("mem") +used when the document was opened, +then (mem, offset_to_start_of_stream, length_of_stream) is returned. +Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
+A Python module for extracting data from MS Excel ™ spreadsheet files.
+
+Version 0.7.1 -- 2009-05-31
+
+Development of this module would not have been possible without the document +"OpenOffice.org's Documentation of the Microsoft Excel File Format" +("OOo docs" for short). +The latest version is available from OpenOffice.org in + PDF format +and + ODT format. +Small portions of the OOo docs are reproduced in this +document. A study of the OOo docs is recommended for those who wish a +deeper understanding of the Excel file layout than the xlrd docs can provide. +
+ +Backporting to Python 2.1 was partially funded by + + Journyx - provider of timesheet and project accounting solutions. + +
+ +Provision of formatting information in version 0.6.1 was funded by + + Simplistix Ltd. + +
+ +This module presents all text strings as Python unicode objects. +From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode. +Older files (Excel 95 and earlier) don't keep strings in Unicode; +a CODEPAGE record provides a codepage number (for example, 1252) which is +used by xlrd to derive the encoding (for same example: "cp1252") which is +used to translate to Unicode.
+ +If the CODEPAGE record is missing (possible if the file was created +by third-party software), xlrd will assume that the encoding is ascii, and keep going. +If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and +you will need to determine the encoding yourself, and tell xlrd: +
+ book = xlrd.open_workbook(..., encoding_override="cp1252") ++
If the CODEPAGE record exists but is wrong (for example, the codepage +number is 1251, but the strings are actually encoded in koi8_r), +it can be overridden using the same mechanism. +The supplied runxlrd.py has a corresponding command-line argument, which +may be used for experimentation: +
+ runxlrd.py -e koi8_r 3rows myfile.xls ++
The first place to look for an encoding ("codec name") is + +the Python documentation. +
+ + +In reality, there are no such things. What you have are floating point +numbers and pious hope. +There are several problems with Excel dates:
+ +(1) Dates are not stored as a separate data type; they are stored as +floating point numbers and you have to rely on +(a) the "number format" applied to them in Excel and/or +(b) knowing which cells are supposed to have dates in them. +This module helps with (a) by inspecting the +format that has been applied to each number cell; +if it appears to be a date format, the cell +is classified as a date rather than a number. Feedback on this feature, +especially from non-English-speaking locales, would be appreciated.
+ +(2) Excel for Windows stores dates by default as the number of +days (or fraction thereof) since 1899-12-31T00:00:00. Excel for +Macintosh uses a default start date of 1904-01-01T00:00:00. The date +system can be changed in Excel on a per-workbook basis (for example: +Tools -> Options -> Calculation, tick the "1904 date system" box). +This is of course a bad idea if there are already dates in the +workbook. There is no good reason to change it even if there are no +dates in the workbook. Which date system is in use is recorded in the +workbook. A workbook transported from Windows to Macintosh (or vice +versa) will work correctly with the host Excel. When using this +module's xldate_as_tuple function to convert numbers from a workbook, +you must use the datemode attribute of the Book object. If you guess, +or make a judgement depending on where you believe the workbook was +created, you run the risk of being 1462 days out of kilter.
+ +Reference: +http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162
+ + +(3) The Excel implementation of the Windows-default 1900-based date system works on the +incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29, +which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the +result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc +program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored. +Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.
+ +Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326
+ +(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero. +Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00), +independent of the calendar, +or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)? +The xldate_* functions in this module +take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both +date systems. This is consistent with more recent Microsoft documentation +(for example, the help file for Excel 2002 which says that the first day +in the 1904 date system is 1904-01-02). + +
(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file, +in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive), +Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)." +This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era, +means that DATE(1899, 12, 31) is interpreted as 3799-12-31.
+ +For further information, please refer to the documentation for the xldate_* functions.
+ ++A name is used to refer to a cell, a group of cells, a constant +value, a formula, or a macro. Usually the scope of a name is global +across the whole workbook. However it can be local to a worksheet. +For example, if the sales figures are in different cells in +different sheets, the user may define the name "Sales" in each +sheet. There are built-in names, like "Print_Area" and +"Print_Titles"; these two are naturally local to a sheet. +
+To inspect the names with a user interface like MS Excel, OOo Calc, +or Gnumeric, click on Insert/Names/Define. This will show the global +names, plus those local to the currently selected sheet. +
+A Book object provides two dictionaries (name_map and +name_and_scope_map) and a list (name_obj_list) which allow various +ways of accessing the Name objects. There is one Name object for +each NAME record found in the workbook. Name objects have many +attributes, several of which are relevant only when obj.macro is 1. +
+In the examples directory you will find namesdemo.xls which +showcases the many different ways that names can be used, and +xlrdnamesAPIdemo.py which offers 3 different queries for inspecting +the names in your files, and shows how to extract whatever a name is +referring to. There is currently one "convenience method", +Name.cell(), which extracts the value in the case where the name +refers to a single cell. More convenience methods are planned. The +source code for Name.cell (in __init__.py) is an extra source of +information on how the Name attributes hang together. +
+ +Name information is not extracted from files older than +Excel 5.0 (Book.biff_version < 50)
+ +This collection of features, new in xlrd version 0.6.1, is intended +to provide the information needed to (1) display/render spreadsheet contents +(say) on a screen or in a PDF file, and (2) copy spreadsheet data to another +file without losing the ability to display/render it.
+ +A colour is represented in Excel as a (red, green, blue) ("RGB") tuple +with each component in range(256). However it is not possible to access an +unlimited number of colours; each spreadsheet is limited to a palette of 64 different +colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index +("colour index") into this palette. + +Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue, +yellow, magenta, and cyan.
+
+The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
+can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette
+of 7 rows of 8 colours. The last two rows are reserved for use in charts.
+The correspondence between this grid and the assigned
+colour indexes is NOT left-to-right top-to-bottom.
+Indexes 8 to 15 correspond to changeable
+parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
+index 15 starts off being cyan but can be changed by the user.
+
+The default colour for each index depends on the file version; tables of the defaults
+are available in the source code. If the user changes one or more colours,
+a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable
+indexes.
+Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer
+to colour index 7; "[COLOR16]...." will produce cyan
+unless the user changes colour index 15 to something else.
+
+
In addition, there are several "magic" colour indexes used by Excel:
+0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines
+(used in XF, CF, and WINDOW2 records)
+0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background
+(used in XF and CF records )
+0x43: System face colour (dialogue background colour)
+0x4D: System window text colour for chart border lines
+0x4E: System window background colour for chart areas
+0x4F: Automatic colour for chart border lines (seems to be always Black)
+0x50: System ToolTip background colour (used in note objects)
+0x51: System ToolTip text colour (used in note objects)
+0x7FFF: System window text colour for fonts (used in FONT and CF records)
+Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT
+records.
+
+
This feature, new in version 0.7.1, is governed by the on_demand argument +to the open_workbook() function and allows saving memory and time by loading +only those sheets that the caller is interested in, and releasing sheets +when no longer required.
+ +on_demand=False (default): No change. open_workbook() loads global data +and all sheets, releases resources no longer required (principally the +str or mmap object containing the Workbook stream), and returns.
+ +on_demand=True and BIFF version < 5.0: A warning message is emitted, +on_demand is recorded as False, and the old process is followed.
+ +on_demand=True and BIFF version >= 5.0: open_workbook() loads global +data and returns without releasing resources. At this stage, the only +information available about sheets is Book.nsheets and Book.sheet_names().
+ +Book.sheet_by_name() and Book.sheet_by_index() will load the requested +sheet if it is not already loaded.
+ +Book.sheets() will load all/any unloaded sheets.
+ +The caller may save memory by calling +Book.unload_sheet(sheet_name_or_index) when finished with the sheet. +This applies irrespective of the state of on_demand.
+ +The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx() + -- except if those required resources have been released (which will +have happened automatically when on_demand is false). This is the only +case where an exception will be raised.
+ +The caller may query the state of a sheet: +Book.sheet_loaded(sheet_name_or_index) -> a bool
+ +Parent of almost all other classes in the package.
+For more information about this class, see The BaseObject Class.
+Contents of a "workbook".
+For more information about this class, see The Book Class.
+Contains the data for one cell.
+For more information about this class, see The Cell Class.
+Utility function: (5, 7) => 'H6'
+Utility function: (5, 7) => '$H$6'
+Width and default formatting information that applies to one or +more columns in a sheet.
+For more information about this class, see The Colinfo Class.
+Utility function: 7 => 'H', 27 => 'AB'
+For debugging and analysis: summarise the file's BIFF records. +I.e. produce a sorted file of (record_name, count).
+For debugging: dump the file's BIFF records in char & hex. +
There is one and only one instance of an empty cell -- it's a singleton. This is it. +You may use a test like "acell is empty_cell".
+This mixin class exists solely so that Format, Font, and XF....
+For more information about this class, see The EqNeAttrs Class.
+This dictionary can be used to produce a text version of the internal codes +that Excel uses for error cells. Here are its contents: +
+0x00: '#NULL!', # Intersection of two cell ranges is empty +0x07: '#DIV/0!', # Division by zero +0x0F: '#VALUE!', # Wrong type of operand +0x17: '#REF!', # Illegal or deleted cell reference +0x1D: '#NAME?', # Wrong function or range name +0x24: '#NUM!', # Value range overflow +0x2A: '#N/A!', # Argument or function not available ++
An Excel "font" contains the details of not only what is normally +considered a font, but also several other display attributes.
+For more information about this class, see The Font Class.
+"Number format" information from a FORMAT record.
+For more information about this class, see The Format Class.
+Information relating to a named reference, formula, macro, etc.
+For more information about this class, see The Name Class.
+Open a spreadsheet file for data extraction.
+Used in evaluating formulas.
+For more information about this class, see The Operand Class.
+Utility function:
+
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20'
+
Utility function:
+
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))
+=> 'Sheet1![@-13,#-32]:[@+12,#-23]'
+where '@' refers to the current or base column and '#'
+refers to the current or base row.
+
Represents an absolute or relative 3-dimensional reference to a box +of one or more cells.
+For more information about this class, see The Ref3D Class.
+Height and default formatting information that applies to a row in a sheet.
+For more information about this class, see The Rowinfo Class.
+Contains the data for one worksheet.
+For more information about this class, see The Sheet Class.
+eXtended Formatting information for cells, rows, columns and styles.
+For more information about this class, see The XF Class.
+A collection of the alignment and similar attributes of an XF record.
+For more information about this class, see The XFAlignment Class.
+A collection of the background-related attributes of an XF record.
+For more information about this class, see The XFBackground Class.
+A collection of the border-related attributes of an XF record.
+For more information about this class, see The XFBorder Class.
+A collection of the protection-related attributes of an XF record.
+For more information about this class, see The XFProtection Class.
+Convert an Excel number (presumed to represent a date, a datetime or a time) into +a tuple suitable for feeding to datetime or mx.DateTime constructors.
+Convert a date tuple (year, month, day) to an Excel date.
+Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value. +For more details, refer to other xldate_from_*_tuple functions.
+Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
+Parent of almost all other classes in the package. Defines a common "dump" method +for debugging.
+Contents of a "workbook". +
WARNING: You don't call this class yourself. You use the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").
+Version of BIFF (Binary Interchange File Format) used to create the file. +Latest is 8.0 (represented here as 80), introduced with Excel 97. +Earliest supported by this module: 2.0 (represented as 20).
+An integer denoting the character set used for strings in this file. +For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE. +For earlier versions, this is used to derive the appropriate Python encoding +to be used to convert to Unicode. +Examples: 1252 -> 'cp1252', 10000 -> 'mac_roman'
+This provides definitions for colour indexes. Please refer to the
+above section "The Palette; Colour Indexes" for an explanation
+of how colours are represented in Excel.
+Colour indexes into the palette map into (red, green, blue) tuples.
+"Magic" indexes e.g. 0x7FFF map to None.
+colour_map is what you need if you want to render cells on screen or in a PDF
+file. If you are writing an output XLS file, use palette_record.
+
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+
A tuple containing the (telephone system) country code for:
+ [0]: the user-interface setting when the file was created.
+ [1]: the regional settings.
+Example: (1, 61) meaning (USA, Australia).
+This information may give a clue to the correct encoding for an unknown codepage.
+For a long list of observed values, refer to the OpenOffice.org documentation for
+the COUNTRY record.
+
Which date system was in force when this file was last saved.
+ 0 => 1900 system (the Excel for Windows default).
+ 1 => 1904 system (the Excel for Macintosh default).
+
The encoding that was derived from the codepage.
+A list of Font class instances, each corresponding to a FONT record.
+
-- New in version 0.6.1
+
A list of Format objects, each corresponding to a FORMAT record, in
+the order that they appear in the input file.
+It does not contain builtin formats.
+If you are creating an output file using (for example) pyExcelerator,
+use this list.
+The collection to be used for all visual rendering purposes is format_map.
+
-- New in version 0.6.1
+
The mapping from XF.format_key to Format object.
+
-- New in version 0.6.1
+
Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent).
+Time in seconds to parse the data from the contiguous string (or mmap equivalent).
+A mapping from (lower_case_name, scope) to a single Name object.
+
-- New in version 0.6.0
+
A mapping from lower_case_name to a list of Name objects. The list is
+sorted in scope order. Typically there will be one item (of global scope)
+in the list.
+
-- New in version 0.6.0
+
List containing a Name object for each NAME record in the workbook.
+
-- New in version 0.6.0
+
The number of worksheets present in the workbook file. +This information is available even when no sheets have yet been loaded.
+If the user has changed any of the colours in the standard palette, the XLS
+file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier)
+RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)].
+Otherwise this list will be empty. This is what you need if you are
+writing an output XLS file. If you want to render cells on screen or in a PDF
+file, use colour_map.
+
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+
This provides access via name to the extended format information for
+both built-in styles and user-defined styles.
+It maps name to (built_in, xf_index), where:
+name is either the name of a user-defined style,
+or the name of one of the built-in styles. Known built-in names are
+Normal, RowLevel_1 to RowLevel_7,
+ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]",
+"Currency [0]", Hyperlink, and "Followed Hyperlink".
+built_in 1 = built-in style, 0 = user-defined
+xf_index is an index into Book.xf_list.
+References: OOo docs s6.99 (STYLE record); Excel UI Format/Style
+
-- New in version 0.6.1
+
What (if anything) is recorded as the name of the last user to save the file.
+A list of XF class instances, each corresponding to an XF record.
+
-- New in version 0.6.1
+
Contains the data for one cell.
+ +WARNING: You don't call this class yourself. You access Cell objects +via methods of the Sheet object(s) that you found in the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").
+Cell objects have three attributes: ctype is an int, value +(which depends on ctype) and xf_index. +If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. +The following table describes the types of cells and how their values +are represented in Python.
+ +| Type symbol | +Type number | +Python value | +
|---|---|---|
| XL_CELL_EMPTY | +0 | +empty string u'' | +
| XL_CELL_TEXT | +1 | +a Unicode string | +
| XL_CELL_NUMBER | +2 | +float | +
| XL_CELL_DATE | +3 | +float | +
| XL_CELL_BOOLEAN | +4 | +int; 1 means TRUE, 0 means FALSE | +
| XL_CELL_ERROR | +5 | +int representing internal Excel codes; for a text representation, +refer to the supplied dictionary error_text_from_code | +
| XL_CELL_BLANK | +6 | +empty string u''. Note: this type will appear only when +open_workbook(..., formatting_info=True) is used. | +
Width and default formatting information that applies to one or +more columns in a sheet. Derived from COLINFO records. + +
Here is the default hierarchy for width, according to the OOo docs:
+
+
"""In BIFF3, if a COLINFO record is missing for a column,
+the width specified in the record DEFCOLWIDTH is used instead.
+
+
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used,
+if the corresponding bit for this column is cleared in the GCW
+record, otherwise the column width set in the DEFCOLWIDTH record
+is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]).
+
+
In BIFF8, if a COLINFO record is missing for a column,
+the width specified in the record STANDARDWIDTH is used.
+If this [STANDARDWIDTH] record is also missing,
+the column width of the record DEFCOLWIDTH is used instead."""
+
+
+Footnote: The docs on the GCW record say this:
+"""
+If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH
+record. If a bit is cleared, the corresponding column uses the width set in the
+COLINFO record for this column.
+
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if
+the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH
+record of the worksheet will be used instead.
+
"""
+At the moment (2007-01-17) xlrd is going with the GCW version of the story.
+Reference to the source may be useful: see the computed_column_width(colx) method
+of the Sheet class.
+
-- New in version 0.6.1
+
Value of a 1-bit flag whose purpose is unknown +but is often seen set to 1
+1 = column is collapsed
+1 = column is hidden
+Outline level of the column, in range(7). +(0 = no outline)
+Width of the column in 1/256 of the width of the zero character, +using default font (first FONT record in the file).
+XF index to be used for formatting empty cells.
+This mixin class exists solely so that Format, Font, and XF.... objects +can be compared by value of their attributes.
+An Excel "font" contains the details of not only what is normally
+considered a font, but also several other display attributes.
+Items correspond to those in the Excel UI's Format/Cells/Font tab.
+
-- New in version 0.6.1
+
1 = Characters are bold. Redundant; see "weight" attribute.
+Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol, +77 = Apple Roman, +128 = ANSI Japanese Shift-JIS, +129 = ANSI Korean (Hangul), +130 = ANSI Korean (Johab), +134 = ANSI Chinese Simplified GBK, +136 = ANSI Chinese Traditional BIG5, +161 = ANSI Greek, +162 = ANSI Turkish, +163 = ANSI Vietnamese, +177 = ANSI Hebrew, +178 = ANSI Arabic, +186 = ANSI Baltic, +204 = ANSI Cyrillic, +222 = ANSI Thai, +238 = ANSI Latin II (Central European), +255 = OEM Latin I
+An explanation of "colour index" is given in the Formatting +section at the start of this document.
+1 = Superscript, 2 = Subscript.
+0 = None (unknown or don't care)
+1 = Roman (variable width, serifed)
+2 = Swiss (variable width, sans-serifed)
+3 = Modern (fixed width, serifed or sans-serifed)
+4 = Script (cursive)
+5 = Decorative (specialised, for example Old English, Fraktur)
+
The 0-based index used to refer to this Font() instance. +Note that index 4 is never used; xlrd supplies a dummy place-holder.
+Height of the font (in twips). A twip = 1/20 of a point.
+1 = Characters are italic.
+The name of the font. Example: u"Arial"
+1 = Font is outline style (Macintosh only)
+1 = Font is shadow style (Macintosh only)
+1 = Characters are struck out.
+0 = None
+1 = Single; 0x21 (33) = Single accounting
+2 = Double; 0x22 (34) = Double accounting
+
1 = Characters are underlined. Redundant; see "underline_type" attribute.
+Font weight (100-1000). Standard values are 400 for normal text +and 700 for bold text.
+"Number format" information from a FORMAT record.
+
-- New in version 0.6.1
+
The key into Book.format_map
+The format string
+A classification that has been inferred from the format string.
+Currently, this is used only to distinguish between numbers and dates.
+
Values:
+
FUN = 0 # unknown
+
FDT = 1 # date
+
FNU = 2 # number
+
FGE = 3 # general
+
FTX = 4 # text
+
Information relating to a named reference, formula, macro, etc.
+
-- New in version 0.6.0
+
-- Name information is not extracted from files older than
+Excel 5.0 (Book.biff_version < 50)
+
This is a convenience method for the use case where the name +refers to one rectangular area in one worksheet.
+0 = Formula definition; 1 = Binary data
No examples have been sighted.
+
0 = User-defined name; 1 = Built-in name +(common examples: Print_Area, Print_Titles; see OOo docs for full list)
+This is a convenience method for the frequent use case where the name +refers to a single cell.
+0 = Simple formula; 1 = Complex formula (array formula or user defined)
+No examples have been sighted.
+
0 = Command macro; 1 = Function macro. Relevant only if macro == 1
+Function group. Relevant only if macro == 1; see OOo docs for values.
+0 = Visible; 1 = Hidden
+0 = Standard name; 1 = Macro name
+A Unicode string. If builtin, decoded as per OOo docs.
+The index of this object in book.name_obj_list
+An 8-bit string.
+The result of evaluating the formula, if any. +If no formula, or evaluation of the formula encountered problems, +the result is None. Otherwise the result is a single instance of the +Operand class.
+-1: The name is global (visible in all calculation sheets).
+-2: The name belongs to a macro sheet or VBA sheet.
+-3: The name is invalid.
+0 <= scope < book.nsheets: The name is local to the sheet whose index is scope.
+
0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1
+Used in evaluating formulas. +The following table describes the kinds and how their values +are represented.
+ +| Kind symbol | +Kind number | +Value representation | +
|---|---|---|
| oBOOL | +3 | +integer: 0 => False; 1 => True | +
| oERR | +4 | +None, or an int error code (same as XL_CELL_ERROR in the Cell class). + | +
| oMSNG | +5 | +Used by Excel as a placeholder for a missing (not supplied) function +argument. Should *not* appear as a final formula result. Value is None. | +
| oNUM | +2 | +A float. Note that there is no way of distinguishing dates. | +
| oREF | +-1 | +The value is either None or a non-empty list of
+absolute Ref3D instances. + |
+
| oREL | +-2 | +The value is None or a non-empty list of +fully or partially relative Ref3D instances. + | +
| oSTRG | +1 | +A Unicode string. | +
| oUNK | +0 | +The kind is unknown or ambiguous. The value is None | +
oUNK means that the kind of operand is not known unambiguously.
+The reconstituted text of the original formula. Function names will be +in English irrespective of the original language, which doesn't seem +to be recorded anywhere. The separator is ",", not ";" or whatever else +might be more appropriate for the end-user's locale; patches welcome.
+None means that the actual value of the operand is a variable +(depends on cell data), not a constant.
+Represents an absolute or relative 3-dimensional reference to a box
+of one or more cells.
+-- New in version 0.6.0
+
The coords attribute is a tuple of the form:
+(shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
+where 0 <= thingxlo <= thingx < thingxhi.
+Note that it is quite possible to have thingx > nthings; for example
+Print_Titles could have colxhi == 256 and/or rowxhi == 65536
+irrespective of how many columns/rows are actually used in the worksheet.
+The caller will need to decide how to handle this situation.
+Keyword: IndexError :-)
+
The components of the coords attribute are also available as individual +attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.
+ +The relflags attribute is a 6-tuple of flags which indicate whether
+the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).
+Note that there is necessarily no information available as to what cell(s)
+the reference could possibly be relative to. The caller must decide what if
+any use to make of oREL operands. Note also that a partially relative
+reference may well be a typo.
+For example, define name A1Z10 as $a$1:$z10 (missing $ after z)
+while the cursor is on cell Sheet3!A27.
+The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26)
+and relflags = (0, 0, 0, 1, 0, 0).
+So far, only one possibility of a sheet-relative component in
+a reference has been noticed: a 2D reference located in the "current sheet".
+
This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...).
+
Height and default formatting information that applies to a row in a sheet.
+Derived from ROW records.
+
-- New in version 0.6.1
+
This flag is set, if the upper border of at least one cell in this row +or if the lower border of at least one cell in the row above is +formatted with a thick line style. Thin and medium line styles are not +taken into account.
+This flag is set, if the lower border of at least one cell in this row +or if the upper border of at least one cell in the row below is +formatted with a medium or thick line style. Thin line styles are not +taken into account.
+0 = Row has custom height; 1 = Row has default height
+1 = the xf_index attribute is usable; 0 = ignore it
+Height of the row, in twips. One twip == 1/20 of a point
+1 = Row height and default font height do not match
+1 = Row is hidden (manually, or by a filter or outline group)
+1 = Outline group starts or ends here (depending on where the +outline buttons are located, see WSBOOL record [TODO ??]), +and is collapsed +
Outline level of the row
+Index to default XF record for empty cells in this row. +Don't use this if has_default_xf_index == 0.
+Contains the data for one worksheet.
+ +In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a +column index, counting from zero. +Negative values for row/column indexes and slice positions are supported in the expected fashion.
+ +For information about cell types and cell values, refer to the documentation of the Cell class.
+ +WARNING: You don't call this class yourself. You access Sheet objects via the Book object that +was returned when you called xlrd.open_workbook("myfile.xls").
+Cell object in the given row and column.
+Type of the cell in the given row and column. +Refer to the documentation of the Cell class.
+Value of the cell in the given row and column.
+XF index of the cell in the given row and column.
+This is an index into Book.xf_list.
+
-- New in version 0.6.1
+
Returns a sequence of the Cell objects in the given column.
+List of address ranges of cells containing column labels.
+These are set up in Excel by Insert > Name > Labels > Columns.
+
-- New in version 0.6.0
+
How to deconstruct the list:
+
+for crange in thesheet.col_label_ranges: + rlo, rhi, clo, chi = crange + for rx in xrange(rlo, rhi): + for cx in xrange(clo, chi): + print "Column label at (rowx=%d, colx=%d) is %r" \ + (rx, cx, thesheet.cell_value(rx, cx)) ++
Returns a slice of the Cell objects in the given column.
+Returns a slice of the types of the cells in the given column.
+Returns a slice of the values of the cells in the given column.
+The map from a column index to a Colinfo object. Often there is an entry
+in COLINFO records for all column indexes in range(257).
+Note that xlrd ignores the entry for the non-existent
+257th column. On the other hand, there may be no entry for unused columns.
+
-- New in version 0.6.1
+
Determine column display width.
+
-- New in version 0.6.1
+
+
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default value to be used for a row if there is +no ROW record for that row. +From the optional DEFAULTROWHEIGHT record. +
Default column width from DEFCOLWIDTH record, else None.
+From the OOo docs:
+"""Column width in characters, using the width of the zero character
+from default font (first FONT record in the file). Excel adds some
+extra space to the default width, depending on the default font and
+default font size. The algorithm how to exactly calculate the resulting
+column width is not known.
+Example: The default width of 8 set in this record results in a column
+width of 8.43 using Arial font with a size of 10 points."""
+For the default hierarchy, refer to the Colinfo class above.
+
-- New in version 0.6.1
+
A 256-element tuple corresponding to the contents of the GCW record for this sheet. +If no such record, treat as all bits zero. +Applies to BIFF4-7 only. See docs of Colinfo class for discussion.
+List of address ranges of cells which have been merged.
+These are set up in Excel by Format > Cells > Alignment, then ticking
+the "Merge cells" box.
+
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+
How to deconstruct the list:
+
+for crange in thesheet.merged_cells: + rlo, rhi, clo, chi = crange + for rowx in xrange(rlo, rhi): + for colx in xrange(clo, chi): + # cell (rlo, clo) (the top left one) will carry the data + # and formatting info; the remainder will be recorded as + # blank cells, but a renderer will apply the formatting info + # for the top left cell (e.g. border, pattern) to all cells in + # the range. ++
Name of sheet.
+Number of columns in sheet. A column index is in range(thesheet.ncols).
+Number of rows in sheet. A row index is in range(thesheet.nrows).
+Returns a sequence of the Cell objects in the given row.
+List of address ranges of cells containing row labels.
+For more details, see col_label_ranges above.
+
-- New in version 0.6.0
+
Returns a slice of the Cell objects in the given row.
+Returns a slice of the types +of the cells in the given row.
+Returns a slice of the values +of the cells in the given row.
+The map from a row index to a Rowinfo object. Note that it is possible
+to have missing entries -- at least one source of XLS files doesn't
+bother writing ROW records.
+
-- New in version 0.6.1
+
Default column width from STANDARDWIDTH record, else None.
+From the OOo docs:
+"""Default width of the columns in 1/256 of the width of the zero
+character, using default font (first FONT record in the file)."""
+For the default hierarchy, refer to the Colinfo class above.
+
-- New in version 0.6.1
+
Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden +by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden +only by VBA macro).
+eXtended Formatting information for cells, rows, columns and styles.
+
-- New in version 0.6.1
+
+
Each of the 6 flags below describes the validity of
+a specific group of attributes.
+
+In cell XFs, flag==0 means the attributes of the parent style XF are used,
+(but only if the attributes are valid there); flag==1 means the attributes
+of this XF are used.
+In style XFs, flag==0 means the attribute setting is valid; flag==1 means
+the attribute should be ignored.
+Note that the API
+provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs
+have had the above inheritance mechanism applied.
+
+
An instance of an XFAlignment object.
+An instance of an XFBackground object.
+An instance of an XFBorder object.
+Index into Book.font_list
+Key into Book.format_map +
+Warning: OOo docs on the XF record call this "Index to FORMAT record". +It is not an index in the Python sense. It is a key to a map. +It is true only for Excel 4.0 and earlier files +that the key into format_map from an XF instance +is the same as the index into format_list, and only +if the index is less than 164. +
+0 = cell XF, 1 = style XF
+cell XF: Index into Book.xf_list
+of this XF's style XF
+style XF: 0xFFF
+
An instance of an XFProtection object.
+Index into Book.xf_list
+A collection of the alignment and similar attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Alignment tab.
+
-- New in version 0.6.1
+
Values: section 6.115 (p 214) of OOo docs
+A number in range(15).
+Values: section 6.115 (p 215) of OOo docs.
+Note: file versions BIFF7 and earlier use the documented
+"orientation" attribute; this will be mapped (without loss)
+into "rotation".
+
1 = shrink font size to fit text into cell.
+0 = according to context; 1 = left-to-right; 2 = right-to-left
+1 = text is wrapped at right margin
+Values: section 6.115 (p 215) of OOo docs
+A collection of the background-related attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Patterns tab.
+An explanation of "colour index" is given in the Formatting
+section at the start of this document.
+
-- New in version 0.6.1
+
See section 3.11 of the OOo docs.
+See section 3.11 of the OOo docs.
+See section 3.11 of the OOo docs.
+A collection of the border-related attributes of an XF record. +Items correspond to those in the Excel UI's Format/Cells/Border tab.
+An explanations of "colour index" is given in the Formatting +section at the start of this document. +There are five line style attributes; possible values and the +associated meanings are: +0 = No line, +1 = Thin, +2 = Medium, +3 = Dashed, +4 = Dotted, +5 = Thick, +6 = Double, +7 = Hair, +8 = Medium dashed, +9 = Thin dash-dotted, +10 = Medium dash-dotted, +11 = Thin dash-dot-dotted, +12 = Medium dash-dot-dotted, +13 = Slanted medium dash-dotted. +The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only. +For pictures of the line styles, refer to OOo docs s3.10 (p22) +"Line Styles for Cell Borders (BIFF3-BIFF8)".
+The colour index for the cell's bottom line
+The line style for the cell's bottom line
+The colour index for the cell's diagonal lines, if any
+1 = draw a diagonal from top left to bottom right
+The line style for the cell's diagonal lines, if any
+1 = draw a diagonal from bottom left to top right
+The colour index for the cell's left line
+The line style for the cell's left line
+The colour index for the cell's right line
+The line style for the cell's right line
+The colour index for the cell's top line
+The line style for the cell's top line
+A collection of the protection-related attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Protection tab.
+Note the OOo docs include the "cell or style" bit
+in this bundle of attributes.
+This is incorrect; the bit is used in determining which bundles to use.
+
-- New in version 0.6.1
+
1 = Cell is prevented from being changed, moved, resized, or deleted +(only if the sheet is protected).
+1 = Hide formula so that it doesn't appear in the formula bar when +the cell is selected (only if the sheet is protected).
+Dj_rJeq^}#ifbpuu5YHJKI0o-nN-Hz6qlGd|aA|D6cSRrm3N!vB}Yi
zT( Copyright © 2006 Stephen John Machin, Lingfo Pty Ltd This module is part of the xlrd package, which is released under a BSD-style licence. Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd This module is part of the xlrd package, which is released under
+# a BSD-style licence. A collection of the border-related attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Border tab. An explanations of "colour index" is given in the Formatting
+# section at the start of this document.
+# There are five line style attributes; possible values and the
+# associated meanings are:
+# 0 = No line,
+# 1 = Thin,
+# 2 = Medium,
+# 3 = Dashed,
+# 4 = Dotted,
+# 5 = Thick,
+# 6 = Double,
+# 7 = Hair,
+# 8 = Medium dashed,
+# 9 = Thin dash-dotted,
+# 10 = Medium dash-dotted,
+# 11 = Thin dash-dot-dotted,
+# 12 = Medium dash-dot-dotted,
+# 13 = Slanted medium dash-dotted.
+# The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only.
+# For pictures of the line styles, refer to OOo docs s3.10 (p22)
+# "Line Styles for Cell Borders (BIFF3-BIFF8)". Each of the 6 flags below describes the validity of
+# a specific group of attributes.
+#
+ # Warning: OOo docs on the XF record call this "Index to FORMAT record".
+ # It is not an index in the Python sense. It is a key to a map.
+ # It is true only for Excel 4.0 and earlier files
+ # that the key into format_map from an XF instance
+ # is the same as the index into format_list, and only
+ # if the index is less than 164.
+ # Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd This module is part of the xlrd package, which is released under
+# a BSD-style licence.
-- New in version 0.6.1
+class Font(BaseObject, EqNeAttrs):
+ ##
+ # 1 = Characters are bold. Redundant; see "weight" attribute.
+ bold = 0
+ ##
+ # Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol,
+ # 77 = Apple Roman,
+ # 128 = ANSI Japanese Shift-JIS,
+ # 129 = ANSI Korean (Hangul),
+ # 130 = ANSI Korean (Johab),
+ # 134 = ANSI Chinese Simplified GBK,
+ # 136 = ANSI Chinese Traditional BIG5,
+ # 161 = ANSI Greek,
+ # 162 = ANSI Turkish,
+ # 163 = ANSI Vietnamese,
+ # 177 = ANSI Hebrew,
+ # 178 = ANSI Arabic,
+ # 186 = ANSI Baltic,
+ # 204 = ANSI Cyrillic,
+ # 222 = ANSI Thai,
+ # 238 = ANSI Latin II (Central European),
+ # 255 = OEM Latin I
+ character_set = 0
+ ##
+ # An explanation of "colour index" is given in the Formatting
+ # section at the start of this document.
+ colour_index = 0
+ ##
+ # 1 = Superscript, 2 = Subscript.
+ escapement = 0
+ ##
+ # 0 = None (unknown or don't care)
+ # 1 = Roman (variable width, serifed)
+ # 2 = Swiss (variable width, sans-serifed)
+ # 3 = Modern (fixed width, serifed or sans-serifed)
+ # 4 = Script (cursive)
+ # 5 = Decorative (specialised, for example Old English, Fraktur)
+ family = 0
+ ##
+ # The 0-based index used to refer to this Font() instance.
+ # Note that index 4 is never used; xlrd supplies a dummy place-holder.
+ font_index = 0
+ ##
+ # Height of the font (in twips). A twip = 1/20 of a point.
+ height = 0
+ ##
+ # 1 = Characters are italic.
+ italic = 0
+ ##
+ # The name of the font. Example: u"Arial"
+ name = u""
+ ##
+ # 1 = Characters are struck out.
+ struck_out = 0
+ ##
+ # 0 = None
+ # 1 = Single; 0x21 (33) = Single accounting
+ # 2 = Double; 0x22 (34) = Double accounting
+ underline_type = 0
+ ##
+ # 1 = Characters are underlined. Redundant; see "underline_type" attribute.
+ underlined = 0
+ ##
+ # Font weight (100-1000). Standard values are 400 for normal text
+ # and 700 for bold text.
+ weight = 400
+ ##
+ # 1 = Font is outline style (Macintosh only)
+ outline = 0
+ ##
+ # 1 = Font is shadow style (Macintosh only)
+ shadow = 0
+
+ # No methods ...
+
+def handle_efont(book, data): # BIFF2 only
+ if not book.formatting_info:
+ return
+ book.font_list[-1].colour_index = unpack('
-- New in version 0.6.1
+class Format(BaseObject, EqNeAttrs):
+ ##
+ # The key into Book.format_map
+ format_key = 0
+ ##
+ # A classification that has been inferred from the format string.
+ # Currently, this is used only to distinguish between numbers and dates.
+ #
Values:
+ #
FUN = 0 # unknown
+ #
FDT = 1 # date
+ #
FNU = 2 # number
+ #
FGE = 3 # general
+ #
FTX = 4 # text
+ type = FUN
+ ##
+ # The format string
+ format_str = u''
+
+ def __init__(self, format_key, ty, format_str):
+ self.format_key = format_key
+ self.type = ty
+ self.format_str = format_str
+
+std_format_strings = {
+ # "std" == "standard for US English locale"
+ # #### TODO ... a lot of work to tailor these to the user's locale.
+ # See e.g. gnumeric-1.x.y/src/formats.c
+ 0x00: "General",
+ 0x01: "0",
+ 0x02: "0.00",
+ 0x03: "#,##0",
+ 0x04: "#,##0.00",
+ 0x05: "$#,##0_);($#,##0)",
+ 0x06: "$#,##0_);[Red]($#,##0)",
+ 0x07: "$#,##0.00_);($#,##0.00)",
+ 0x08: "$#,##0.00_);[Red]($#,##0.00)",
+ 0x09: "0%",
+ 0x0a: "0.00%",
+ 0x0b: "0.00E+00",
+ 0x0c: "# ?/?",
+ 0x0d: "# ??/??",
+ 0x0e: "m/d/yy",
+ 0x0f: "d-mmm-yy",
+ 0x10: "d-mmm",
+ 0x11: "mmm-yy",
+ 0x12: "h:mm AM/PM",
+ 0x13: "h:mm:ss AM/PM",
+ 0x14: "h:mm",
+ 0x15: "h:mm:ss",
+ 0x16: "m/d/yy h:mm",
+ 0x25: "#,##0_);(#,##0)",
+ 0x26: "#,##0_);[Red](#,##0)",
+ 0x27: "#,##0.00_);(#,##0.00)",
+ 0x28: "#,##0.00_);[Red](#,##0.00)",
+ 0x29: "_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)",
+ 0x2a: "_($* #,##0_);_($* (#,##0);_($* \"-\"_);_(@_)",
+ 0x2b: "_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)",
+ 0x2c: "_($* #,##0.00_);_($* (#,##0.00);_($* \"-\"??_);_(@_)",
+ 0x2d: "mm:ss",
+ 0x2e: "[h]:mm:ss",
+ 0x2f: "mm:ss.0",
+ 0x30: "##0.0E+0",
+ 0x31: "@",
+ }
+
+fmt_code_ranges = [ # both-inclusive ranges of "standard" format codes
+ # Source: the openoffice.org doc't
+ ( 0, 0, FGE),
+ ( 1, 13, FNU),
+ (14, 22, FDT),
+ #### (27, 36, FDT), # Japanese dates -- not sure of reliability of this
+ (37, 44, FNU),
+ (45, 47, FDT),
+ (48, 48, FNU),
+ (49, 49, FTX),
+ ####(50, 58, FDT), # Japanese dates -- but Gnumeric assumes
+ # built-in formats finish at 49, not at 163
+ ]
+
+std_format_code_types = {}
+for lo, hi, ty in fmt_code_ranges:
+ for x in xrange(lo, hi+1):
+ std_format_code_types[x] = ty
+del lo, hi, ty, x
+
+date_chars = u'ymdhs' # year, month/minute, day, hour, second
+date_char_dict = {}
+for _c in date_chars + date_chars.upper():
+ date_char_dict[_c] = 5
+del _c, date_chars
+
+skip_char_dict = {}
+for _c in u'$-+/(): ':
+ skip_char_dict[_c] = 1
+
+num_char_dict = {
+ u'0': 5,
+ u'#': 5,
+ u'?': 5,
+ }
+
+non_date_formats = {
+ u'0.00E+00':1,
+ u'##0.0E+0':1,
+ u'General' :1,
+ u'GENERAL' :1, # OOo Calc 1.1.4 does this.
+ u'general' :1, # pyExcelerator 0.6.3 does this.
+ u'@' :1,
+ }
+
+fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub
+
+# Boolean format strings (actual cases)
+# u'"Yes";"Yes";"No"'
+# u'"True";"True";"False"'
+# u'"On";"On";"Off"'
+
+def is_date_format_string(book, fmt):
+ # Heuristics:
+ # Ignore "text" and [stuff in square brackets (aarrgghh -- see below)].
+ # Handle backslashed-escaped chars properly.
+ # E.g. hh\hmm\mss\s should produce a display like 23h59m59s
+ # Date formats have one or more of ymdhs (caseless) in them.
+ # Numeric formats have # and 0.
+ # N.B. u'General"."' hence get rid of "text" first.
+ # TODO: Find where formats are interpreted in Gnumeric
+ # TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23)
+ state = 0
+ s = ''
+ ignorable = skip_char_dict.has_key
+ for c in fmt:
+ if state == 0:
+ if c == u'"':
+ state = 1
+ elif c in ur"\_*":
+ state = 2
+ elif ignorable(c):
+ pass
+ else:
+ s += c
+ elif state == 1:
+ if c == u'"':
+ state = 0
+ elif state == 2:
+ # Ignore char after backslash, underscore or asterisk
+ state = 0
+ assert 0 <= state <= 2
+ if book.verbosity >= 4:
+ print "is_date_format_string: reduced format is %r" % s
+ s = fmt_bracketed_sub('', s)
+ if non_date_formats.has_key(s):
+ return False
+ state = 0
+ separator = ";"
+ got_sep = 0
+ date_count = num_count = 0
+ for c in s:
+ if date_char_dict.has_key(c):
+ date_count += date_char_dict[c]
+ elif num_char_dict.has_key(c):
+ num_count += num_char_dict[c]
+ elif c == separator:
+ got_sep = 1
+ # print num_count, date_count, repr(fmt)
+ if date_count and not num_count:
+ return True
+ if num_count and not date_count:
+ return False
+ if date_count:
+ fprintf(book.logfile,
+ 'WARNING *** is_date_format: ambiguous d=%d n=%d fmt=%r\n',
+ date_count, num_count, fmt)
+ elif not got_sep:
+ fprintf(book.logfile,
+ "WARNING *** format %r produces constant result\n",
+ fmt)
+ return date_count > num_count
+
+def handle_format(self, data, rectype=XL_FORMAT):
+ DEBUG = 0
+ bv = self.biff_version
+ if rectype == XL_FORMAT2:
+ bv = min(bv, 30)
+ if not self.encoding:
+ self.derive_encoding()
+ strpos = 2
+ if bv >= 50:
+ fmtkey = unpack('
-- New in version 0.6.1
+class XFBorder(BaseObject, EqNeAttrs):
+
+ ##
+ # The colour index for the cell's top line
+ top_colour_index = 0
+ ##
+ # The colour index for the cell's bottom line
+ bottom_colour_index = 0
+ ##
+ # The colour index for the cell's left line
+ left_colour_index = 0
+ ##
+ # The colour index for the cell's right line
+ right_colour_index = 0
+ ##
+ # The colour index for the cell's diagonal lines, if any
+ diag_colour_index = 0
+ ##
+ # The line style for the cell's top line
+ top_line_style = 0
+ ##
+ # The line style for the cell's bottom line
+ bottom_line_style = 0
+ ##
+ # The line style for the cell's left line
+ left_line_style = 0
+ ##
+ # The line style for the cell's right line
+ right_line_style = 0
+ ##
+ # The line style for the cell's diagonal lines, if any
+ diag_line_style = 0
+ ##
+ # 1 = draw a diagonal from top left to bottom right
+ diag_down = 0
+ ##
+ # 1 = draw a diagonal from bottom left to top right
+ diag_up = 0
+
+##
+# A collection of the background-related attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Patterns tab.
+# An explanation of "colour index" is given in the Formatting
+# section at the start of this document.
+#
-- New in version 0.6.1
+class XFBackground(BaseObject, EqNeAttrs):
+
+ ##
+ # See section 3.11 of the OOo docs.
+ fill_pattern = 0
+ ##
+ # See section 3.11 of the OOo docs.
+ background_colour_index = 0
+ ##
+ # See section 3.11 of the OOo docs.
+ pattern_colour_index = 0
+
+##
+# A collection of the alignment and similar attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Alignment tab.
+#
-- New in version 0.6.1
+
+class XFAlignment(BaseObject, EqNeAttrs):
+
+ ##
+ # Values: section 6.115 (p 214) of OOo docs
+ hor_align = 0
+ ##
+ # Values: section 6.115 (p 215) of OOo docs
+ vert_align = 0
+ ##
+ # Values: section 6.115 (p 215) of OOo docs.
+ # Note: file versions BIFF7 and earlier use the documented
+ # "orientation" attribute; this will be mapped (without loss)
+ # into "rotation".
+ rotation = 0
+ ##
+ # 1 = text is wrapped at right margin
+ text_wrapped = 0
+ ##
+ # A number in range(15).
+ indent_level = 0
+ ##
+ # 1 = shrink font size to fit text into cell.
+ shrink_to_fit = 0
+ ##
+ # 0 = according to context; 1 = left-to-right; 2 = right-to-left
+ text_direction = 0
+
+##
+# A collection of the protection-related attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Protection tab.
+# Note the OOo docs include the "cell or style" bit
+# in this bundle of attributes.
+# This is incorrect; the bit is used in determining which bundles to use.
+#
-- New in version 0.6.1
+
+class XFProtection(BaseObject, EqNeAttrs):
+
+ ##
+ # 1 = Cell is prevented from being changed, moved, resized, or deleted
+ # (only if the sheet is protected).
+ cell_locked = 0
+ ##
+ # 1 = Hide formula so that it doesn't appear in the formula bar when
+ # the cell is selected (only if the sheet is protected).
+ formula_hidden = 0
+
+##
+# eXtended Formatting information for cells, rows, columns and styles.
+#
-- New in version 0.6.1
+#
+#
+# In cell XFs, flag==0 means the attributes of the parent style XF are used,
+# (but only if the attributes are valid there); flag==1 means the attributes
+# of this XF are used.
+# In style XFs, flag==0 means the attribute setting is valid; flag==1 means
+# the attribute should be ignored.
+# Note that the API
+# provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs
+# have had the above inheritance mechanism applied.
+#
+ # style XF: 0xFFF
+ parent_style_index = 0
+ ##
+ #
+ _format_flag = 0
+ ##
+ #
+ _font_flag = 0
+ ##
+ #
+ _alignment_flag = 0
+ ##
+ #
+ _border_flag = 0
+ ##
+ #
+ _background_flag = 0
+ ##
+ #
+ _protection_flag = 0
+ ##
+ # Index into Book.xf_list
+ xf_index = 0
+ ##
+ # Index into Book.font_list
+ font_index = 0
+ ##
+ # Key into Book.format_map
+ #
| Kind symbol | +#Kind number | +#Value representation | +#
|---|---|---|
| oBOOL | +#3 | +#integer: 0 => False; 1 => True | +#
| oERR | +#4 | +#None, or an int error code (same as XL_CELL_ERROR in the Cell class). +# | +#
| oMSNG | +#5 | +#Used by Excel as a placeholder for a missing (not supplied) function +# argument. Should *not* appear as a final formula result. Value is None. | +#
| oNUM | +#2 | +#A float. Note that there is no way of distinguishing dates. | +#
| oREF | +#-1 | +#The value is either None or a non-empty list of
+# absolute Ref3D instances. +# |
+#
| oREL | +#-2 | +#The value is None or a non-empty list of +# fully or partially relative Ref3D instances. +# | +#
| oSTRG | +#1 | +#A Unicode string. | +#
| oUNK | +#0 | +#The kind is unknown or ambiguous. The value is None | +#
Represents an absolute or relative 3-dimensional reference to a box
+# of one or more cells.
+# -- New in version 0.6.0
+#
The coords attribute is a tuple of the form:
+# (shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)
+# where 0 <= thingxlo <= thingx < thingxhi.
+# Note that it is quite possible to have thingx > nthings; for example
+# Print_Titles could have colxhi == 256 and/or rowxhi == 65536
+# irrespective of how many columns/rows are actually used in the worksheet.
+# The caller will need to decide how to handle this situation.
+# Keyword: IndexError :-)
+#
The components of the coords attribute are also available as individual +# attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.
+# +#The relflags attribute is a 6-tuple of flags which indicate whether
+# the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0). Portions copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd This module is part of the xlrd package, which is released under a BSD-style licence. Contains the data for one worksheet. In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a
+# column index, counting from zero.
+# Negative values for row/column indexes and slice positions are supported in the expected fashion. For information about cell types and cell values, refer to the documentation of the Cell class. WARNING: You don't call this class yourself. You access Sheet objects via the Book object that
+# was returned when you called xlrd.open_workbook("myfile.xls").
+# Note that there is necessarily no information available as to what cell(s)
+# the reference could possibly be relative to. The caller must decide what if
+# any use to make of oREL operands. Note also that a partially relative
+# reference may well be a typo.
+# For example, define name A1Z10 as $a$1:$z10 (missing $ after z)
+# while the cursor is on cell Sheet3!A27.
+# The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26)
+# and relflags = (0, 0, 0, 1, 0, 0).
+# So far, only one possibility of a sheet-relative component in
+# a reference has been noticed: a 2D reference located in the "current sheet".
+#
This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...).
+
+class Ref3D(_ref3d_base):
+
+ def __init__(self, atuple):
+ self.coords = atuple[0:6]
+ self.relflags = atuple[6:12]
+ if not self.relflags:
+ self.relflags = (0, 0, 0, 0, 0, 0)
+ (self.shtxlo, self.shtxhi,
+ self.rowxlo, self.rowxhi,
+ self.colxlo, self.colxhi) = self.coords
+
+ def __repr__(self):
+ if not self.relflags or self.relflags == (0, 0, 0, 0, 0, 0):
+ return "Ref3D(coords=%r)" % (self.coords, )
+ else:
+ return "Ref3D(coords=%r, relflags=%r)" \
+ % (self.coords, self.relflags)
+
+tAdd = 0x03
+tSub = 0x04
+tMul = 0x05
+tDiv = 0x06
+tPower = 0x07
+tConcat = 0x08
+tLT, tLE, tEQ, tGE, tGT, tNE = range(0x09, 0x0F)
+
+import operator as opr
+
+def nop(x):
+ return x
+
+def _opr_pow(x, y): return x ** y
+
+def _opr_lt(x, y): return x < y
+def _opr_le(x, y): return x <= y
+def _opr_eq(x, y): return x == y
+def _opr_ge(x, y): return x >= y
+def _opr_gt(x, y): return x > y
+def _opr_ne(x, y): return x != y
+
+def num2strg(num):
+ """Attempt to emulate Excel's default conversion
+ from number to string.
+ """
+ s = str(num)
+ if s.endswith(".0"):
+ s = s[:-2]
+ return s
+
+_arith_argdict = {oNUM: nop, oSTRG: float}
+_cmp_argdict = {oNUM: nop, oSTRG: nop}
+# Seems no conversions done on relops; in Excel, "1" > 9 produces TRUE.
+_strg_argdict = {oNUM:num2strg, oSTRG:nop}
+binop_rules = {
+ tAdd: (_arith_argdict, oNUM, opr.add, 30, '+'),
+ tSub: (_arith_argdict, oNUM, opr.sub, 30, '-'),
+ tMul: (_arith_argdict, oNUM, opr.mul, 40, '*'),
+ tDiv: (_arith_argdict, oNUM, opr.div, 40, '/'),
+ tPower: (_arith_argdict, oNUM, _opr_pow, 50, '^',),
+ tConcat:(_strg_argdict, oSTRG, opr.add, 20, '&'),
+ tLT: (_cmp_argdict, oBOOL, _opr_lt, 10, '<'),
+ tLE: (_cmp_argdict, oBOOL, _opr_le, 10, '<='),
+ tEQ: (_cmp_argdict, oBOOL, _opr_eq, 10, '='),
+ tGE: (_cmp_argdict, oBOOL, _opr_ge, 10, '>='),
+ tGT: (_cmp_argdict, oBOOL, _opr_gt, 10, '>'),
+ tNE: (_cmp_argdict, oBOOL, _opr_ne, 10, '<>'),
+ }
+
+unop_rules = {
+ 0x13: (lambda x: -x, 70, '-', ''), # unary minus
+ 0x12: (lambda x: x, 70, '+', ''), # unary plus
+ 0x14: (lambda x: x / 100.0, 60, '', '%'),# percent
+ }
+
+LEAF_RANK = 90
+FUNC_RANK = 90
+
+STACK_ALARM_LEVEL = 5
+STACK_PANIC_LEVEL = 10
+
+def evaluate_name_formula(bk, nobj, namex, blah=0, level=0):
+ if level > STACK_ALARM_LEVEL:
+ blah = 1
+ data = nobj.raw_formula
+ fmlalen = nobj.basic_formula_len
+ bv = bk.biff_version
+ reldelta = 1 # All defined name formulas use "Method B" [OOo docs]
+ if blah:
+ print "::: evaluate_name_formula %r %r %d %d %r level=%d" \
+ % (namex, nobj.name, fmlalen, bv, data, level)
+ hex_char_dump(data, 0, fmlalen)
+ if level > STACK_PANIC_LEVEL:
+ raise XLRDError("Excessive indirect references in NAME formula")
+ sztab = szdict[bv]
+ pos = 0
+ stack = []
+ any_rel = 0
+ any_err = 0
+ any_external = 0
+ unk_opnd = Operand(oUNK, None)
+ error_opnd = Operand(oERR, None)
+ spush = stack.append
+
+ def do_binop(opcd, stk):
+ assert len(stk) >= 2
+ bop = stk.pop()
+ aop = stk.pop()
+ argdict, result_kind, func, rank, sym = binop_rules[opcd]
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ resop = Operand(result_kind, None, rank, otext)
+ try:
+ bconv = argdict[bop.kind]
+ aconv = argdict[aop.kind]
+ except KeyError:
+ stk.append(resop)
+ return
+ if bop.value is None or aop.value is None:
+ stk.append(resop)
+ return
+ bval = bconv(bop.value)
+ aval = aconv(aop.value)
+ result = func(aval, bval)
+ if result_kind == oBOOL:
+ result = intbool(result) # -> 1 or 0
+ resop.value = result
+ stk.append(resop)
+
+ def do_unaryop(opcode, arglist, result_kind, stk):
+ assert len(stk) >= 1
+ aop = stk.pop()
+ assert aop.kind in arglist
+ val = aop.value
+ func, rank, sym1, sym2 = unop_rules[opcode]
+ otext = ''.join([
+ sym1,
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym2,
+ ])
+ if val is not None:
+ val = func(val)
+ stk.append(Operand(result_kind, val, rank, otext))
+
+ def not_in_name_formula(op_arg, oname_arg):
+ msg = "ERROR *** Token 0x%02x (%s) found in NAME formula" \
+ % (op_arg, oname_arg)
+ raise FormulaError(msg)
+
+ if fmlalen == 0:
+ stack = [unk_opnd]
+
+ while 0 <= pos < fmlalen:
+ op = ord(data[pos])
+ opcode = op & 0x1f
+ optype = (op & 0x60) >> 5
+ if optype:
+ opx = opcode + 32
+ else:
+ opx = opcode
+ oname = onames[opx] # + [" RVA"][optype]
+ sz = sztab[opx]
+ if blah:
+ print "Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \
+ % (pos, op, oname, sz, opcode, optype)
+ print "Stack =", stack
+ if sz == -2:
+ msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \
+ % (op, oname, bv)
+ raise FormulaError(msg)
+ if not optype:
+ if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl
+ not_in_name_formula(op, oname)
+ elif 0x03 <= opcode <= 0x0E:
+ # Add, Sub, Mul, Div, Power
+ # tConcat
+ # tLT, ..., tNE
+ do_binop(opcode, stack)
+ elif opcode == 0x0F: # tIsect
+ if blah: print >> bk.logfile, "tIsect pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ' '
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF)
+ res.text = otext
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind == oUNK or aop.kind == oUNK:
+ # This can happen with undefined
+ # (go search in the current sheet) labels.
+ # For example =Bob Sales
+ # Each label gets a NAME record with an empty formula (!)
+ # Evaluation of the tName token classifies it as oUNK
+ # res.kind = oREF
+ pass
+ elif bop.kind == oREF == aop.kind:
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tIsectFuncs, aop.value[0], bop.value[0])
+ res.value = [Ref3D(coords)]
+ elif bop.kind == oREL == aop.kind:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tIsectFuncs, aop.value[0], bop.value[0])
+ relfa = aop.value[0].relflags
+ relfb = bop.value[0].relflags
+ if relfa == relfb:
+ res.value = [Ref3D(coords + relfa)]
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tIsect post", stack
+ elif opcode == 0x10: # tList
+ if blah: print >> bk.logfile, "tList pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ','
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL):
+ res.kind = oREF
+ if aop.kind == oREL or bop.kind == oREL:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) >= 1
+ assert len(bop.value) == 1
+ res.value = aop.value + bop.value
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tList post", stack
+ elif opcode == 0x11: # tRange
+ if blah: print >> bk.logfile, "tRange pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ':'
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res = oERR
+ elif bop.kind == oREF == aop.kind:
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tRangeFuncs, aop.value[0], bop.value[0])
+ res.value = [Ref3D(coords)]
+ elif bop.kind == oREL == aop.kind:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tRangeFuncs, aop.value[0], bop.value[0])
+ relfa = aop.value[0].relflags
+ relfb = bop.value[0].relflags
+ if relfa == relfb:
+ res.value = [Ref3D(coords + relfa)]
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tRange post", stack
+ elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent
+ do_unaryop(opcode, (oUNK, oNUM,), oNUM, stack)
+ elif opcode == 0x15: # tParen
+ # source cosmetics
+ pass
+ elif opcode == 0x16: # tMissArg
+ spush(Operand(oMSNG, None, LEAF_RANK, ''))
+ elif opcode == 0x17: # tStr
+ if bv <= 70:
+ strg, newpos = unpack_string_update_pos(
+ data, pos+1, bk.encoding, lenlen=1)
+ else:
+ strg, newpos = unpack_unicode_update_pos(
+ data, pos+1, lenlen=1)
+ sz = newpos - pos
+ if blah: print >> bk.logfile, " sz=%d strg=%r" % (sz, strg)
+ text = '"' + strg.replace('"', '""') + '"'
+ spush(Operand(oSTRG, strg, LEAF_RANK, text))
+ elif opcode == 0x18: # tExtended
+ # new with BIFF 8
+ assert bv >= 80
+ # not in OOo docs
+ raise FormulaError("tExtended token not implemented")
+ elif opcode == 0x19: # tAttr
+ subop, nc = unpack("
Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20'
+def rangename3d(book, ref3d):
+ """ Ref3D(1, 4, 5, 20, 7, 10) => 'Sheet2:Sheet3!$H$6:$J$20'
+ (assuming Excel's default sheetnames) """
+ coords = ref3d.coords
+ return "%s!%s" % (
+ sheetrange(book, *coords[:2]),
+ rangename2d(*coords[2:6]))
+
+##
+# Utility function:
+#
Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))
+# => 'Sheet1![@-13,#-32]:[@+12,#-23]'
+# where '@' refers to the current or base column and '#'
+# refers to the current or base row.
+def rangename3drel(book, ref3d):
+ coords = ref3d.coords
+ relflags = ref3d.relflags
+ shdesc = sheetrangerel(book, coords[:2], relflags[:2])
+ rngdesc = rangename2drel(coords[2:6], relflags[2:6])
+ if not shdesc:
+ return rngdesc
+ return "%s!%s" % (shdesc, rngdesc)
+
+def quotedsheetname(shnames, shx):
+ if shx >= 0:
+ shname = shnames[shx]
+ else:
+ shname = {
+ -1: "?internal; any sheet?",
+ -2: "internal; deleted sheet",
+ -3: "internal; macro sheet",
+ -4: "<
-- New in version 0.6.1
+ colinfo_map = {}
+
+ ##
+ # The map from a row index to a Rowinfo object. Note that it is possible
+ # to have missing entries -- at least one source of XLS files doesn't
+ # bother writing ROW records.
+ #
-- New in version 0.6.1
+ rowinfo_map = {}
+
+ ##
+ # List of address ranges of cells containing column labels.
+ # These are set up in Excel by Insert > Name > Labels > Columns.
+ #
-- New in version 0.6.0
+ #
How to deconstruct the list:
+ #
+ # for crange in thesheet.col_label_ranges:
+ # rlo, rhi, clo, chi = crange
+ # for rx in xrange(rlo, rhi):
+ # for cx in xrange(clo, chi):
+ # print "Column label at (rowx=%d, colx=%d) is %r" \
+ # (rx, cx, thesheet.cell_value(rx, cx))
+ #
+ col_label_ranges = []
+
+ ##
+ # List of address ranges of cells containing row labels.
+ # For more details, see col_label_ranges above.
+ #
-- New in version 0.6.0
+ row_label_ranges = []
+
+ ##
+ # List of address ranges of cells which have been merged.
+ # These are set up in Excel by Format > Cells > Alignment, then ticking
+ # the "Merge cells" box.
+ #
-- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+ #
How to deconstruct the list:
+ #
+ # for crange in thesheet.merged_cells:
+ # rlo, rhi, clo, chi = crange
+ # for rowx in xrange(rlo, rhi):
+ # for colx in xrange(clo, chi):
+ # # cell (rlo, clo) (the top left one) will carry the data
+ # # and formatting info; the remainder will be recorded as
+ # # blank cells, but a renderer will apply the formatting info
+ # # for the top left cell (e.g. border, pattern) to all cells in
+ # # the range.
+ #
+ merged_cells = []
+
+ ##
+ # Default column width from DEFCOLWIDTH record, else None.
+ # From the OOo docs:
+ # """Column width in characters, using the width of the zero character
+ # from default font (first FONT record in the file). Excel adds some
+ # extra space to the default width, depending on the default font and
+ # default font size. The algorithm how to exactly calculate the resulting
+ # column width is not known.
+ # Example: The default width of 8 set in this record results in a column
+ # width of 8.43 using Arial font with a size of 10 points."""
+ # For the default hierarchy, refer to the Colinfo class above.
+ #
-- New in version 0.6.1
+ defcolwidth = None
+
+ ##
+ # Default column width from STANDARDWIDTH record, else None.
+ # From the OOo docs:
+ # """Default width of the columns in 1/256 of the width of the zero
+ # character, using default font (first FONT record in the file)."""
+ # For the default hierarchy, refer to the Colinfo class above.
+ #
-- New in version 0.6.1
+ standardwidth = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_row_height = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_row_height_mismatch = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_row_hidden = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_additional_space_above = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the optional DEFAULTROWHEIGHT record.
+ default_additional_space_below = None
+
+ ##
+ # Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden
+ # by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden
+ # only by VBA macro).
+ visibility = 0
+
+ ##
+ # A 256-element tuple corresponding to the contents of the GCW record for this sheet.
+ # If no such record, treat as all bits zero.
+ # Applies to BIFF4-7 only. See docs of Colinfo class for discussion.
+ gcw = (0, ) * 256
+
+ def __init__(self, book, position, name, number):
+ self.book = book
+ self.biff_version = book.biff_version
+ self._position = position
+ self.logfile = book.logfile
+ self.pickleable = book.pickleable
+ self.dont_use_array = not(array_array and (CAN_PICKLE_ARRAY or not book.pickleable))
+ self.name = name
+ self.number = number
+ self.verbosity = book.verbosity
+ self.formatting_info = book.formatting_info
+ self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map
+ self.nrows = 0 # actual, including possibly empty cells
+ self.ncols = 0
+ self._maxdatarowx = -1 # highest rowx containing a non-empty cell
+ self._maxdatacolx = -1 # highest colx containing a non-empty cell
+ self._dimnrows = 0 # as per DIMENSIONS record
+ self._dimncols = 0
+ self._cell_values = []
+ self._cell_types = []
+ self._cell_xf_indexes = []
+ self._need_fix_ragged_rows = 0
+ self.defcolwidth = None
+ self.standardwidth = None
+ self.default_row_height = None
+ self.default_row_height_mismatch = 0
+ self.default_row_hidden = 0
+ self.default_additional_space_above = 0
+ self.default_additional_space_below = 0
+ self.colinfo_map = {}
+ self.rowinfo_map = {}
+ self.col_label_ranges = []
+ self.row_label_ranges = []
+ self.merged_cells = []
+ self._xf_index_stats = [0, 0, 0, 0]
+ self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record
+ for attr, defval in _WINDOW2_options:
+ setattr(self, attr, defval)
+ self.first_visible_rowx = 0
+ self.first_visible_colx = 0
+ self.gridline_colour_index = 0x40
+ self.gridline_colour_rgb = None # pre-BIFF8
+ self.cached_page_break_preview_mag_factor = 0
+ self.cached_normal_view_mag_factor = 0
+ self._ixfe = None # BIFF2 only
+ self._cell_attr_to_xfx = {} # BIFF2.0 only
+
+ #### Don't initialise this here, use class attribute initialisation.
+ #### self.gcw = (0, ) * 256 ####
+
+ if self.biff_version >= 80:
+ self.utter_max_rows = 65536
+ else:
+ self.utter_max_rows = 16384
+ self.utter_max_cols = 256
+
+ ##
+ # Cell object in the given row and column.
+ def cell(self, rowx, colx):
+ if self.formatting_info:
+ xfx = self.cell_xf_index(rowx, colx)
+ else:
+ xfx = None
+ return Cell(
+ self._cell_types[rowx][colx],
+ self._cell_values[rowx][colx],
+ xfx,
+ )
+
+ ##
+ # Value of the cell in the given row and column.
+ def cell_value(self, rowx, colx):
+ return self._cell_values[rowx][colx]
+
+ ##
+ # Type of the cell in the given row and column.
+ # Refer to the documentation of the Cell class.
+ def cell_type(self, rowx, colx):
+ return self._cell_types[rowx][colx]
+
+ ##
+ # XF index of the cell in the given row and column.
+ # This is an index into Book.xf_list.
+ #
-- New in version 0.6.1
+ def cell_xf_index(self, rowx, colx):
+ self.req_fmt_info()
+ xfx = self._cell_xf_indexes[rowx][colx]
+ if xfx > -1:
+ self._xf_index_stats[0] += 1
+ return xfx
+ # Check for a row xf_index
+ try:
+ xfx = self.rowinfo_map[rowx].xf_index
+ if xfx > -1:
+ self._xf_index_stats[1] += 1
+ return xfx
+ except KeyError:
+ pass
+ # Check for a column xf_index
+ try:
+ xfx = self.colinfo_map[colx].xf_index
+ assert xfx > -1
+ self._xf_index_stats[2] += 1
+ return xfx
+ except KeyError:
+ # If all else fails, 15 is used as hardwired global default xf_index.
+ self._xf_index_stats[3] += 1
+ return 15
+
+ ##
+ # Returns a sequence of the Cell objects in the given row.
+ def row(self, rowx):
+ return [
+ self.cell(rowx, colx)
+ for colx in xrange(self.ncols)
+ ]
+
+ ##
+ # Returns a slice of the types
+ # of the cells in the given row.
+ def row_types(self, rowx, start_colx=0, end_colx=None):
+ if end_colx is None:
+ return self._cell_types[rowx][start_colx:]
+ return self._cell_types[rowx][start_colx:end_colx]
+
+ ##
+ # Returns a slice of the values
+ # of the cells in the given row.
+ def row_values(self, rowx, start_colx=0, end_colx=None):
+ if end_colx is None:
+ return self._cell_values[rowx][start_colx:]
+ return self._cell_values[rowx][start_colx:end_colx]
+
+ ##
+ # Returns a slice of the Cell objects in the given row.
+ def row_slice(self, rowx, start_colx=0, end_colx=None):
+ nc = self.ncols
+ if start_colx < 0:
+ start_colx += nc
+ if start_colx < 0:
+ start_colx = 0
+ if end_colx is None or end_colx > nc:
+ end_colx = nc
+ elif end_colx < 0:
+ end_colx += nc
+ return [
+ self.cell(rowx, colx)
+ for colx in xrange(start_colx, end_colx)
+ ]
+
+ ##
+ # Returns a slice of the Cell objects in the given column.
+ def col_slice(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self.cell(rowx, colx)
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a slice of the values of the cells in the given column.
+ def col_values(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self._cell_values[rowx][colx]
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a slice of the types of the cells in the given column.
+ def col_types(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self._cell_types[rowx][colx]
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a sequence of the Cell objects in the given column.
+ def col(self, colx):
+ return self.col_slice(colx)
+ # Above two lines just for the docs. Here's the real McCoy:
+ col = col_slice
+
+ # === Following methods are used in building the worksheet.
+ # === They are not part of the API.
+
+ def extend_cells(self, nr, nc):
+ # print "extend_cells_2", self.nrows, self.ncols, nr, nc
+ assert 1 <= nc <= self.utter_max_cols
+ assert 1 <= nr <= self.utter_max_rows
+ if nr <= self.nrows:
+ # New cell is in an existing row, so extend that row (if necessary).
+ # Note that nr < self.nrows means that the cell data
+ # is not in ascending row order!!
+ self._need_fix_ragged_rows = 1
+ nrx = nr - 1
+ trow = self._cell_types[nrx]
+ tlen = len(trow)
+ nextra = max(nc, self.ncols) - tlen
+ if nextra > 0:
+ xce = XL_CELL_EMPTY
+ if self.dont_use_array:
+ trow.extend([xce] * nextra)
+ if self.formatting_info:
+ self._cell_xf_indexes[nrx].extend([-1] * nextra)
+ else:
+ aa = array_array
+ trow.extend(aa('B', [xce]) * nextra)
+ if self.formatting_info:
+ self._cell_xf_indexes[nrx].extend(aa('h', [-1]) * nextra)
+ self._cell_values[nrx].extend([''] * nextra)
+ if nc > self.ncols:
+ self.ncols = nc
+ self._need_fix_ragged_rows = 1
+ if nr > self.nrows:
+ scta = self._cell_types.append
+ scva = self._cell_values.append
+ scxa = self._cell_xf_indexes.append
+ fmt_info = self.formatting_info
+ xce = XL_CELL_EMPTY
+ nc = self.ncols
+ if self.dont_use_array:
+ for _unused in xrange(self.nrows, nr):
+ scta([xce] * nc)
+ scva([''] * nc)
+ if fmt_info:
+ scxa([-1] * nc)
+ else:
+ aa = array_array
+ for _unused in xrange(self.nrows, nr):
+ scta(aa('B', [xce]) * nc)
+ scva([''] * nc)
+ if fmt_info:
+ scxa(aa('h', [-1]) * nc)
+ self.nrows = nr
+
+ def fix_ragged_rows(self):
+ t0 = time.time()
+ ncols = self.ncols
+ xce = XL_CELL_EMPTY
+ aa = array_array
+ s_cell_types = self._cell_types
+ s_cell_values = self._cell_values
+ s_cell_xf_indexes = self._cell_xf_indexes
+ s_dont_use_array = self.dont_use_array
+ s_fmt_info = self.formatting_info
+ totrowlen = 0
+ for rowx in xrange(self.nrows):
+ trow = s_cell_types[rowx]
+ rlen = len(trow)
+ totrowlen += rlen
+ nextra = ncols - rlen
+ if nextra > 0:
+ s_cell_values[rowx][rlen:] = [''] * nextra
+ if s_dont_use_array:
+ trow[rlen:] = [xce] * nextra
+ if s_fmt_info:
+ s_cell_xf_indexes[rowx][rlen:] = [-1] * nextra
+ else:
+ trow.extend(aa('B', [xce]) * nextra)
+ if s_fmt_info:
+ s_cell_xf_indexes[rowx][rlen:] = aa('h', [-1]) * nextra
+ self._fix_ragged_rows_time = time.time() - t0
+ if 0 and self.nrows:
+ avgrowlen = float(totrowlen) / self.nrows
+ print >> self.logfile, \
+ "sheet %d: avg row len %.1f; max row len %d" \
+ % (self.number, avgrowlen, self.ncols)
+
+ def tidy_dimensions(self):
+ if self.verbosity >= 3:
+ fprintf(self.logfile,
+ "tidy_dimensions: nrows=%d ncols=%d _need_fix_ragged_rows=%d\n",
+ self.nrows, self.ncols, self._need_fix_ragged_rows,
+ )
+ if 1 and self.merged_cells:
+ nr = nc = 0
+ umaxrows = self.utter_max_rows
+ umaxcols = self.utter_max_cols
+ for crange in self.merged_cells:
+ rlo, rhi, clo, chi = crange
+ if not (0 <= rlo < rhi <= umaxrows) \
+ or not (0 <= clo < chi <= umaxcols):
+ fprintf(self.logfile,
+ "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n",
+ self.number, self.name, crange)
+ if rhi > nr: nr = rhi
+ if chi > nc: nc = chi
+ self.extend_cells(nr, nc)
+ if self.verbosity >= 1 \
+ and (self.nrows != self._dimnrows or self.ncols != self._dimncols):
+ fprintf(self.logfile,
+ "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n",
+ self.number,
+ self.name,
+ self._dimnrows,
+ self._dimncols,
+ self.nrows,
+ self.ncols,
+ )
+ if self._need_fix_ragged_rows:
+ self.fix_ragged_rows()
+
+ def put_cell(self, rowx, colx, ctype, value, xf_index):
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except IndexError:
+ # print >> self.logfile, "put_cell extending", rowx, colx
+ self.extend_cells(rowx+1, colx+1)
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+
+ def put_blank_cell(self, rowx, colx, xf_index):
+ # This is used for cells from BLANK and MULBLANK records
+ ctype = XL_CELL_BLANK
+ value = ''
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except IndexError:
+ # print >> self.logfile, "put_cell extending", rowx, colx
+ self.extend_cells(rowx+1, colx+1)
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+
+ def put_number_cell(self, rowx, colx, value, xf_index):
+ ctype = self._xf_index_to_xl_type_map[xf_index]
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except IndexError:
+ # print >> self.logfile, "put_number_cell extending", rowx, colx
+ self.extend_cells(rowx+1, colx+1)
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except:
+ print >> self.logfile, "put_number_cell", rowx, colx
+ raise
+ except:
+ print >> self.logfile, "put_number_cell", rowx, colx
+ raise
+
+ # === Methods after this line neither know nor care about how cells are stored.
+
+ def read(self, bk):
+ global rc_stats
+ DEBUG = 0
+ blah = DEBUG or self.verbosity >= 2
+ blah_rows = DEBUG or self.verbosity >= 4
+ blah_formulas = 1 and blah
+ oldpos = bk._position
+ bk._position = self._position
+ XL_SHRFMLA_ETC_ETC = (
+ XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2,
+ XL_ARRAY2, XL_TABLEOP_B2,
+ )
+ self_put_number_cell = self.put_number_cell
+ self_put_cell = self.put_cell
+ self_put_blank_cell = self.put_blank_cell
+ local_unpack = unpack
+ bk_get_record_parts = bk.get_record_parts
+ bv = self.biff_version
+ fmt_info = self.formatting_info
+ eof_found = 0
+ while 1:
+ # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position
+ rc, data_len, data = bk_get_record_parts()
+ # if rc in rc_stats:
+ # rc_stats[rc] += 1
+ # else:
+ # rc_stats[rc] = 1
+ # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data)
+ if rc == XL_NUMBER:
+ rowx, colx, xf_index, d = local_unpack('
-- New in version 0.6.1
+ #
+ # @param colx Index of the queried column, range 0 to 255.
+ # Note that it is possible to find out the width that will be used to display
+ # columns with no cell information e.g. column IV (colx=255).
+ # @return The column width that will be used for displaying
+ # the given column by Excel, in units of 1/256th of the width of a
+ # standard character (the digit zero in the first font).
+
+ def computed_column_width(self, colx):
+ self.req_fmt_info()
+ if self.biff_version >= 80:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ if self.standardwidth is not None:
+ return self.standardwidth
+ elif self.biff_version >= 40:
+ if self.gcw[colx]:
+ if self.standardwidth is not None:
+ return self.standardwidth
+ else:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ elif self.biff_version == 30:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ # All roads lead to Rome and the DEFCOLWIDTH ...
+ if self.defcolwidth is not None:
+ return self.defcolwidth * 256
+ return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record
+
+ def handle_msodrawingetc(self, recid, data_len, data):
+ if not OBJ_MSO_DEBUG:
+ return
+ DEBUG = 1
+ if self.biff_version < 80:
+ return
+ o = MSODrawing()
+ pos = 0
+ while pos < data_len:
+ tmp, fbt, cb = unpack('
WARNING: You don't call this class yourself. You access Cell objects +# via methods of the Sheet object(s) that you found in the Book object that +# was returned when you called xlrd.open_workbook("myfile.xls").
+#Cell objects have three attributes: ctype is an int, value +# (which depends on ctype) and xf_index. +# If "formatting_info" is not enabled when the workbook is opened, xf_index will be None. +# The following table describes the types of cells and how their values +# are represented in Python.
+# +#| Type symbol | +#Type number | +#Python value | +#
|---|---|---|
| XL_CELL_EMPTY | +#0 | +#empty string u'' | +#
| XL_CELL_TEXT | +#1 | +#a Unicode string | +#
| XL_CELL_NUMBER | +#2 | +#float | +#
| XL_CELL_DATE | +#3 | +#float | +#
| XL_CELL_BOOLEAN | +#4 | +#int; 1 means TRUE, 0 means FALSE | +#
| XL_CELL_ERROR | +#5 | +#int representing internal Excel codes; for a text representation, +# refer to the supplied dictionary error_text_from_code | +#
| XL_CELL_BLANK | +#6 | +#empty string u''. Note: this type will appear only when +# open_workbook(..., formatting_info=True) is used. | +#
Here is the default hierarchy for width, according to the OOo docs:
+#
+#
"""In BIFF3, if a COLINFO record is missing for a column,
+# the width specified in the record DEFCOLWIDTH is used instead.
+#
+#
In BIFF4-BIFF7, the width set in this [COLINFO] record is only used,
+# if the corresponding bit for this column is cleared in the GCW
+# record, otherwise the column width set in the DEFCOLWIDTH record
+# is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]).
+#
+#
In BIFF8, if a COLINFO record is missing for a column,
+# the width specified in the record STANDARDWIDTH is used.
+# If this [STANDARDWIDTH] record is also missing,
+# the column width of the record DEFCOLWIDTH is used instead."""
+#
+#
+# Footnote: The docs on the GCW record say this:
+# """
+# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH
+# record. If a bit is cleared, the corresponding column uses the width set in the
+# COLINFO record for this column.
+#
If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if
+# the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH
+# record of the worksheet will be used instead.
+#
"""
+# At the moment (2007-01-17) xlrd is going with the GCW version of the story.
+# Reference to the source may be useful: see the computed_column_width(colx) method
+# of the Sheet class.
+#
-- New in version 0.6.1
+#
Copyright © 2006-2008 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+## + +# timemachine.py -- adaptation for earlier Pythons e.g. 2.1 +# usage: from timemachine import * + +# 2008-02-08 SJM Generalised method of detecting IronPython + +import sys + +python_version = sys.version_info[:2] # e.g. version 2.4 -> (2, 4) + +CAN_PICKLE_ARRAY = python_version >= (2, 5) +CAN_SUBCLASS_BUILTIN = python_version >= (2, 2) + +if sys.version.find("IronPython") >= 0: + array_array = None +else: + from array import array as array_array + +if python_version < (2, 2): + class object: + pass + False = 0 + True = 1 + +def int_floor_div(x, y): + return divmod(x, y)[0] + +def intbool(x): + if x: + return 1 + return 0 + +if python_version < (2, 3): + def sum(sequence, start=0): + tot = start + for item in aseq: + tot += item + return tot diff --git a/tablib/packages/xlrd/xldate.py b/tablib/packages/xlrd/xldate.py new file mode 100644 index 0000000..e5f7591 --- /dev/null +++ b/tablib/packages/xlrd/xldate.py @@ -0,0 +1,171 @@ +# -*- coding: cp1252 -*- + +# No part of the content of this file was derived from the works of David Giffin. + +## +#Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd
+#This module is part of the xlrd package, which is released under a BSD-style licence.
+# +#Provides function(s) for dealing with Microsoft Excel ™ dates.
+## + +# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099) + +# The conversion from days to (year, month, day) starts with +# an integral "julian day number" aka JDN. +# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713. +# More importantly: +# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0 +# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0 + +from timemachine import int_floor_div as ifd + +_JDN_delta = (2415080 - 61, 2416482 - 1) +assert _JDN_delta[1] - _JDN_delta[0] == 1462 + +class XLDateError(ValueError): pass + +class XLDateNegative(XLDateError): pass +class XLDateAmbiguous(XLDateError): pass +class XLDateTooLarge(XLDateError): pass +class XLDateBadDatemode(XLDateError): pass +class XLDateBadTuple(XLDateError): pass + +_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01 + +## +# Convert an Excel number (presumed to represent a date, a datetime or a time) into +# a tuple suitable for feeding to datetime or mx.DateTime constructors. +# @param xldate The Excel number +# @param datemode 0: 1900-based, 1: 1904-based. +#