mirror of
https://github.com/kennethreitz/tablib.git
synced 2026-06-05 23:10:17 +00:00
Added XLS import support
This commit is contained in:
@@ -39,6 +39,7 @@ else:
|
||||
from cStringIO import StringIO as BytesIO
|
||||
from cStringIO import StringIO
|
||||
import tablib.packages.xlwt as xlwt
|
||||
import tablib.packages.xlrd as xlrd
|
||||
from tablib.packages import markup
|
||||
from itertools import ifilter
|
||||
from tablib.packages import openpyxl
|
||||
|
||||
+34
-2
@@ -5,8 +5,8 @@
|
||||
|
||||
import sys
|
||||
|
||||
from tablib.compat import BytesIO, xlwt
|
||||
|
||||
from tablib.compat import BytesIO, xlwt, xlrd
|
||||
import tablib
|
||||
|
||||
title = 'xls'
|
||||
extentions = ('xls',)
|
||||
@@ -16,6 +16,38 @@ wrap = xlwt.easyxf("alignment: wrap on")
|
||||
bold = xlwt.easyxf("font: bold on")
|
||||
|
||||
|
||||
def import_set(dset, in_stream, headers=True):
|
||||
"""Returns dataset from XLS stream."""
|
||||
|
||||
dset.wipe()
|
||||
|
||||
wb = xlrd.open_workbook(file_contents=in_stream)
|
||||
ws = wb.sheet_by_index(0)
|
||||
|
||||
for i in range(ws.nrows):
|
||||
if (i == 0) and (headers):
|
||||
dset.headers = ws.row_values(i)
|
||||
else:
|
||||
dset.append(ws.row_values(i))
|
||||
|
||||
|
||||
def import_book(dbook, in_stream, headers=True):
|
||||
"""Returns databook from XLS stream."""
|
||||
|
||||
dbook.wipe()
|
||||
|
||||
wb = xlrd.open_workbook(file_contents=in_stream)
|
||||
for ws in wb.sheets():
|
||||
data = tablib.Dataset()
|
||||
data.title = ws.name
|
||||
for i in range(ws.nrows):
|
||||
if (i == 0) and (headers):
|
||||
data.headers = ws.row_values(i)
|
||||
else:
|
||||
data.append(ws.row_values(i))
|
||||
dbook.add_sheet(data)
|
||||
|
||||
|
||||
def export_set(dataset):
|
||||
"""Returns XLS representation of Dataset."""
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,639 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# Support module for the xlrd package.
|
||||
#
|
||||
# <p>Portions copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
# 2008-02-10 SJM BIFF2 BLANK record
|
||||
# 2008-02-08 SJM Preparation for Excel 2.0 support
|
||||
# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count
|
||||
# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
|
||||
# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
|
||||
# 2007-04-22 SJM Remove experimental "trimming" facility.
|
||||
|
||||
DEBUG = 0
|
||||
|
||||
from struct import unpack
|
||||
import sys
|
||||
from timemachine import *
|
||||
|
||||
class XLRDError(Exception):
|
||||
pass
|
||||
|
||||
##
|
||||
# Parent of almost all other classes in the package. Defines a common "dump" method
|
||||
# for debugging.
|
||||
|
||||
class BaseObject(object):
|
||||
|
||||
_repr_these = []
|
||||
|
||||
##
|
||||
# @param f open file object, to which the dump is written
|
||||
# @param header text to write before the dump
|
||||
# @param footer text to write after the dump
|
||||
# @param indent number of leading spaces (for recursive calls)
|
||||
|
||||
def dump(self, f=None, header=None, footer=None, indent=0):
|
||||
if f is None:
|
||||
f = sys.stderr
|
||||
alist = self.__dict__.items()
|
||||
alist.sort()
|
||||
pad = " " * indent
|
||||
if header is not None: print >> f, header
|
||||
list_type = type([])
|
||||
dict_type = type({})
|
||||
for attr, value in alist:
|
||||
if getattr(value, 'dump', None) and attr != 'book':
|
||||
value.dump(f,
|
||||
header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
|
||||
indent=indent+4)
|
||||
elif attr not in self._repr_these and (
|
||||
isinstance(value, list_type) or isinstance(value, dict_type)
|
||||
):
|
||||
print >> f, "%s%s: %s, len = %d" % (pad, attr, type(value), len(value))
|
||||
else:
|
||||
print >> f, "%s%s: %r" % (pad, attr, value)
|
||||
if footer is not None: print >> f, footer
|
||||
|
||||
FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
|
||||
DATEFORMAT = FDT
|
||||
NUMBERFORMAT = FNU
|
||||
|
||||
(
|
||||
XL_CELL_EMPTY,
|
||||
XL_CELL_TEXT,
|
||||
XL_CELL_NUMBER,
|
||||
XL_CELL_DATE,
|
||||
XL_CELL_BOOLEAN,
|
||||
XL_CELL_ERROR,
|
||||
XL_CELL_BLANK, # for use in debugging, gathering stats, etc
|
||||
) = range(7)
|
||||
|
||||
biff_text_from_num = {
|
||||
0: "(not BIFF)",
|
||||
20: "2.0",
|
||||
21: "2.1",
|
||||
30: "3",
|
||||
40: "4S",
|
||||
45: "4W",
|
||||
50: "5",
|
||||
70: "7",
|
||||
80: "8",
|
||||
85: "8X",
|
||||
}
|
||||
|
||||
##
|
||||
# <p>This dictionary can be used to produce a text version of the internal codes
|
||||
# that Excel uses for error cells. Here are its contents:
|
||||
# <pre>
|
||||
# 0x00: '#NULL!', # Intersection of two cell ranges is empty
|
||||
# 0x07: '#DIV/0!', # Division by zero
|
||||
# 0x0F: '#VALUE!', # Wrong type of operand
|
||||
# 0x17: '#REF!', # Illegal or deleted cell reference
|
||||
# 0x1D: '#NAME?', # Wrong function or range name
|
||||
# 0x24: '#NUM!', # Value range overflow
|
||||
# 0x2A: '#N/A!', # Argument or function not available
|
||||
# </pre></p>
|
||||
|
||||
error_text_from_code = {
|
||||
0x00: '#NULL!', # Intersection of two cell ranges is empty
|
||||
0x07: '#DIV/0!', # Division by zero
|
||||
0x0F: '#VALUE!', # Wrong type of operand
|
||||
0x17: '#REF!', # Illegal or deleted cell reference
|
||||
0x1D: '#NAME?', # Wrong function or range name
|
||||
0x24: '#NUM!', # Value range overflow
|
||||
0x2A: '#N/A!', # Argument or function not available
|
||||
}
|
||||
|
||||
BIFF_FIRST_UNICODE = 80
|
||||
|
||||
XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
|
||||
XL_WORKBOOK_GLOBALS_4W = 0x100
|
||||
XL_WORKSHEET = WRKSHEET = 0x10
|
||||
|
||||
XL_BOUNDSHEET_WORKSHEET = 0x00
|
||||
XL_BOUNDSHEET_CHART = 0x02
|
||||
XL_BOUNDSHEET_VB_MODULE = 0x06
|
||||
|
||||
# XL_RK2 = 0x7e
|
||||
XL_ARRAY = 0x0221
|
||||
XL_ARRAY2 = 0x0021
|
||||
XL_BLANK = 0x0201
|
||||
XL_BLANK_B2 = 0x01
|
||||
XL_BOF = 0x809
|
||||
XL_BOOLERR = 0x205
|
||||
XL_BOOLERR_B2 = 0x5
|
||||
XL_BOUNDSHEET = 0x85
|
||||
XL_BUILTINFMTCOUNT = 0x56
|
||||
XL_CF = 0x01B1
|
||||
XL_CODEPAGE = 0x42
|
||||
XL_COLINFO = 0x7D
|
||||
XL_COLUMNDEFAULT = 0x20 # BIFF2 only
|
||||
XL_COLWIDTH = 0x24 # BIFF2 only
|
||||
XL_CONDFMT = 0x01B0
|
||||
XL_CONTINUE = 0x3c
|
||||
XL_COUNTRY = 0x8C
|
||||
XL_DATEMODE = 0x22
|
||||
XL_DEFAULTROWHEIGHT = 0x0225
|
||||
XL_DEFCOLWIDTH = 0x55
|
||||
XL_DIMENSION = 0x200
|
||||
XL_DIMENSION2 = 0x0
|
||||
XL_EFONT = 0x45
|
||||
XL_EOF = 0x0a
|
||||
XL_EXTERNNAME = 0x23
|
||||
XL_EXTERNSHEET = 0x17
|
||||
XL_EXTSST = 0xff
|
||||
XL_FEAT11 = 0x872
|
||||
XL_FILEPASS = 0x2f
|
||||
XL_FONT = 0x31
|
||||
XL_FONT_B3B4 = 0x231
|
||||
XL_FORMAT = 0x41e
|
||||
XL_FORMAT2 = 0x1E # BIFF2, BIFF3
|
||||
XL_FORMULA = 0x6
|
||||
XL_FORMULA3 = 0x206
|
||||
XL_FORMULA4 = 0x406
|
||||
XL_GCW = 0xab
|
||||
XL_INDEX = 0x20b
|
||||
XL_INTEGER = 0x2 # BIFF2 only
|
||||
XL_IXFE = 0x44 # BIFF2 only
|
||||
XL_LABEL = 0x204
|
||||
XL_LABEL_B2 = 0x04
|
||||
XL_LABELRANGES = 0x15f
|
||||
XL_LABELSST = 0xfd
|
||||
XL_MERGEDCELLS = 0xE5
|
||||
XL_MSO_DRAWING = 0x00EC
|
||||
XL_MSO_DRAWING_GROUP = 0x00EB
|
||||
XL_MSO_DRAWING_SELECTION = 0x00ED
|
||||
XL_MULRK = 0xbd
|
||||
XL_MULBLANK = 0xbe
|
||||
XL_NAME = 0x18
|
||||
XL_NOTE = 0x1c
|
||||
XL_NUMBER = 0x203
|
||||
XL_NUMBER_B2 = 0x3
|
||||
XL_OBJ = 0x5D
|
||||
XL_PALETTE = 0x92
|
||||
XL_RK = 0x27e
|
||||
XL_ROW = 0x208
|
||||
XL_ROW_B2 = 0x08
|
||||
XL_RSTRING = 0xd6
|
||||
XL_SHEETHDR = 0x8F # BIFF4W only
|
||||
XL_SHEETSOFFSET = 0x8E # BIFF4W only
|
||||
XL_SHRFMLA = 0x04bc
|
||||
XL_SST = 0xfc
|
||||
XL_STANDARDWIDTH = 0x99
|
||||
XL_STRING = 0x207
|
||||
XL_STRING_B2 = 0x7
|
||||
XL_STYLE = 0x293
|
||||
XL_SUPBOOK = 0x1AE
|
||||
XL_TABLEOP = 0x236
|
||||
XL_TABLEOP2 = 0x37
|
||||
XL_TABLEOP_B2 = 0x36
|
||||
XL_TXO = 0x1b6
|
||||
XL_UNCALCED = 0x5e
|
||||
XL_UNKNOWN = 0xffff
|
||||
XL_WINDOW2 = 0x023E
|
||||
XL_WRITEACCESS = 0x5C
|
||||
XL_XF = 0xe0
|
||||
XL_XF2 = 0x0043 # BIFF2 version of XF record
|
||||
XL_XF3 = 0x0243 # BIFF3 version of XF record
|
||||
XL_XF4 = 0x0443 # BIFF4 version of XF record
|
||||
|
||||
boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
|
||||
bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
|
||||
|
||||
XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
|
||||
|
||||
_cell_opcode_list = [
|
||||
XL_BOOLERR,
|
||||
XL_FORMULA,
|
||||
XL_FORMULA3,
|
||||
XL_FORMULA4,
|
||||
XL_LABEL,
|
||||
XL_LABELSST,
|
||||
XL_MULRK,
|
||||
XL_NUMBER,
|
||||
XL_RK,
|
||||
XL_RSTRING,
|
||||
]
|
||||
_cell_opcode_dict = {}
|
||||
for _cell_opcode in _cell_opcode_list:
|
||||
_cell_opcode_dict[_cell_opcode] = 1
|
||||
is_cell_opcode = _cell_opcode_dict.has_key
|
||||
|
||||
# def fprintf(f, fmt, *vargs): f.write(fmt % vargs)
|
||||
|
||||
def fprintf(f, fmt, *vargs):
|
||||
if fmt.endswith('\n'):
|
||||
print >> f, fmt[:-1] % vargs
|
||||
else:
|
||||
print >> f, fmt % vargs,
|
||||
|
||||
def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, (src & mask) >> n)
|
||||
|
||||
def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
|
||||
for n, mask, attr in manifest:
|
||||
local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
|
||||
|
||||
def unpack_string(data, pos, encoding, lenlen=1):
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
return unicode(data[pos:pos+nchars], encoding)
|
||||
|
||||
def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
newpos = pos + nchars
|
||||
return (unicode(data[pos:newpos], encoding), newpos)
|
||||
|
||||
def unpack_unicode(data, pos, lenlen=2):
|
||||
"Return unicode_strg"
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
if not nchars:
|
||||
# Ambiguous whether 0-length string should have an "options" byte.
|
||||
# Avoid crash if missing.
|
||||
return u""
|
||||
pos += lenlen
|
||||
options = ord(data[pos])
|
||||
pos += 1
|
||||
# phonetic = options & 0x04
|
||||
# richtext = options & 0x08
|
||||
if options & 0x08:
|
||||
# rt = unpack('<H', data[pos:pos+2])[0] # unused
|
||||
pos += 2
|
||||
if options & 0x04:
|
||||
# sz = unpack('<i', data[pos:pos+4])[0] # unused
|
||||
pos += 4
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
rawstrg = data[pos:pos+2*nchars]
|
||||
# if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
|
||||
strg = unicode(rawstrg, 'utf_16_le')
|
||||
# pos += 2*nchars
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
# Merely returning the raw bytes would work OK 99.99% of the time
|
||||
# if the local codepage was cp1252 -- however this would rapidly go pear-shaped
|
||||
# for other codepages so we grit our Anglocentric teeth and return Unicode :-)
|
||||
|
||||
strg = unicode(data[pos:pos+nchars], "latin_1")
|
||||
# pos += nchars
|
||||
# if richtext:
|
||||
# pos += 4 * rt
|
||||
# if phonetic:
|
||||
# pos += sz
|
||||
# return (strg, pos)
|
||||
return strg
|
||||
|
||||
def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
|
||||
"Return (unicode_strg, updated value of pos)"
|
||||
if known_len is not None:
|
||||
# On a NAME record, the length byte is detached from the front of the string.
|
||||
nchars = known_len
|
||||
else:
|
||||
nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
|
||||
pos += lenlen
|
||||
if not nchars and not data[pos:]:
|
||||
# Zero-length string with no options byte
|
||||
return (u"", pos)
|
||||
options = ord(data[pos])
|
||||
pos += 1
|
||||
phonetic = options & 0x04
|
||||
richtext = options & 0x08
|
||||
if richtext:
|
||||
rt = unpack('<H', data[pos:pos+2])[0]
|
||||
pos += 2
|
||||
if phonetic:
|
||||
sz = unpack('<i', data[pos:pos+4])[0]
|
||||
pos += 4
|
||||
if options & 0x01:
|
||||
# Uncompressed UTF-16-LE
|
||||
strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
|
||||
pos += 2*nchars
|
||||
else:
|
||||
# Note: this is COMPRESSED (not ASCII!) encoding!!!
|
||||
strg = unicode(data[pos:pos+nchars], "latin_1")
|
||||
pos += nchars
|
||||
if richtext:
|
||||
pos += 4 * rt
|
||||
if phonetic:
|
||||
pos += sz
|
||||
return (strg, pos)
|
||||
|
||||
def unpack_cell_range_address_list_update_pos(
|
||||
output_list, data, pos, biff_version, addr_size=6):
|
||||
# output_list is updated in situ
|
||||
if biff_version < 80:
|
||||
assert addr_size == 6
|
||||
else:
|
||||
assert addr_size in (6, 8)
|
||||
n, = unpack("<H", data[pos:pos+2])
|
||||
pos += 2
|
||||
if n:
|
||||
if addr_size == 6:
|
||||
fmt = "<HHBB"
|
||||
else:
|
||||
fmt = "<HHHH"
|
||||
for _unused in xrange(n):
|
||||
ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
|
||||
output_list.append((ra, rb+1, ca, cb+1))
|
||||
pos += addr_size
|
||||
return pos
|
||||
|
||||
_brecstrg = """\
|
||||
0000 DIMENSIONS_B2
|
||||
0001 BLANK_B2
|
||||
0002 INTEGER_B2_ONLY
|
||||
0003 NUMBER_B2
|
||||
0004 LABEL_B2
|
||||
0005 BOOLERR_B2
|
||||
0006 FORMULA
|
||||
0007 STRING_B2
|
||||
0008 ROW_B2
|
||||
0009 BOF_B2
|
||||
000A EOF
|
||||
000B INDEX_B2_ONLY
|
||||
000C CALCCOUNT
|
||||
000D CALCMODE
|
||||
000E PRECISION
|
||||
000F REFMODE
|
||||
0010 DELTA
|
||||
0011 ITERATION
|
||||
0012 PROTECT
|
||||
0013 PASSWORD
|
||||
0014 HEADER
|
||||
0015 FOOTER
|
||||
0016 EXTERNCOUNT
|
||||
0017 EXTERNSHEET
|
||||
0018 NAME_B2,5+
|
||||
0019 WINDOWPROTECT
|
||||
001A VERTICALPAGEBREAKS
|
||||
001B HORIZONTALPAGEBREAKS
|
||||
001C NOTE
|
||||
001D SELECTION
|
||||
001E FORMAT_B2-3
|
||||
001F BUILTINFMTCOUNT_B2
|
||||
0020 COLUMNDEFAULT_B2_ONLY
|
||||
0021 ARRAY_B2_ONLY
|
||||
0022 DATEMODE
|
||||
0023 EXTERNNAME
|
||||
0024 COLWIDTH_B2_ONLY
|
||||
0025 DEFAULTROWHEIGHT_B2_ONLY
|
||||
0026 LEFTMARGIN
|
||||
0027 RIGHTMARGIN
|
||||
0028 TOPMARGIN
|
||||
0029 BOTTOMMARGIN
|
||||
002A PRINTHEADERS
|
||||
002B PRINTGRIDLINES
|
||||
002F FILEPASS
|
||||
0031 FONT
|
||||
0032 FONT2_B2_ONLY
|
||||
0036 TABLEOP_B2
|
||||
0037 TABLEOP2_B2
|
||||
003C CONTINUE
|
||||
003D WINDOW1
|
||||
003E WINDOW2_B2
|
||||
0040 BACKUP
|
||||
0041 PANE
|
||||
0042 CODEPAGE
|
||||
0043 XF_B2
|
||||
0044 IXFE_B2_ONLY
|
||||
0045 EFONT_B2_ONLY
|
||||
004D PLS
|
||||
0051 DCONREF
|
||||
0055 DEFCOLWIDTH
|
||||
0056 BUILTINFMTCOUNT_B3-4
|
||||
0059 XCT
|
||||
005A CRN
|
||||
005B FILESHARING
|
||||
005C WRITEACCESS
|
||||
005D OBJECT
|
||||
005E UNCALCED
|
||||
005F SAVERECALC
|
||||
0063 OBJECTPROTECT
|
||||
007D COLINFO
|
||||
007E RK2_mythical_?
|
||||
0080 GUTS
|
||||
0081 WSBOOL
|
||||
0082 GRIDSET
|
||||
0083 HCENTER
|
||||
0084 VCENTER
|
||||
0085 BOUNDSHEET
|
||||
0086 WRITEPROT
|
||||
008C COUNTRY
|
||||
008D HIDEOBJ
|
||||
008E SHEETSOFFSET
|
||||
008F SHEETHDR
|
||||
0090 SORT
|
||||
0092 PALETTE
|
||||
0099 STANDARDWIDTH
|
||||
009B FILTERMODE
|
||||
009C FNGROUPCOUNT
|
||||
009D AUTOFILTERINFO
|
||||
009E AUTOFILTER
|
||||
00A0 SCL
|
||||
00A1 SETUP
|
||||
00AB GCW
|
||||
00BD MULRK
|
||||
00BE MULBLANK
|
||||
00C1 MMS
|
||||
00D6 RSTRING
|
||||
00D7 DBCELL
|
||||
00DA BOOKBOOL
|
||||
00DD SCENPROTECT
|
||||
00E0 XF
|
||||
00E1 INTERFACEHDR
|
||||
00E2 INTERFACEEND
|
||||
00E5 MERGEDCELLS
|
||||
00E9 BITMAP
|
||||
00EB MSO_DRAWING_GROUP
|
||||
00EC MSO_DRAWING
|
||||
00ED MSO_DRAWING_SELECTION
|
||||
00EF PHONETIC
|
||||
00FC SST
|
||||
00FD LABELSST
|
||||
00FF EXTSST
|
||||
013D TABID
|
||||
015F LABELRANGES
|
||||
0160 USESELFS
|
||||
0161 DSF
|
||||
01AE SUPBOOK
|
||||
01AF PROTECTIONREV4
|
||||
01B0 CONDFMT
|
||||
01B1 CF
|
||||
01B2 DVAL
|
||||
01B6 TXO
|
||||
01B7 REFRESHALL
|
||||
01B8 HLINK
|
||||
01BC PASSWORDREV4
|
||||
01BE DV
|
||||
01C0 XL9FILE
|
||||
01C1 RECALCID
|
||||
0200 DIMENSIONS
|
||||
0201 BLANK
|
||||
0203 NUMBER
|
||||
0204 LABEL
|
||||
0205 BOOLERR
|
||||
0206 FORMULA_B3
|
||||
0207 STRING
|
||||
0208 ROW
|
||||
0209 BOF
|
||||
020B INDEX_B3+
|
||||
0218 NAME
|
||||
0221 ARRAY
|
||||
0223 EXTERNNAME_B3-4
|
||||
0225 DEFAULTROWHEIGHT
|
||||
0231 FONT_B3B4
|
||||
0236 TABLEOP
|
||||
023E WINDOW2
|
||||
0243 XF_B3
|
||||
027E RK
|
||||
0293 STYLE
|
||||
0406 FORMULA_B4
|
||||
0409 BOF
|
||||
041E FORMAT
|
||||
0443 XF_B4
|
||||
04BC SHRFMLA
|
||||
0800 QUICKTIP
|
||||
0809 BOF
|
||||
0862 SHEETLAYOUT
|
||||
0867 SHEETPROTECTION
|
||||
0868 RANGEPROTECTION
|
||||
"""
|
||||
|
||||
biff_rec_name_dict = {}
|
||||
for _buff in _brecstrg.splitlines():
|
||||
_numh, _name = _buff.split()
|
||||
biff_rec_name_dict[int(_numh, 16)] = _name
|
||||
del _buff, _name, _brecstrg
|
||||
|
||||
def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
|
||||
endpos = min(ofs + dlen, len(strg))
|
||||
pos = ofs
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while pos < endpos:
|
||||
endsub = min(pos + 16, endpos)
|
||||
substrg = strg[pos:endsub]
|
||||
lensub = endsub - pos
|
||||
if lensub <= 0 or lensub != len(substrg):
|
||||
fprintf(
|
||||
sys.stdout,
|
||||
'??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
|
||||
ofs, dlen, base, endpos, pos, endsub, substrg)
|
||||
break
|
||||
hexd = ''.join(["%02x " % ord(c) for c in substrg])
|
||||
chard = ''
|
||||
for c in substrg:
|
||||
if c == '\0':
|
||||
c = '~'
|
||||
elif not (' ' <= c <= '~'):
|
||||
c = '?'
|
||||
chard += c
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (base+pos-ofs)
|
||||
fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
|
||||
pos = endsub
|
||||
|
||||
def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
adj = base - stream_offset
|
||||
dummies = 0
|
||||
numbered = not unnumbered
|
||||
num_prefix = ''
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
dummies = stream_end - pos
|
||||
savpos = pos
|
||||
pos = stream_end
|
||||
break
|
||||
if dummies:
|
||||
dummies += 4
|
||||
else:
|
||||
savpos = pos
|
||||
dummies = 4
|
||||
pos += 4
|
||||
else:
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
|
||||
dummies = 0
|
||||
recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)
|
||||
pos += 4
|
||||
hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
|
||||
pos += length
|
||||
if dummies:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + savpos)
|
||||
fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
|
||||
if pos < stream_end:
|
||||
if numbered:
|
||||
num_prefix = "%5d: " % (adj + pos)
|
||||
fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
|
||||
hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
|
||||
elif pos > stream_end:
|
||||
fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
|
||||
|
||||
def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
|
||||
pos = stream_offset
|
||||
stream_end = stream_offset + stream_len
|
||||
tally = {}
|
||||
while stream_end - pos >= 4:
|
||||
rc, length = unpack('<HH', mem[pos:pos+4])
|
||||
if rc == 0 and length == 0:
|
||||
if mem[pos:] == '\0' * (stream_end - pos):
|
||||
break
|
||||
recname = "<Dummy (zero)>"
|
||||
else:
|
||||
recname = biff_rec_name_dict.get(rc, None)
|
||||
if recname is None:
|
||||
recname = "Unknown_0x%04X" % rc
|
||||
if tally.has_key(recname):
|
||||
tally[recname] += 1
|
||||
else:
|
||||
tally[recname] = 1
|
||||
pos += length + 4
|
||||
slist = tally.items()
|
||||
slist.sort()
|
||||
for recname, count in slist:
|
||||
print >> fout, "%8d %s" % (count, recname)
|
||||
|
||||
encoding_from_codepage = {
|
||||
1200 : 'utf_16_le',
|
||||
10000: 'mac_roman',
|
||||
10006: 'mac_greek', # guess
|
||||
10007: 'mac_cyrillic', # guess
|
||||
10029: 'mac_latin2', # guess
|
||||
10079: 'mac_iceland', # guess
|
||||
10081: 'mac_turkish', # guess
|
||||
32768: 'mac_roman',
|
||||
32769: 'cp1252',
|
||||
}
|
||||
# some more guessing, for Indic scripts
|
||||
# codepage 57000 range:
|
||||
# 2 Devanagari [0]
|
||||
# 3 Bengali [1]
|
||||
# 4 Tamil [5]
|
||||
# 5 Telegu [6]
|
||||
# 6 Assamese [1] c.f. Bengali
|
||||
# 7 Oriya [4]
|
||||
# 8 Kannada [7]
|
||||
# 9 Malayalam [8]
|
||||
# 10 Gujarati [3]
|
||||
# 11 Gurmukhi [2]
|
||||
@@ -0,0 +1,358 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# Implements the minimal functionality required
|
||||
# to extract a "Workbook" or "Book" stream (as one big string)
|
||||
# from an OLE2 Compound Document file.
|
||||
# <p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
|
||||
# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer]
|
||||
# 2007-09-08 SJM Warning message if sector sizes are extremely large.
|
||||
# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted.
|
||||
# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms.
|
||||
|
||||
|
||||
import sys
|
||||
from struct import unpack
|
||||
from timemachine import *
|
||||
|
||||
##
|
||||
# Magic cookie that should appear in the first 8 bytes of the file.
|
||||
SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
|
||||
|
||||
EOCSID = -2
|
||||
FREESID = -1
|
||||
SATSID = -3
|
||||
MSATSID = -4
|
||||
|
||||
class CompDocError(Exception):
|
||||
pass
|
||||
|
||||
class DirNode(object):
|
||||
|
||||
def __init__(self, DID, dent, DEBUG=0):
|
||||
# dent is the 128-byte directory entry
|
||||
self.DID = DID
|
||||
# (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
# self.root_DID,
|
||||
# self.first_SID,
|
||||
# self.tot_size) = \
|
||||
# unpack('<HBBiii16x4x8x8xii4x', dent[64:128])
|
||||
(cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
|
||||
self.root_DID) = \
|
||||
unpack('<HBBiii', dent[64:80])
|
||||
(self.first_SID, self.tot_size) = \
|
||||
unpack('<ii', dent[116:124])
|
||||
if cbufsize == 0:
|
||||
self.name = u''
|
||||
else:
|
||||
self.name = unicode(dent[0:cbufsize-2], 'utf_16_le') # omit the trailing U+0000
|
||||
self.children = [] # filled in later
|
||||
self.parent = -1 # indicates orphan; fixed up later
|
||||
self.tsinfo = unpack('<IIII', dent[100:116])
|
||||
if DEBUG:
|
||||
self.dump(DEBUG)
|
||||
|
||||
def dump(self, DEBUG=1):
|
||||
print "DID=%d name=%r etype=%d DIDs(left=%d right=%d root=%d parent=%d kids=%r) first_SID=%d tot_size=%d" \
|
||||
% (self.DID, self.name, self.etype, self.left_DID,
|
||||
self.right_DID, self.root_DID, self.parent, self.children, self.first_SID, self.tot_size)
|
||||
if DEBUG == 2:
|
||||
# cre_lo, cre_hi, mod_lo, mod_hi = tsinfo
|
||||
print "timestamp info", self.tsinfo
|
||||
|
||||
def _build_family_tree(dirlist, parent_DID, child_DID):
|
||||
if child_DID < 0: return
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].left_DID)
|
||||
dirlist[parent_DID].children.append(child_DID)
|
||||
dirlist[child_DID].parent = parent_DID
|
||||
_build_family_tree(dirlist, parent_DID, dirlist[child_DID].right_DID)
|
||||
if dirlist[child_DID].etype == 1: # storage
|
||||
_build_family_tree(dirlist, child_DID, dirlist[child_DID].root_DID)
|
||||
|
||||
##
|
||||
# Compound document handler.
|
||||
# @param mem The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
# only operation it needs to support is slicing.
|
||||
|
||||
class CompDoc(object):
|
||||
|
||||
def __init__(self, mem, logfile=sys.stdout, DEBUG=0):
|
||||
self.logfile = logfile
|
||||
if mem[0:8] != SIGNATURE:
|
||||
raise CompDocError('Not an OLE2 compound document')
|
||||
if mem[28:30] != '\xFE\xFF':
|
||||
raise CompDocError('Expected "little-endian" marker, found %r' % mem[28:30])
|
||||
revision, version = unpack('<HH', mem[24:28])
|
||||
if DEBUG:
|
||||
print >> logfile, "\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision)
|
||||
self.mem = mem
|
||||
ssz, sssz = unpack('<HH', mem[30:34])
|
||||
if ssz > 20: # allows for 2**20 bytes i.e. 1MB
|
||||
print >> logfile, \
|
||||
"WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \
|
||||
% ssz
|
||||
ssz = 9
|
||||
if sssz > ssz:
|
||||
print >> logfile, \
|
||||
"WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \
|
||||
% sssz
|
||||
sssz = 6
|
||||
self.sec_size = sec_size = 1 << ssz
|
||||
self.short_sec_size = 1 << sssz
|
||||
(
|
||||
SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream,
|
||||
SSAT_first_sec_sid, SSAT_tot_secs,
|
||||
MSAT_first_sec_sid, MSAT_tot_secs,
|
||||
# ) = unpack('<ii4xiiiii', mem[44:76])
|
||||
) = unpack('<iiiiiiii', mem[44:76])
|
||||
mem_data_len = len(mem) - 512
|
||||
mem_data_secs, left_over = divmod(mem_data_len, sec_size)
|
||||
if left_over:
|
||||
#### raise CompDocError("Not a whole number of sectors")
|
||||
print >> logfile, \
|
||||
"WARNING *** file size (%d) not 512 + multiple of sector size (%d)" \
|
||||
% (len(mem), sec_size)
|
||||
if DEBUG:
|
||||
print >> logfile, 'sec sizes', ssz, sssz, sec_size, self.short_sec_size
|
||||
print >> logfile, "mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs)
|
||||
print >> logfile, "SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d" \
|
||||
% (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,)
|
||||
print >> logfile, "SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,)
|
||||
print >> logfile, "MSAT_first_sec_sid=%d, MSAT_tot_secs=%d" % (MSAT_first_sec_sid, MSAT_tot_secs,)
|
||||
nent = int_floor_div(sec_size, 4) # number of SID entries in a sector
|
||||
fmt = "<%di" % nent
|
||||
trunc_warned = 0
|
||||
#
|
||||
# === build the MSAT ===
|
||||
#
|
||||
MSAT = list(unpack('<109i', mem[76:512]))
|
||||
sid = MSAT_first_sec_sid
|
||||
while sid >= 0:
|
||||
if sid >= mem_data_secs:
|
||||
raise CompDocError(
|
||||
"MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs)
|
||||
)
|
||||
offset = 512 + sec_size * sid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
sid = news.pop()
|
||||
MSAT.extend(news)
|
||||
if DEBUG:
|
||||
print >> logfile, "MSAT: len =", len(MSAT)
|
||||
print >> logfile, MSAT
|
||||
#
|
||||
# === build the SAT ===
|
||||
#
|
||||
self.SAT = []
|
||||
for msid in MSAT:
|
||||
if msid == FREESID: continue
|
||||
if msid >= mem_data_secs:
|
||||
if not trunc_warned:
|
||||
print >> logfile, "WARNING *** File is truncated, or OLE2 MSAT is corrupt!!"
|
||||
print >> logfile, \
|
||||
"INFO: Trying to access sector %d but only %d available" \
|
||||
% (msid, mem_data_secs)
|
||||
trunc_warned = 1
|
||||
continue
|
||||
offset = 512 + sec_size * msid
|
||||
news = list(unpack(fmt, mem[offset:offset+sec_size]))
|
||||
self.SAT.extend(news)
|
||||
if DEBUG:
|
||||
print >> logfile, "SAT: len =", len(self.SAT)
|
||||
print >> logfile, self.SAT
|
||||
# print >> logfile, "SAT ",
|
||||
# for i, s in enumerate(self.SAT):
|
||||
# print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s)
|
||||
# print >> logfile, "%d:%d " % (i, s),
|
||||
print
|
||||
|
||||
# === build the directory ===
|
||||
#
|
||||
dbytes = self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid,
|
||||
name="directory")
|
||||
dirlist = []
|
||||
did = -1
|
||||
for pos in xrange(0, len(dbytes), 128):
|
||||
did += 1
|
||||
dirlist.append(DirNode(did, dbytes[pos:pos+128], 0))
|
||||
self.dirlist = dirlist
|
||||
_build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ...
|
||||
if DEBUG:
|
||||
for d in dirlist:
|
||||
d.dump(DEBUG)
|
||||
#
|
||||
# === get the SSCS ===
|
||||
#
|
||||
sscs_dir = self.dirlist[0]
|
||||
assert sscs_dir.etype == 5 # root entry
|
||||
if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0:
|
||||
# Problem reported by Frank Hoffsuemmer: some software was
|
||||
# writing -1 instead of -2 (EOCSID) for the first_SID
|
||||
# when the SCCS was empty. Not having EOCSID caused assertion
|
||||
# failure in _get_stream.
|
||||
# Solution: avoid calling _get_stream in any case when the
|
||||
# SCSS appears to be empty.
|
||||
self.SSCS = ""
|
||||
else:
|
||||
self.SSCS = self._get_stream(
|
||||
self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID,
|
||||
sscs_dir.tot_size, name="SSCS")
|
||||
# if DEBUG: print >> logfile, "SSCS", repr(self.SSCS)
|
||||
#
|
||||
# === build the SSAT ===
|
||||
#
|
||||
self.SSAT = []
|
||||
if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0:
|
||||
print >> logfile, \
|
||||
"WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero"
|
||||
if sscs_dir.tot_size > 0:
|
||||
sid = SSAT_first_sec_sid
|
||||
nsecs = SSAT_tot_secs
|
||||
while sid >= 0 and nsecs > 0:
|
||||
nsecs -= 1
|
||||
start_pos = 512 + sid * sec_size
|
||||
news = list(unpack(fmt, mem[start_pos:start_pos+sec_size]))
|
||||
self.SSAT.extend(news)
|
||||
sid = self.SAT[sid]
|
||||
# assert SSAT_tot_secs == 0 or sid == EOCSID
|
||||
if DEBUG: print >> logfile, "SSAT last sid %d; remaining sectors %d" % (sid, nsecs)
|
||||
assert nsecs == 0 and sid == EOCSID
|
||||
if DEBUG: print >> logfile, "SSAT", self.SSAT
|
||||
|
||||
def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''):
|
||||
# print >> self.logfile, "_get_stream", base, sec_size, start_sid, size
|
||||
sectors = []
|
||||
s = start_sid
|
||||
if size is None:
|
||||
# nothing to check against
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
sectors.append(mem[start_pos:start_pos+sec_size])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
else:
|
||||
todo = size
|
||||
while s >= 0:
|
||||
start_pos = base + s * sec_size
|
||||
grab = sec_size
|
||||
if grab > todo:
|
||||
grab = todo
|
||||
todo -= grab
|
||||
sectors.append(mem[start_pos:start_pos+grab])
|
||||
try:
|
||||
s = sat[s]
|
||||
except IndexError:
|
||||
raise CompDocError(
|
||||
"OLE2 stream %r: sector allocation table invalid entry (%d)" %
|
||||
(name, s)
|
||||
)
|
||||
assert s == EOCSID
|
||||
if todo != 0:
|
||||
print >> self.logfile, \
|
||||
"WARNING *** OLE2 stream %r: expected size %d, actual size %d" \
|
||||
% (name, size, size - todo)
|
||||
return ''.join(sectors)
|
||||
|
||||
def _dir_search(self, path, storage_DID=0):
|
||||
# Return matching DirNode instance, or None
|
||||
head = path[0]
|
||||
tail = path[1:]
|
||||
dl = self.dirlist
|
||||
for child in dl[storage_DID].children:
|
||||
if dl[child].name.lower() == head.lower():
|
||||
et = dl[child].etype
|
||||
if et == 2:
|
||||
return dl[child]
|
||||
if et == 1:
|
||||
if not tail:
|
||||
raise CompDocError("Requested component is a 'storage'")
|
||||
return self._dir_search(tail, child)
|
||||
dl[child].dump(1)
|
||||
raise CompDocError("Requested stream is not a 'user stream'")
|
||||
return None
|
||||
|
||||
##
|
||||
# Interrogate the compound document's directory; return the stream as a string if found, otherwise
|
||||
# return None.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def get_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return None
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._get_stream(
|
||||
self.mem, 512, self.SAT, self.sec_size, d.first_SID,
|
||||
d.tot_size, name=qname)
|
||||
else:
|
||||
return self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, name=qname + " (from SSCS)")
|
||||
|
||||
##
|
||||
# Interrogate the compound document's directory.
|
||||
# If the named stream is not found, (None, 0, 0) will be returned.
|
||||
# If the named stream is found and is contiguous within the original byte sequence ("mem")
|
||||
# used when the document was opened,
|
||||
# then (mem, offset_to_start_of_stream, length_of_stream) is returned.
|
||||
# Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
|
||||
# @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
|
||||
|
||||
def locate_named_stream(self, qname):
|
||||
d = self._dir_search(qname.split("/"))
|
||||
if d is None:
|
||||
return (None, 0, 0)
|
||||
if d.tot_size >= self.min_size_std_stream:
|
||||
return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size)
|
||||
else:
|
||||
return (
|
||||
self._get_stream(
|
||||
self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
|
||||
d.tot_size, qname + " (from SSCS)"),
|
||||
0,
|
||||
d.tot_size
|
||||
)
|
||||
return (None, 0, 0) # not found
|
||||
|
||||
def _locate_stream(self, mem, base, sat, sec_size, start_sid, size):
|
||||
# print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size
|
||||
s = start_sid
|
||||
if s < 0:
|
||||
raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid)
|
||||
p = -99 # dummy previous SID
|
||||
start_pos = -9999
|
||||
end_pos = -8888
|
||||
slices = []
|
||||
while s >= 0:
|
||||
if s == p+1:
|
||||
# contiguous sectors
|
||||
end_pos += sec_size
|
||||
else:
|
||||
# start new slice
|
||||
if p >= 0:
|
||||
# not first time
|
||||
slices.append((start_pos, end_pos))
|
||||
start_pos = base + s * sec_size
|
||||
end_pos = start_pos + sec_size
|
||||
p = s
|
||||
s = sat[s]
|
||||
assert s == EOCSID
|
||||
# print >> self.logfile, len(slices) + 1, "slices"
|
||||
if not slices:
|
||||
# The stream is contiguous ... just what we like!
|
||||
return (mem, start_pos, size)
|
||||
slices.append((start_pos, end_pos))
|
||||
return (''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size)
|
||||
|
||||
# ==========================================================================================
|
||||
@@ -0,0 +1,69 @@
|
||||
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv='Content-Type' content='text/html; charset=us-ascii' />
|
||||
<title>The compdoc Module</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>The compdoc Module</h1>
|
||||
<p>Implements the minimal functionality required
|
||||
to extract a "Workbook" or "Book" stream (as one big string)
|
||||
from an OLE2 Compound Document file.
|
||||
</p><p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
<p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
<h2>Module Contents</h2>
|
||||
<dl>
|
||||
<dt><b>CompDoc(mem, logfile=sys.stdout, DEBUG=0)</b> (class) [<a href='#compdoc.CompDoc-class'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Compound document handler.</p>
|
||||
<dl>
|
||||
<dt><i>mem</i></dt>
|
||||
<dd>
|
||||
The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
only operation it needs to support is slicing.</dd>
|
||||
</dl><br />
|
||||
<p>For more information about this class, see <a href='#compdoc.CompDoc-class'><i>The CompDoc Class</i></a>.</p>
|
||||
</dd>
|
||||
<dt><a id='compdoc.SIGNATURE-variable' name='compdoc.SIGNATURE-variable'><b>SIGNATURE</b></a> (variable) [<a href='#compdoc.SIGNATURE-variable'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Magic cookie that should appear in the first 8 bytes of the file.</p>
|
||||
</dd>
|
||||
</dl>
|
||||
<h2><a id='compdoc.CompDoc-class' name='compdoc.CompDoc-class'>The CompDoc Class</a></h2>
|
||||
<dl>
|
||||
<dt><b>CompDoc(mem, logfile=sys.stdout, DEBUG=0)</b> (class) [<a href='#compdoc.CompDoc-class'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Compound document handler.</p>
|
||||
<dl>
|
||||
<dt><i>mem</i></dt>
|
||||
<dd>
|
||||
The raw contents of the file, as a string, or as an mmap.mmap() object. The
|
||||
only operation it needs to support is slicing.</dd>
|
||||
</dl><br />
|
||||
</dd>
|
||||
<dt><a id='compdoc.CompDoc.get_named_stream-method' name='compdoc.CompDoc.get_named_stream-method'><b>get_named_stream(qname)</b></a> [<a href='#compdoc.CompDoc.get_named_stream-method'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Interrogate the compound document's directory; return the stream as a string if found, otherwise
|
||||
return None.</p>
|
||||
<dl>
|
||||
<dt><i>qname</i></dt>
|
||||
<dd>
|
||||
Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.</dd>
|
||||
</dl><br />
|
||||
</dd>
|
||||
<dt><a id='compdoc.CompDoc.locate_named_stream-method' name='compdoc.CompDoc.locate_named_stream-method'><b>locate_named_stream(qname)</b></a> [<a href='#compdoc.CompDoc.locate_named_stream-method'>#</a>]</dt>
|
||||
<dd>
|
||||
<p>Interrogate the compound document's directory.
|
||||
If the named stream is not found, (None, 0, 0) will be returned.
|
||||
If the named stream is found and is contiguous within the original byte sequence ("mem")
|
||||
used when the document was opened,
|
||||
then (mem, offset_to_start_of_stream, length_of_stream) is returned.
|
||||
Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.</p>
|
||||
<dl>
|
||||
<dt><i>qname</i></dt>
|
||||
<dd>
|
||||
Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.</dd>
|
||||
</dl><br />
|
||||
</dd>
|
||||
</dl>
|
||||
</body></html>
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -0,0 +1,178 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# Module/script example of the xlrd API for extracting information
|
||||
# about named references, named constants, etc.
|
||||
#
|
||||
# <p>Copyright © 2006 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
import xlrd
|
||||
import sys
|
||||
import glob
|
||||
|
||||
def scope_as_string(book, scope):
|
||||
if 0 <= scope < book.nsheets:
|
||||
return "sheet #%d (%r)" % (scope, book.sheet_names()[scope])
|
||||
if scope == -1:
|
||||
return "Global"
|
||||
if scope == -2:
|
||||
return "Macro/VBA"
|
||||
return "Unknown scope value (%r)" % scope
|
||||
|
||||
def do_scope_query(book, scope_strg, show_contents=0, f=sys.stdout):
|
||||
try:
|
||||
qscope = int(scope_strg)
|
||||
except ValueError:
|
||||
if scope_strg == "*":
|
||||
qscope = None # means "all'
|
||||
else:
|
||||
# so assume it's a sheet name ...
|
||||
qscope = book.sheet_names().index(scope_strg)
|
||||
print >> f, "%r => %d" % (scope_strg, qscope)
|
||||
for nobj in book.name_obj_list:
|
||||
if qscope is None or nobj.scope == qscope:
|
||||
show_name_object(book, nobj, show_contents, f)
|
||||
|
||||
def show_name_details(book, name, show_contents=0, f=sys.stdout):
|
||||
"""
|
||||
book -- Book object obtained from xlrd.open_workbook().
|
||||
name -- The name that's being investigated.
|
||||
show_contents -- 0: Don't; 1: Non-empty cells only; 2: All cells
|
||||
f -- Open output file handle.
|
||||
"""
|
||||
name_lcase = name.lower() # Excel names are case-insensitive.
|
||||
nobj_list = book.name_map.get(name_lcase)
|
||||
if not nobj_list:
|
||||
print >> f, "%r: unknown name" % name
|
||||
return
|
||||
for nobj in nobj_list:
|
||||
show_name_object(book, nobj, show_contents, f)
|
||||
|
||||
def show_name_details_in_scope(
|
||||
book, name, scope_strg, show_contents=0, f=sys.stdout,
|
||||
):
|
||||
try:
|
||||
scope = int(scope_strg)
|
||||
except ValueError:
|
||||
# so assume it's a sheet name ...
|
||||
scope = book.sheet_names().index(scope_strg)
|
||||
print >> f, "%r => %d" % (scope_strg, scope)
|
||||
name_lcase = name.lower() # Excel names are case-insensitive.
|
||||
while 1:
|
||||
nobj = book.name_and_scope_map.get((name_lcase, scope))
|
||||
if nobj:
|
||||
break
|
||||
print >> f, "Name %r not found in scope %d" % (name, scope)
|
||||
if scope == -1:
|
||||
return
|
||||
scope = -1 # Try again with global scope
|
||||
print >> f, "Name %r found in scope %d" % (name, scope)
|
||||
show_name_object(book, nobj, show_contents, f)
|
||||
|
||||
def showable_cell_value(celltype, cellvalue, datemode):
|
||||
if celltype == xlrd.XL_CELL_DATE:
|
||||
try:
|
||||
showval = xlrd.xldate_as_tuple(cellvalue, datemode)
|
||||
except xlrd.XLDateError:
|
||||
e1, e2 = sys.exc_info()[:2]
|
||||
showval = "%s:%s" % (e1.__name__, e2)
|
||||
elif celltype == xlrd.XL_CELL_ERROR:
|
||||
showval = xlrd.error_text_from_code.get(
|
||||
cellvalue, '<Unknown error code 0x%02x>' % cellvalue)
|
||||
else:
|
||||
showval = cellvalue
|
||||
return showval
|
||||
|
||||
def show_name_object(book, nobj, show_contents=0, f=sys.stdout):
|
||||
print >> f, "\nName: %r, scope: %r (%s)" \
|
||||
% (nobj.name, nobj.scope, scope_as_string(book, nobj.scope))
|
||||
res = nobj.result
|
||||
print >> f, "Formula eval result: %r" % res
|
||||
if res is None:
|
||||
return
|
||||
# result should be an instance of the Operand class
|
||||
kind = res.kind
|
||||
value = res.value
|
||||
if kind >= 0:
|
||||
# A scalar, or unknown ... you've seen all there is to see.
|
||||
pass
|
||||
elif kind == xlrd.oREL:
|
||||
# A list of Ref3D objects representing *relative* ranges
|
||||
for i in xrange(len(value)):
|
||||
ref3d = value[i]
|
||||
print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3drel(book, ref3d))
|
||||
elif kind == xlrd.oREF:
|
||||
# A list of Ref3D objects
|
||||
for i in xrange(len(value)):
|
||||
ref3d = value[i]
|
||||
print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3d(book, ref3d))
|
||||
if not show_contents:
|
||||
continue
|
||||
datemode = book.datemode
|
||||
for shx in xrange(ref3d.shtxlo, ref3d.shtxhi):
|
||||
sh = book.sheet_by_index(shx)
|
||||
print >> f, " Sheet #%d (%s)" % (shx, sh.name)
|
||||
rowlim = min(ref3d.rowxhi, sh.nrows)
|
||||
collim = min(ref3d.colxhi, sh.ncols)
|
||||
for rowx in xrange(ref3d.rowxlo, rowlim):
|
||||
for colx in xrange(ref3d.colxlo, collim):
|
||||
cty = sh.cell_type(rowx, colx)
|
||||
if cty == xlrd.XL_CELL_EMPTY and show_contents == 1:
|
||||
continue
|
||||
cval = sh.cell_value(rowx, colx)
|
||||
sval = showable_cell_value(cty, cval, datemode)
|
||||
print >> f, " (%3d,%3d) %-5s: %r" \
|
||||
% (rowx, colx, xlrd.cellname(rowx, colx), sval)
|
||||
|
||||
if __name__ == "__main__":
|
||||
def usage():
|
||||
text = """
|
||||
usage: xlrdnameAIPdemo.py glob_pattern name scope show_contents
|
||||
|
||||
where:
|
||||
"glob_pattern" designates a set of files
|
||||
"name" is a name or '*' (all names)
|
||||
"scope" is -1 (global) or a sheet number
|
||||
or a sheet name or * (all scopes)
|
||||
"show_contents" is one of 0 (no show),
|
||||
1 (only non-empty cells), or 2 (all cells)
|
||||
|
||||
Examples (script name and glob_pattern arg omitted for brevity)
|
||||
[Searching through book.name_obj_list]
|
||||
* * 0 lists all names
|
||||
* * 1 lists all names, showing referenced non-empty cells
|
||||
* 1 0 lists all names local to the 2nd sheet
|
||||
* Northern 0 lists all names local to the 'Northern' sheet
|
||||
* -1 0 lists all names with global scope
|
||||
[Initial direct access through book.name_map]
|
||||
Sales * 0 lists all occurrences of "Sales" in any scope
|
||||
[Direct access through book.name_and_scope_map]
|
||||
Revenue -1 0 checks if "Revenue" exists in global scope
|
||||
|
||||
"""
|
||||
sys.stdout.write(text)
|
||||
|
||||
if len(sys.argv) != 5:
|
||||
usage()
|
||||
sys.exit(0)
|
||||
arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls"
|
||||
arg_name = sys.argv[2] # see below
|
||||
arg_scope = sys.argv[3] # see below
|
||||
arg_show_contents = int(sys.argv[4]) # 0: no show, 1: only non-empty cells,
|
||||
# 2: all cells
|
||||
for fname in glob.glob(arg_pattern):
|
||||
book = xlrd.open_workbook(fname)
|
||||
if arg_name == "*":
|
||||
# Examine book.name_obj_list to find all names
|
||||
# in a given scope ("*" => all scopes)
|
||||
do_scope_query(book, arg_scope, arg_show_contents)
|
||||
elif arg_scope == "*":
|
||||
# Using book.name_map to find all usage of a name.
|
||||
show_name_details(book, arg_name, arg_show_contents)
|
||||
else:
|
||||
# Using book.name_and_scope_map to find which if any instances
|
||||
# of a name are visible in the given scope, which can be supplied
|
||||
# as -1 (global) or a sheet number or a sheet name.
|
||||
show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,77 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
"""
|
||||
Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
|
||||
"""
|
||||
/*-
|
||||
* Copyright (c) 2001 David Giffin.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Based on the the Java version: Andrew Khan Copyright (c) 2000.
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this
|
||||
* software must display the following acknowledgment:
|
||||
* "This product includes software developed by
|
||||
* David Giffin <david@giffin.org>."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by
|
||||
* David Giffin <david@giffin.org>."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY
|
||||
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID GIFFIN OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
||||
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
"""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,44 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
##
|
||||
# <p>Copyright © 2006-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
##
|
||||
|
||||
# timemachine.py -- adaptation for earlier Pythons e.g. 2.1
|
||||
# usage: from timemachine import *
|
||||
|
||||
# 2008-02-08 SJM Generalised method of detecting IronPython
|
||||
|
||||
import sys
|
||||
|
||||
python_version = sys.version_info[:2] # e.g. version 2.4 -> (2, 4)
|
||||
|
||||
CAN_PICKLE_ARRAY = python_version >= (2, 5)
|
||||
CAN_SUBCLASS_BUILTIN = python_version >= (2, 2)
|
||||
|
||||
if sys.version.find("IronPython") >= 0:
|
||||
array_array = None
|
||||
else:
|
||||
from array import array as array_array
|
||||
|
||||
if python_version < (2, 2):
|
||||
class object:
|
||||
pass
|
||||
False = 0
|
||||
True = 1
|
||||
|
||||
def int_floor_div(x, y):
|
||||
return divmod(x, y)[0]
|
||||
|
||||
def intbool(x):
|
||||
if x:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
if python_version < (2, 3):
|
||||
def sum(sequence, start=0):
|
||||
tot = start
|
||||
for item in aseq:
|
||||
tot += item
|
||||
return tot
|
||||
@@ -0,0 +1,171 @@
|
||||
# -*- coding: cp1252 -*-
|
||||
|
||||
# No part of the content of this file was derived from the works of David Giffin.
|
||||
|
||||
##
|
||||
# <p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
|
||||
# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
|
||||
#
|
||||
# <p>Provides function(s) for dealing with Microsoft Excel ™ dates.</p>
|
||||
##
|
||||
|
||||
# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099)
|
||||
|
||||
# The conversion from days to (year, month, day) starts with
|
||||
# an integral "julian day number" aka JDN.
|
||||
# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713.
|
||||
# More importantly:
|
||||
# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
|
||||
# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0
|
||||
|
||||
from timemachine import int_floor_div as ifd
|
||||
|
||||
_JDN_delta = (2415080 - 61, 2416482 - 1)
|
||||
assert _JDN_delta[1] - _JDN_delta[0] == 1462
|
||||
|
||||
class XLDateError(ValueError): pass
|
||||
|
||||
class XLDateNegative(XLDateError): pass
|
||||
class XLDateAmbiguous(XLDateError): pass
|
||||
class XLDateTooLarge(XLDateError): pass
|
||||
class XLDateBadDatemode(XLDateError): pass
|
||||
class XLDateBadTuple(XLDateError): pass
|
||||
|
||||
_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01
|
||||
|
||||
##
|
||||
# Convert an Excel number (presumed to represent a date, a datetime or a time) into
|
||||
# a tuple suitable for feeding to datetime or mx.DateTime constructors.
|
||||
# @param xldate The Excel number
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# <br>WARNING: when using this function to
|
||||
# interpret the contents of a workbook, you should pass in the Book.datemode
|
||||
# attribute of that workbook. Whether
|
||||
# the workbook has ever been anywhere near a Macintosh is irrelevant.
|
||||
# @return Gregorian (year, month, day, hour, minute, nearest_second).
|
||||
# <br>Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time;
|
||||
# (0, 0, 0, hour, minute, second) will be returned.
|
||||
# <br>Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number"
|
||||
# is zero.
|
||||
# @throws XLDateNegative xldate < 0.00
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateTooLarge Gregorian year 10000 or later
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateError Covers the 4 specific errors
|
||||
|
||||
def xldate_as_tuple(xldate, datemode):
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
if xldate == 0.00:
|
||||
return (0, 0, 0, 0, 0, 0)
|
||||
if xldate < 0.00:
|
||||
raise XLDateNegative(xldate)
|
||||
xldays = int(xldate)
|
||||
frac = xldate - xldays
|
||||
seconds = int(round(frac * 86400.0))
|
||||
assert 0 <= seconds <= 86400
|
||||
if seconds == 86400:
|
||||
hour = minute = second = 0
|
||||
xldays += 1
|
||||
else:
|
||||
# second = seconds % 60; minutes = seconds // 60
|
||||
minutes, second = divmod(seconds, 60)
|
||||
# minute = minutes % 60; hour = minutes // 60
|
||||
hour, minute = divmod(minutes, 60)
|
||||
if xldays >= _XLDAYS_TOO_LARGE[datemode]:
|
||||
raise XLDateTooLarge(xldate)
|
||||
|
||||
if xldays == 0:
|
||||
return (0, 0, 0, hour, minute, second)
|
||||
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous(xldate)
|
||||
|
||||
jdn = xldays + _JDN_delta[datemode]
|
||||
yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3
|
||||
mp = ifd(yreg % 1461, 4) * 535 + 333
|
||||
d = ifd(mp % 16384, 535) + 1
|
||||
# mp /= 16384
|
||||
mp >>= 14
|
||||
if mp >= 10:
|
||||
return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second)
|
||||
else:
|
||||
return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second)
|
||||
|
||||
# === conversions from date/time to xl numbers
|
||||
|
||||
def _leap(y):
|
||||
if y % 4: return 0
|
||||
if y % 100: return 1
|
||||
if y % 400: return 0
|
||||
return 1
|
||||
|
||||
_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
|
||||
|
||||
##
|
||||
# Convert a date tuple (year, month, day) to an Excel date.
|
||||
# @param year Gregorian year.
|
||||
# @param month 1 <= month <= 12
|
||||
# @param day 1 <= day <= last day of that (year, month)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
|
||||
# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
|
||||
# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s)
|
||||
# @throws XLDateError Covers the specific errors
|
||||
|
||||
def xldate_from_date_tuple((year, month, day), datemode):
|
||||
|
||||
if datemode not in (0, 1):
|
||||
raise XLDateBadDatemode(datemode)
|
||||
|
||||
if year == 0 and month == 0 and day == 0:
|
||||
return 0.00
|
||||
|
||||
if not (1900 <= year <= 9999):
|
||||
raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),))
|
||||
if not (1 <= month <= 12):
|
||||
raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),))
|
||||
if day < 1 \
|
||||
or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))):
|
||||
raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),))
|
||||
|
||||
Yp = year + 4716
|
||||
M = month
|
||||
if M <= 2:
|
||||
Yp = Yp - 1
|
||||
Mp = M + 9
|
||||
else:
|
||||
Mp = M - 3
|
||||
jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \
|
||||
day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4)
|
||||
xldays = jdn - _JDN_delta[datemode]
|
||||
if xldays <= 0:
|
||||
raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),))
|
||||
if xldays < 61 and datemode == 0:
|
||||
raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),))
|
||||
return float(xldays)
|
||||
|
||||
##
|
||||
# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
|
||||
# @param hour 0 <= hour < 24
|
||||
# @param minute 0 <= minute < 60
|
||||
# @param second 0 <= second < 60
|
||||
# @throws XLDateBadTuple Out-of-range hour, minute, or second
|
||||
|
||||
def xldate_from_time_tuple((hour, minute, second)):
|
||||
if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60:
|
||||
return ((second / 60.0 + minute) / 60.0 + hour) / 24.0
|
||||
raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),))
|
||||
|
||||
##
|
||||
# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value.
|
||||
# For more details, refer to other xldate_from_*_tuple functions.
|
||||
# @param datetime_tuple (year, month, day, hour, minute, second)
|
||||
# @param datemode 0: 1900-based, 1: 1904-based.
|
||||
|
||||
def xldate_from_datetime_tuple(datetime_tuple, datemode):
|
||||
return (
|
||||
xldate_from_date_tuple(datetime_tuple[:3], datemode)
|
||||
+
|
||||
xldate_from_time_tuple(datetime_tuple[3:])
|
||||
)
|
||||
@@ -552,6 +552,31 @@ class TablibTestCase(unittest.TestCase):
|
||||
data.sort(target_header)
|
||||
|
||||
self.assertEquals(self.founders[orig_target_header], data[target_header])
|
||||
|
||||
def test_xls_import_set(self):
|
||||
"""Generate and import XLS set serialization."""
|
||||
data.append(self.john)
|
||||
data.append(self.george)
|
||||
data.headers = self.headers
|
||||
|
||||
_xls = data.xls
|
||||
|
||||
data.xls = _xls
|
||||
|
||||
self.assertEqual(_xls, data.xls)
|
||||
|
||||
def test_xls_import_book(self):
|
||||
"""Generate and import XLS book serialization."""
|
||||
data.append(self.john)
|
||||
data.append(self.george)
|
||||
data.headers = self.headers
|
||||
|
||||
book.add_sheet(data)
|
||||
_xls = book.xls
|
||||
|
||||
book.xls = _xls
|
||||
|
||||
self.assertEqual(_xls, book.xls)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user