add xlwt 3.x

This commit is contained in:
Kenneth Reitz
2011-03-23 01:13:03 -04:00
parent 4c8b5e72e3
commit 58bc1c7dcf
32 changed files with 11947 additions and 12164 deletions
File diff suppressed because it is too large Load Diff
+258 -262
View File
@@ -1,262 +1,258 @@
# -*- coding: windows-1251 -*-
# Portions are Copyright (C) 2005 Roman V. Kiseliov
# Portions are Copyright (c) 2004 Evgeny Filatov <fufff@users.sourceforge.net>
# Portions are Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
from BIFFRecords import BiffRecord
from struct import *
def _size_col(sheet, col):
return sheet.col_width(col)
def _size_row(sheet, row):
return sheet.row_height(row)
def _position_image(sheet, row_start, col_start, x1, y1, width, height):
"""Calculate the vertices that define the position of the image as required by
the OBJ record.
+------------+------------+
| A | B |
+-----+------------+------------+
| |(x1,y1) | |
| 1 |(A1)._______|______ |
| | | | |
| | | | |
+-----+----| BITMAP |-----+
| | | | |
| 2 | |______________. |
| | | (B2)|
| | | (x2,y2)|
+---- +------------+------------+
Example of a bitmap that covers some of the area from cell A1 to cell B2.
Based on the width and height of the bitmap we need to calculate 8 vars:
col_start, row_start, col_end, row_end, x1, y1, x2, y2.
The width and height of the cells are also variable and have to be taken into
account.
The values of col_start and row_start are passed in from the calling
function. The values of col_end and row_end are calculated by subtracting
the width and height of the bitmap from the width and height of the
underlying cells.
The vertices are expressed as a percentage of the underlying cell width as
follows (rhs values are in pixels):
x1 = X / W *1024
y1 = Y / H *256
x2 = (X-1) / W *1024
y2 = (Y-1) / H *256
Where: X is distance from the left side of the underlying cell
Y is distance from the top of the underlying cell
W is the width of the cell
H is the height of the cell
Note: the SDK incorrectly states that the height should be expressed as a
percentage of 1024.
col_start - Col containing upper left corner of object
row_start - Row containing top left corner of object
x1 - Distance to left side of object
y1 - Distance to top of object
width - Width of image frame
height - Height of image frame
"""
# Adjust start column for offsets that are greater than the col width
while x1 >= _size_col(sheet, col_start):
x1 -= _size_col(sheet, col_start)
col_start += 1
# Adjust start row for offsets that are greater than the row height
while y1 >= _size_row(sheet, row_start):
y1 -= _size_row(sheet, row_start)
row_start += 1
# Initialise end cell to the same as the start cell
row_end = row_start # Row containing bottom right corner of object
col_end = col_start # Col containing lower right corner of object
width = width + x1 - 1
height = height + y1 - 1
# Subtract the underlying cell widths to find the end cell of the image
while (width >= _size_col(sheet, col_end)):
width -= _size_col(sheet, col_end)
col_end += 1
# Subtract the underlying cell heights to find the end cell of the image
while (height >= _size_row(sheet, row_end)):
height -= _size_row(sheet, row_end)
row_end += 1
# Bitmap isn't allowed to start or finish in a hidden cell, i.e. a cell
# with zero height or width.
if ((_size_col(sheet, col_start) == 0) or (_size_col(sheet, col_end) == 0)
or (_size_row(sheet, row_start) == 0) or (_size_row(sheet, row_end) == 0)):
return
# Convert the pixel values to the percentage value expected by Excel
x1 = int(float(x1) / _size_col(sheet, col_start) * 1024)
y1 = int(float(y1) / _size_row(sheet, row_start) * 256)
# Distance to right side of object
x2 = int(float(width) / _size_col(sheet, col_end) * 1024)
# Distance to bottom of object
y2 = int(float(height) / _size_row(sheet, row_end) * 256)
return (col_start, x1, row_start, y1, col_end, x2, row_end, y2)
class ObjBmpRecord(BiffRecord):
_REC_ID = 0x005D # Record identifier
def __init__(self, row, col, sheet, im_data_bmp, x, y, scale_x, scale_y):
# Scale the frame of the image.
width = im_data_bmp.width * scale_x
height = im_data_bmp.height * scale_y
# Calculate the vertices of the image and write the OBJ record
coordinates = _position_image(sheet, row, col, x, y, width, height)
# print coordinates
col_start, x1, row_start, y1, col_end, x2, row_end, y2 = coordinates
"""Store the OBJ record that precedes an IMDATA record. This could be generalise
to support other Excel objects.
"""
cObj = 0x0001 # Count of objects in file (set to 1)
OT = 0x0008 # Object type. 8 = Picture
id = 0x0001 # Object ID
grbit = 0x0614 # Option flags
colL = col_start # Col containing upper left corner of object
dxL = x1 # Distance from left side of cell
rwT = row_start # Row containing top left corner of object
dyT = y1 # Distance from top of cell
colR = col_end # Col containing lower right corner of object
dxR = x2 # Distance from right of cell
rwB = row_end # Row containing bottom right corner of object
dyB = y2 # Distance from bottom of cell
cbMacro = 0x0000 # Length of FMLA structure
Reserved1 = 0x0000 # Reserved
Reserved2 = 0x0000 # Reserved
icvBack = 0x09 # Background colour
icvFore = 0x09 # Foreground colour
fls = 0x00 # Fill pattern
fAuto = 0x00 # Automatic fill
icv = 0x08 # Line colour
lns = 0xff # Line style
lnw = 0x01 # Line weight
fAutoB = 0x00 # Automatic border
frs = 0x0000 # Frame style
cf = 0x0009 # Image format, 9 = bitmap
Reserved3 = 0x0000 # Reserved
cbPictFmla = 0x0000 # Length of FMLA structure
Reserved4 = 0x0000 # Reserved
grbit2 = 0x0001 # Option flags
Reserved5 = 0x0000 # Reserved
data = pack("<L", cObj)
data += pack("<H", OT)
data += pack("<H", id)
data += pack("<H", grbit)
data += pack("<H", colL)
data += pack("<H", dxL)
data += pack("<H", rwT)
data += pack("<H", dyT)
data += pack("<H", colR)
data += pack("<H", dxR)
data += pack("<H", rwB)
data += pack("<H", dyB)
data += pack("<H", cbMacro)
data += pack("<L", Reserved1)
data += pack("<H", Reserved2)
data += pack("<B", icvBack)
data += pack("<B", icvFore)
data += pack("<B", fls)
data += pack("<B", fAuto)
data += pack("<B", icv)
data += pack("<B", lns)
data += pack("<B", lnw)
data += pack("<B", fAutoB)
data += pack("<H", frs)
data += pack("<L", cf)
data += pack("<H", Reserved3)
data += pack("<H", cbPictFmla)
data += pack("<H", Reserved4)
data += pack("<H", grbit2)
data += pack("<L", Reserved5)
self._rec_data = data
def _process_bitmap(bitmap):
"""Convert a 24 bit bitmap into the modified internal format used by Windows.
This is described in BITMAPCOREHEADER and BITMAPCOREINFO structures in the
MSDN library.
"""
# Open file and binmode the data in case the platform needs it.
fh = file(bitmap, "rb")
try:
# Slurp the file into a string.
data = fh.read()
finally:
fh.close()
# Check that the file is big enough to be a bitmap.
if len(data) <= 0x36:
raise Exception("bitmap doesn't contain enough data.")
# The first 2 bytes are used to identify the bitmap.
if (data[:2] != "BM"):
raise Exception("bitmap doesn't appear to to be a valid bitmap image.")
# Remove bitmap data: ID.
data = data[2:]
# Read and remove the bitmap size. This is more reliable than reading
# the data size at offset 0x22.
#
size = unpack("<L", data[:4])[0]
size -= 0x36 # Subtract size of bitmap header.
size += 0x0C # Add size of BIFF header.
data = data[4:]
# Remove bitmap data: reserved, offset, header length.
data = data[12:]
# Read and remove the bitmap width and height. Verify the sizes.
width, height = unpack("<LL", data[:8])
data = data[8:]
if (width > 0xFFFF):
raise Exception("bitmap: largest image width supported is 65k.")
if (height > 0xFFFF):
raise Exception("bitmap: largest image height supported is 65k.")
# Read and remove the bitmap planes and bpp data. Verify them.
planes, bitcount = unpack("<HH", data[:4])
data = data[4:]
if (bitcount != 24):
raise Exception("bitmap isn't a 24bit true color bitmap.")
if (planes != 1):
raise Exception("bitmap: only 1 plane supported in bitmap image.")
# Read and remove the bitmap compression. Verify compression.
compression = unpack("<L", data[:4])[0]
data = data[4:]
if (compression != 0):
raise Exception("bitmap: compression not supported in bitmap image.")
# Remove bitmap data: data size, hres, vres, colours, imp. colours.
data = data[20:]
# Add the BITMAPCOREHEADER data
header = pack("<LHHHH", 0x000c, width, height, 0x01, 0x18)
data = header + data
return (width, height, size, data)
class ImDataBmpRecord(BiffRecord):
_REC_ID = 0x007F
def __init__(self, filename):
"""Insert a 24bit bitmap image in a worksheet. The main record required is
IMDATA but it must be proceeded by a OBJ record to define its position.
"""
BiffRecord.__init__(self)
self.width, self.height, self.size, data = _process_bitmap(filename)
# Write the IMDATA record to store the bitmap data
cf = 0x09
env = 0x01
lcb = self.size
self._rec_data = pack("<HHL", cf, env, lcb) + data
# Portions are Copyright (C) 2005 Roman V. Kiseliov
# Portions are Copyright (c) 2004 Evgeny Filatov <fufff@users.sourceforge.net>
# Portions are Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
from .BIFFRecords import BiffRecord
from struct import *
def _size_col(sheet, col):
return sheet.col_width(col)
def _size_row(sheet, row):
return sheet.row_height(row)
def _position_image(sheet, row_start, col_start, x1, y1, width, height):
"""Calculate the vertices that define the position of the image as required by
the OBJ record.
+------------+------------+
| A | B |
+-----+------------+------------+
| |(x1,y1) | |
| 1 |(A1)._______|______ |
| | | | |
| | | | |
+-----+----| BITMAP |-----+
| | | | |
| 2 | |______________. |
| | | (B2)|
| | | (x2,y2)|
+---- +------------+------------+
Example of a bitmap that covers some of the area from cell A1 to cell B2.
Based on the width and height of the bitmap we need to calculate 8 vars:
col_start, row_start, col_end, row_end, x1, y1, x2, y2.
The width and height of the cells are also variable and have to be taken into
account.
The values of col_start and row_start are passed in from the calling
function. The values of col_end and row_end are calculated by subtracting
the width and height of the bitmap from the width and height of the
underlying cells.
The vertices are expressed as a percentage of the underlying cell width as
follows (rhs values are in pixels):
x1 = X / W *1024
y1 = Y / H *256
x2 = (X-1) / W *1024
y2 = (Y-1) / H *256
Where: X is distance from the left side of the underlying cell
Y is distance from the top of the underlying cell
W is the width of the cell
H is the height of the cell
Note: the SDK incorrectly states that the height should be expressed as a
percentage of 1024.
col_start - Col containing upper left corner of object
row_start - Row containing top left corner of object
x1 - Distance to left side of object
y1 - Distance to top of object
width - Width of image frame
height - Height of image frame
"""
# Adjust start column for offsets that are greater than the col width
while x1 >= _size_col(sheet, col_start):
x1 -= _size_col(sheet, col_start)
col_start += 1
# Adjust start row for offsets that are greater than the row height
while y1 >= _size_row(sheet, row_start):
y1 -= _size_row(sheet, row_start)
row_start += 1
# Initialise end cell to the same as the start cell
row_end = row_start # Row containing bottom right corner of object
col_end = col_start # Col containing lower right corner of object
width = width + x1 - 1
height = height + y1 - 1
# Subtract the underlying cell widths to find the end cell of the image
while (width >= _size_col(sheet, col_end)):
width -= _size_col(sheet, col_end)
col_end += 1
# Subtract the underlying cell heights to find the end cell of the image
while (height >= _size_row(sheet, row_end)):
height -= _size_row(sheet, row_end)
row_end += 1
# Bitmap isn't allowed to start or finish in a hidden cell, i.e. a cell
# with zero height or width.
if ((_size_col(sheet, col_start) == 0) or (_size_col(sheet, col_end) == 0)
or (_size_row(sheet, row_start) == 0) or (_size_row(sheet, row_end) == 0)):
return
# Convert the pixel values to the percentage value expected by Excel
x1 = int(float(x1) / _size_col(sheet, col_start) * 1024)
y1 = int(float(y1) / _size_row(sheet, row_start) * 256)
# Distance to right side of object
x2 = int(float(width) / _size_col(sheet, col_end) * 1024)
# Distance to bottom of object
y2 = int(float(height) / _size_row(sheet, row_end) * 256)
return (col_start, x1, row_start, y1, col_end, x2, row_end, y2)
class ObjBmpRecord(BiffRecord):
_REC_ID = 0x005D # Record identifier
def __init__(self, row, col, sheet, im_data_bmp, x, y, scale_x, scale_y):
# Scale the frame of the image.
width = im_data_bmp.width * scale_x
height = im_data_bmp.height * scale_y
# Calculate the vertices of the image and write the OBJ record
coordinates = _position_image(sheet, row, col, x, y, width, height)
# print coordinates
col_start, x1, row_start, y1, col_end, x2, row_end, y2 = coordinates
"""Store the OBJ record that precedes an IMDATA record. This could be generalise
to support other Excel objects.
"""
cObj = 0x0001 # Count of objects in file (set to 1)
OT = 0x0008 # Object type. 8 = Picture
id = 0x0001 # Object ID
grbit = 0x0614 # Option flags
colL = col_start # Col containing upper left corner of object
dxL = x1 # Distance from left side of cell
rwT = row_start # Row containing top left corner of object
dyT = y1 # Distance from top of cell
colR = col_end # Col containing lower right corner of object
dxR = x2 # Distance from right of cell
rwB = row_end # Row containing bottom right corner of object
dyB = y2 # Distance from bottom of cell
cbMacro = 0x0000 # Length of FMLA structure
Reserved1 = 0x0000 # Reserved
Reserved2 = 0x0000 # Reserved
icvBack = 0x09 # Background colour
icvFore = 0x09 # Foreground colour
fls = 0x00 # Fill pattern
fAuto = 0x00 # Automatic fill
icv = 0x08 # Line colour
lns = 0xff # Line style
lnw = 0x01 # Line weight
fAutoB = 0x00 # Automatic border
frs = 0x0000 # Frame style
cf = 0x0009 # Image format, 9 = bitmap
Reserved3 = 0x0000 # Reserved
cbPictFmla = 0x0000 # Length of FMLA structure
Reserved4 = 0x0000 # Reserved
grbit2 = 0x0001 # Option flags
Reserved5 = 0x0000 # Reserved
data = pack("<L", cObj)
data += pack("<H", OT)
data += pack("<H", id)
data += pack("<H", grbit)
data += pack("<H", colL)
data += pack("<H", dxL)
data += pack("<H", rwT)
data += pack("<H", dyT)
data += pack("<H", colR)
data += pack("<H", dxR)
data += pack("<H", rwB)
data += pack("<H", dyB)
data += pack("<H", cbMacro)
data += pack("<L", Reserved1)
data += pack("<H", Reserved2)
data += pack("<B", icvBack)
data += pack("<B", icvFore)
data += pack("<B", fls)
data += pack("<B", fAuto)
data += pack("<B", icv)
data += pack("<B", lns)
data += pack("<B", lnw)
data += pack("<B", fAutoB)
data += pack("<H", frs)
data += pack("<L", cf)
data += pack("<H", Reserved3)
data += pack("<H", cbPictFmla)
data += pack("<H", Reserved4)
data += pack("<H", grbit2)
data += pack("<L", Reserved5)
self._rec_data = data
def _process_bitmap(bitmap):
"""Convert a 24 bit bitmap into the modified internal format used by Windows.
This is described in BITMAPCOREHEADER and BITMAPCOREINFO structures in the
MSDN library.
"""
# Open file and binmode the data in case the platform needs it.
fh = open(bitmap, 'rb')
try:
# Slurp the file into a string.
data = fh.read()
finally:
fh.close()
# Check that the file is big enough to be a bitmap.
if len(data) <= 0x36:
raise Exception("bitmap doesn't contain enough data.")
# The first 2 bytes are used to identify the bitmap.
if (data[:2] != b"BM"):
raise Exception("bitmap doesn't appear to to be a valid bitmap image.")
# Remove bitmap data: ID.
data = data[2:]
# Read and remove the bitmap size. This is more reliable than reading
# the data size at offset 0x22.
#
size = unpack("<L", data[:4])[0]
size -= 0x36 # Subtract size of bitmap header.
size += 0x0C # Add size of BIFF header.
data = data[4:]
# Remove bitmap data: reserved, offset, header length.
data = data[12:]
# Read and remove the bitmap width and height. Verify the sizes.
width, height = unpack("<LL", data[:8])
data = data[8:]
if (width > 0xFFFF):
raise Exception("bitmap: largest image width supported is 65k.")
if (height > 0xFFFF):
raise Exception("bitmap: largest image height supported is 65k.")
# Read and remove the bitmap planes and bpp data. Verify them.
planes, bitcount = unpack("<HH", data[:4])
data = data[4:]
if (bitcount != 24):
raise Exception("bitmap isn't a 24bit true color bitmap.")
if (planes != 1):
raise Exception("bitmap: only 1 plane supported in bitmap image.")
# Read and remove the bitmap compression. Verify compression.
compression = unpack("<L", data[:4])[0]
data = data[4:]
if (compression != 0):
raise Exception("bitmap: compression not supported in bitmap image.")
# Remove bitmap data: data size, hres, vres, colours, imp. colours.
data = data[20:]
# Add the BITMAPCOREHEADER data
header = pack("<LHHHH", 0x000c, width, height, 0x01, 0x18)
data = header + data
return (width, height, size, data)
class ImDataBmpRecord(BiffRecord):
_REC_ID = 0x007F
def __init__(self, filename):
"""Insert a 24bit bitmap image in a worksheet. The main record required is
IMDATA but it must be proceeded by a OBJ record to define its position.
"""
BiffRecord.__init__(self)
self.width, self.height, self.size, data = _process_bitmap(filename)
# Write the IMDATA record to store the bitmap data
cf = 0x09
env = 0x01
lcb = self.size
self._rec_data = pack("<HHL", cf, env, lcb) + data
+233 -243
View File
@@ -1,243 +1,233 @@
# -*- coding: windows-1252 -*-
from struct import unpack, pack
import BIFFRecords
class StrCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "sst_idx"]
def __init__(self, rowx, colx, xf_idx, sst_idx):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.sst_idx = sst_idx
def get_biff_data(self):
# return BIFFRecords.LabelSSTRecord(self.rowx, self.colx, self.xf_idx, self.sst_idx).get()
return pack('<5HL', 0x00FD, 10, self.rowx, self.colx, self.xf_idx, self.sst_idx)
class BlankCell(object):
__slots__ = ["rowx", "colx", "xf_idx"]
def __init__(self, rowx, colx, xf_idx):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
def get_biff_data(self):
# return BIFFRecords.BlankRecord(self.rowx, self.colx, self.xf_idx).get()
return pack('<5H', 0x0201, 6, self.rowx, self.colx, self.xf_idx)
class MulBlankCell(object):
__slots__ = ["rowx", "colx1", "colx2", "xf_idx"]
def __init__(self, rowx, colx1, colx2, xf_idx):
self.rowx = rowx
self.colx1 = colx1
self.colx2 = colx2
self.xf_idx = xf_idx
def get_biff_data(self):
return BIFFRecords.MulBlankRecord(self.rowx,
self.colx1, self.colx2, self.xf_idx).get()
class NumberCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, number):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.number = float(number)
def get_encoded_data(self):
rk_encoded = 0
num = self.number
# The four possible kinds of RK encoding are *not* mutually exclusive.
# The 30-bit integer variety picks up the most.
# In the code below, the four varieties are checked in descending order
# of bangs per buck, or not at all.
# SJM 2007-10-01
if -0x20000000 <= num < 0x20000000: # fits in 30-bit *signed* int
inum = int(num)
if inum == num: # survives round-trip
# print "30-bit integer RK", inum, hex(inum)
rk_encoded = 2 | (inum << 2)
return 1, rk_encoded
temp = num * 100
if -0x20000000 <= temp < 0x20000000:
# That was step 1: the coded value will fit in
# a 30-bit signed integer.
itemp = int(round(temp, 0))
# That was step 2: "itemp" is the best candidate coded value.
# Now for step 3: simulate the decoding,
# to check for round-trip correctness.
if itemp / 100.0 == num:
# print "30-bit integer RK*100", itemp, hex(itemp)
rk_encoded = 3 | (itemp << 2)
return 1, rk_encoded
if 0: # Cost of extra pack+unpack not justified by tiny yield.
packed = pack('<d', num)
w01, w23 = unpack('<2i', packed)
if not w01 and not(w23 & 3):
# 34 lsb are 0
# print "float RK", w23, hex(w23)
return 1, w23
packed100 = pack('<d', temp)
w01, w23 = unpack('<2i', packed100)
if not w01 and not(w23 & 3):
# 34 lsb are 0
# print "float RK*100", w23, hex(w23)
return 1, w23 | 1
#print "Number"
#print
return 0, pack('<5Hd', 0x0203, 14, self.rowx, self.colx, self.xf_idx, num)
def get_biff_data(self):
isRK, value = self.get_encoded_data()
if isRK:
return pack('<5Hi', 0x27E, 10, self.rowx, self.colx, self.xf_idx, value)
return value # NUMBER record already packed
class BooleanCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, number):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.number = number
def get_biff_data(self):
return BIFFRecords.BoolErrRecord(self.rowx,
self.colx, self.xf_idx, self.number, 0).get()
error_code_map = {
0x00: 0, # Intersection of two cell ranges is empty
0x07: 7, # Division by zero
0x0F: 15, # Wrong type of operand
0x17: 23, # Illegal or deleted cell reference
0x1D: 29, # Wrong function or range name
0x24: 36, # Value range overflow
0x2A: 42, # Argument or function not available
'#NULL!' : 0, # Intersection of two cell ranges is empty
'#DIV/0!': 7, # Division by zero
'#VALUE!': 36, # Wrong type of operand
'#REF!' : 23, # Illegal or deleted cell reference
'#NAME?' : 29, # Wrong function or range name
'#NUM!' : 36, # Value range overflow
'#N/A!' : 42, # Argument or function not available
}
class ErrorCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, error_string_or_code):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
try:
self.number = error_code_map[error_string_or_code]
except KeyError:
raise Exception('Illegal error value (%r)' % error_string_or_code)
def get_biff_data(self):
return BIFFRecords.BoolErrRecord(self.rowx,
self.colx, self.xf_idx, self.number, 1).get()
class FormulaCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "frmla", "calc_flags"]
def __init__(self, rowx, colx, xf_idx, frmla, calc_flags=0):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.frmla = frmla
self.calc_flags = calc_flags
def get_biff_data(self):
return BIFFRecords.FormulaRecord(self.rowx,
self.colx, self.xf_idx, self.frmla.rpn(), self.calc_flags).get()
# module-level function for *internal* use by the Row module
def _get_cells_biff_data_mul(rowx, cell_items):
# Return the BIFF data for all cell records in the row.
# Adjacent BLANK|RK records are combined into MUL(BLANK|RK) records.
pieces = []
nitems = len(cell_items)
i = 0
while i < nitems:
icolx, icell = cell_items[i]
if isinstance(icell, NumberCell):
isRK, value = icell.get_encoded_data()
if not isRK:
pieces.append(value) # pre-packed NUMBER record
i += 1
continue
muldata = [(value, icell.xf_idx)]
target = NumberCell
elif isinstance(icell, BlankCell):
muldata = [icell.xf_idx]
target = BlankCell
else:
pieces.append(icell.get_biff_data())
i += 1
continue
lastcolx = icolx
j = i
packed_record = ''
for j in xrange(i+1, nitems):
jcolx, jcell = cell_items[j]
if jcolx != lastcolx + 1:
nexti = j
break
if not isinstance(jcell, target):
nexti = j
break
if target == NumberCell:
isRK, value = jcell.get_encoded_data()
if not isRK:
packed_record = value
nexti = j + 1
break
muldata.append((value, jcell.xf_idx))
else:
muldata.append(jcell.xf_idx)
lastcolx = jcolx
else:
nexti = j + 1
if target == NumberCell:
if lastcolx == icolx:
# RK record
value, xf_idx = muldata[0]
pieces.append(pack('<5Hi', 0x027E, 10, rowx, icolx, xf_idx, value))
else:
# MULRK record
nc = lastcolx - icolx + 1
pieces.append(pack('<4H', 0x00BD, 6 * nc + 6, rowx, icolx))
pieces.append(''.join([pack('<Hi', xf_idx, value) for value, xf_idx in muldata]))
pieces.append(pack('<H', lastcolx))
else:
if lastcolx == icolx:
# BLANK record
xf_idx = muldata[0]
pieces.append(pack('<5H', 0x0201, 6, rowx, icolx, xf_idx))
else:
# MULBLANK record
nc = lastcolx - icolx + 1
pieces.append(pack('<4H', 0x00BE, 2 * nc + 6, rowx, icolx))
pieces.append(''.join([pack('<H', xf_idx) for xf_idx in muldata]))
pieces.append(pack('<H', lastcolx))
if packed_record:
pieces.append(packed_record)
i = nexti
return ''.join(pieces)
from struct import unpack, pack
from . import BIFFRecords
class StrCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "sst_idx"]
def __init__(self, rowx, colx, xf_idx, sst_idx):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.sst_idx = sst_idx
def get_biff_data(self):
return pack('<5HL', 0x00FD, 10, self.rowx, self.colx, self.xf_idx, self.sst_idx)
class BlankCell(object):
__slots__ = ["rowx", "colx", "xf_idx"]
def __init__(self, rowx, colx, xf_idx):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
def get_biff_data(self):
return pack('<5H', 0x0201, 6, self.rowx, self.colx, self.xf_idx)
class MulBlankCell(object):
__slots__ = ["rowx", "colx1", "colx2", "xf_idx"]
def __init__(self, rowx, colx1, colx2, xf_idx):
self.rowx = rowx
self.colx1 = colx1
self.colx2 = colx2
self.xf_idx = xf_idx
def get_biff_data(self):
return BIFFRecords.MulBlankRecord(self.rowx,
self.colx1, self.colx2, self.xf_idx).get()
class NumberCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, number):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.number = float(number)
def get_encoded_data(self):
rk_encoded = 0
num = self.number
# The four possible kinds of RK encoding are *not* mutually exclusive.
# The 30-bit integer variety picks up the most.
# In the code below, the four varieties are checked in descending order
# of bangs per buck, or not at all.
# SJM 2007-10-01
if -0x20000000 <= num < 0x20000000: # fits in 30-bit *signed* int
inum = int(num)
if inum == num: # survives round-trip
rk_encoded = 2 | (inum << 2)
return 1, rk_encoded
temp = num * 100
if -0x20000000 <= temp < 0x20000000:
# That was step 1: the coded value will fit in
# a 30-bit signed integer.
itemp = int(round(temp, 0))
# That was step 2: "itemp" is the best candidate coded value.
# Now for step 3: simulate the decoding,
# to check for round-trip correctness.
if itemp / 100.0 == num:
rk_encoded = 3 | (itemp << 2)
return 1, rk_encoded
if 0: # Cost of extra pack+unpack not justified by tiny yield.
packed = pack('<d', num)
w01, w23 = unpack('<2i', packed)
if not w01 and not(w23 & 3):
return 1, w23
packed100 = pack('<d', temp)
w01, w23 = unpack('<2i', packed100)
if not w01 and not(w23 & 3):
return 1, w23 | 1
return 0, pack('<5Hd', 0x0203, 14, self.rowx, self.colx, self.xf_idx, num)
def get_biff_data(self):
isRK, value = self.get_encoded_data()
if isRK:
return pack('<5Hi', 0x27E, 10, self.rowx, self.colx, self.xf_idx, value)
return value # NUMBER record already packed
class BooleanCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, number):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.number = number
def get_biff_data(self):
return BIFFRecords.BoolErrRecord(self.rowx,
self.colx, self.xf_idx, self.number, 0).get()
error_code_map = {
0x00: 0, # Intersection of two cell ranges is empty
0x07: 7, # Division by zero
0x0F: 15, # Wrong type of operand
0x17: 23, # Illegal or deleted cell reference
0x1D: 29, # Wrong function or range name
0x24: 36, # Value range overflow
0x2A: 42, # Argument or function not available
'#NULL!' : 0, # Intersection of two cell ranges is empty
'#DIV/0!': 7, # Division by zero
'#VALUE!': 36, # Wrong type of operand
'#REF!' : 23, # Illegal or deleted cell reference
'#NAME?' : 29, # Wrong function or range name
'#NUM!' : 36, # Value range overflow
'#N/A!' : 42, # Argument or function not available
}
class ErrorCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, error_string_or_code):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
try:
self.number = error_code_map[error_string_or_code]
except KeyError:
raise Exception('Illegal error value (%r)' % error_string_or_code)
def get_biff_data(self):
return BIFFRecords.BoolErrRecord(self.rowx,
self.colx, self.xf_idx, self.number, 1).get()
class FormulaCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "frmla", "calc_flags"]
def __init__(self, rowx, colx, xf_idx, frmla, calc_flags=0):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.frmla = frmla
self.calc_flags = calc_flags
def get_biff_data(self):
return BIFFRecords.FormulaRecord(self.rowx,
self.colx, self.xf_idx, self.frmla.rpn(), self.calc_flags).get()
# module-level function for *internal* use by the Row module
def _get_cells_biff_data_mul(rowx, cell_items):
# Return the BIFF data for all cell records in the row.
# Adjacent BLANK|RK records are combined into MUL(BLANK|RK) records.
pieces = []
nitems = len(cell_items)
i = 0
while i < nitems:
icolx, icell = cell_items[i]
if isinstance(icell, NumberCell):
isRK, value = icell.get_encoded_data()
if not isRK:
pieces.append(value) # pre-packed NUMBER record
i += 1
continue
muldata = [(value, icell.xf_idx)]
target = NumberCell
elif isinstance(icell, BlankCell):
muldata = [icell.xf_idx]
target = BlankCell
else:
pieces.append(icell.get_biff_data())
i += 1
continue
lastcolx = icolx
j = i
packed_record = b'' # (to_py3): 'b' binary data
for j in range(i+1, nitems):
jcolx, jcell = cell_items[j]
if jcolx != lastcolx + 1:
nexti = j
break
if not isinstance(jcell, target):
nexti = j
break
if target == NumberCell:
isRK, value = jcell.get_encoded_data()
if not isRK:
packed_record = value
nexti = j + 1
break
muldata.append((value, jcell.xf_idx))
else:
muldata.append(jcell.xf_idx)
lastcolx = jcolx
else:
nexti = j + 1
if target == NumberCell:
if lastcolx == icolx:
# RK record
value, xf_idx = muldata[0]
pieces.append(pack('<5Hi', 0x027E, 10, rowx, icolx, xf_idx, value))
else:
# MULRK record
nc = lastcolx - icolx + 1
pieces.append(pack('<4H', 0x00BD, 6 * nc + 6, rowx, icolx))
# (to_py3): 'b' binary data
pieces.append(b''.join([pack('<Hi', xf_idx, value) for value, xf_idx in muldata]))
pieces.append(pack('<H', lastcolx))
else:
if lastcolx == icolx:
# BLANK record
xf_idx = muldata[0]
pieces.append(pack('<5H', 0x0201, 6, rowx, icolx, xf_idx))
else:
# MULBLANK record
nc = lastcolx - icolx + 1
pieces.append(pack('<4H', 0x00BE, 2 * nc + 6, rowx, icolx))
# (to_py3): 'b' binary data
pieces.append(b''.join([pack('<H', xf_idx) for xf_idx in muldata]))
pieces.append(pack('<H', lastcolx))
if packed_record:
pieces.append(packed_record)
i = nexti
return b''.join(pieces) # (to_py3): 'b' binary data
+34 -34
View File
@@ -1,34 +1,34 @@
# -*- coding: windows-1252 -*-
from BIFFRecords import ColInfoRecord
class Column(object):
def __init__(self, colx, parent_sheet):
if not(isinstance(colx, int) and 0 <= colx <= 255):
raise ValueError("column index (%r) not an int in range(256)" % colx)
self._index = colx
self._parent = parent_sheet
self._parent_wb = parent_sheet.get_parent()
self._xf_index = 0x0F
self.width = 0x0B92
self.hidden = 0
self.level = 0
self.collapse = 0
def set_style(self, style):
self._xf_index = self._parent_wb.add_style(style)
def width_in_pixels(self):
# *** Approximation ****
return int(round(self.width * 0.0272 + 0.446, 0))
def get_biff_record(self):
options = (self.hidden & 0x01) << 0
options |= (self.level & 0x07) << 8
options |= (self.collapse & 0x01) << 12
return ColInfoRecord(self._index, self._index, self.width, self._xf_index, options).get()
# -*- coding: windows-1252 -*-
from .BIFFRecords import ColInfoRecord
class Column(object):
def __init__(self, colx, parent_sheet):
if not(isinstance(colx, int) and 0 <= colx <= 255):
raise ValueError("column index (%r) not an int in range(256)" % colx)
self._index = colx
self._parent = parent_sheet
self._parent_wb = parent_sheet.get_parent()
self._xf_index = 0x0F
self.width = 0x0B92
self.hidden = 0
self.level = 0
self.collapse = 0
def set_style(self, style):
self._xf_index = self._parent_wb.add_style(style)
def width_in_pixels(self):
# *** Approximation ****
return int(round(self.width * 0.0272 + 0.446, 0))
def get_biff_record(self):
options = (self.hidden & 0x01) << 0
options |= (self.level & 0x07) << 8
options |= (self.collapse & 0x01) << 12
return ColInfoRecord(self._index, self._index, self.width, self._xf_index, options).get()
File diff suppressed because it is too large Load Diff
+41 -43
View File
@@ -1,43 +1,41 @@
# -*- coding: windows-1252 -*-
import ExcelFormulaParser, ExcelFormulaLexer
import struct
from antlr import ANTLRException
class Formula(object):
__slots__ = ["__init__", "__s", "__parser", "__sheet_refs", "__xcall_refs"]
def __init__(self, s):
try:
self.__s = s
lexer = ExcelFormulaLexer.Lexer(s)
self.__parser = ExcelFormulaParser.Parser(lexer)
self.__parser.formula()
self.__sheet_refs = self.__parser.sheet_references
self.__xcall_refs = self.__parser.xcall_references
except ANTLRException, e:
# print e
raise ExcelFormulaParser.FormulaParseException, "can't parse formula " + s
def get_references(self):
return self.__sheet_refs, self.__xcall_refs
def patch_references(self, patches):
for offset, idx in patches:
self.__parser.rpn = self.__parser.rpn[:offset] + struct.pack('<H', idx) + self.__parser.rpn[offset+2:]
def text(self):
return self.__s
def rpn(self):
'''
Offset Size Contents
0 2 Size of the following formula data (sz)
2 sz Formula data (RPN token array)
[2+sz] var. (optional) Additional data for specific tokens
'''
return struct.pack("<H", len(self.__parser.rpn)) + self.__parser.rpn
from . import ExcelFormulaParser, ExcelFormulaLexer
import struct
from .antlr import ANTLRException
class Formula(object):
__slots__ = ["__init__", "__s", "__parser", "__sheet_refs", "__xcall_refs"]
def __init__(self, s):
try:
self.__s = s
lexer = ExcelFormulaLexer.Lexer(s)
self.__parser = ExcelFormulaParser.Parser(lexer)
self.__parser.formula()
self.__sheet_refs = self.__parser.sheet_references
self.__xcall_refs = self.__parser.xcall_references
except ANTLRException as e:
# print e
raise ExcelFormulaParser.FormulaParseException("can't parse formula " + s)
def get_references(self):
return self.__sheet_refs, self.__xcall_refs
def patch_references(self, patches):
for offset, idx in patches:
self.__parser.rpn = self.__parser.rpn[:offset] + struct.pack('<H', idx) + self.__parser.rpn[offset+2:]
def text(self):
return self.__s
def rpn(self):
'''
Offset Size Contents
0 2 Size of the following formula data (sz)
2 sz Formula data (RPN token array)
[2+sz] var. (optional) Additional data for specific tokens
'''
return struct.pack("<H", len(self.__parser.rpn)) + self.__parser.rpn
+126 -128
View File
@@ -1,128 +1,126 @@
# -*- coding: windows-1252 -*-
import sys
from antlr import EOF, CommonToken as Tok, TokenStream, TokenStreamException
import struct
import ExcelFormulaParser
from re import compile as recompile, match, LOCALE, UNICODE, IGNORECASE, VERBOSE
int_const_pattern = r"\d+\b"
flt_const_pattern = r"""
(?:
(?: \d* \. \d+ ) # .1 .12 .123 etc 9.1 etc 98.1 etc
|
(?: \d+ \. ) # 1. 12. 123. etc
)
# followed by optional exponent part
(?: [Ee] [+-]? \d+ ) ?
"""
str_const_pattern = r'"(?:[^"]|"")*"'
#range2d_pattern = recompile(r"\$?[A-I]?[A-Z]\$?\d+:\$?[A-I]?[A-Z]\$?\d+"
ref2d_r1c1_pattern = r"[Rr]0*[1-9][0-9]*[Cc]0*[1-9][0-9]*"
ref2d_pattern = r"\$?[A-I]?[A-Z]\$?0*[1-9][0-9]*"
true_pattern = r"TRUE\b"
false_pattern = r"FALSE\b"
if_pattern = r"IF\b"
choose_pattern = r"CHOOSE\b"
name_pattern = r"\w[\.\w]*"
quotename_pattern = r"'(?:[^']|'')*'" #### It's essential that this bracket be non-grouping.
ne_pattern = r"<>"
ge_pattern = r">="
le_pattern = r"<="
pattern_type_tuples = (
(flt_const_pattern, ExcelFormulaParser.NUM_CONST),
(int_const_pattern, ExcelFormulaParser.INT_CONST),
(str_const_pattern, ExcelFormulaParser.STR_CONST),
# (range2d_pattern , ExcelFormulaParser.RANGE2D),
(ref2d_r1c1_pattern, ExcelFormulaParser.REF2D_R1C1),
(ref2d_pattern , ExcelFormulaParser.REF2D),
(true_pattern , ExcelFormulaParser.TRUE_CONST),
(false_pattern , ExcelFormulaParser.FALSE_CONST),
(if_pattern , ExcelFormulaParser.FUNC_IF),
(choose_pattern , ExcelFormulaParser.FUNC_CHOOSE),
(name_pattern , ExcelFormulaParser.NAME),
(quotename_pattern, ExcelFormulaParser.QUOTENAME),
(ne_pattern, ExcelFormulaParser.NE),
(ge_pattern, ExcelFormulaParser.GE),
(le_pattern, ExcelFormulaParser.LE),
)
_re = recompile(
'(' + ')|('.join([i[0] for i in pattern_type_tuples]) + ')',
VERBOSE+LOCALE+IGNORECASE)
_toktype = [None] + [i[1] for i in pattern_type_tuples]
# need dummy at start because re.MatchObject.lastindex counts from 1
single_char_lookup = {
'=': ExcelFormulaParser.EQ,
'<': ExcelFormulaParser.LT,
'>': ExcelFormulaParser.GT,
'+': ExcelFormulaParser.ADD,
'-': ExcelFormulaParser.SUB,
'*': ExcelFormulaParser.MUL,
'/': ExcelFormulaParser.DIV,
':': ExcelFormulaParser.COLON,
';': ExcelFormulaParser.SEMICOLON,
',': ExcelFormulaParser.COMMA,
'(': ExcelFormulaParser.LP,
')': ExcelFormulaParser.RP,
'&': ExcelFormulaParser.CONCAT,
'%': ExcelFormulaParser.PERCENT,
'^': ExcelFormulaParser.POWER,
'!': ExcelFormulaParser.BANG,
}
class Lexer(TokenStream):
def __init__(self, text):
self._text = text[:]
self._pos = 0
self._line = 0
def isEOF(self):
return len(self._text) <= self._pos
def curr_ch(self):
return self._text[self._pos]
def next_ch(self, n = 1):
self._pos += n
def is_whitespace(self):
return self.curr_ch() in " \t\n\r\f\v"
def match_pattern(self):
m = _re.match(self._text, self._pos)
if not m:
return None
self._pos = m.end(0)
return Tok(type = _toktype[m.lastindex], text = m.group(0), col = m.start(0) + 1)
def nextToken(self):
# skip whitespace
while not self.isEOF() and self.is_whitespace():
self.next_ch()
if self.isEOF():
return Tok(type = EOF)
# first, try to match token with 2 or more chars
t = self.match_pattern()
if t:
return t
# second, we want 1-char tokens
te = self.curr_ch()
try:
ty = single_char_lookup[te]
except KeyError:
raise TokenStreamException(
"Unexpected char %r in column %u." % (self.curr_ch(), self._pos))
self.next_ch()
return Tok(type=ty, text=te, col=self._pos)
if __name__ == '__main__':
try:
for t in Lexer(""" 1.23 456 "abcd" R2C2 a1 iv65536 true false if choose a_name 'qname' <> >= <= """):
print t
except TokenStreamException, e:
print "error:", e
import sys
from .antlr import EOF, CommonToken as Tok, TokenStream, TokenStreamException
import struct
from . import ExcelFormulaParser
from re import compile as recompile, match, LOCALE, UNICODE, IGNORECASE, VERBOSE
int_const_pattern = r"\d+\b"
flt_const_pattern = r"""
(?:
(?: \d* \. \d+ ) # .1 .12 .123 etc 9.1 etc 98.1 etc
|
(?: \d+ \. ) # 1. 12. 123. etc
)
# followed by optional exponent part
(?: [Ee] [+-]? \d+ ) ?
"""
str_const_pattern = r'"(?:[^"]|"")*"'
#range2d_pattern = recompile(r"\$?[A-I]?[A-Z]\$?\d+:\$?[A-I]?[A-Z]\$?\d+"
ref2d_r1c1_pattern = r"[Rr]0*[1-9][0-9]*[Cc]0*[1-9][0-9]*"
ref2d_pattern = r"\$?[A-I]?[A-Z]\$?0*[1-9][0-9]*"
true_pattern = r"TRUE\b"
false_pattern = r"FALSE\b"
if_pattern = r"IF\b"
choose_pattern = r"CHOOSE\b"
name_pattern = r"\w[\.\w]*"
quotename_pattern = r"'(?:[^']|'')*'" #### It's essential that this bracket be non-grouping.
ne_pattern = r"<>"
ge_pattern = r">="
le_pattern = r"<="
pattern_type_tuples = (
(flt_const_pattern, ExcelFormulaParser.NUM_CONST),
(int_const_pattern, ExcelFormulaParser.INT_CONST),
(str_const_pattern, ExcelFormulaParser.STR_CONST),
# (range2d_pattern , ExcelFormulaParser.RANGE2D),
(ref2d_r1c1_pattern, ExcelFormulaParser.REF2D_R1C1),
(ref2d_pattern , ExcelFormulaParser.REF2D),
(true_pattern , ExcelFormulaParser.TRUE_CONST),
(false_pattern , ExcelFormulaParser.FALSE_CONST),
(if_pattern , ExcelFormulaParser.FUNC_IF),
(choose_pattern , ExcelFormulaParser.FUNC_CHOOSE),
(name_pattern , ExcelFormulaParser.NAME),
(quotename_pattern, ExcelFormulaParser.QUOTENAME),
(ne_pattern, ExcelFormulaParser.NE),
(ge_pattern, ExcelFormulaParser.GE),
(le_pattern, ExcelFormulaParser.LE),
)
_re = recompile(
'(' + ')|('.join([i[0] for i in pattern_type_tuples]) + ')',
VERBOSE+LOCALE+IGNORECASE)
_toktype = [None] + [i[1] for i in pattern_type_tuples]
# need dummy at start because re.MatchObject.lastindex counts from 1
single_char_lookup = {
'=': ExcelFormulaParser.EQ,
'<': ExcelFormulaParser.LT,
'>': ExcelFormulaParser.GT,
'+': ExcelFormulaParser.ADD,
'-': ExcelFormulaParser.SUB,
'*': ExcelFormulaParser.MUL,
'/': ExcelFormulaParser.DIV,
':': ExcelFormulaParser.COLON,
';': ExcelFormulaParser.SEMICOLON,
',': ExcelFormulaParser.COMMA,
'(': ExcelFormulaParser.LP,
')': ExcelFormulaParser.RP,
'&': ExcelFormulaParser.CONCAT,
'%': ExcelFormulaParser.PERCENT,
'^': ExcelFormulaParser.POWER,
'!': ExcelFormulaParser.BANG,
}
class Lexer(TokenStream):
def __init__(self, text):
self._text = text[:]
self._pos = 0
self._line = 0
def isEOF(self):
return len(self._text) <= self._pos
def curr_ch(self):
return self._text[self._pos]
def next_ch(self, n = 1):
self._pos += n
def is_whitespace(self):
return self.curr_ch() in " \t\n\r\f\v"
def match_pattern(self):
m = _re.match(self._text, self._pos)
if not m:
return None
self._pos = m.end(0)
return Tok(type = _toktype[m.lastindex], text = m.group(0), col = m.start(0) + 1)
def nextToken(self):
# skip whitespace
while not self.isEOF() and self.is_whitespace():
self.next_ch()
if self.isEOF():
return Tok(type = EOF)
# first, try to match token with 2 or more chars
t = self.match_pattern()
if t:
return t
# second, we want 1-char tokens
te = self.curr_ch()
try:
ty = single_char_lookup[te]
except KeyError:
raise TokenStreamException(
"Unexpected char %r in column %u." % (self.curr_ch(), self._pos))
self.next_ch()
return Tok(type=ty, text=te, col=self._pos)
if __name__ == '__main__':
try:
for t in Lexer(""" 1.23 456 "abcd" R2C2 a1 iv65536 true false if choose a_name 'qname' <> >= <= """):
print(t)
except TokenStreamException as e:
print("error:", e)
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+261 -261
View File
@@ -1,261 +1,261 @@
#!/usr/bin/env python
'''
The XF record is able to store explicit cell formatting attributes or the
attributes of a cell style. Explicit formatting includes the reference to
a cell style XF record. This allows to extend a defined cell style with
some explicit attributes. The formatting attributes are divided into
6 groups:
Group Attributes
-------------------------------------
Number format Number format index (index to FORMAT record)
Font Font index (index to FONT record)
Alignment Horizontal and vertical alignment, text wrap, indentation,
orientation/rotation, text direction
Border Border line styles and colours
Background Background area style and colours
Protection Cell locked, formula hidden
For each group a flag in the cell XF record specifies whether to use the
attributes contained in that XF record or in the referenced style
XF record. In style XF records, these flags specify whether the attributes
will overwrite explicit cell formatting when the style is applied to
a cell. Changing a cell style (without applying this style to a cell) will
change all cells which already use that style and do not contain explicit
cell attributes for the changed style attributes. If a cell XF record does
not contain explicit attributes in a group (if the attribute group flag
is not set), it repeats the attributes of its style XF record.
'''
import BIFFRecords
class Font(object):
ESCAPEMENT_NONE = 0x00
ESCAPEMENT_SUPERSCRIPT = 0x01
ESCAPEMENT_SUBSCRIPT = 0x02
UNDERLINE_NONE = 0x00
UNDERLINE_SINGLE = 0x01
UNDERLINE_SINGLE_ACC = 0x21
UNDERLINE_DOUBLE = 0x02
UNDERLINE_DOUBLE_ACC = 0x22
FAMILY_NONE = 0x00
FAMILY_ROMAN = 0x01
FAMILY_SWISS = 0x02
FAMILY_MODERN = 0x03
FAMILY_SCRIPT = 0x04
FAMILY_DECORATIVE = 0x05
CHARSET_ANSI_LATIN = 0x00
CHARSET_SYS_DEFAULT = 0x01
CHARSET_SYMBOL = 0x02
CHARSET_APPLE_ROMAN = 0x4D
CHARSET_ANSI_JAP_SHIFT_JIS = 0x80
CHARSET_ANSI_KOR_HANGUL = 0x81
CHARSET_ANSI_KOR_JOHAB = 0x82
CHARSET_ANSI_CHINESE_GBK = 0x86
CHARSET_ANSI_CHINESE_BIG5 = 0x88
CHARSET_ANSI_GREEK = 0xA1
CHARSET_ANSI_TURKISH = 0xA2
CHARSET_ANSI_VIETNAMESE = 0xA3
CHARSET_ANSI_HEBREW = 0xB1
CHARSET_ANSI_ARABIC = 0xB2
CHARSET_ANSI_BALTIC = 0xBA
CHARSET_ANSI_CYRILLIC = 0xCC
CHARSET_ANSI_THAI = 0xDE
CHARSET_ANSI_LATIN_II = 0xEE
CHARSET_OEM_LATIN_I = 0xFF
def __init__(self):
# twip = 1/20 of a point = 1/1440 of a inch
# usually resolution == 96 pixels per 1 inch
# (rarely 120 pixels per 1 inch or another one)
self.height = 0x00C8 # 200: this is font with height 10 points
self.italic = False
self.struck_out = False
self.outline = False
self.shadow = False
self.colour_index = 0x7FFF
self.bold = False
self._weight = 0x0190 # 0x02BC gives bold font
self.escapement = self.ESCAPEMENT_NONE
self.underline = self.UNDERLINE_NONE
self.family = self.FAMILY_NONE
self.charset = self.CHARSET_SYS_DEFAULT
self.name = 'Arial'
def get_biff_record(self):
height = self.height
options = 0x00
if self.bold:
options |= 0x01
self._weight = 0x02BC
if self.italic:
options |= 0x02
if self.underline != self.UNDERLINE_NONE:
options |= 0x04
if self.struck_out:
options |= 0x08
if self.outline:
options |= 0x010
if self.shadow:
options |= 0x020
colour_index = self.colour_index
weight = self._weight
escapement = self.escapement
underline = self.underline
family = self.family
charset = self.charset
name = self.name
return BIFFRecords.FontRecord(height, options, colour_index, weight, escapement,
underline, family, charset,
name)
def _search_key(self):
return (
self.height,
self.italic,
self.struck_out,
self.outline,
self.shadow,
self.colour_index,
self.bold,
self._weight,
self.escapement,
self.underline,
self.family,
self.charset,
self.name,
)
class Alignment(object):
HORZ_GENERAL = 0x00
HORZ_LEFT = 0x01
HORZ_CENTER = 0x02
HORZ_RIGHT = 0x03
HORZ_FILLED = 0x04
HORZ_JUSTIFIED = 0x05 # BIFF4-BIFF8X
HORZ_CENTER_ACROSS_SEL = 0x06 # Centred across selection (BIFF4-BIFF8X)
HORZ_DISTRIBUTED = 0x07 # Distributed (BIFF8X)
VERT_TOP = 0x00
VERT_CENTER = 0x01
VERT_BOTTOM = 0x02
VERT_JUSTIFIED = 0x03 # Justified (BIFF5-BIFF8X)
VERT_DISTRIBUTED = 0x04 # Distributed (BIFF8X)
DIRECTION_GENERAL = 0x00 # BIFF8X
DIRECTION_LR = 0x01
DIRECTION_RL = 0x02
ORIENTATION_NOT_ROTATED = 0x00
ORIENTATION_STACKED = 0x01
ORIENTATION_90_CC = 0x02
ORIENTATION_90_CW = 0x03
ROTATION_0_ANGLE = 0x00
ROTATION_STACKED = 0xFF
WRAP_AT_RIGHT = 0x01
NOT_WRAP_AT_RIGHT = 0x00
SHRINK_TO_FIT = 0x01
NOT_SHRINK_TO_FIT = 0x00
def __init__(self):
self.horz = self.HORZ_GENERAL
self.vert = self.VERT_BOTTOM
self.dire = self.DIRECTION_GENERAL
self.orie = self.ORIENTATION_NOT_ROTATED
self.rota = self.ROTATION_0_ANGLE
self.wrap = self.NOT_WRAP_AT_RIGHT
self.shri = self.NOT_SHRINK_TO_FIT
self.inde = 0
self.merg = 0
def _search_key(self):
return (
self.horz, self.vert, self.dire, self.orie, self.rota,
self.wrap, self.shri, self.inde, self.merg,
)
class Borders(object):
NO_LINE = 0x00
THIN = 0x01
MEDIUM = 0x02
DASHED = 0x03
DOTTED = 0x04
THICK = 0x05
DOUBLE = 0x06
HAIR = 0x07
#The following for BIFF8
MEDIUM_DASHED = 0x08
THIN_DASH_DOTTED = 0x09
MEDIUM_DASH_DOTTED = 0x0A
THIN_DASH_DOT_DOTTED = 0x0B
MEDIUM_DASH_DOT_DOTTED = 0x0C
SLANTED_MEDIUM_DASH_DOTTED = 0x0D
NEED_DIAG1 = 0x01
NEED_DIAG2 = 0x01
NO_NEED_DIAG1 = 0x00
NO_NEED_DIAG2 = 0x00
def __init__(self):
self.left = self.NO_LINE
self.right = self.NO_LINE
self.top = self.NO_LINE
self.bottom = self.NO_LINE
self.diag = self.NO_LINE
self.left_colour = 0x40
self.right_colour = 0x40
self.top_colour = 0x40
self.bottom_colour = 0x40
self.diag_colour = 0x40
self.need_diag1 = self.NO_NEED_DIAG1
self.need_diag2 = self.NO_NEED_DIAG2
def _search_key(self):
return (
self.left, self.right, self.top, self.bottom, self.diag,
self.left_colour, self.right_colour, self.top_colour,
self.bottom_colour, self.diag_colour,
self.need_diag1, self.need_diag2,
)
class Pattern(object):
# patterns 0x00 - 0x12
NO_PATTERN = 0x00
SOLID_PATTERN = 0x01
def __init__(self):
self.pattern = self.NO_PATTERN
self.pattern_fore_colour = 0x40
self.pattern_back_colour = 0x41
def _search_key(self):
return (
self.pattern,
self.pattern_fore_colour,
self.pattern_back_colour,
)
class Protection(object):
def __init__(self):
self.cell_locked = 1
self.formula_hidden = 0
def _search_key(self):
return (
self.cell_locked,
self.formula_hidden,
)
#!/usr/bin/env python
'''
The XF record is able to store explicit cell formatting attributes or the
attributes of a cell style. Explicit formatting includes the reference to
a cell style XF record. This allows to extend a defined cell style with
some explicit attributes. The formatting attributes are divided into
6 groups:
Group Attributes
-------------------------------------
Number format Number format index (index to FORMAT record)
Font Font index (index to FONT record)
Alignment Horizontal and vertical alignment, text wrap, indentation,
orientation/rotation, text direction
Border Border line styles and colours
Background Background area style and colours
Protection Cell locked, formula hidden
For each group a flag in the cell XF record specifies whether to use the
attributes contained in that XF record or in the referenced style
XF record. In style XF records, these flags specify whether the attributes
will overwrite explicit cell formatting when the style is applied to
a cell. Changing a cell style (without applying this style to a cell) will
change all cells which already use that style and do not contain explicit
cell attributes for the changed style attributes. If a cell XF record does
not contain explicit attributes in a group (if the attribute group flag
is not set), it repeats the attributes of its style XF record.
'''
from . import BIFFRecords
class Font(object):
ESCAPEMENT_NONE = 0x00
ESCAPEMENT_SUPERSCRIPT = 0x01
ESCAPEMENT_SUBSCRIPT = 0x02
UNDERLINE_NONE = 0x00
UNDERLINE_SINGLE = 0x01
UNDERLINE_SINGLE_ACC = 0x21
UNDERLINE_DOUBLE = 0x02
UNDERLINE_DOUBLE_ACC = 0x22
FAMILY_NONE = 0x00
FAMILY_ROMAN = 0x01
FAMILY_SWISS = 0x02
FAMILY_MODERN = 0x03
FAMILY_SCRIPT = 0x04
FAMILY_DECORATIVE = 0x05
CHARSET_ANSI_LATIN = 0x00
CHARSET_SYS_DEFAULT = 0x01
CHARSET_SYMBOL = 0x02
CHARSET_APPLE_ROMAN = 0x4D
CHARSET_ANSI_JAP_SHIFT_JIS = 0x80
CHARSET_ANSI_KOR_HANGUL = 0x81
CHARSET_ANSI_KOR_JOHAB = 0x82
CHARSET_ANSI_CHINESE_GBK = 0x86
CHARSET_ANSI_CHINESE_BIG5 = 0x88
CHARSET_ANSI_GREEK = 0xA1
CHARSET_ANSI_TURKISH = 0xA2
CHARSET_ANSI_VIETNAMESE = 0xA3
CHARSET_ANSI_HEBREW = 0xB1
CHARSET_ANSI_ARABIC = 0xB2
CHARSET_ANSI_BALTIC = 0xBA
CHARSET_ANSI_CYRILLIC = 0xCC
CHARSET_ANSI_THAI = 0xDE
CHARSET_ANSI_LATIN_II = 0xEE
CHARSET_OEM_LATIN_I = 0xFF
def __init__(self):
# twip = 1/20 of a point = 1/1440 of a inch
# usually resolution == 96 pixels per 1 inch
# (rarely 120 pixels per 1 inch or another one)
self.height = 0x00C8 # 200: this is font with height 10 points
self.italic = False
self.struck_out = False
self.outline = False
self.shadow = False
self.colour_index = 0x7FFF
self.bold = False
self._weight = 0x0190 # 0x02BC gives bold font
self.escapement = self.ESCAPEMENT_NONE
self.underline = self.UNDERLINE_NONE
self.family = self.FAMILY_NONE
self.charset = self.CHARSET_SYS_DEFAULT
self.name = b'Arial'
def get_biff_record(self):
height = self.height
options = 0x00
if self.bold:
options |= 0x01
self._weight = 0x02BC
if self.italic:
options |= 0x02
if self.underline != self.UNDERLINE_NONE:
options |= 0x04
if self.struck_out:
options |= 0x08
if self.outline:
options |= 0x010
if self.shadow:
options |= 0x020
colour_index = self.colour_index
weight = self._weight
escapement = self.escapement
underline = self.underline
family = self.family
charset = self.charset
name = self.name
return BIFFRecords.FontRecord(height, options, colour_index, weight, escapement,
underline, family, charset,
name)
def _search_key(self):
return (
self.height,
self.italic,
self.struck_out,
self.outline,
self.shadow,
self.colour_index,
self.bold,
self._weight,
self.escapement,
self.underline,
self.family,
self.charset,
self.name,
)
class Alignment(object):
HORZ_GENERAL = 0x00
HORZ_LEFT = 0x01
HORZ_CENTER = 0x02
HORZ_RIGHT = 0x03
HORZ_FILLED = 0x04
HORZ_JUSTIFIED = 0x05 # BIFF4-BIFF8X
HORZ_CENTER_ACROSS_SEL = 0x06 # Centred across selection (BIFF4-BIFF8X)
HORZ_DISTRIBUTED = 0x07 # Distributed (BIFF8X)
VERT_TOP = 0x00
VERT_CENTER = 0x01
VERT_BOTTOM = 0x02
VERT_JUSTIFIED = 0x03 # Justified (BIFF5-BIFF8X)
VERT_DISTRIBUTED = 0x04 # Distributed (BIFF8X)
DIRECTION_GENERAL = 0x00 # BIFF8X
DIRECTION_LR = 0x01
DIRECTION_RL = 0x02
ORIENTATION_NOT_ROTATED = 0x00
ORIENTATION_STACKED = 0x01
ORIENTATION_90_CC = 0x02
ORIENTATION_90_CW = 0x03
ROTATION_0_ANGLE = 0x00
ROTATION_STACKED = 0xFF
WRAP_AT_RIGHT = 0x01
NOT_WRAP_AT_RIGHT = 0x00
SHRINK_TO_FIT = 0x01
NOT_SHRINK_TO_FIT = 0x00
def __init__(self):
self.horz = self.HORZ_GENERAL
self.vert = self.VERT_BOTTOM
self.dire = self.DIRECTION_GENERAL
self.orie = self.ORIENTATION_NOT_ROTATED
self.rota = self.ROTATION_0_ANGLE
self.wrap = self.NOT_WRAP_AT_RIGHT
self.shri = self.NOT_SHRINK_TO_FIT
self.inde = 0
self.merg = 0
def _search_key(self):
return (
self.horz, self.vert, self.dire, self.orie, self.rota,
self.wrap, self.shri, self.inde, self.merg,
)
class Borders(object):
NO_LINE = 0x00
THIN = 0x01
MEDIUM = 0x02
DASHED = 0x03
DOTTED = 0x04
THICK = 0x05
DOUBLE = 0x06
HAIR = 0x07
#The following for BIFF8
MEDIUM_DASHED = 0x08
THIN_DASH_DOTTED = 0x09
MEDIUM_DASH_DOTTED = 0x0A
THIN_DASH_DOT_DOTTED = 0x0B
MEDIUM_DASH_DOT_DOTTED = 0x0C
SLANTED_MEDIUM_DASH_DOTTED = 0x0D
NEED_DIAG1 = 0x01
NEED_DIAG2 = 0x01
NO_NEED_DIAG1 = 0x00
NO_NEED_DIAG2 = 0x00
def __init__(self):
self.left = self.NO_LINE
self.right = self.NO_LINE
self.top = self.NO_LINE
self.bottom = self.NO_LINE
self.diag = self.NO_LINE
self.left_colour = 0x40
self.right_colour = 0x40
self.top_colour = 0x40
self.bottom_colour = 0x40
self.diag_colour = 0x40
self.need_diag1 = self.NO_NEED_DIAG1
self.need_diag2 = self.NO_NEED_DIAG2
def _search_key(self):
return (
self.left, self.right, self.top, self.bottom, self.diag,
self.left_colour, self.right_colour, self.top_colour,
self.bottom_colour, self.diag_colour,
self.need_diag1, self.need_diag2,
)
class Pattern(object):
# patterns 0x00 - 0x12
NO_PATTERN = 0x00
SOLID_PATTERN = 0x01
def __init__(self):
self.pattern = self.NO_PATTERN
self.pattern_fore_colour = 0x40
self.pattern_back_colour = 0x41
def _search_key(self):
return (
self.pattern,
self.pattern_fore_colour,
self.pattern_back_colour,
)
class Protection(object):
def __init__(self):
self.cell_locked = 1
self.formula_hidden = 0
def _search_key(self):
return (
self.cell_locked,
self.formula_hidden,
)
+253 -253
View File
@@ -1,253 +1,253 @@
# -*- coding: windows-1252 -*-
import BIFFRecords
import Style
from Cell import StrCell, BlankCell, NumberCell, FormulaCell, MulBlankCell, BooleanCell, ErrorCell, \
_get_cells_biff_data_mul
import ExcelFormula
import datetime as dt
try:
from decimal import Decimal
except ImportError:
# Python 2.3: decimal not supported; create dummy Decimal class
class Decimal(object):
pass
class Row(object):
__slots__ = [# private variables
"__idx",
"__parent",
"__parent_wb",
"__cells",
"__min_col_idx",
"__max_col_idx",
"__xf_index",
"__has_default_xf_index",
"__height_in_pixels",
# public variables
"height",
"has_default_height",
"height_mismatch",
"level",
"collapse",
"hidden",
"space_above",
"space_below"]
def __init__(self, rowx, parent_sheet):
if not (isinstance(rowx, int) and 0 <= rowx <= 65535):
raise ValueError("row index (%r) not an int in range(65536)" % rowx)
self.__idx = rowx
self.__parent = parent_sheet
self.__parent_wb = parent_sheet.get_parent()
self.__cells = {}
self.__min_col_idx = 0
self.__max_col_idx = 0
self.__xf_index = 0x0F
self.__has_default_xf_index = 0
self.__height_in_pixels = 0x11
self.height = 0x00FF
self.has_default_height = 0x00
self.height_mismatch = 0
self.level = 0
self.collapse = 0
self.hidden = 0
self.space_above = 0
self.space_below = 0
def __adjust_height(self, style):
twips = style.font.height
points = float(twips)/20.0
# Cell height in pixels can be calcuted by following approx. formula:
# cell height in pixels = font height in points * 83/50 + 2/5
# It works when screen resolution is 96 dpi
pix = int(round(points*83.0/50.0 + 2.0/5.0))
if pix > self.__height_in_pixels:
self.__height_in_pixels = pix
def __adjust_bound_col_idx(self, *args):
for arg in args:
iarg = int(arg)
if not ((0 <= iarg <= 255) and arg == iarg):
raise ValueError("column index (%r) not an int in range(256)" % arg)
sheet = self.__parent
if iarg < self.__min_col_idx:
self.__min_col_idx = iarg
if iarg > self.__max_col_idx:
self.__max_col_idx = iarg
if iarg < sheet.first_used_col:
sheet.first_used_col = iarg
if iarg > sheet.last_used_col:
sheet.last_used_col = iarg
def __excel_date_dt(self, date):
if isinstance(date, dt.date) and (not isinstance(date, dt.datetime)):
epoch = dt.date(1899, 12, 31)
elif isinstance(date, dt.time):
date = dt.datetime.combine(dt.datetime(1900, 1, 1), date)
epoch = dt.datetime(1900, 1, 1, 0, 0, 0)
else:
epoch = dt.datetime(1899, 12, 31, 0, 0, 0)
delta = date - epoch
xldate = delta.days + float(delta.seconds) / (24*60*60)
# Add a day for Excel's missing leap day in 1900
if xldate > 59:
xldate += 1
return xldate
def get_height_in_pixels(self):
return self.__height_in_pixels
def set_style(self, style):
self.__adjust_height(style)
self.__xf_index = self.__parent_wb.add_style(style)
self.__has_default_xf_index = 1
def get_xf_index(self):
return self.__xf_index
def get_cells_count(self):
return len(self.__cells)
def get_min_col(self):
return self.__min_col_idx
def get_max_col(self):
return self.__max_col_idx
def get_row_biff_data(self):
height_options = (self.height & 0x07FFF)
height_options |= (self.has_default_height & 0x01) << 15
options = (self.level & 0x07) << 0
options |= (self.collapse & 0x01) << 4
options |= (self.hidden & 0x01) << 5
options |= (self.height_mismatch & 0x01) << 6
options |= (self.__has_default_xf_index & 0x01) << 7
options |= (0x01 & 0x01) << 8
options |= (self.__xf_index & 0x0FFF) << 16
options |= (self.space_above & 1) << 28
options |= (self.space_below & 1) << 29
return BIFFRecords.RowRecord(self.__idx, self.__min_col_idx,
self.__max_col_idx, height_options, options).get()
def insert_cell(self, col_index, cell_obj):
if col_index in self.__cells:
if not self.__parent._cell_overwrite_ok:
msg = "Attempt to overwrite cell: sheetname=%r rowx=%d colx=%d" \
% (self.__parent.name, self.__idx, col_index)
raise Exception(msg)
prev_cell_obj = self.__cells[col_index]
sst_idx = getattr(prev_cell_obj, 'sst_idx', None)
if sst_idx is not None:
self.__parent_wb.del_str(sst_idx)
self.__cells[col_index] = cell_obj
def insert_mulcells(self, colx1, colx2, cell_obj):
self.insert_cell(colx1, cell_obj)
for col_index in xrange(colx1+1, colx2+1):
self.insert_cell(col_index, None)
def get_cells_biff_data(self):
cell_items = [item for item in self.__cells.iteritems() if item[1] is not None]
cell_items.sort() # in column order
return _get_cells_biff_data_mul(self.__idx, cell_items)
# previously:
# return ''.join([cell.get_biff_data() for colx, cell in cell_items])
def get_index(self):
return self.__idx
def set_cell_text(self, colx, value, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, StrCell(self.__idx, colx, xf_index, self.__parent_wb.add_str(value)))
def set_cell_blank(self, colx, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, BlankCell(self.__idx, colx, xf_index))
def set_cell_mulblanks(self, first_colx, last_colx, style=Style.default_style):
assert 0 <= first_colx <= last_colx <= 255
self.__adjust_height(style)
self.__adjust_bound_col_idx(first_colx, last_colx)
xf_index = self.__parent_wb.add_style(style)
# ncols = last_colx - first_colx + 1
self.insert_mulcells(first_colx, last_colx, MulBlankCell(self.__idx, first_colx, last_colx, xf_index))
def set_cell_number(self, colx, number, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, NumberCell(self.__idx, colx, xf_index, number))
def set_cell_date(self, colx, datetime_obj, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx,
NumberCell(self.__idx, colx, xf_index, self.__excel_date_dt(datetime_obj)))
def set_cell_formula(self, colx, formula, style=Style.default_style, calc_flags=0):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.__parent_wb.add_sheet_reference(formula)
self.insert_cell(colx, FormulaCell(self.__idx, colx, xf_index, formula, calc_flags=0))
def set_cell_boolean(self, colx, value, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, BooleanCell(self.__idx, colx, xf_index, bool(value)))
def set_cell_error(self, colx, error_string_or_code, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, ErrorCell(self.__idx, colx, xf_index, error_string_or_code))
def write(self, col, label, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(col)
style_index = self.__parent_wb.add_style(style)
if isinstance(label, basestring):
if len(label) > 0:
self.insert_cell(col,
StrCell(self.__idx, col, style_index, self.__parent_wb.add_str(label))
)
else:
self.insert_cell(col, BlankCell(self.__idx, col, style_index))
elif isinstance(label, bool): # bool is subclass of int; test bool first
self.insert_cell(col, BooleanCell(self.__idx, col, style_index, label))
elif isinstance(label, (float, int, long, Decimal)):
self.insert_cell(col, NumberCell(self.__idx, col, style_index, label))
elif isinstance(label, (dt.datetime, dt.date, dt.time)):
date_number = self.__excel_date_dt(label)
self.insert_cell(col, NumberCell(self.__idx, col, style_index, date_number))
elif label is None:
self.insert_cell(col, BlankCell(self.__idx, col, style_index))
elif isinstance(label, ExcelFormula.Formula):
self.__parent_wb.add_sheet_reference(label)
self.insert_cell(col, FormulaCell(self.__idx, col, style_index, label))
else:
raise Exception("Unexpected data type %r" % type(label))
write_blanks = set_cell_mulblanks
# -*- coding: windows-1252 -*-
from . import BIFFRecords
from . import Style
from .Cell import StrCell, BlankCell, NumberCell, FormulaCell, MulBlankCell, BooleanCell, ErrorCell, \
_get_cells_biff_data_mul
from . import ExcelFormula
import datetime as dt
try:
from decimal import Decimal
except ImportError:
# Python 2.3: decimal not supported; create dummy Decimal class
class Decimal(object):
pass
class Row(object):
__slots__ = [# private variables
"__idx",
"__parent",
"__parent_wb",
"__cells",
"__min_col_idx",
"__max_col_idx",
"__xf_index",
"__has_default_xf_index",
"__height_in_pixels",
# public variables
"height",
"has_default_height",
"height_mismatch",
"level",
"collapse",
"hidden",
"space_above",
"space_below"]
def __init__(self, rowx, parent_sheet):
if not (isinstance(rowx, int) and 0 <= rowx <= 65535):
raise ValueError("row index (%r) not an int in range(65536)" % rowx)
self.__idx = rowx
self.__parent = parent_sheet
self.__parent_wb = parent_sheet.get_parent()
self.__cells = {}
self.__min_col_idx = 0
self.__max_col_idx = 0
self.__xf_index = 0x0F
self.__has_default_xf_index = 0
self.__height_in_pixels = 0x11
self.height = 0x00FF
self.has_default_height = 0x00
self.height_mismatch = 0
self.level = 0
self.collapse = 0
self.hidden = 0
self.space_above = 0
self.space_below = 0
def __adjust_height(self, style):
twips = style.font.height
points = float(twips)/20.0
# Cell height in pixels can be calcuted by following approx. formula:
# cell height in pixels = font height in points * 83/50 + 2/5
# It works when screen resolution is 96 dpi
pix = int(round(points*83.0/50.0 + 2.0/5.0))
if pix > self.__height_in_pixels:
self.__height_in_pixels = pix
def __adjust_bound_col_idx(self, *args):
for arg in args:
iarg = int(arg)
if not ((0 <= iarg <= 255) and arg == iarg):
raise ValueError("column index (%r) not an int in range(256)" % arg)
sheet = self.__parent
if iarg < self.__min_col_idx:
self.__min_col_idx = iarg
if iarg > self.__max_col_idx:
self.__max_col_idx = iarg
if iarg < sheet.first_used_col:
sheet.first_used_col = iarg
if iarg > sheet.last_used_col:
sheet.last_used_col = iarg
def __excel_date_dt(self, date):
if isinstance(date, dt.date) and (not isinstance(date, dt.datetime)):
epoch = dt.date(1899, 12, 31)
elif isinstance(date, dt.time):
date = dt.datetime.combine(dt.datetime(1900, 1, 1), date)
epoch = dt.datetime(1900, 1, 1, 0, 0, 0)
else:
epoch = dt.datetime(1899, 12, 31, 0, 0, 0)
delta = date - epoch
xldate = delta.days + float(delta.seconds) / (24*60*60)
# Add a day for Excel's missing leap day in 1900
if xldate > 59:
xldate += 1
return xldate
def get_height_in_pixels(self):
return self.__height_in_pixels
def set_style(self, style):
self.__adjust_height(style)
self.__xf_index = self.__parent_wb.add_style(style)
self.__has_default_xf_index = 1
def get_xf_index(self):
return self.__xf_index
def get_cells_count(self):
return len(self.__cells)
def get_min_col(self):
return self.__min_col_idx
def get_max_col(self):
return self.__max_col_idx
def get_row_biff_data(self):
height_options = (self.height & 0x07FFF)
height_options |= (self.has_default_height & 0x01) << 15
options = (self.level & 0x07) << 0
options |= (self.collapse & 0x01) << 4
options |= (self.hidden & 0x01) << 5
options |= (self.height_mismatch & 0x01) << 6
options |= (self.__has_default_xf_index & 0x01) << 7
options |= (0x01 & 0x01) << 8
options |= (self.__xf_index & 0x0FFF) << 16
options |= (self.space_above & 1) << 28
options |= (self.space_below & 1) << 29
return BIFFRecords.RowRecord(self.__idx, self.__min_col_idx,
self.__max_col_idx, height_options, options).get()
def insert_cell(self, col_index, cell_obj):
if col_index in self.__cells:
if not self.__parent._cell_overwrite_ok:
msg = "Attempt to overwrite cell: sheetname=%r rowx=%d colx=%d" \
% (self.__parent.name, self.__idx, col_index)
raise Exception(msg)
prev_cell_obj = self.__cells[col_index]
sst_idx = getattr(prev_cell_obj, 'sst_idx', None)
if sst_idx is not None:
self.__parent_wb.del_str(sst_idx)
self.__cells[col_index] = cell_obj
def insert_mulcells(self, colx1, colx2, cell_obj):
self.insert_cell(colx1, cell_obj)
for col_index in range(colx1+1, colx2+1):
self.insert_cell(col_index, None)
def get_cells_biff_data(self):
cell_items = [item for item in self.__cells.items() if item[1] is not None]
cell_items.sort() # in column order
return _get_cells_biff_data_mul(self.__idx, cell_items)
# previously:
# return ''.join([cell.get_biff_data() for colx, cell in cell_items])
def get_index(self):
return self.__idx
def set_cell_text(self, colx, value, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, StrCell(self.__idx, colx, xf_index, self.__parent_wb.add_str(value)))
def set_cell_blank(self, colx, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, BlankCell(self.__idx, colx, xf_index))
def set_cell_mulblanks(self, first_colx, last_colx, style=Style.default_style):
assert 0 <= first_colx <= last_colx <= 255
self.__adjust_height(style)
self.__adjust_bound_col_idx(first_colx, last_colx)
xf_index = self.__parent_wb.add_style(style)
# ncols = last_colx - first_colx + 1
self.insert_mulcells(first_colx, last_colx, MulBlankCell(self.__idx, first_colx, last_colx, xf_index))
def set_cell_number(self, colx, number, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, NumberCell(self.__idx, colx, xf_index, number))
def set_cell_date(self, colx, datetime_obj, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx,
NumberCell(self.__idx, colx, xf_index, self.__excel_date_dt(datetime_obj)))
def set_cell_formula(self, colx, formula, style=Style.default_style, calc_flags=0):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.__parent_wb.add_sheet_reference(formula)
self.insert_cell(colx, FormulaCell(self.__idx, colx, xf_index, formula, calc_flags=0))
def set_cell_boolean(self, colx, value, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, BooleanCell(self.__idx, colx, xf_index, bool(value)))
def set_cell_error(self, colx, error_string_or_code, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, ErrorCell(self.__idx, colx, xf_index, error_string_or_code))
def write(self, col, label, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(col)
style_index = self.__parent_wb.add_style(style)
if isinstance(label, str):
if len(label) > 0:
self.insert_cell(col,
StrCell(self.__idx, col, style_index, self.__parent_wb.add_str(label))
)
else:
self.insert_cell(col, BlankCell(self.__idx, col, style_index))
elif isinstance(label, bool): # bool is subclass of int; test bool first
self.insert_cell(col, BooleanCell(self.__idx, col, style_index, label))
elif isinstance(label, (float, int, Decimal)):
self.insert_cell(col, NumberCell(self.__idx, col, style_index, label))
elif isinstance(label, (dt.datetime, dt.date, dt.time)):
date_number = self.__excel_date_dt(label)
self.insert_cell(col, NumberCell(self.__idx, col, style_index, date_number))
elif label is None:
self.insert_cell(col, BlankCell(self.__idx, col, style_index))
elif isinstance(label, ExcelFormula.Formula):
self.__parent_wb.add_sheet_reference(label)
self.insert_cell(col, FormulaCell(self.__idx, col, style_index, label))
else:
raise Exception("Unexpected data type %r" % type(label))
write_blanks = set_cell_mulblanks
File diff suppressed because it is too large Load Diff
+79 -81
View File
@@ -1,81 +1,79 @@
# -*- coding: windows-1252 -*-
'''
From BIFF8 on, strings are always stored using UTF-16LE text encoding. The
character array is a sequence of 16-bit values4. Additionally it is
possible to use a compressed format, which omits the high bytes of all
characters, if they are all zero.
The following tables describe the standard format of the entire string, but
in many records the strings differ from this format. This will be mentioned
separately. It is possible (but not required) to store Rich-Text formatting
information and Asian phonetic information inside a Unicode string. This
results in four different ways to store a string. The character array
is not zero-terminated.
The string consists of the character count (as usual an 8-bit value or
a 16-bit value), option flags, the character array and optional formatting
information. If the string is empty, sometimes the option flags field will
not occur. This is mentioned at the respective place.
Offset Size Contents
0 1 or 2 Length of the string (character count, ln)
1 or 2 1 Option flags:
Bit Mask Contents
0 01H Character compression (ccompr):
0 = Compressed (8-bit characters)
1 = Uncompressed (16-bit characters)
2 04H Asian phonetic settings (phonetic):
0 = Does not contain Asian phonetic settings
1 = Contains Asian phonetic settings
3 08H Rich-Text settings (richtext):
0 = Does not contain Rich-Text settings
1 = Contains Rich-Text settings
[2 or 3] 2 (optional, only if richtext=1) Number of Rich-Text formatting runs (rt)
[var.] 4 (optional, only if phonetic=1) Size of Asian phonetic settings block (in bytes, sz)
var. ln or
2·ln Character array (8-bit characters or 16-bit characters, dependent on ccompr)
[var.] 4·rt (optional, only if richtext=1) List of rt formatting runs
[var.] sz (optional, only if phonetic=1) Asian Phonetic Settings Block
'''
from struct import pack
def upack2(s, encoding='ascii'):
# If not unicode, make it so.
if isinstance(s, unicode):
us = s
else:
us = unicode(s, encoding)
# Limit is based on number of content characters
# (not on number of bytes in packed result)
len_us = len(us)
if len_us > 65535:
raise Exception('String longer than 65535 characters')
try:
encs = us.encode('latin1')
# Success here means all chars are in U+0000 to U+00FF
# inclusive, meaning that we can use "compressed format".
flag = 0
except UnicodeEncodeError:
encs = us.encode('utf_16_le')
flag = 1
return pack('<HB', len_us, flag) + encs
def upack1(s, encoding='ascii'):
# Same as upack2(), but with a one-byte length field.
if isinstance(s, unicode):
us = s
else:
us = unicode(s, encoding)
len_us = len(us)
if len_us > 255:
raise Exception('String longer than 255 characters')
try:
encs = us.encode('latin1')
flag = 0
except UnicodeEncodeError:
encs = us.encode('utf_16_le')
flag = 1
return pack('<BB', len_us, flag) + encs
'''
From BIFF8 on, strings are always stored using UTF-16LE text encoding. The
character array is a sequence of 16-bit values4. Additionally it is
possible to use a compressed format, which omits the high bytes of all
characters, if they are all zero.
The following tables describe the standard format of the entire string, but
in many records the strings differ from this format. This will be mentioned
separately. It is possible (but not required) to store Rich-Text formatting
information and Asian phonetic information inside a Unicode string. This
results in four different ways to store a string. The character array
is not zero-terminated.
The string consists of the character count (as usual an 8-bit value or
a 16-bit value), option flags, the character array and optional formatting
information. If the string is empty, sometimes the option flags field will
not occur. This is mentioned at the respective place.
Offset Size Contents
0 1 or 2 Length of the string (character count, ln)
1 or 2 1 Option flags:
Bit Mask Contents
0 01H Character compression (ccompr):
0 = Compressed (8-bit characters)
1 = Uncompressed (16-bit characters)
2 04H Asian phonetic settings (phonetic):
0 = Does not contain Asian phonetic settings
1 = Contains Asian phonetic settings
3 08H Rich-Text settings (richtext):
0 = Does not contain Rich-Text settings
1 = Contains Rich-Text settings
[2 or 3] 2 (optional, only if richtext=1) Number of Rich-Text formatting runs (rt)
[var.] 4 (optional, only if phonetic=1) Size of Asian phonetic settings block (in bytes, sz)
var. ln or
2·ln Character array (8-bit characters or 16-bit characters, dependent on ccompr)
[var.] 4·rt (optional, only if richtext=1) List of rt formatting runs
[var.] sz (optional, only if phonetic=1) Asian Phonetic Settings Block
'''
from struct import pack
def upack2(s, encoding='ascii'):
# If not unicode, make it so.
if isinstance(s, str):
us = s
else:
us = str(s, encoding)
# Limit is based on number of content characters
# (not on number of bytes in packed result)
len_us = len(us)
if len_us > 65535:
raise Exception('String longer than 65535 characters')
try:
encs = us.encode('latin1')
# Success here means all chars are in U+0000 to U+00FF
# inclusive, meaning that we can use "compressed format".
flag = 0
except UnicodeEncodeError:
encs = us.encode('utf_16_le')
flag = 1
return pack('<HB', len_us, flag) + encs
def upack1(s, encoding='ascii'):
# Same as upack2(), but with a one-byte length field.
if isinstance(s, str):
us = s
else:
us = str(s, encoding)
len_us = len(us)
if len_us > 255:
raise Exception('String longer than 255 characters')
try:
encs = us.encode('latin1')
flag = 0
except UnicodeEncodeError:
encs = us.encode('utf_16_le')
flag = 1
return pack('<BB', len_us, flag) + encs
+196 -196
View File
@@ -1,196 +1,196 @@
# pyXLWriter: A library for generating Excel Spreadsheets
# Copyright (c) 2004 Evgeny Filatov <fufff@users.sourceforge.net>
# Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# This library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
# General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#----------------------------------------------------------------------------
# This module was written/ported from PERL Spreadsheet::WriteExcel module
# The author of the PERL Spreadsheet::WriteExcel module is John McNamara
# <jmcnamara@cpan.org>
#----------------------------------------------------------------------------
# See the README.txt distributed with pyXLWriter for more details.
# Portions are (C) Roman V. Kiseliov, 2005
# Utilities for work with reference to cells and with sheetnames
__rev_id__ = """$Id: Utils.py 3844 2009-05-20 01:02:54Z sjmachin $"""
import re
from struct import pack
from ExcelMagic import MAX_ROW, MAX_COL
_re_cell_ex = re.compile(r"(\$?)([A-I]?[A-Z])(\$?)(\d+)", re.IGNORECASE)
_re_row_range = re.compile(r"\$?(\d+):\$?(\d+)")
_re_col_range = re.compile(r"\$?([A-I]?[A-Z]):\$?([A-I]?[A-Z])", re.IGNORECASE)
_re_cell_range = re.compile(r"\$?([A-I]?[A-Z]\$?\d+):\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
_re_cell_ref = re.compile(r"\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
def col_by_name(colname):
"""
"""
col = 0
pow = 1
for i in xrange(len(colname)-1, -1, -1):
ch = colname[i]
col += (ord(ch) - ord('A') + 1) * pow
pow *= 26
return col - 1
def cell_to_rowcol(cell):
"""Convert an Excel cell reference string in A1 notation
to numeric row/col notation.
Returns: row, col, row_abs, col_abs
"""
m = _re_cell_ex.match(cell)
if not m:
raise Exception("Ill-formed single_cell reference: %s" % cell)
col_abs, col, row_abs, row = m.groups()
row_abs = bool(row_abs)
col_abs = bool(col_abs)
row = int(row) - 1
col = col_by_name(col.upper())
return row, col, row_abs, col_abs
def cell_to_rowcol2(cell):
"""Convert an Excel cell reference string in A1 notation
to numeric row/col notation.
Returns: row, col
"""
m = _re_cell_ex.match(cell)
if not m:
raise Exception("Error in cell format")
col_abs, col, row_abs, row = m.groups()
# Convert base26 column string to number
# All your Base are belong to us.
row = int(row) - 1
col = col_by_name(col.upper())
return row, col
def rowcol_to_cell(row, col, row_abs=False, col_abs=False):
"""Convert numeric row/col notation to an Excel cell reference string in
A1 notation.
"""
assert 0 <= row < MAX_ROW # MAX_ROW counts from 1
assert 0 <= col < MAX_COL # MAX_COL counts from 1
d = col // 26
m = col % 26
chr1 = "" # Most significant character in AA1
if row_abs:
row_abs = '$'
else:
row_abs = ''
if col_abs:
col_abs = '$'
else:
col_abs = ''
if d > 0:
chr1 = chr(ord('A') + d - 1)
chr2 = chr(ord('A') + m)
# Zero index to 1-index
return col_abs + chr1 + chr2 + row_abs + str(row + 1)
def rowcol_pair_to_cellrange(row1, col1, row2, col2,
row1_abs=False, col1_abs=False, row2_abs=False, col2_abs=False):
"""Convert two (row,column) pairs
into a cell range string in A1:B2 notation.
Returns: cell range string
"""
assert row1 <= row2
assert col1 <= col2
return (
rowcol_to_cell(row1, col1, row1_abs, col1_abs)
+ ":"
+ rowcol_to_cell(row2, col2, row2_abs, col2_abs)
)
def cellrange_to_rowcol_pair(cellrange):
"""Convert cell range string in A1 notation to numeric row/col
pair.
Returns: row1, col1, row2, col2
"""
cellrange = cellrange.upper()
# Convert a row range: '1:3'
res = _re_row_range.match(cellrange)
if res:
row1 = int(res.group(1)) - 1
col1 = 0
row2 = int(res.group(2)) - 1
col2 = -1
return row1, col1, row2, col2
# Convert a column range: 'A:A' or 'B:G'.
# A range such as A:A is equivalent to A1:A16384, so add rows as required
res = _re_col_range.match(cellrange)
if res:
col1 = col_by_name(res.group(1).upper())
row1 = 0
col2 = col_by_name(res.group(2).upper())
row2 = -1
return row1, col1, row2, col2
# Convert a cell range: 'A1:B7'
res = _re_cell_range.match(cellrange)
if res:
row1, col1 = cell_to_rowcol2(res.group(1))
row2, col2 = cell_to_rowcol2(res.group(2))
return row1, col1, row2, col2
# Convert a cell reference: 'A1' or 'AD2000'
res = _re_cell_ref.match(cellrange)
if res:
row1, col1 = cell_to_rowcol2(res.group(1))
return row1, col1, row1, col1
raise Exception("Unknown cell reference %s" % (cell))
def cell_to_packed_rowcol(cell):
""" pack row and column into the required 4 byte format """
row, col, row_abs, col_abs = cell_to_rowcol(cell)
if col >= MAX_COL:
raise Exception("Column %s greater than IV in formula" % cell)
if row >= MAX_ROW: # this for BIFF8. for BIFF7 available 2^14
raise Exception("Row %s greater than %d in formula" % (cell, MAX_ROW))
col |= int(not row_abs) << 15
col |= int(not col_abs) << 14
return row, col
# === sheetname functions ===
def valid_sheet_name(sheet_name):
if sheet_name == u"" or sheet_name[0] == u"'" or len(sheet_name) > 31:
return False
for c in sheet_name:
if c in u"[]:\\?/*\x00":
return False
return True
def quote_sheet_name(unquoted_sheet_name):
if not valid_sheet_name(unquoted_sheet_name):
raise Exception(
'attempt to quote an invalid worksheet name %r' % unquoted_sheet_name)
return u"'" + unquoted_sheet_name.replace(u"'", u"''") + u"'"
# pyXLWriter: A library for generating Excel Spreadsheets
# Copyright (c) 2004 Evgeny Filatov <fufff@users.sourceforge.net>
# Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# This library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
# General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#----------------------------------------------------------------------------
# This module was written/ported from PERL Spreadsheet::WriteExcel module
# The author of the PERL Spreadsheet::WriteExcel module is John McNamara
# <jmcnamara@cpan.org>
#----------------------------------------------------------------------------
# See the README.txt distributed with pyXLWriter for more details.
# Portions are (C) Roman V. Kiseliov, 2005
# Utilities for work with reference to cells and with sheetnames
__rev_id__ = """$Id: Utils.py 3844 2009-05-20 01:02:54Z sjmachin $"""
import re
from struct import pack
from .ExcelMagic import MAX_ROW, MAX_COL
_re_cell_ex = re.compile(r"(\$?)([A-I]?[A-Z])(\$?)(\d+)", re.IGNORECASE)
_re_row_range = re.compile(r"\$?(\d+):\$?(\d+)")
_re_col_range = re.compile(r"\$?([A-I]?[A-Z]):\$?([A-I]?[A-Z])", re.IGNORECASE)
_re_cell_range = re.compile(r"\$?([A-I]?[A-Z]\$?\d+):\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
_re_cell_ref = re.compile(r"\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
def col_by_name(colname):
"""
"""
col = 0
pow = 1
for i in range(len(colname)-1, -1, -1):
ch = colname[i]
col += (ord(ch) - ord('A') + 1) * pow
pow *= 26
return col - 1
def cell_to_rowcol(cell):
"""Convert an Excel cell reference string in A1 notation
to numeric row/col notation.
Returns: row, col, row_abs, col_abs
"""
m = _re_cell_ex.match(cell)
if not m:
raise Exception("Ill-formed single_cell reference: %s" % cell)
col_abs, col, row_abs, row = m.groups()
row_abs = bool(row_abs)
col_abs = bool(col_abs)
row = int(row) - 1
col = col_by_name(col.upper())
return row, col, row_abs, col_abs
def cell_to_rowcol2(cell):
"""Convert an Excel cell reference string in A1 notation
to numeric row/col notation.
Returns: row, col
"""
m = _re_cell_ex.match(cell)
if not m:
raise Exception("Error in cell format")
col_abs, col, row_abs, row = m.groups()
# Convert base26 column string to number
# All your Base are belong to us.
row = int(row) - 1
col = col_by_name(col.upper())
return row, col
def rowcol_to_cell(row, col, row_abs=False, col_abs=False):
"""Convert numeric row/col notation to an Excel cell reference string in
A1 notation.
"""
assert 0 <= row < MAX_ROW # MAX_ROW counts from 1
assert 0 <= col < MAX_COL # MAX_COL counts from 1
d = col // 26
m = col % 26
chr1 = "" # Most significant character in AA1
if row_abs:
row_abs = '$'
else:
row_abs = ''
if col_abs:
col_abs = '$'
else:
col_abs = ''
if d > 0:
chr1 = chr(ord('A') + d - 1)
chr2 = chr(ord('A') + m)
# Zero index to 1-index
return col_abs + chr1 + chr2 + row_abs + str(row + 1)
def rowcol_pair_to_cellrange(row1, col1, row2, col2,
row1_abs=False, col1_abs=False, row2_abs=False, col2_abs=False):
"""Convert two (row,column) pairs
into a cell range string in A1:B2 notation.
Returns: cell range string
"""
assert row1 <= row2
assert col1 <= col2
return (
rowcol_to_cell(row1, col1, row1_abs, col1_abs)
+ ":"
+ rowcol_to_cell(row2, col2, row2_abs, col2_abs)
)
def cellrange_to_rowcol_pair(cellrange):
"""Convert cell range string in A1 notation to numeric row/col
pair.
Returns: row1, col1, row2, col2
"""
cellrange = cellrange.upper()
# Convert a row range: '1:3'
res = _re_row_range.match(cellrange)
if res:
row1 = int(res.group(1)) - 1
col1 = 0
row2 = int(res.group(2)) - 1
col2 = -1
return row1, col1, row2, col2
# Convert a column range: 'A:A' or 'B:G'.
# A range such as A:A is equivalent to A1:A16384, so add rows as required
res = _re_col_range.match(cellrange)
if res:
col1 = col_by_name(res.group(1).upper())
row1 = 0
col2 = col_by_name(res.group(2).upper())
row2 = -1
return row1, col1, row2, col2
# Convert a cell range: 'A1:B7'
res = _re_cell_range.match(cellrange)
if res:
row1, col1 = cell_to_rowcol2(res.group(1))
row2, col2 = cell_to_rowcol2(res.group(2))
return row1, col1, row2, col2
# Convert a cell reference: 'A1' or 'AD2000'
res = _re_cell_ref.match(cellrange)
if res:
row1, col1 = cell_to_rowcol2(res.group(1))
return row1, col1, row1, col1
raise Exception("Unknown cell reference %s" % (cell))
def cell_to_packed_rowcol(cell):
""" pack row and column into the required 4 byte format """
row, col, row_abs, col_abs = cell_to_rowcol(cell)
if col >= MAX_COL:
raise Exception("Column %s greater than IV in formula" % cell)
if row >= MAX_ROW: # this for BIFF8. for BIFF7 available 2^14
raise Exception("Row %s greater than %d in formula" % (cell, MAX_ROW))
col |= int(not row_abs) << 15
col |= int(not col_abs) << 14
return row, col
# === sheetname functions ===
def valid_sheet_name(sheet_name):
if sheet_name == "" or sheet_name[0] == "'" or len(sheet_name) > 31:
return False
for c in sheet_name:
if c in "[]:\\?/*\x00":
return False
return True
def quote_sheet_name(unquoted_sheet_name):
if not valid_sheet_name(unquoted_sheet_name):
raise Exception(
'attempt to quote an invalid worksheet name %r' % unquoted_sheet_name)
return "'" + unquoted_sheet_name.replace("'", "''") + "'"
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+9 -16
View File
@@ -1,16 +1,9 @@
# -*- coding: windows-1252 -*-
__VERSION__ = '0.7.2'
import sys
if sys.version_info[:2] < (2, 3):
print >> sys.stderr, "Sorry, xlwt requires Python 2.3 or later"
sys.exit(1)
from Workbook import Workbook
from Worksheet import Worksheet
from Row import Row
from Column import Column
from Formatting import Font, Alignment, Borders, Pattern, Protection
from Style import XFStyle, easyxf
from ExcelFormula import *
import sys
from .Workbook import Workbook
from .Worksheet import Worksheet
from .Row import Row
from .Column import Column
from .Formatting import Font, Alignment, Borders, Pattern, Protection
from .Style import XFStyle, easyxf
from .ExcelFormula import *
+2868 -2874
View File
File diff suppressed because it is too large Load Diff
+19 -22
View File
@@ -1,21 +1,22 @@
from error import *
from .error import *
from tokens import *
from events import *
from nodes import *
from .tokens import *
from .events import *
from .nodes import *
from loader import *
from dumper import *
from .loader import *
from .dumper import *
__version__ = '3.09'
try:
from cyaml import *
from .cyaml import *
__with_libyaml__ = True
except ImportError:
__with_libyaml__ = False
import io
def scan(stream, Loader=Loader):
"""
Scan a YAML stream and produce scanning tokens.
@@ -91,8 +92,7 @@ def emit(events, stream=None, Dumper=Dumper,
"""
getvalue = None
if stream is None:
from StringIO import StringIO
stream = StringIO()
stream = io.StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
@@ -104,7 +104,7 @@ def emit(events, stream=None, Dumper=Dumper,
def serialize_all(nodes, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of representation trees into a YAML stream.
@@ -113,10 +113,9 @@ def serialize_all(nodes, stream=None, Dumper=Dumper,
getvalue = None
if stream is None:
if encoding is None:
from StringIO import StringIO
stream = io.StringIO()
else:
from cStringIO import StringIO
stream = StringIO()
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
@@ -140,7 +139,7 @@ def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of Python objects into a YAML stream.
@@ -149,10 +148,9 @@ def dump_all(documents, stream=None, Dumper=Dumper,
getvalue = None
if stream is None:
if encoding is None:
from StringIO import StringIO
stream = io.StringIO()
else:
from cStringIO import StringIO
stream = StringIO()
stream = io.BytesIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
@@ -256,13 +254,12 @@ class YAMLObjectMetaclass(type):
cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
cls.yaml_dumper.add_representer(cls, cls.to_yaml)
class YAMLObject(object):
class YAMLObject(metaclass=YAMLObjectMetaclass):
"""
An object that can dump itself to a YAML stream
and load itself from a YAML stream.
"""
__metaclass__ = YAMLObjectMetaclass
__slots__ = () # no direct instantiation, so allow immutable subclasses
yaml_loader = Loader
@@ -271,18 +268,18 @@ class YAMLObject(object):
yaml_tag = None
yaml_flow_style = None
@classmethod
def from_yaml(cls, loader, node):
"""
Convert a representation node to a Python object.
"""
return loader.construct_yaml_object(node, cls)
from_yaml = classmethod(from_yaml)
@classmethod
def to_yaml(cls, dumper, data):
"""
Convert a Python object to a representation node.
"""
return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
flow_style=cls.yaml_flow_style)
to_yaml = classmethod(to_yaml)
+9 -9
View File
@@ -1,14 +1,14 @@
__all__ = ['Composer', 'ComposerError']
from error import MarkedYAMLError
from events import *
from nodes import *
from .error import MarkedYAMLError
from .events import *
from .nodes import *
class ComposerError(MarkedYAMLError):
pass
class Composer(object):
class Composer:
def __init__(self):
self.anchors = {}
@@ -66,14 +66,14 @@ class Composer(object):
anchor = event.anchor
if anchor not in self.anchors:
raise ComposerError(None, None, "found undefined alias %r"
% anchor.encode('utf-8'), event.start_mark)
% anchor, event.start_mark)
return self.anchors[anchor]
event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.anchors:
raise ComposerError("found duplicate anchor %r; first occurence"
% anchor.encode('utf-8'), self.anchors[anchor].start_mark,
% anchor, self.anchors[anchor].start_mark,
"second occurence", event.start_mark)
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
@@ -88,7 +88,7 @@ class Composer(object):
def compose_scalar_node(self, anchor):
event = self.get_event()
tag = event.tag
if tag is None or tag == u'!':
if tag is None or tag == '!':
tag = self.resolve(ScalarNode, event.value, event.implicit)
node = ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
@@ -99,7 +99,7 @@ class Composer(object):
def compose_sequence_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == u'!':
if tag is None or tag == '!':
tag = self.resolve(SequenceNode, None, start_event.implicit)
node = SequenceNode(tag, [],
start_event.start_mark, None,
@@ -117,7 +117,7 @@ class Composer(object):
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == u'!':
if tag is None or tag == '!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
+100 -98
View File
@@ -2,22 +2,15 @@
__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor',
'ConstructorError']
from error import *
from nodes import *
from .error import *
from .nodes import *
import datetime
try:
set
except NameError:
from sets import Set as set
import binascii, re, sys, types
import collections, datetime, base64, binascii, re, sys, types
class ConstructorError(MarkedYAMLError):
pass
class BaseConstructor(object):
class BaseConstructor:
yaml_constructors = {}
yaml_multi_constructors = {}
@@ -95,7 +88,7 @@ class BaseConstructor(object):
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = generator.next()
data = next(generator)
if self.deep_construct:
for dummy in generator:
pass
@@ -130,11 +123,9 @@ class BaseConstructor(object):
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
try:
hash(key)
except TypeError, exc:
if not isinstance(key, collections.Hashable):
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unacceptable key (%s)" % exc, key_node.start_mark)
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
@@ -151,33 +142,33 @@ class BaseConstructor(object):
pairs.append((key, value))
return pairs
@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
add_constructor = classmethod(add_constructor)
@classmethod
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor
add_multi_constructor = classmethod(add_multi_constructor)
class SafeConstructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == u'tag:yaml.org,2002:value':
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return BaseConstructor.construct_scalar(self, node)
return super().construct_scalar(node)
def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == u'tag:yaml.org,2002:merge':
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
@@ -199,8 +190,8 @@ class SafeConstructor(BaseConstructor):
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == u'tag:yaml.org,2002:value':
key_node.tag = u'tag:yaml.org,2002:str'
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
@@ -210,19 +201,19 @@ class SafeConstructor(BaseConstructor):
def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return BaseConstructor.construct_mapping(self, node, deep=deep)
return super().construct_mapping(node, deep=deep)
def construct_yaml_null(self, node):
self.construct_scalar(node)
return None
bool_values = {
u'yes': True,
u'no': False,
u'true': True,
u'false': False,
u'on': True,
u'off': False,
'yes': True,
'no': False,
'true': True,
'false': False,
'on': True,
'off': False,
}
def construct_yaml_bool(self, node):
@@ -230,7 +221,7 @@ class SafeConstructor(BaseConstructor):
return self.bool_values[value.lower()]
def construct_yaml_int(self, node):
value = str(self.construct_scalar(node))
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
@@ -263,7 +254,7 @@ class SafeConstructor(BaseConstructor):
nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
def construct_yaml_float(self, node):
value = str(self.construct_scalar(node))
value = self.construct_scalar(node)
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
@@ -287,15 +278,23 @@ class SafeConstructor(BaseConstructor):
return sign*float(value)
def construct_yaml_binary(self, node):
value = self.construct_scalar(node)
try:
return str(value).decode('base64')
except (binascii.Error, UnicodeEncodeError), exc:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
ur'''^(?P<year>[0-9][0-9][0-9][0-9])
r'''^(?P<year>[0-9][0-9][0-9][0-9])
-(?P<month>[0-9][0-9]?)
-(?P<day>[0-9][0-9]?)
(?:(?:[Tt]|[ \t]+)
@@ -386,11 +385,7 @@ class SafeConstructor(BaseConstructor):
data.update(value)
def construct_yaml_str(self, node):
value = self.construct_scalar(node)
try:
return value.encode('ascii')
except UnicodeEncodeError:
return value
return self.construct_scalar(node)
def construct_yaml_seq(self, node):
data = []
@@ -415,55 +410,55 @@ class SafeConstructor(BaseConstructor):
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
"could not determine a constructor for the tag %r" % node.tag,
node.start_mark)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:null',
'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:bool',
'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:int',
'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:float',
'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:binary',
'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:timestamp',
'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:omap',
'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:pairs',
'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:set',
'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:str',
'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:seq',
'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)
SafeConstructor.add_constructor(
u'tag:yaml.org,2002:map',
'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)
SafeConstructor.add_constructor(None,
@@ -472,13 +467,29 @@ SafeConstructor.add_constructor(None,
class Constructor(SafeConstructor):
def construct_python_str(self, node):
return self.construct_scalar(node).encode('utf-8')
return self.construct_scalar(node)
def construct_python_unicode(self, node):
return self.construct_scalar(node)
def construct_python_bytes(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
def construct_python_long(self, node):
return long(self.construct_yaml_int(node))
return self.construct_yaml_int(node)
def construct_python_complex(self, node):
return complex(self.construct_scalar(node))
@@ -492,54 +503,46 @@ class Constructor(SafeConstructor):
"expected non-empty name appended to the tag", mark)
try:
__import__(name)
except ImportError, exc:
except ImportError as exc:
raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark)
"cannot find module %r (%s)" % (name, exc), mark)
return sys.modules[name]
def find_python_name(self, name, mark):
if not name:
raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark)
if u'.' in name:
# Python 2.4 only
#module_name, object_name = name.rsplit('.', 1)
items = name.split('.')
object_name = items.pop()
module_name = '.'.join(items)
if '.' in name:
module_name, object_name = name.rsplit('.', 1)
else:
module_name = '__builtin__'
module_name = 'builtins'
object_name = name
try:
__import__(module_name)
except ImportError, exc:
except ImportError as exc:
raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark)
"cannot find module %r (%s)" % (module_name, exc), mark)
module = sys.modules[module_name]
if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark,
"cannot find %r in the module %r" % (object_name.encode('utf-8'),
module.__name__), mark)
"cannot find %r in the module %r"
% (object_name, module.__name__), mark)
return getattr(module, object_name)
def construct_python_name(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python name", node.start_mark,
"expected the empty value, but found %r" % value.encode('utf-8'),
node.start_mark)
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_name(suffix, node.start_mark)
def construct_python_module(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python module", node.start_mark,
"expected the empty value, but found %r" % value.encode('utf-8'),
node.start_mark)
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_module(suffix, node.start_mark)
class classobj: pass
def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False):
if not args:
@@ -547,12 +550,7 @@ class Constructor(SafeConstructor):
if not kwds:
kwds = {}
cls = self.find_python_name(suffix, node.start_mark)
if newobj and isinstance(cls, type(self.classobj)) \
and not args and not kwds:
instance = self.classobj()
instance.__class__ = cls
return instance
elif newobj and isinstance(cls, type):
if newobj and isinstance(cls, type):
return cls.__new__(cls, *args, **kwds)
else:
return cls(*args, **kwds)
@@ -619,66 +617,70 @@ class Constructor(SafeConstructor):
return self.construct_python_object_apply(suffix, node, newobj=True)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/none',
'tag:yaml.org,2002:python/none',
Constructor.construct_yaml_null)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/bool',
'tag:yaml.org,2002:python/bool',
Constructor.construct_yaml_bool)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/str',
'tag:yaml.org,2002:python/str',
Constructor.construct_python_str)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/unicode',
'tag:yaml.org,2002:python/unicode',
Constructor.construct_python_unicode)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/int',
'tag:yaml.org,2002:python/bytes',
Constructor.construct_python_bytes)
Constructor.add_constructor(
'tag:yaml.org,2002:python/int',
Constructor.construct_yaml_int)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/long',
'tag:yaml.org,2002:python/long',
Constructor.construct_python_long)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/float',
'tag:yaml.org,2002:python/float',
Constructor.construct_yaml_float)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/complex',
'tag:yaml.org,2002:python/complex',
Constructor.construct_python_complex)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/list',
'tag:yaml.org,2002:python/list',
Constructor.construct_yaml_seq)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/tuple',
'tag:yaml.org,2002:python/tuple',
Constructor.construct_python_tuple)
Constructor.add_constructor(
u'tag:yaml.org,2002:python/dict',
'tag:yaml.org,2002:python/dict',
Constructor.construct_yaml_map)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/name:',
'tag:yaml.org,2002:python/name:',
Constructor.construct_python_name)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/module:',
'tag:yaml.org,2002:python/module:',
Constructor.construct_python_module)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object:',
'tag:yaml.org,2002:python/object:',
Constructor.construct_python_object)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/apply:',
'tag:yaml.org,2002:python/object/apply:',
Constructor.construct_python_object_apply)
Constructor.add_multi_constructor(
u'tag:yaml.org,2002:python/object/new:',
'tag:yaml.org,2002:python/object/new:',
Constructor.construct_python_object_new)
+4 -4
View File
@@ -4,12 +4,12 @@ __all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader',
from _yaml import CParser, CEmitter
from constructor import *
from .constructor import *
from serializer import *
from representer import *
from .serializer import *
from .representer import *
from resolver import *
from .resolver import *
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
+4 -4
View File
@@ -1,10 +1,10 @@
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
from emitter import *
from serializer import *
from representer import *
from resolver import *
from .emitter import *
from .serializer import *
from .representer import *
from .resolver import *
class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
+155 -158
View File
@@ -8,13 +8,13 @@
__all__ = ['Emitter', 'EmitterError']
from error import YAMLError
from events import *
from .error import YAMLError
from .events import *
class EmitterError(YAMLError):
pass
class ScalarAnalysis(object):
class ScalarAnalysis:
def __init__(self, scalar, empty, multiline,
allow_flow_plain, allow_block_plain,
allow_single_quoted, allow_double_quoted,
@@ -28,11 +28,11 @@ class ScalarAnalysis(object):
self.allow_double_quoted = allow_double_quoted
self.allow_block = allow_block
class Emitter(object):
class Emitter:
DEFAULT_TAG_PREFIXES = {
u'!' : u'!',
u'tag:yaml.org,2002:' : u'!!',
'!' : '!',
'tag:yaml.org,2002:' : '!!',
}
def __init__(self, stream, canonical=None, indent=None, width=None,
@@ -88,8 +88,8 @@ class Emitter(object):
self.best_width = 80
if width and width > self.best_indent*2:
self.best_width = width
self.best_line_break = u'\n'
if line_break in [u'\r', u'\n', u'\r\n']:
self.best_line_break = '\n'
if line_break in ['\r', '\n', '\r\n']:
self.best_line_break = line_break
# Tag prefixes.
@@ -154,7 +154,7 @@ class Emitter(object):
def expect_stream_start(self):
if isinstance(self.event, StreamStartEvent):
if self.event.encoding and not getattr(self.stream, 'encoding', None):
if self.event.encoding and not hasattr(self.stream, 'encoding'):
self.encoding = self.event.encoding
self.write_stream_start()
self.state = self.expect_first_document_start
@@ -173,15 +173,14 @@ class Emitter(object):
def expect_document_start(self, first=False):
if isinstance(self.event, DocumentStartEvent):
if (self.event.version or self.event.tags) and self.open_ended:
self.write_indicator(u'...', True)
self.write_indicator('...', True)
self.write_indent()
if self.event.version:
version_text = self.prepare_version(self.event.version)
self.write_version_directive(version_text)
self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
if self.event.tags:
handles = self.event.tags.keys()
handles.sort()
handles = sorted(self.event.tags.keys())
for handle in handles:
prefix = self.event.tags[handle]
self.tag_prefixes[prefix] = handle
@@ -193,13 +192,13 @@ class Emitter(object):
and not self.check_empty_document())
if not implicit:
self.write_indent()
self.write_indicator(u'---', True)
self.write_indicator('---', True)
if self.canonical:
self.write_indent()
self.state = self.expect_document_root
elif isinstance(self.event, StreamEndEvent):
if self.open_ended:
self.write_indicator(u'...', True)
self.write_indicator('...', True)
self.write_indent()
self.write_stream_end()
self.state = self.expect_nothing
@@ -211,7 +210,7 @@ class Emitter(object):
if isinstance(self.event, DocumentEndEvent):
self.write_indent()
if self.event.explicit:
self.write_indicator(u'...', True)
self.write_indicator('...', True)
self.write_indent()
self.flush_stream()
self.state = self.expect_document_start
@@ -234,7 +233,7 @@ class Emitter(object):
if isinstance(self.event, AliasEvent):
self.expect_alias()
elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
self.process_anchor(u'&')
self.process_anchor('&')
self.process_tag()
if isinstance(self.event, ScalarEvent):
self.expect_scalar()
@@ -256,7 +255,7 @@ class Emitter(object):
def expect_alias(self):
if self.event.anchor is None:
raise EmitterError("anchor is not specified for alias")
self.process_anchor(u'*')
self.process_anchor('*')
self.state = self.states.pop()
def expect_scalar(self):
@@ -268,7 +267,7 @@ class Emitter(object):
# Flow sequence handlers.
def expect_flow_sequence(self):
self.write_indicator(u'[', True, whitespace=True)
self.write_indicator('[', True, whitespace=True)
self.flow_level += 1
self.increase_indent(flow=True)
self.state = self.expect_first_flow_sequence_item
@@ -277,7 +276,7 @@ class Emitter(object):
if isinstance(self.event, SequenceEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
self.write_indicator(u']', False)
self.write_indicator(']', False)
self.state = self.states.pop()
else:
if self.canonical or self.column > self.best_width:
@@ -290,12 +289,12 @@ class Emitter(object):
self.indent = self.indents.pop()
self.flow_level -= 1
if self.canonical:
self.write_indicator(u',', False)
self.write_indicator(',', False)
self.write_indent()
self.write_indicator(u']', False)
self.write_indicator(']', False)
self.state = self.states.pop()
else:
self.write_indicator(u',', False)
self.write_indicator(',', False)
if self.canonical or self.column > self.best_width:
self.write_indent()
self.states.append(self.expect_flow_sequence_item)
@@ -304,7 +303,7 @@ class Emitter(object):
# Flow mapping handlers.
def expect_flow_mapping(self):
self.write_indicator(u'{', True, whitespace=True)
self.write_indicator('{', True, whitespace=True)
self.flow_level += 1
self.increase_indent(flow=True)
self.state = self.expect_first_flow_mapping_key
@@ -313,7 +312,7 @@ class Emitter(object):
if isinstance(self.event, MappingEndEvent):
self.indent = self.indents.pop()
self.flow_level -= 1
self.write_indicator(u'}', False)
self.write_indicator('}', False)
self.state = self.states.pop()
else:
if self.canonical or self.column > self.best_width:
@@ -322,7 +321,7 @@ class Emitter(object):
self.states.append(self.expect_flow_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator(u'?', True)
self.write_indicator('?', True)
self.states.append(self.expect_flow_mapping_value)
self.expect_node(mapping=True)
@@ -331,31 +330,31 @@ class Emitter(object):
self.indent = self.indents.pop()
self.flow_level -= 1
if self.canonical:
self.write_indicator(u',', False)
self.write_indicator(',', False)
self.write_indent()
self.write_indicator(u'}', False)
self.write_indicator('}', False)
self.state = self.states.pop()
else:
self.write_indicator(u',', False)
self.write_indicator(',', False)
if self.canonical or self.column > self.best_width:
self.write_indent()
if not self.canonical and self.check_simple_key():
self.states.append(self.expect_flow_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator(u'?', True)
self.write_indicator('?', True)
self.states.append(self.expect_flow_mapping_value)
self.expect_node(mapping=True)
def expect_flow_mapping_simple_value(self):
self.write_indicator(u':', False)
self.write_indicator(':', False)
self.states.append(self.expect_flow_mapping_key)
self.expect_node(mapping=True)
def expect_flow_mapping_value(self):
if self.canonical or self.column > self.best_width:
self.write_indent()
self.write_indicator(u':', True)
self.write_indicator(':', True)
self.states.append(self.expect_flow_mapping_key)
self.expect_node(mapping=True)
@@ -375,7 +374,7 @@ class Emitter(object):
self.state = self.states.pop()
else:
self.write_indent()
self.write_indicator(u'-', True, indention=True)
self.write_indicator('-', True, indention=True)
self.states.append(self.expect_block_sequence_item)
self.expect_node(sequence=True)
@@ -398,18 +397,18 @@ class Emitter(object):
self.states.append(self.expect_block_mapping_simple_value)
self.expect_node(mapping=True, simple_key=True)
else:
self.write_indicator(u'?', True, indention=True)
self.write_indicator('?', True, indention=True)
self.states.append(self.expect_block_mapping_value)
self.expect_node(mapping=True)
def expect_block_mapping_simple_value(self):
self.write_indicator(u':', False)
self.write_indicator(':', False)
self.states.append(self.expect_block_mapping_key)
self.expect_node(mapping=True)
def expect_block_mapping_value(self):
self.write_indent()
self.write_indicator(u':', True, indention=True)
self.write_indicator(':', True, indention=True)
self.states.append(self.expect_block_mapping_key)
self.expect_node(mapping=True)
@@ -428,7 +427,7 @@ class Emitter(object):
return False
event = self.events[0]
return (isinstance(event, ScalarEvent) and event.anchor is None
and event.tag is None and event.implicit and event.value == u'')
and event.tag is None and event.implicit and event.value == '')
def check_simple_key(self):
length = 0
@@ -473,7 +472,7 @@ class Emitter(object):
self.prepared_tag = None
return
if self.event.implicit[0] and tag is None:
tag = u'!'
tag = '!'
self.prepared_tag = None
else:
if (not self.canonical or tag is None) and self.event.implicit:
@@ -536,19 +535,18 @@ class Emitter(object):
major, minor = version
if major != 1:
raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
return u'%d.%d' % (major, minor)
return '%d.%d' % (major, minor)
def prepare_tag_handle(self, handle):
if not handle:
raise EmitterError("tag handle must not be empty")
if handle[0] != u'!' or handle[-1] != u'!':
raise EmitterError("tag handle must start and end with '!': %r"
% (handle.encode('utf-8')))
if handle[0] != '!' or handle[-1] != '!':
raise EmitterError("tag handle must start and end with '!': %r" % handle)
for ch in handle[1:-1]:
if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_'):
if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_'):
raise EmitterError("invalid character %r in the tag handle: %r"
% (ch.encode('utf-8'), handle.encode('utf-8')))
% (ch, handle))
return handle
def prepare_tag_prefix(self, prefix):
@@ -556,12 +554,12 @@ class Emitter(object):
raise EmitterError("tag prefix must not be empty")
chunks = []
start = end = 0
if prefix[0] == u'!':
if prefix[0] == '!':
end = 1
while end < len(prefix):
ch = prefix[end]
if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-;/?!:@&=+$,_.~*\'()[]':
if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?!:@&=+$,_.~*\'()[]':
end += 1
else:
if start < end:
@@ -569,32 +567,31 @@ class Emitter(object):
start = end = end+1
data = ch.encode('utf-8')
for ch in data:
chunks.append(u'%%%02X' % ord(ch))
chunks.append('%%%02X' % ord(ch))
if start < end:
chunks.append(prefix[start:end])
return u''.join(chunks)
return ''.join(chunks)
def prepare_tag(self, tag):
if not tag:
raise EmitterError("tag must not be empty")
if tag == u'!':
if tag == '!':
return tag
handle = None
suffix = tag
prefixes = self.tag_prefixes.keys()
prefixes.sort()
prefixes = sorted(self.tag_prefixes.keys())
for prefix in prefixes:
if tag.startswith(prefix) \
and (prefix == u'!' or len(prefix) < len(tag)):
and (prefix == '!' or len(prefix) < len(tag)):
handle = self.tag_prefixes[prefix]
suffix = tag[len(prefix):]
chunks = []
start = end = 0
while end < len(suffix):
ch = suffix[end]
if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-;/?:@&=+$,_.~*\'()[]' \
or (ch == u'!' and handle != u'!'):
if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?:@&=+$,_.~*\'()[]' \
or (ch == '!' and handle != '!'):
end += 1
else:
if start < end:
@@ -602,23 +599,23 @@ class Emitter(object):
start = end = end+1
data = ch.encode('utf-8')
for ch in data:
chunks.append(u'%%%02X' % ord(ch))
chunks.append('%%%02X' % ord(ch))
if start < end:
chunks.append(suffix[start:end])
suffix_text = u''.join(chunks)
suffix_text = ''.join(chunks)
if handle:
return u'%s%s' % (handle, suffix_text)
return '%s%s' % (handle, suffix_text)
else:
return u'!<%s>' % suffix_text
return '!<%s>' % suffix_text
def prepare_anchor(self, anchor):
if not anchor:
raise EmitterError("anchor must not be empty")
for ch in anchor:
if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_'):
if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_'):
raise EmitterError("invalid character %r in the anchor: %r"
% (ch.encode('utf-8'), anchor.encode('utf-8')))
% (ch, anchor))
return anchor
def analyze_scalar(self, scalar):
@@ -645,7 +642,7 @@ class Emitter(object):
space_break = False
# Check document indicators.
if scalar.startswith(u'---') or scalar.startswith(u'...'):
if scalar.startswith('---') or scalar.startswith('...'):
block_indicators = True
flow_indicators = True
@@ -654,7 +651,7 @@ class Emitter(object):
# Last character or followed by a whitespace.
followed_by_whitespace = (len(scalar) == 1 or
scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
scalar[1] in '\0 \t\r\n\x85\u2028\u2029')
# The previous character is a space.
previous_space = False
@@ -669,34 +666,34 @@ class Emitter(object):
# Check for indicators.
if index == 0:
# Leading indicators are special characters.
if ch in u'#,[]{}&*!|>\'\"%@`':
if ch in '#,[]{}&*!|>\'\"%@`':
flow_indicators = True
block_indicators = True
if ch in u'?:':
if ch in '?:':
flow_indicators = True
if followed_by_whitespace:
block_indicators = True
if ch == u'-' and followed_by_whitespace:
if ch == '-' and followed_by_whitespace:
flow_indicators = True
block_indicators = True
else:
# Some indicators cannot appear within a scalar as well.
if ch in u',?[]{}':
if ch in ',?[]{}':
flow_indicators = True
if ch == u':':
if ch == ':':
flow_indicators = True
if followed_by_whitespace:
block_indicators = True
if ch == u'#' and preceeded_by_whitespace:
if ch == '#' and preceeded_by_whitespace:
flow_indicators = True
block_indicators = True
# Check for line breaks, special, and unicode characters.
if ch in u'\n\x85\u2028\u2029':
if ch in '\n\x85\u2028\u2029':
line_breaks = True
if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD') and ch != '\uFEFF':
unicode_characters = True
if not self.allow_unicode:
special_characters = True
@@ -704,7 +701,7 @@ class Emitter(object):
special_characters = True
# Detect important whitespace combinations.
if ch == u' ':
if ch == ' ':
if index == 0:
leading_space = True
if index == len(scalar)-1:
@@ -713,7 +710,7 @@ class Emitter(object):
break_space = True
previous_space = True
previous_break = False
elif ch in u'\n\x85\u2028\u2029':
elif ch in '\n\x85\u2028\u2029':
if index == 0:
leading_break = True
if index == len(scalar)-1:
@@ -728,9 +725,9 @@ class Emitter(object):
# Prepare for the next character.
index += 1
preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029')
preceeded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029')
followed_by_whitespace = (index+1 >= len(scalar) or
scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029')
# Let's decide what styles are allowed.
allow_flow_plain = True
@@ -789,7 +786,7 @@ class Emitter(object):
def write_stream_start(self):
# Write BOM if needed.
if self.encoding and self.encoding.startswith('utf-16'):
self.stream.write(u'\uFEFF'.encode(self.encoding))
self.stream.write('\uFEFF'.encode(self.encoding))
def write_stream_end(self):
self.flush_stream()
@@ -799,7 +796,7 @@ class Emitter(object):
if self.whitespace or not need_whitespace:
data = indicator
else:
data = u' '+indicator
data = ' '+indicator
self.whitespace = whitespace
self.indention = self.indention and indention
self.column += len(data)
@@ -815,7 +812,7 @@ class Emitter(object):
self.write_line_break()
if self.column < indent:
self.whitespace = True
data = u' '*(indent-self.column)
data = ' '*(indent-self.column)
self.column = indent
if self.encoding:
data = data.encode(self.encoding)
@@ -833,14 +830,14 @@ class Emitter(object):
self.stream.write(data)
def write_version_directive(self, version_text):
data = u'%%YAML %s' % version_text
data = '%%YAML %s' % version_text
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
self.write_line_break()
def write_tag_directive(self, handle_text, prefix_text):
data = u'%%TAG %s %s' % (handle_text, prefix_text)
data = '%%TAG %s %s' % (handle_text, prefix_text)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
@@ -849,7 +846,7 @@ class Emitter(object):
# Scalar streams.
def write_single_quoted(self, text, split=True):
self.write_indicator(u'\'', True)
self.write_indicator('\'', True)
spaces = False
breaks = False
start = end = 0
@@ -858,7 +855,7 @@ class Emitter(object):
if end < len(text):
ch = text[end]
if spaces:
if ch is None or ch != u' ':
if ch is None or ch != ' ':
if start+1 == end and self.column > self.best_width and split \
and start != 0 and end != len(text):
self.write_indent()
@@ -870,18 +867,18 @@ class Emitter(object):
self.stream.write(data)
start = end
elif breaks:
if ch is None or ch not in u'\n\x85\u2028\u2029':
if text[start] == u'\n':
if ch is None or ch not in '\n\x85\u2028\u2029':
if text[start] == '\n':
self.write_line_break()
for br in text[start:end]:
if br == u'\n':
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
self.write_indent()
start = end
else:
if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'':
if start < end:
data = text[start:end]
self.column += len(data)
@@ -889,49 +886,49 @@ class Emitter(object):
data = data.encode(self.encoding)
self.stream.write(data)
start = end
if ch == u'\'':
data = u'\'\''
if ch == '\'':
data = '\'\''
self.column += 2
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end + 1
if ch is not None:
spaces = (ch == u' ')
breaks = (ch in u'\n\x85\u2028\u2029')
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
self.write_indicator(u'\'', False)
self.write_indicator('\'', False)
ESCAPE_REPLACEMENTS = {
u'\0': u'0',
u'\x07': u'a',
u'\x08': u'b',
u'\x09': u't',
u'\x0A': u'n',
u'\x0B': u'v',
u'\x0C': u'f',
u'\x0D': u'r',
u'\x1B': u'e',
u'\"': u'\"',
u'\\': u'\\',
u'\x85': u'N',
u'\xA0': u'_',
u'\u2028': u'L',
u'\u2029': u'P',
'\0': '0',
'\x07': 'a',
'\x08': 'b',
'\x09': 't',
'\x0A': 'n',
'\x0B': 'v',
'\x0C': 'f',
'\x0D': 'r',
'\x1B': 'e',
'\"': '\"',
'\\': '\\',
'\x85': 'N',
'\xA0': '_',
'\u2028': 'L',
'\u2029': 'P',
}
def write_double_quoted(self, text, split=True):
self.write_indicator(u'"', True)
self.write_indicator('"', True)
start = end = 0
while end <= len(text):
ch = None
if end < len(text):
ch = text[end]
if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
or not (u'\x20' <= ch <= u'\x7E'
if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \
or not ('\x20' <= ch <= '\x7E'
or (self.allow_unicode
and (u'\xA0' <= ch <= u'\uD7FF'
or u'\uE000' <= ch <= u'\uFFFD'))):
and ('\xA0' <= ch <= '\uD7FF'
or '\uE000' <= ch <= '\uFFFD'))):
if start < end:
data = text[start:end]
self.column += len(data)
@@ -941,21 +938,21 @@ class Emitter(object):
start = end
if ch is not None:
if ch in self.ESCAPE_REPLACEMENTS:
data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
elif ch <= u'\xFF':
data = u'\\x%02X' % ord(ch)
elif ch <= u'\uFFFF':
data = u'\\u%04X' % ord(ch)
data = '\\'+self.ESCAPE_REPLACEMENTS[ch]
elif ch <= '\xFF':
data = '\\x%02X' % ord(ch)
elif ch <= '\uFFFF':
data = '\\u%04X' % ord(ch)
else:
data = u'\\U%08X' % ord(ch)
data = '\\U%08X' % ord(ch)
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
start = end+1
if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
if 0 < end < len(text)-1 and (ch == ' ' or start >= end) \
and self.column+(end-start) > self.best_width and split:
data = text[start:end]+u'\\'
data = text[start:end]+'\\'
if start < end:
start = end
self.column += len(data)
@@ -965,30 +962,30 @@ class Emitter(object):
self.write_indent()
self.whitespace = False
self.indention = False
if text[start] == u' ':
data = u'\\'
if text[start] == ' ':
data = '\\'
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
self.stream.write(data)
end += 1
self.write_indicator(u'"', False)
self.write_indicator('"', False)
def determine_block_hints(self, text):
hints = u''
hints = ''
if text:
if text[0] in u' \n\x85\u2028\u2029':
hints += unicode(self.best_indent)
if text[-1] not in u'\n\x85\u2028\u2029':
hints += u'-'
elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029':
hints += u'+'
if text[0] in ' \n\x85\u2028\u2029':
hints += str(self.best_indent)
if text[-1] not in '\n\x85\u2028\u2029':
hints += '-'
elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029':
hints += '+'
return hints
def write_folded(self, text):
hints = self.determine_block_hints(text)
self.write_indicator(u'>'+hints, True)
if hints[-1:] == u'+':
self.write_indicator('>'+hints, True)
if hints[-1:] == '+':
self.open_ended = True
self.write_line_break()
leading_space = True
@@ -1000,13 +997,13 @@ class Emitter(object):
if end < len(text):
ch = text[end]
if breaks:
if ch is None or ch not in u'\n\x85\u2028\u2029':
if not leading_space and ch is not None and ch != u' ' \
and text[start] == u'\n':
if ch is None or ch not in '\n\x85\u2028\u2029':
if not leading_space and ch is not None and ch != ' ' \
and text[start] == '\n':
self.write_line_break()
leading_space = (ch == u' ')
leading_space = (ch == ' ')
for br in text[start:end]:
if br == u'\n':
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
@@ -1014,7 +1011,7 @@ class Emitter(object):
self.write_indent()
start = end
elif spaces:
if ch != u' ':
if ch != ' ':
if start+1 == end and self.column > self.best_width:
self.write_indent()
else:
@@ -1025,7 +1022,7 @@ class Emitter(object):
self.stream.write(data)
start = end
else:
if ch is None or ch in u' \n\x85\u2028\u2029':
if ch is None or ch in ' \n\x85\u2028\u2029':
data = text[start:end]
self.column += len(data)
if self.encoding:
@@ -1035,14 +1032,14 @@ class Emitter(object):
self.write_line_break()
start = end
if ch is not None:
breaks = (ch in u'\n\x85\u2028\u2029')
spaces = (ch == u' ')
breaks = (ch in '\n\x85\u2028\u2029')
spaces = (ch == ' ')
end += 1
def write_literal(self, text):
hints = self.determine_block_hints(text)
self.write_indicator(u'|'+hints, True)
if hints[-1:] == u'+':
self.write_indicator('|'+hints, True)
if hints[-1:] == '+':
self.open_ended = True
self.write_line_break()
breaks = True
@@ -1052,9 +1049,9 @@ class Emitter(object):
if end < len(text):
ch = text[end]
if breaks:
if ch is None or ch not in u'\n\x85\u2028\u2029':
if ch is None or ch not in '\n\x85\u2028\u2029':
for br in text[start:end]:
if br == u'\n':
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
@@ -1062,7 +1059,7 @@ class Emitter(object):
self.write_indent()
start = end
else:
if ch is None or ch in u'\n\x85\u2028\u2029':
if ch is None or ch in '\n\x85\u2028\u2029':
data = text[start:end]
if self.encoding:
data = data.encode(self.encoding)
@@ -1071,7 +1068,7 @@ class Emitter(object):
self.write_line_break()
start = end
if ch is not None:
breaks = (ch in u'\n\x85\u2028\u2029')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
def write_plain(self, text, split=True):
@@ -1080,7 +1077,7 @@ class Emitter(object):
if not text:
return
if not self.whitespace:
data = u' '
data = ' '
self.column += len(data)
if self.encoding:
data = data.encode(self.encoding)
@@ -1095,7 +1092,7 @@ class Emitter(object):
if end < len(text):
ch = text[end]
if spaces:
if ch != u' ':
if ch != ' ':
if start+1 == end and self.column > self.best_width and split:
self.write_indent()
self.whitespace = False
@@ -1108,11 +1105,11 @@ class Emitter(object):
self.stream.write(data)
start = end
elif breaks:
if ch not in u'\n\x85\u2028\u2029':
if text[start] == u'\n':
if ch not in '\n\x85\u2028\u2029':
if text[start] == '\n':
self.write_line_break()
for br in text[start:end]:
if br == u'\n':
if br == '\n':
self.write_line_break()
else:
self.write_line_break(br)
@@ -1121,7 +1118,7 @@ class Emitter(object):
self.indention = False
start = end
else:
if ch is None or ch in u' \n\x85\u2028\u2029':
if ch is None or ch in ' \n\x85\u2028\u2029':
data = text[start:end]
self.column += len(data)
if self.encoding:
@@ -1129,7 +1126,7 @@ class Emitter(object):
self.stream.write(data)
start = end
if ch is not None:
spaces = (ch == u' ')
breaks = (ch in u'\n\x85\u2028\u2029')
spaces = (ch == ' ')
breaks = (ch in '\n\x85\u2028\u2029')
end += 1
+4 -4
View File
@@ -1,7 +1,7 @@
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
class Mark(object):
class Mark:
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
@@ -16,7 +16,7 @@ class Mark(object):
return None
head = ''
start = self.pointer
while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029':
start -= 1
if self.pointer-start > max_length/2-1:
head = ' ... '
@@ -24,13 +24,13 @@ class Mark(object):
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = self.buffer[start:end].encode('utf-8')
snippet = self.buffer[start:end]
return ' '*indent + head + snippet + tail + '\n' \
+ ' '*(indent+self.pointer-start+len(head)) + '^'
+6 -6
View File
@@ -1,12 +1,12 @@
__all__ = ['BaseLoader', 'SafeLoader', 'Loader']
from reader import *
from scanner import *
from parser import *
from composer import *
from constructor import *
from resolver import *
from .reader import *
from .scanner import *
from .parser import *
from .composer import *
from .constructor import *
from .resolver import *
class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
+16 -16
View File
@@ -61,21 +61,21 @@
__all__ = ['Parser', 'ParserError']
from error import MarkedYAMLError
from tokens import *
from events import *
from scanner import *
from .error import MarkedYAMLError
from .tokens import *
from .events import *
from .scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser(object):
class Parser:
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
DEFAULT_TAGS = {
u'!': u'!',
u'!!': u'tag:yaml.org,2002:',
'!': '!',
'!!': 'tag:yaml.org,2002:',
}
def __init__(self):
@@ -214,7 +214,7 @@ class Parser(object):
self.tag_handles = {}
while self.check_token(DirectiveToken):
token = self.get_token()
if token.name == u'YAML':
if token.name == 'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
"found duplicate YAML directive", token.start_mark)
@@ -224,11 +224,11 @@ class Parser(object):
"found incompatible YAML document (version 1.* is required)",
token.start_mark)
self.yaml_version = token.value
elif token.name == u'TAG':
elif token.name == 'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle.encode('utf-8'),
"duplicate tag handle %r" % handle,
token.start_mark)
self.tag_handles[handle] = prefix
if self.tag_handles:
@@ -298,19 +298,19 @@ class Parser(object):
if handle is not None:
if handle not in self.tag_handles:
raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle.encode('utf-8'),
"found undefined tag handle %r" % handle,
tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
#if tag == u'!':
#if tag == '!':
# raise ParserError("while parsing a node", start_mark,
# "found non-specific tag '!'", tag_mark,
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
start_mark = end_mark = self.peek_token().start_mark
event = None
implicit = (tag is None or tag == u'!')
implicit = (tag is None or tag == '!')
if indentless_sequence and self.check_token(BlockEntryToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
@@ -320,7 +320,7 @@ class Parser(object):
if self.check_token(ScalarToken):
token = self.get_token()
end_mark = token.end_mark
if (token.plain and tag is None) or tag == u'!':
if (token.plain and tag is None) or tag == '!':
implicit = (True, False)
elif tag is None:
implicit = (False, True)
@@ -352,7 +352,7 @@ class Parser(object):
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
event = ScalarEvent(anchor, tag, (implicit, False), u'',
event = ScalarEvent(anchor, tag, (implicit, False), '',
start_mark, end_mark)
self.state = self.states.pop()
else:
@@ -580,5 +580,5 @@ class Parser(object):
return self.process_empty_scalar(self.peek_token().start_mark)
def process_empty_scalar(self, mark):
return ScalarEvent(None, None, (True, False), u'', mark, mark)
return ScalarEvent(None, None, (True, False), '', mark, mark)
+28 -61
View File
@@ -17,45 +17,10 @@
__all__ = ['Reader', 'ReaderError']
from error import YAMLError, Mark
from .error import YAMLError, Mark
import codecs, re
# Unfortunately, codec functions in Python 2.3 does not support the `finish`
# arguments, so we have to write our own wrappers.
try:
codecs.utf_8_decode('', 'strict', False)
from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
except TypeError:
def utf_16_le_decode(data, errors, finish=False):
if not finish and len(data) % 2 == 1:
data = data[:-1]
return codecs.utf_16_le_decode(data, errors)
def utf_16_be_decode(data, errors, finish=False):
if not finish and len(data) % 2 == 1:
data = data[:-1]
return codecs.utf_16_be_decode(data, errors)
def utf_8_decode(data, errors, finish=False):
if not finish:
# We are trying to remove a possible incomplete multibyte character
# from the suffix of the data.
# The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
# All further bytes are in the range 0x80 to 0xbf.
# UTF-8 encoded UCS characters may be up to six bytes long.
count = 0
while count < 5 and count < len(data) \
and '\x80' <= data[-count-1] <= '\xBF':
count -= 1
if count < 5 and count < len(data) \
and '\xC0' <= data[-count-1] <= '\xFD':
data = data[:-count-1]
return codecs.utf_8_decode(data, errors)
class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
@@ -66,7 +31,7 @@ class ReaderError(YAMLError):
self.reason = reason
def __str__(self):
if isinstance(self.character, str):
if isinstance(self.character, bytes):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
@@ -79,13 +44,13 @@ class ReaderError(YAMLError):
class Reader(object):
# Reader:
# - determines the data encoding and converts it to unicode,
# - determines the data encoding and converts it to a unicode string,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
# - a `bytes` object,
# - a `str` object,
# - a `unicode` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
@@ -96,7 +61,7 @@ class Reader(object):
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = u''
self.buffer = ''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
@@ -104,19 +69,19 @@ class Reader(object):
self.index = 0
self.line = 0
self.column = 0
if isinstance(stream, unicode):
if isinstance(stream, str):
self.name = "<unicode string>"
self.check_printable(stream)
self.buffer = stream+u'\0'
elif isinstance(stream, str):
self.name = "<string>"
self.buffer = stream+'\0'
elif isinstance(stream, bytes):
self.name = "<byte string>"
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "<file>")
self.eof = False
self.raw_buffer = ''
self.raw_buffer = None
self.determine_encoding()
def peek(self, index=0):
@@ -138,11 +103,11 @@ class Reader(object):
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
if ch in u'\n\x85\u2028\u2029' \
or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
if ch in '\n\x85\u2028\u2029' \
or (ch == '\r' and self.buffer[self.pointer] != '\n'):
self.line += 1
self.column = 0
elif ch != u'\uFEFF':
elif ch != '\uFEFF':
self.column += 1
length -= 1
@@ -155,21 +120,21 @@ class Reader(object):
None, None)
def determine_encoding(self):
while not self.eof and len(self.raw_buffer) < 2:
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
if not isinstance(self.raw_buffer, unicode):
if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = utf_16_le_decode
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = utf_16_be_decode
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = utf_8_decode
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
@@ -190,8 +155,8 @@ class Reader(object):
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
except UnicodeDecodeError, exc:
character = exc.object[exc.start]
except UnicodeDecodeError as exc:
character = self.raw_buffer[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
@@ -205,16 +170,18 @@ class Reader(object):
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
self.buffer += u'\0'
self.buffer += '\0'
self.raw_buffer = None
break
def update_raw(self, size=1024):
def update_raw(self, size=4096):
data = self.stream.read(size)
if data:
self.raw_buffer += data
self.stream_pointer += len(data)
if self.raw_buffer is None:
self.raw_buffer = data
else:
self.raw_buffer += data
self.stream_pointer += len(data)
if not data:
self.eof = True
#try:
+56 -171
View File
@@ -2,22 +2,15 @@
__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
'RepresenterError']
from error import *
from nodes import *
from .error import *
from .nodes import *
import datetime
try:
set
except NameError:
from sets import Set as set
import sys, copy_reg, types
import datetime, sys, copyreg, types, base64
class RepresenterError(YAMLError):
pass
class BaseRepresenter(object):
class BaseRepresenter:
yaml_representers = {}
yaml_multi_representers = {}
@@ -36,12 +29,6 @@ class BaseRepresenter(object):
self.object_keeper = []
self.alias_key = None
def get_classobj_bases(self, cls):
bases = [cls]
for base in cls.__bases__:
bases.extend(self.get_classobj_bases(base))
return bases
def represent_data(self, data):
if self.ignore_aliases(data):
self.alias_key = None
@@ -56,8 +43,6 @@ class BaseRepresenter(object):
#self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if type(data) is types.InstanceType:
data_types = self.get_classobj_bases(data.__class__)+list(data_types)
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
else:
@@ -71,22 +56,22 @@ class BaseRepresenter(object):
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = ScalarNode(None, unicode(data))
node = ScalarNode(None, str(data))
#if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
@classmethod
def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
cls.yaml_representers[data_type] = representer
add_representer = classmethod(add_representer)
@classmethod
def add_multi_representer(cls, data_type, representer):
if not 'yaml_multi_representers' in cls.__dict__:
cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
cls.yaml_multi_representers[data_type] = representer
add_multi_representer = classmethod(add_multi_representer)
def represent_scalar(self, tag, value, style=None):
if style is None:
@@ -121,8 +106,11 @@ class BaseRepresenter(object):
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = mapping.items()
mapping.sort()
mapping = list(mapping.items())
try:
mapping = sorted(mapping)
except TypeError:
pass
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
@@ -146,44 +134,31 @@ class SafeRepresenter(BaseRepresenter):
def ignore_aliases(self, data):
if data in [None, ()]:
return True
if isinstance(data, (str, unicode, bool, int, float)):
if isinstance(data, (str, bytes, bool, int, float)):
return True
def represent_none(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:null',
u'null')
return self.represent_scalar('tag:yaml.org,2002:null', 'null')
def represent_str(self, data):
tag = None
style = None
try:
data = unicode(data, 'ascii')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
try:
data = unicode(data, 'utf-8')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
data = data.encode('base64')
tag = u'tag:yaml.org,2002:binary'
style = '|'
return self.represent_scalar(tag, data, style=style)
return self.represent_scalar('tag:yaml.org,2002:str', data)
def represent_unicode(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:str', data)
def represent_binary(self, data):
if hasattr(base64, 'encodebytes'):
data = base64.encodebytes(data).decode('ascii')
else:
data = base64.encodestring(data).decode('ascii')
return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|')
def represent_bool(self, data):
if data:
value = u'true'
value = 'true'
else:
value = u'false'
return self.represent_scalar(u'tag:yaml.org,2002:bool', value)
value = 'false'
return self.represent_scalar('tag:yaml.org,2002:bool', value)
def represent_int(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
def represent_long(self, data):
return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
return self.represent_scalar('tag:yaml.org,2002:int', str(data))
inf_value = 1e300
while repr(inf_value) != repr(inf_value*inf_value):
@@ -191,13 +166,13 @@ class SafeRepresenter(BaseRepresenter):
def represent_float(self, data):
if data != data or (data == 0.0 and data == 1.0):
value = u'.nan'
value = '.nan'
elif data == self.inf_value:
value = u'.inf'
value = '.inf'
elif data == -self.inf_value:
value = u'-.inf'
value = '-.inf'
else:
value = unicode(repr(data)).lower()
value = repr(data).lower()
# Note that in some cases `repr(data)` represents a float number
# without the decimal parts. For instance:
# >>> repr(1e17)
@@ -205,9 +180,9 @@ class SafeRepresenter(BaseRepresenter):
# Unfortunately, this is not a valid float representation according
# to the definition of the `!!float` tag. We fix this by adding
# '.0' before the 'e' symbol.
if u'.' not in value and u'e' in value:
value = value.replace(u'e', u'.0e', 1)
return self.represent_scalar(u'tag:yaml.org,2002:float', value)
if '.' not in value and 'e' in value:
value = value.replace('e', '.0e', 1)
return self.represent_scalar('tag:yaml.org,2002:float', value)
def represent_list(self, data):
#pairs = (len(data) > 0 and isinstance(data, list))
@@ -217,7 +192,7 @@ class SafeRepresenter(BaseRepresenter):
# pairs = False
# break
#if not pairs:
return self.represent_sequence(u'tag:yaml.org,2002:seq', data)
return self.represent_sequence('tag:yaml.org,2002:seq', data)
#value = []
#for item_key, item_value in data:
# value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
@@ -225,21 +200,21 @@ class SafeRepresenter(BaseRepresenter):
#return SequenceNode(u'tag:yaml.org,2002:pairs', value)
def represent_dict(self, data):
return self.represent_mapping(u'tag:yaml.org,2002:map', data)
return self.represent_mapping('tag:yaml.org,2002:map', data)
def represent_set(self, data):
value = {}
for key in data:
value[key] = None
return self.represent_mapping(u'tag:yaml.org,2002:set', value)
return self.represent_mapping('tag:yaml.org,2002:set', value)
def represent_date(self, data):
value = unicode(data.isoformat())
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
value = data.isoformat()
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_datetime(self, data):
value = unicode(data.isoformat(' '))
return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
value = data.isoformat(' ')
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_yaml_object(self, tag, data, cls, flow_style=None):
if hasattr(data, '__getstate__'):
@@ -257,8 +232,8 @@ SafeRepresenter.add_representer(type(None),
SafeRepresenter.add_representer(str,
SafeRepresenter.represent_str)
SafeRepresenter.add_representer(unicode,
SafeRepresenter.represent_unicode)
SafeRepresenter.add_representer(bytes,
SafeRepresenter.represent_binary)
SafeRepresenter.add_representer(bool,
SafeRepresenter.represent_bool)
@@ -266,9 +241,6 @@ SafeRepresenter.add_representer(bool,
SafeRepresenter.add_representer(int,
SafeRepresenter.represent_int)
SafeRepresenter.add_representer(long,
SafeRepresenter.represent_long)
SafeRepresenter.add_representer(float,
SafeRepresenter.represent_float)
@@ -295,99 +267,27 @@ SafeRepresenter.add_representer(None,
class Representer(SafeRepresenter):
def represent_str(self, data):
tag = None
style = None
try:
data = unicode(data, 'ascii')
tag = u'tag:yaml.org,2002:str'
except UnicodeDecodeError:
try:
data = unicode(data, 'utf-8')
tag = u'tag:yaml.org,2002:python/str'
except UnicodeDecodeError:
data = data.encode('base64')
tag = u'tag:yaml.org,2002:binary'
style = '|'
return self.represent_scalar(tag, data, style=style)
def represent_unicode(self, data):
tag = None
try:
data.encode('ascii')
tag = u'tag:yaml.org,2002:python/unicode'
except UnicodeEncodeError:
tag = u'tag:yaml.org,2002:str'
return self.represent_scalar(tag, data)
def represent_long(self, data):
tag = u'tag:yaml.org,2002:int'
if int(data) is not data:
tag = u'tag:yaml.org,2002:python/long'
return self.represent_scalar(tag, unicode(data))
def represent_complex(self, data):
if data.imag == 0.0:
data = u'%r' % data.real
data = '%r' % data.real
elif data.real == 0.0:
data = u'%rj' % data.imag
data = '%rj' % data.imag
elif data.imag > 0:
data = u'%r+%rj' % (data.real, data.imag)
data = '%r+%rj' % (data.real, data.imag)
else:
data = u'%r%rj' % (data.real, data.imag)
return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data)
data = '%r%rj' % (data.real, data.imag)
return self.represent_scalar('tag:yaml.org,2002:python/complex', data)
def represent_tuple(self, data):
return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data)
return self.represent_sequence('tag:yaml.org,2002:python/tuple', data)
def represent_name(self, data):
name = u'%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'')
name = '%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '')
def represent_module(self, data):
return self.represent_scalar(
u'tag:yaml.org,2002:python/module:'+data.__name__, u'')
def represent_instance(self, data):
# For instances of classic classes, we use __getinitargs__ and
# __getstate__ to serialize the data.
# If data.__getinitargs__ exists, the object must be reconstructed by
# calling cls(**args), where args is a tuple returned by
# __getinitargs__. Otherwise, the cls.__init__ method should never be
# called and the class instance is created by instantiating a trivial
# class and assigning to the instance's __class__ variable.
# If data.__getstate__ exists, it returns the state of the object.
# Otherwise, the state of the object is data.__dict__.
# We produce either a !!python/object or !!python/object/new node.
# If data.__getinitargs__ does not exist and state is a dictionary, we
# produce a !!python/object node . Otherwise we produce a
# !!python/object/new node.
cls = data.__class__
class_name = u'%s.%s' % (cls.__module__, cls.__name__)
args = None
state = None
if hasattr(data, '__getinitargs__'):
args = list(data.__getinitargs__())
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__
if args is None and isinstance(state, dict):
return self.represent_mapping(
u'tag:yaml.org,2002:python/object:'+class_name, state)
if isinstance(state, dict) and not state:
return self.represent_sequence(
u'tag:yaml.org,2002:python/object/new:'+class_name, args)
value = {}
if args:
value['args'] = args
value['state'] = state
return self.represent_mapping(
u'tag:yaml.org,2002:python/object/new:'+class_name, value)
'tag:yaml.org,2002:python/module:'+data.__name__, '')
def represent_object(self, data):
# We use __reduce__ API to save the data. data.__reduce__ returns
@@ -407,8 +307,8 @@ class Representer(SafeRepresenter):
# !!python/object/apply node.
cls = type(data)
if cls in copy_reg.dispatch_table:
reduce = copy_reg.dispatch_table[cls](data)
if cls in copyreg.dispatch_table:
reduce = copyreg.dispatch_table[cls](data)
elif hasattr(data, '__reduce_ex__'):
reduce = data.__reduce_ex__(2)
elif hasattr(data, '__reduce__'):
@@ -427,16 +327,16 @@ class Representer(SafeRepresenter):
if function.__name__ == '__newobj__':
function = args[0]
args = args[1:]
tag = u'tag:yaml.org,2002:python/object/new:'
tag = 'tag:yaml.org,2002:python/object/new:'
newobj = True
else:
tag = u'tag:yaml.org,2002:python/object/apply:'
tag = 'tag:yaml.org,2002:python/object/apply:'
newobj = False
function_name = u'%s.%s' % (function.__module__, function.__name__)
function_name = '%s.%s' % (function.__module__, function.__name__)
if not args and not listitems and not dictitems \
and isinstance(state, dict) and newobj:
return self.represent_mapping(
u'tag:yaml.org,2002:python/object:'+function_name, state)
'tag:yaml.org,2002:python/object:'+function_name, state)
if not listitems and not dictitems \
and isinstance(state, dict) and not state:
return self.represent_sequence(tag+function_name, args)
@@ -451,15 +351,6 @@ class Representer(SafeRepresenter):
value['dictitems'] = dictitems
return self.represent_mapping(tag+function_name, value)
Representer.add_representer(str,
Representer.represent_str)
Representer.add_representer(unicode,
Representer.represent_unicode)
Representer.add_representer(long,
Representer.represent_long)
Representer.add_representer(complex,
Representer.represent_complex)
@@ -469,9 +360,6 @@ Representer.add_representer(tuple,
Representer.add_representer(type,
Representer.represent_name)
Representer.add_representer(types.ClassType,
Representer.represent_name)
Representer.add_representer(types.FunctionType,
Representer.represent_name)
@@ -481,9 +369,6 @@ Representer.add_representer(types.BuiltinFunctionType,
Representer.add_representer(types.ModuleType,
Representer.represent_module)
Representer.add_multi_representer(types.InstanceType,
Representer.represent_instance)
Representer.add_multi_representer(object,
Representer.represent_object)
+38 -38
View File
@@ -1,19 +1,19 @@
__all__ = ['BaseResolver', 'Resolver']
from error import *
from nodes import *
from .error import *
from .nodes import *
import re
class ResolverError(YAMLError):
pass
class BaseResolver(object):
class BaseResolver:
DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {}
yaml_path_resolvers = {}
@@ -22,6 +22,7 @@ class BaseResolver(object):
self.resolver_exact_paths = []
self.resolver_prefix_paths = []
@classmethod
def add_implicit_resolver(cls, tag, regexp, first):
if not 'yaml_implicit_resolvers' in cls.__dict__:
cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
@@ -29,8 +30,8 @@ class BaseResolver(object):
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
add_implicit_resolver = classmethod(add_implicit_resolver)
@classmethod
def add_path_resolver(cls, tag, path, kind=None):
# Note: `add_path_resolver` is experimental. The API could be changed.
# `new_path` is a pattern that is matched against the path from the
@@ -66,10 +67,10 @@ class BaseResolver(object):
elif node_check is dict:
node_check = MappingNode
elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
and not isinstance(node_check, basestring) \
and not isinstance(node_check, str) \
and node_check is not None:
raise ResolverError("Invalid node checker: %s" % node_check)
if not isinstance(index_check, (basestring, int)) \
if not isinstance(index_check, (str, int)) \
and index_check is not None:
raise ResolverError("Invalid index checker: %s" % index_check)
new_path.append((node_check, index_check))
@@ -83,7 +84,6 @@ class BaseResolver(object):
and kind is not None:
raise ResolverError("Invalid node kind: %s" % kind)
cls.yaml_path_resolvers[tuple(new_path), kind] = tag
add_path_resolver = classmethod(add_path_resolver)
def descend_resolver(self, current_node, current_index):
if not self.yaml_path_resolvers:
@@ -117,7 +117,7 @@ class BaseResolver(object):
def check_resolver_prefix(self, depth, path, kind,
current_node, current_index):
node_check, index_check = path[depth-1]
if isinstance(node_check, basestring):
if isinstance(node_check, str):
if current_node.tag != node_check:
return
elif node_check is not None:
@@ -128,7 +128,7 @@ class BaseResolver(object):
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, basestring):
if isinstance(index_check, str):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
@@ -139,8 +139,8 @@ class BaseResolver(object):
def resolve(self, kind, value, implicit):
if kind is ScalarNode and implicit[0]:
if value == u'':
resolvers = self.yaml_implicit_resolvers.get(u'', [])
if value == '':
resolvers = self.yaml_implicit_resolvers.get('', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
@@ -165,60 +165,60 @@ class Resolver(BaseResolver):
pass
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:bool',
re.compile(ur'''^(?:yes|Yes|YES|no|No|NO
'tag:yaml.org,2002:bool',
re.compile(r'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list(u'yYnNtTfFoO'))
list('yYnNtTfFoO'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:float',
re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
'tag:yaml.org,2002:float',
re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
list(u'-+0123456789.'))
list('-+0123456789.'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:int',
re.compile(ur'''^(?:[-+]?0b[0-1_]+
'tag:yaml.org,2002:int',
re.compile(r'''^(?:[-+]?0b[0-1_]+
|[-+]?0[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
list(u'-+0123456789'))
list('-+0123456789'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:merge',
re.compile(ur'^(?:<<)$'),
[u'<'])
'tag:yaml.org,2002:merge',
re.compile(r'^(?:<<)$'),
['<'])
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:null',
re.compile(ur'''^(?: ~
'tag:yaml.org,2002:null',
re.compile(r'''^(?: ~
|null|Null|NULL
| )$''', re.X),
[u'~', u'n', u'N', u''])
['~', 'n', 'N', ''])
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:timestamp',
re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
'tag:yaml.org,2002:timestamp',
re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
list(u'0123456789'))
list('0123456789'))
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:value',
re.compile(ur'^(?:=)$'),
[u'='])
'tag:yaml.org,2002:value',
re.compile(r'^(?:=)$'),
['='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver(
u'tag:yaml.org,2002:yaml',
re.compile(ur'^(?:!|&|\*)$'),
list(u'!&*'))
'tag:yaml.org,2002:yaml',
re.compile(r'^(?:!|&|\*)$'),
list('!&*'))
+195 -204
View File
@@ -26,13 +26,13 @@
__all__ = ['Scanner', 'ScannerError']
from error import MarkedYAMLError
from tokens import *
from .error import MarkedYAMLError
from .tokens import *
class ScannerError(MarkedYAMLError):
pass
class SimpleKey(object):
class SimpleKey:
# See below simple keys treatment.
def __init__(self, token_number, required, index, line, column, mark):
@@ -43,7 +43,7 @@ class SimpleKey(object):
self.column = column
self.mark = mark
class Scanner(object):
class Scanner:
def __init__(self):
"""Initialize the scanner."""
@@ -166,85 +166,85 @@ class Scanner(object):
ch = self.peek()
# Is it the end of stream?
if ch == u'\0':
if ch == '\0':
return self.fetch_stream_end()
# Is it a directive?
if ch == u'%' and self.check_directive():
if ch == '%' and self.check_directive():
return self.fetch_directive()
# Is it the document start?
if ch == u'-' and self.check_document_start():
if ch == '-' and self.check_document_start():
return self.fetch_document_start()
# Is it the document end?
if ch == u'.' and self.check_document_end():
if ch == '.' and self.check_document_end():
return self.fetch_document_end()
# TODO: support for BOM within a stream.
#if ch == u'\uFEFF':
#if ch == '\uFEFF':
# return self.fetch_bom() <-- issue BOMToken
# Note: the order of the following checks is NOT significant.
# Is it the flow sequence start indicator?
if ch == u'[':
if ch == '[':
return self.fetch_flow_sequence_start()
# Is it the flow mapping start indicator?
if ch == u'{':
if ch == '{':
return self.fetch_flow_mapping_start()
# Is it the flow sequence end indicator?
if ch == u']':
if ch == ']':
return self.fetch_flow_sequence_end()
# Is it the flow mapping end indicator?
if ch == u'}':
if ch == '}':
return self.fetch_flow_mapping_end()
# Is it the flow entry indicator?
if ch == u',':
if ch == ',':
return self.fetch_flow_entry()
# Is it the block entry indicator?
if ch == u'-' and self.check_block_entry():
if ch == '-' and self.check_block_entry():
return self.fetch_block_entry()
# Is it the key indicator?
if ch == u'?' and self.check_key():
if ch == '?' and self.check_key():
return self.fetch_key()
# Is it the value indicator?
if ch == u':' and self.check_value():
if ch == ':' and self.check_value():
return self.fetch_value()
# Is it an alias?
if ch == u'*':
if ch == '*':
return self.fetch_alias()
# Is it an anchor?
if ch == u'&':
if ch == '&':
return self.fetch_anchor()
# Is it a tag?
if ch == u'!':
if ch == '!':
return self.fetch_tag()
# Is it a literal scalar?
if ch == u'|' and not self.flow_level:
if ch == '|' and not self.flow_level:
return self.fetch_literal()
# Is it a folded scalar?
if ch == u'>' and not self.flow_level:
if ch == '>' and not self.flow_level:
return self.fetch_folded()
# Is it a single quoted scalar?
if ch == u'\'':
if ch == '\'':
return self.fetch_single()
# Is it a double quoted scalar?
if ch == u'\"':
if ch == '\"':
return self.fetch_double()
# It must be a plain scalar then.
@@ -253,8 +253,8 @@ class Scanner(object):
# No? It's an error. Let's produce a nice error message.
raise ScannerError("while scanning for the next token", None,
"found character %r that cannot start any token"
% ch.encode('utf-8'), self.get_mark())
"found character %r that cannot start any token" % ch,
self.get_mark())
# Simple keys treatment.
@@ -280,7 +280,7 @@ class Scanner(object):
# - should be no longer than 1024 characters.
# Disabling this procedure will allow simple keys of any length and
# height (may cause problems if indentation is broken though).
for level in self.possible_simple_keys.keys():
for level in list(self.possible_simple_keys):
key = self.possible_simple_keys[level]
if key.line != self.line \
or self.index-key.index > 1024:
@@ -692,22 +692,22 @@ class Scanner(object):
# DOCUMENT-START: ^ '---' (' '|'\n')
if self.column == 0:
if self.prefix(3) == u'---' \
and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
if self.prefix(3) == '---' \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return True
def check_document_end(self):
# DOCUMENT-END: ^ '...' (' '|'\n')
if self.column == 0:
if self.prefix(3) == u'...' \
and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
if self.prefix(3) == '...' \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return True
def check_block_entry(self):
# BLOCK-ENTRY: '-' (' '|'\n')
return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_key(self):
@@ -717,7 +717,7 @@ class Scanner(object):
# KEY(block context): '?' (' '|'\n')
else:
return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_value(self):
@@ -727,7 +727,7 @@ class Scanner(object):
# VALUE(block context): ':' (' '|'\n')
else:
return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
def check_plain(self):
@@ -744,9 +744,9 @@ class Scanner(object):
# '-' character) because we want the flow context to be space
# independent.
ch = self.peek()
return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
and (ch == u'-' or (not self.flow_level and ch in u'?:')))
return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029'
and (ch == '-' or (not self.flow_level and ch in '?:')))
# Scanners.
@@ -770,14 +770,14 @@ class Scanner(object):
# `unwind_indent` before issuing BLOCK-END.
# Scanners for block, flow, and plain scalars need to be modified.
if self.index == 0 and self.peek() == u'\uFEFF':
if self.index == 0 and self.peek() == '\uFEFF':
self.forward()
found = False
while not found:
while self.peek() == u' ':
while self.peek() == ' ':
self.forward()
if self.peek() == u'#':
while self.peek() not in u'\0\r\n\x85\u2028\u2029':
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
if self.scan_line_break():
if not self.flow_level:
@@ -791,15 +791,15 @@ class Scanner(object):
self.forward()
name = self.scan_directive_name(start_mark)
value = None
if name == u'YAML':
if name == 'YAML':
value = self.scan_yaml_directive_value(start_mark)
end_mark = self.get_mark()
elif name == u'TAG':
elif name == 'TAG':
value = self.scan_tag_directive_value(start_mark)
end_mark = self.get_mark()
else:
end_mark = self.get_mark()
while self.peek() not in u'\0\r\n\x85\u2028\u2029':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
self.scan_directive_ignored_line(start_mark)
return DirectiveToken(name, value, start_mark, end_mark)
@@ -808,51 +808,48 @@ class Scanner(object):
# See the specification for details.
length = 0
ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_':
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if not length:
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
% ch.encode('utf-8'), self.get_mark())
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected alphabetic or numeric character, but found %r"
% ch.encode('utf-8'), self.get_mark())
% ch, self.get_mark())
return value
def scan_yaml_directive_value(self, start_mark):
# See the specification for details.
while self.peek() == u' ':
while self.peek() == ' ':
self.forward()
major = self.scan_yaml_directive_number(start_mark)
if self.peek() != '.':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or '.', but found %r"
% self.peek().encode('utf-8'),
"expected a digit or '.', but found %r" % self.peek(),
self.get_mark())
self.forward()
minor = self.scan_yaml_directive_number(start_mark)
if self.peek() not in u'\0 \r\n\x85\u2028\u2029':
if self.peek() not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a digit or ' ', but found %r"
% self.peek().encode('utf-8'),
"expected a digit or ' ', but found %r" % self.peek(),
self.get_mark())
return (major, minor)
def scan_yaml_directive_number(self, start_mark):
# See the specification for details.
ch = self.peek()
if not (u'0' <= ch <= u'9'):
if not ('0' <= ch <= '9'):
raise ScannerError("while scanning a directive", start_mark,
"expected a digit, but found %r" % ch.encode('utf-8'),
self.get_mark())
"expected a digit, but found %r" % ch, self.get_mark())
length = 0
while u'0' <= self.peek(length) <= u'9':
while '0' <= self.peek(length) <= '9':
length += 1
value = int(self.prefix(length))
self.forward(length)
@@ -860,10 +857,10 @@ class Scanner(object):
def scan_tag_directive_value(self, start_mark):
# See the specification for details.
while self.peek() == u' ':
while self.peek() == ' ':
self.forward()
handle = self.scan_tag_directive_handle(start_mark)
while self.peek() == u' ':
while self.peek() == ' ':
self.forward()
prefix = self.scan_tag_directive_prefix(start_mark)
return (handle, prefix)
@@ -872,34 +869,32 @@ class Scanner(object):
# See the specification for details.
value = self.scan_tag_handle('directive', start_mark)
ch = self.peek()
if ch != u' ':
if ch != ' ':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
self.get_mark())
"expected ' ', but found %r" % ch, self.get_mark())
return value
def scan_tag_directive_prefix(self, start_mark):
# See the specification for details.
value = self.scan_tag_uri('directive', start_mark)
ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
self.get_mark())
"expected ' ', but found %r" % ch, self.get_mark())
return value
def scan_directive_ignored_line(self, start_mark):
# See the specification for details.
while self.peek() == u' ':
while self.peek() == ' ':
self.forward()
if self.peek() == u'#':
while self.peek() not in u'\0\r\n\x85\u2028\u2029':
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek()
if ch not in u'\0\r\n\x85\u2028\u2029':
if ch not in '\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a directive", start_mark,
"expected a comment or a line break, but found %r"
% ch.encode('utf-8'), self.get_mark())
% ch, self.get_mark())
self.scan_line_break()
def scan_anchor(self, TokenClass):
@@ -913,28 +908,28 @@ class Scanner(object):
# Therefore we restrict aliases to numbers and ASCII letters.
start_mark = self.get_mark()
indicator = self.peek()
if indicator == u'*':
if indicator == '*':
name = 'alias'
else:
name = 'anchor'
self.forward()
length = 0
ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_':
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if not length:
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
% ch.encode('utf-8'), self.get_mark())
% ch, self.get_mark())
value = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
raise ScannerError("while scanning an %s" % name, start_mark,
"expected alphabetic or numeric character, but found %r"
% ch.encode('utf-8'), self.get_mark())
% ch, self.get_mark())
end_mark = self.get_mark()
return TokenClass(value, start_mark, end_mark)
@@ -942,40 +937,39 @@ class Scanner(object):
# See the specification for details.
start_mark = self.get_mark()
ch = self.peek(1)
if ch == u'<':
if ch == '<':
handle = None
self.forward(2)
suffix = self.scan_tag_uri('tag', start_mark)
if self.peek() != u'>':
if self.peek() != '>':
raise ScannerError("while parsing a tag", start_mark,
"expected '>', but found %r" % self.peek().encode('utf-8'),
"expected '>', but found %r" % self.peek(),
self.get_mark())
self.forward()
elif ch in u'\0 \t\r\n\x85\u2028\u2029':
elif ch in '\0 \t\r\n\x85\u2028\u2029':
handle = None
suffix = u'!'
suffix = '!'
self.forward()
else:
length = 1
use_handle = False
while ch not in u'\0 \r\n\x85\u2028\u2029':
if ch == u'!':
while ch not in '\0 \r\n\x85\u2028\u2029':
if ch == '!':
use_handle = True
break
length += 1
ch = self.peek(length)
handle = u'!'
handle = '!'
if use_handle:
handle = self.scan_tag_handle('tag', start_mark)
else:
handle = u'!'
handle = '!'
self.forward()
suffix = self.scan_tag_uri('tag', start_mark)
ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a tag", start_mark,
"expected ' ', but found %r" % ch.encode('utf-8'),
self.get_mark())
"expected ' ', but found %r" % ch, self.get_mark())
value = (handle, suffix)
end_mark = self.get_mark()
return TagToken(value, start_mark, end_mark)
@@ -1006,39 +1000,39 @@ class Scanner(object):
else:
indent = min_indent+increment-1
breaks, end_mark = self.scan_block_scalar_breaks(indent)
line_break = u''
line_break = ''
# Scan the inner part of the block scalar.
while self.column == indent and self.peek() != u'\0':
while self.column == indent and self.peek() != '\0':
chunks.extend(breaks)
leading_non_space = self.peek() not in u' \t'
leading_non_space = self.peek() not in ' \t'
length = 0
while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':
while self.peek(length) not in '\0\r\n\x85\u2028\u2029':
length += 1
chunks.append(self.prefix(length))
self.forward(length)
line_break = self.scan_line_break()
breaks, end_mark = self.scan_block_scalar_breaks(indent)
if self.column == indent and self.peek() != u'\0':
if self.column == indent and self.peek() != '\0':
# Unfortunately, folding rules are ambiguous.
#
# This is the folding according to the specification:
if folded and line_break == u'\n' \
and leading_non_space and self.peek() not in u' \t':
if folded and line_break == '\n' \
and leading_non_space and self.peek() not in ' \t':
if not breaks:
chunks.append(u' ')
chunks.append(' ')
else:
chunks.append(line_break)
# This is Clark Evans's interpretation (also in the spec
# examples):
#
#if folded and line_break == u'\n':
#if folded and line_break == '\n':
# if not breaks:
# if self.peek() not in ' \t':
# chunks.append(u' ')
# chunks.append(' ')
# else:
# chunks.append(line_break)
#else:
@@ -1053,7 +1047,7 @@ class Scanner(object):
chunks.extend(breaks)
# We are done.
return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
def scan_block_scalar_indicators(self, start_mark):
@@ -1061,21 +1055,21 @@ class Scanner(object):
chomping = None
increment = None
ch = self.peek()
if ch in u'+-':
if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.forward()
ch = self.peek()
if ch in u'0123456789':
if ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
"expected indentation indicator in the range 1-9, but found 0",
self.get_mark())
self.forward()
elif ch in u'0123456789':
elif ch in '0123456789':
increment = int(ch)
if increment == 0:
raise ScannerError("while scanning a block scalar", start_mark,
@@ -1083,31 +1077,31 @@ class Scanner(object):
self.get_mark())
self.forward()
ch = self.peek()
if ch in u'+-':
if ch in '+-':
if ch == '+':
chomping = True
else:
chomping = False
self.forward()
ch = self.peek()
if ch not in u'\0 \r\n\x85\u2028\u2029':
if ch not in '\0 \r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected chomping or indentation indicators, but found %r"
% ch.encode('utf-8'), self.get_mark())
% ch, self.get_mark())
return chomping, increment
def scan_block_scalar_ignored_line(self, start_mark):
# See the specification for details.
while self.peek() == u' ':
while self.peek() == ' ':
self.forward()
if self.peek() == u'#':
while self.peek() not in u'\0\r\n\x85\u2028\u2029':
if self.peek() == '#':
while self.peek() not in '\0\r\n\x85\u2028\u2029':
self.forward()
ch = self.peek()
if ch not in u'\0\r\n\x85\u2028\u2029':
if ch not in '\0\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a block scalar", start_mark,
"expected a comment or a line break, but found %r"
% ch.encode('utf-8'), self.get_mark())
"expected a comment or a line break, but found %r" % ch,
self.get_mark())
self.scan_line_break()
def scan_block_scalar_indentation(self):
@@ -1115,8 +1109,8 @@ class Scanner(object):
chunks = []
max_indent = 0
end_mark = self.get_mark()
while self.peek() in u' \r\n\x85\u2028\u2029':
if self.peek() != u' ':
while self.peek() in ' \r\n\x85\u2028\u2029':
if self.peek() != ' ':
chunks.append(self.scan_line_break())
end_mark = self.get_mark()
else:
@@ -1129,12 +1123,12 @@ class Scanner(object):
# See the specification for details.
chunks = []
end_mark = self.get_mark()
while self.column < indent and self.peek() == u' ':
while self.column < indent and self.peek() == ' ':
self.forward()
while self.peek() in u'\r\n\x85\u2028\u2029':
while self.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
end_mark = self.get_mark()
while self.column < indent and self.peek() == u' ':
while self.column < indent and self.peek() == ' ':
self.forward()
return chunks, end_mark
@@ -1159,33 +1153,33 @@ class Scanner(object):
chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
self.forward()
end_mark = self.get_mark()
return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
return ScalarToken(''.join(chunks), False, start_mark, end_mark,
style)
ESCAPE_REPLACEMENTS = {
u'0': u'\0',
u'a': u'\x07',
u'b': u'\x08',
u't': u'\x09',
u'\t': u'\x09',
u'n': u'\x0A',
u'v': u'\x0B',
u'f': u'\x0C',
u'r': u'\x0D',
u'e': u'\x1B',
u' ': u'\x20',
u'\"': u'\"',
u'\\': u'\\',
u'N': u'\x85',
u'_': u'\xA0',
u'L': u'\u2028',
u'P': u'\u2029',
'0': '\0',
'a': '\x07',
'b': '\x08',
't': '\x09',
'\t': '\x09',
'n': '\x0A',
'v': '\x0B',
'f': '\x0C',
'r': '\x0D',
'e': '\x1B',
' ': '\x20',
'\"': '\"',
'\\': '\\',
'N': '\x85',
'_': '\xA0',
'L': '\u2028',
'P': '\u2029',
}
ESCAPE_CODES = {
u'x': 2,
u'u': 4,
u'U': 8,
'x': 2,
'u': 4,
'U': 8,
}
def scan_flow_scalar_non_spaces(self, double, start_mark):
@@ -1193,19 +1187,19 @@ class Scanner(object):
chunks = []
while True:
length = 0
while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029':
length += 1
if length:
chunks.append(self.prefix(length))
self.forward(length)
ch = self.peek()
if not double and ch == u'\'' and self.peek(1) == u'\'':
chunks.append(u'\'')
if not double and ch == '\'' and self.peek(1) == '\'':
chunks.append('\'')
self.forward(2)
elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
elif (double and ch == '\'') or (not double and ch in '\"\\'):
chunks.append(ch)
self.forward()
elif double and ch == u'\\':
elif double and ch == '\\':
self.forward()
ch = self.peek()
if ch in self.ESCAPE_REPLACEMENTS:
@@ -1215,19 +1209,19 @@ class Scanner(object):
length = self.ESCAPE_CODES[ch]
self.forward()
for k in range(length):
if self.peek(k) not in u'0123456789ABCDEFabcdef':
if self.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"expected escape sequence of %d hexdecimal numbers, but found %r" %
(length, self.peek(k).encode('utf-8')), self.get_mark())
(length, self.peek(k)), self.get_mark())
code = int(self.prefix(length), 16)
chunks.append(unichr(code))
chunks.append(chr(code))
self.forward(length)
elif ch in u'\r\n\x85\u2028\u2029':
elif ch in '\r\n\x85\u2028\u2029':
self.scan_line_break()
chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
else:
raise ScannerError("while scanning a double-quoted scalar", start_mark,
"found unknown escape character %r" % ch.encode('utf-8'), self.get_mark())
"found unknown escape character %r" % ch, self.get_mark())
else:
return chunks
@@ -1235,21 +1229,21 @@ class Scanner(object):
# See the specification for details.
chunks = []
length = 0
while self.peek(length) in u' \t':
while self.peek(length) in ' \t':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch == u'\0':
if ch == '\0':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected end of stream", self.get_mark())
elif ch in u'\r\n\x85\u2028\u2029':
elif ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
breaks = self.scan_flow_scalar_breaks(double, start_mark)
if line_break != u'\n':
if line_break != '\n':
chunks.append(line_break)
elif not breaks:
chunks.append(u' ')
chunks.append(' ')
chunks.extend(breaks)
else:
chunks.append(whitespaces)
@@ -1262,13 +1256,13 @@ class Scanner(object):
# Instead of checking indentation, we check for document
# separators.
prefix = self.prefix(3)
if (prefix == u'---' or prefix == u'...') \
and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
raise ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected document separator", self.get_mark())
while self.peek() in u' \t':
while self.peek() in ' \t':
self.forward()
if self.peek() in u'\r\n\x85\u2028\u2029':
if self.peek() in '\r\n\x85\u2028\u2029':
chunks.append(self.scan_line_break())
else:
return chunks
@@ -1290,19 +1284,19 @@ class Scanner(object):
spaces = []
while True:
length = 0
if self.peek() == u'#':
if self.peek() == '#':
break
while True:
ch = self.peek(length)
if ch in u'\0 \t\r\n\x85\u2028\u2029' \
or (not self.flow_level and ch == u':' and
self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \
or (self.flow_level and ch in u',:?[]{}'):
if ch in '\0 \t\r\n\x85\u2028\u2029' \
or (not self.flow_level and ch == ':' and
self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029') \
or (self.flow_level and ch in ',:?[]{}'):
break
length += 1
# It's not clear what we should do with ':' in the flow context.
if (self.flow_level and ch == u':'
and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):
if (self.flow_level and ch == ':'
and self.peek(length+1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'):
self.forward(length)
raise ScannerError("while scanning a plain scalar", start_mark,
"found unexpected ':'", self.get_mark(),
@@ -1315,10 +1309,10 @@ class Scanner(object):
self.forward(length)
end_mark = self.get_mark()
spaces = self.scan_plain_spaces(indent, start_mark)
if not spaces or self.peek() == u'#' \
if not spaces or self.peek() == '#' \
or (not self.flow_level and self.column < indent):
break
return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
return ScalarToken(''.join(chunks), True, start_mark, end_mark)
def scan_plain_spaces(self, indent, start_mark):
# See the specification for details.
@@ -1326,32 +1320,32 @@ class Scanner(object):
# We just forbid them completely. Do not use tabs in YAML!
chunks = []
length = 0
while self.peek(length) in u' ':
while self.peek(length) in ' ':
length += 1
whitespaces = self.prefix(length)
self.forward(length)
ch = self.peek()
if ch in u'\r\n\x85\u2028\u2029':
if ch in '\r\n\x85\u2028\u2029':
line_break = self.scan_line_break()
self.allow_simple_key = True
prefix = self.prefix(3)
if (prefix == u'---' or prefix == u'...') \
and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return
breaks = []
while self.peek() in u' \r\n\x85\u2028\u2029':
while self.peek() in ' \r\n\x85\u2028\u2029':
if self.peek() == ' ':
self.forward()
else:
breaks.append(self.scan_line_break())
prefix = self.prefix(3)
if (prefix == u'---' or prefix == u'...') \
and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
if (prefix == '---' or prefix == '...') \
and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
return
if line_break != u'\n':
if line_break != '\n':
chunks.append(line_break)
elif not breaks:
chunks.append(u' ')
chunks.append(' ')
chunks.extend(breaks)
elif whitespaces:
chunks.append(whitespaces)
@@ -1362,22 +1356,20 @@ class Scanner(object):
# For some strange reasons, the specification does not allow '_' in
# tag handles. I have allowed it anyway.
ch = self.peek()
if ch != u'!':
if ch != '!':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch.encode('utf-8'),
self.get_mark())
"expected '!', but found %r" % ch, self.get_mark())
length = 1
ch = self.peek(length)
if ch != u' ':
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-_':
if ch != ' ':
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-_':
length += 1
ch = self.peek(length)
if ch != u'!':
if ch != '!':
self.forward(length)
raise ScannerError("while scanning a %s" % name, start_mark,
"expected '!', but found %r" % ch.encode('utf-8'),
self.get_mark())
"expected '!', but found %r" % ch, self.get_mark())
length += 1
value = self.prefix(length)
self.forward(length)
@@ -1389,9 +1381,9 @@ class Scanner(object):
chunks = []
length = 0
ch = self.peek(length)
while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
if ch == u'%':
while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
or ch in '-;/?:@&=+$,_.!~*\'()[]%':
if ch == '%':
chunks.append(self.prefix(length))
self.forward(length)
length = 0
@@ -1405,26 +1397,25 @@ class Scanner(object):
length = 0
if not chunks:
raise ScannerError("while parsing a %s" % name, start_mark,
"expected URI, but found %r" % ch.encode('utf-8'),
self.get_mark())
return u''.join(chunks)
"expected URI, but found %r" % ch, self.get_mark())
return ''.join(chunks)
def scan_uri_escapes(self, name, start_mark):
# See the specification for details.
bytes = []
codes = []
mark = self.get_mark()
while self.peek() == u'%':
while self.peek() == '%':
self.forward()
for k in range(2):
if self.peek(k) not in u'0123456789ABCDEFabcdef':
if self.peek(k) not in '0123456789ABCDEFabcdef':
raise ScannerError("while scanning a %s" % name, start_mark,
"expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
(self.peek(k).encode('utf-8')), self.get_mark())
bytes.append(chr(int(self.prefix(2), 16)))
"expected URI escape sequence of 2 hexdecimal numbers, but found %r"
% self.peek(k), self.get_mark())
codes.append(int(self.prefix(2), 16))
self.forward(2)
try:
value = unicode(''.join(bytes), 'utf-8')
except UnicodeDecodeError, exc:
value = bytes(codes).decode('utf-8')
except UnicodeDecodeError as exc:
raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
return value
@@ -1438,16 +1429,16 @@ class Scanner(object):
# '\u2029 : '\u2029'
# default : ''
ch = self.peek()
if ch in u'\r\n\x85':
if self.prefix(2) == u'\r\n':
if ch in '\r\n\x85':
if self.prefix(2) == '\r\n':
self.forward(2)
else:
self.forward()
return u'\n'
elif ch in u'\u2028\u2029':
return '\n'
elif ch in '\u2028\u2029':
self.forward()
return ch
return u''
return ''
#try:
# import psyco
+5 -5
View File
@@ -1,16 +1,16 @@
__all__ = ['Serializer', 'SerializerError']
from error import YAMLError
from events import *
from nodes import *
from .error import YAMLError
from .events import *
from .nodes import *
class SerializerError(YAMLError):
pass
class Serializer(object):
class Serializer:
ANCHOR_TEMPLATE = u'id%03d'
ANCHOR_TEMPLATE = 'id%03d'
def __init__(self, encoding=None,
explicit_start=None, explicit_end=None, version=None, tags=None):