Compare commits

...

55 Commits

Author SHA1 Message Date
Kenneth Reitz e42d215833 sphinx version # fix. 2011-03-24 06:09:15 -04:00
Kenneth Reitz 1a5e2ecb33 release date bump 2011-03-24 05:57:22 -04:00
Kenneth Reitz e1bf189847 setup.py says 25,26,27,30,31,32 2011-03-23 05:47:49 -04:00
Kenneth Reitz 0785328e21 updated HISTORY 2011-03-23 04:53:24 -04:00
Kenneth Reitz 6ba0cc9af3 3.1, 3.2 support. 2011-03-23 04:49:07 -04:00
Kenneth Reitz 36876205e7 3.2 compatibility 2011-03-23 04:48:58 -04:00
Kenneth Reitz 1b97b7191e pre-version bump 2011-03-23 04:24:25 -04:00
Kenneth Reitz 8b575df419 version check in setup.py fix 2011-03-23 04:24:12 -04:00
Kenneth Reitz 6a3928759a 25 support 2011-03-23 04:13:49 -04:00
Kenneth Reitz 63348d883b fix imports for 2x 2011-03-23 04:04:03 -04:00
Kenneth Reitz 5dce600969 xls import fix 2011-03-23 04:00:05 -04:00
Kenneth Reitz 0913b54f47 this isn't an apple 2011-03-23 03:56:07 -04:00
Kenneth Reitz c5bbc74b96 import magic 2011-03-23 03:55:23 -04:00
Kenneth Reitz 7f5342a1b8 no need for simplejson anymore 2011-03-23 02:27:40 -04:00
Kenneth Reitz d42f9bc10f python3 tox config for jenkins 2011-03-23 02:23:43 -04:00
Kenneth Reitz c6565c9e29 2.5 compatible version checking 2011-03-23 02:22:10 -04:00
Kenneth Reitz 1a9343750e Merge branch 'feature/3.x' 2011-03-23 02:08:40 -04:00
Kenneth Reitz 8a393214c8 add tox tests for 3.x 2011-03-23 02:08:04 -04:00
Kenneth Reitz b8ed741a36 Same codebase for 2.x and 3.x! 2011-03-23 02:07:39 -04:00
Kenneth Reitz cddbd78a61 autoload 3 modules if using 3 2011-03-23 02:02:59 -04:00
Kenneth Reitz b113f49ce6 python3 AND python2 packages. 2011-03-23 01:59:19 -04:00
Kenneth Reitz 1429b9f8c4 xlwrt3 if python 3 2011-03-23 01:56:57 -04:00
Kenneth Reitz 42700f98a5 check python version 2011-03-23 01:47:10 -04:00
Kenneth Reitz 0e56db632a xlwt3 package 2011-03-23 01:47:01 -04:00
Kenneth Reitz b07512071e cyaml fixes 2011-03-23 01:38:31 -04:00
Kenneth Reitz e4881809d6 BytesIO in xls for 3.x 2011-03-23 01:38:20 -04:00
Kenneth Reitz 54ab300d2d markup changes for 3.x 2011-03-23 01:37:33 -04:00
Kenneth Reitz 4368d64317 xlwrt cleanups 2011-03-23 01:37:14 -04:00
Kenneth Reitz 117344de14 2to3! 2011-03-23 01:13:16 -04:00
Kenneth Reitz 58bc1c7dcf add xlwt 3.x 2011-03-23 01:13:03 -04:00
Kenneth Reitz 4c8b5e72e3 remove 2.x xlwt 2011-03-23 01:12:48 -04:00
Kenneth Reitz b900236157 testing whitespace cleanup 2011-03-23 00:50:27 -04:00
Kenneth Reitz dc14a16e04 added formatter tests 2011-03-23 00:49:32 -04:00
Kenneth Reitz 2d2ac9b708 col not key 2011-03-23 00:49:25 -04:00
Kenneth Reitz 1efcb7a63d Merge branch 'feature/formatters' 2011-03-23 00:39:16 -04:00
Kenneth Reitz 65c73dfc42 do some internal validation for adding formatters 2011-03-23 00:38:45 -04:00
Kenneth Reitz 3803a7a21b formatter execution in place upon export 2011-03-23 00:32:56 -04:00
Kenneth Reitz 8b5b29fc90 added Dataset.add_formatter 2011-03-23 00:20:39 -04:00
Kenneth Reitz e8ba765426 tablib.helpers is not longer needed 🍰 2011-03-23 00:03:33 -04:00
Kenneth Reitz 57001a5465 Merge https://github.com/mwhooker/tablib into develop 2011-03-22 23:57:42 -04:00
Matthew Hooker c8493ff047 fixes issue #38: python 2.5 support 2011-03-22 17:16:30 -04:00
Kenneth Reitz 03914323c2 Merge https://github.com/playpauseandstop/tablib into develop 2011-03-01 17:06:15 -05:00
Igor Davydenko 2f331cee8e Fix #24, add support of spaces in CSV files. 2011-03-01 19:36:19 +02:00
Kenneth Reitz e1734f2315 added __docformat__ 2011-02-21 14:07:42 -05:00
Kenneth Reitz 22cddbcd63 well then 2011-02-21 02:55:24 -05:00
Kenneth Reitz 76f09cd3b3 Added release number to documentation. 2011-02-21 02:37:53 -05:00
Kenneth Reitz d11c09febe Added License text. 2011-02-21 02:37:29 -05:00
Kenneth Reitz 9ab277a468 docs: Python 2.5 is supported now. 2011-02-21 02:32:20 -05:00
Kenneth Reitz 23c1831144 Added HACKING file. 2011-02-21 02:15:00 -05:00
Kenneth Reitz 1cf9bd14b4 ci.kennethreitz.com now 2011-02-21 02:12:41 -05:00
Kenneth Reitz c2331f7a23 Updated pythons list. 2011-02-21 02:11:59 -05:00
Kenneth Reitz bccf0d1ba1 no more test suite. 2011-02-18 04:37:00 -05:00
Kenneth Reitz c219972ccd added pypy location to tox config 2011-02-18 04:28:50 -05:00
Kenneth Reitz 52e9d44739 typo 2011-02-18 03:42:54 -05:00
Kenneth Reitz e94ecd8472 Small doc updates 2011-02-18 03:41:54 -05:00
63 changed files with 18807 additions and 251 deletions
+14
View File
@@ -0,0 +1,14 @@
Where possible, please follow PEP8 with regard to coding style. Sometimes the line
length restriction is too hard to follow, so don't bend over backwards there.
Triple-quotes should always be """, single quotes are ' unless using "
would result in less escaping within the string.
All modules, functions, and methods should be well documented reStructuredText for
Sphinx AutoDoc.
All functionality should be available in pure Python. Optional C (via Cython)
implementations may be written for performance reasons, but should never
replace the Python implementation.
Lastly, don't take yourself too seriously :)
+16 -7
View File
@@ -1,13 +1,22 @@
History
-------
0.9.5 (2011-03-24)
++++++++++++++++++
* Python 3.1, Python 3.2 Support (same code base!)
* Formatter callback support
* Various bug fixes
0.9.4 (2011-02-18)
++++++++++++++++++
* Python 2.5 Support!
* Tox Testing for 2.5, 2.6, 2.7
* AnyJSON Integrated
* OrderedDict support ?
* OrderedDict support
* Caved to community pressure (spaces)
@@ -22,7 +31,7 @@ History
0.9.2 (2010-11-17)
++++++++++++++++++
* Tanspose method added to Datasets.
* Transpose method added to Datasets.
* New frozen top row in Excel output.
* Pickling support for Datasets and Rows.
* Support for row/column stacking.
@@ -39,7 +48,7 @@ History
* Massive documentation update!
* Tablib.org!
* Row taggins and Dataset filtering!
* Row tagging and Dataset filtering!
* Column insert/delete support
* Column append API change (header required)
* Internal Changes (Row object and use thereof)
@@ -49,13 +58,13 @@ History
++++++++++++++++++
* New import system. All dependencies attempt to load from site-packages,
then fallback on vendorized modules.
then fallback on tenderized modules.
0.8.4 (2010-10-04)
++++++++++++++++++
* Upated XLS output: Only wrap if '\\n' in cell.
* Updated XLS output: Only wrap if '\\n' in cell.
0.8.3 (2010-10-04)
@@ -90,13 +99,13 @@ History
++++++++++++++++++
* Reverting methods back to properties.
* Windows bug compenated in documentation.
* Windows bug compensated in documentation.
0.7.0 (2010-09-20)
++++++++++++++++++
* Renamed DataBook Databook for consistiency.
* Renamed DataBook Databook for consistency.
* Export properties changed to methods (XLS filename / StringIO bug).
* Optional Dataset.xls(path='filename') support (for writing on windows).
* Added utf-8 on the worksheet level.
+2 -4
View File
@@ -141,10 +141,8 @@ If you'd like to contribute, simply fork `the repository`_, commit your changes
Roadmap
-------
- Release CLI Interface
- Auto-detect import format
- Add possible other exports (SQL?)
- Ability to assign types to rows (set, regex=, &c.)
- Python 2.4, 3.0, 3.1, 3.2 Support
- Tablib.ext namespace
.. _`the repository`: http://github.com/kennethreitz/tablib
.. _AUTHORS: http://github.com/kennethreitz/tablib/blob/master/AUTHORS
+1 -1
View File
@@ -48,7 +48,7 @@ copyright = u'2011, Kenneth Reitz. Styles (modified) © Armin Ronacher'
# built documents.
#
# The short X.Y version.
version = tablib.core.__version__
version = '0.9.5'
# The full version, including alpha/beta/rc tags.
release = version
+2 -2
View File
@@ -87,7 +87,7 @@ Adding New Formats
Tablib welcomes new format additions! Format suggestions include:
* Tab Seperated Values
* Tab Separated Values
* MySQL Dump
* HTML Table
@@ -178,7 +178,7 @@ Every commit made to the **develop** branch is automatically tested and inspecte
Anyone may view the build status and history at any time.
http://git.kennethreitz.com/ci/
http://ci.kennethreitz.com/
If you are trustworthy and plan to contribute to tablib on a regular basis, please contact `Kenneth Reitz`_ to get an account on the Hudson Server.
+4 -2
View File
@@ -3,8 +3,10 @@
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Tablib: Pythonic Tabular Data
=============================
Tablib: Pythonic Tabular Datasets
=================================
Release |version|.
.. Contents:
..
+1 -1
View File
@@ -55,7 +55,7 @@ However, if performance is important to you (and it should be), you can install
$ pip install PyYAML
If you're using Python 2.5 (currently unsupported), you should also install the **simplejson** module. If you're using Python 2.6+, the built-in **json** module is already optimized and in use. ::
If you're using Python 2.5, you should also install the **simplejson** module (pip will do this for you). If you're using Python 2.6+, the built-in **json** module is already optimized and in use. ::
$ pip install simplejson
+31 -2
View File
@@ -36,6 +36,31 @@ Tablib is released under terms of `The MIT License`_.
.. _`The MIT License`: http://www.opensource.org/licenses/mit-license.php
.. _license:
Tablib License
--------------
Copyright (c) 2011 Kenneth Reitz.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
.. _pythonsupport:
@@ -44,8 +69,12 @@ Pythons Supported
At this time, the following Python platforms are officially supported:
* Python 2.6
* Python 2.7
* cPython 2.5
* cPython 2.6
* cPython 2.7
* cPython 3.1
* cPython 3.2
* PyPy-c 1.4
Support for other Pythons will be rolled out soon.
+5 -4
View File
@@ -17,12 +17,12 @@ if sys.argv[-1] == "publish":
required = []
if sys.version_info < (2,6):
if sys.version_info[:2] < (2,6):
required.append('simplejson')
setup(
name='tablib',
version='0.9.4',
version='0.9.5',
description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
long_description=open('README.rst').read() + '\n\n' +
open('HISTORY.rst').read(),
@@ -46,7 +46,8 @@ setup(
'Programming Language :: Python :: 2.5',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
# 'Programming Language :: Python :: 3.0',
# 'Programming Language :: Python :: 3.1',
'Programming Language :: Python :: 3.0',
'Programming Language :: Python :: 3.1',
'Programming Language :: Python :: 3.2',
),
)
+12 -4
View File
@@ -1,8 +1,16 @@
""" Tablib.
"""
from tablib.core import (
Databook, Dataset, detect, import_set,
InvalidDatasetType, InvalidDimensions, UnsupportedFormat
)
import sys
if sys.version_info[0:1] > (2, 5):
from tablib.core import (
Databook, Dataset, detect, import_set,
InvalidDatasetType, InvalidDimensions, UnsupportedFormat
)
else:
from tablib.core25 import (
Databook, Dataset, detect, import_set,
InvalidDatasetType, InvalidDimensions, UnsupportedFormat
)
+66 -16
View File
@@ -3,7 +3,7 @@
tablib.core
~~~~~~~~~~~
This module implements the central tablib objects.
This module implements the central Tablib objects.
:copyright: (c) 2011 by Kenneth Reitz.
:license: MIT, see LICENSE for more details.
@@ -13,6 +13,7 @@ from copy import copy
from operator import itemgetter
from tablib import formats
import collections
try:
from collections import OrderedDict
@@ -26,6 +27,7 @@ __build__ = 0x000904
__author__ = 'Kenneth Reitz'
__license__ = 'MIT'
__copyright__ = 'Copyright 2011 Kenneth Reitz'
__docformat__ = 'restructuredtext'
class Row(object):
@@ -62,7 +64,7 @@ class Row(object):
return {slot: [getattr(self, slot) for slot in self.__slots__]}
def __setstate__(self, state):
for (k, v) in state.items(): setattr(self, k, v)
for (k, v) in list(state.items()): setattr(self, k, v)
def append(self, value):
self._row.append(value)
@@ -88,7 +90,7 @@ class Row(object):
if tag == None:
return False
elif isinstance(tag, basestring):
elif isinstance(tag, str):
return (tag in self.tags)
else:
return bool(len(set(tag) & set(self.tags)))
@@ -136,6 +138,9 @@ class Dataset(object):
# ('title', index) tuples
self._separators = []
# (column, callback) tuples
self._formatters = []
try:
self.headers = kwargs['headers']
@@ -155,7 +160,7 @@ class Dataset(object):
def __getitem__(self, key):
if isinstance(key, basestring):
if isinstance(key, str):
if key in self.headers:
pos = self.headers.index(key) # get 'key' index from each data
return [row[pos] for row in self._data]
@@ -175,7 +180,7 @@ class Dataset(object):
def __delitem__(self, key):
if isinstance(key, basestring):
if isinstance(key, str):
if key in self.headers:
@@ -236,13 +241,29 @@ class Dataset(object):
def _package(self, dicts=True):
"""Packages Dataset into lists of dictionaries for transmission."""
_data = list(self._data)
# Execute formatters
if self._formatters:
for row_i, row in enumerate(_data):
for col, callback in self._formatters:
try:
if col is None:
for j, c in enumerate(row):
_data[row_i][j] = callback(c)
else:
_data[row_i][col] = callback(row[col])
except IndexError:
raise InvalidDatasetIndex
if self.headers:
if dicts:
data = [OrderedDict(zip(self.headers, data_row)) for data_row in self ._data]
data = [OrderedDict(list(zip(self.headers, data_row))) for data_row in _data]
else:
data = [list(self.headers)] + list(self._data)
data = [list(self.headers)] + list(_data)
else:
data = [list(row) for row in self._data]
data = [list(row) for row in _data]
return data
@@ -257,8 +278,8 @@ class Dataset(object):
else:
header = []
if len(col) == 1 and callable(col[0]):
col = map(col[0], self._data)
if len(col) == 1 and isinstance(col[0], collections.Callable):
col = list(map(col[0], self._data))
col = tuple(header + col)
return col
@@ -347,9 +368,9 @@ class Dataset(object):
# if list of objects
elif isinstance(pickle[0], dict):
self.wipe()
self.headers = pickle[0].keys()
self.headers = list(pickle[0].keys())
for row in pickle:
self.append(Row(row.values()))
self.append(Row(list(row.values())))
else:
raise UnsupportedFormat
@@ -385,6 +406,7 @@ class Dataset(object):
"""
pass
@property
def tsv():
"""A TSV representation of the :class:`Dataset` object. The top row will contain
@@ -469,6 +491,29 @@ class Dataset(object):
self.insert_separator(index, text)
def add_formatter(self, col, handler):
"""Adds a :ref:`formatter` to the :class:`Dataset`.
.. versionadded:: 0.9.5
:param col: column to. Accepts index int or header str.
:param handler: reference to callback function to execute
against each cell value.
"""
if isinstance(col, str):
if col in self.headers:
col = self.headers.index(col) # get 'key' index from each data
else:
raise KeyError
if not col > self.width:
self._formatters.append((col, handler))
else:
raise InvalidDatasetIndex
return True
def insert(self, index, row=None, col=None, header=None, tags=list()):
"""Inserts a row or column to the :class:`Dataset` at the given index.
@@ -504,8 +549,8 @@ class Dataset(object):
col = list(col)
# Callable Columns...
if len(col) == 1 and callable(col[0]):
col = map(col[0], self._data)
if len(col) == 1 and isinstance(col[0], collections.Callable):
col = list(map(col[0], self._data))
col = self._clean_col(col)
self._validate(col=col)
@@ -543,7 +588,7 @@ class Dataset(object):
Returns a new :class:`Dataset` instance where columns have been
sorted."""
if isinstance(col, basestring):
if isinstance(col, str):
if not self.headers:
raise HeadersNeeded
@@ -658,12 +703,14 @@ class Dataset(object):
return _dset
def wipe(self):
"""Removes all content and headers from the :class:`Dataset` object."""
self._data = list()
self.__headers = None
class Databook(object):
"""A book of :class:`Dataset` objects.
"""
@@ -748,7 +795,7 @@ def import_set(stream):
format.import_set(data, stream)
return data
except AttributeError, e:
except AttributeError as e:
return None
@@ -758,6 +805,9 @@ class InvalidDatasetType(Exception):
class InvalidDimensions(Exception):
"Invalid size"
class InvalidDatasetIndex(Exception):
"Outside of Dataset size"
class HeadersNeeded(Exception):
"Header parameter must be given when appending a column in this Dataset."
+818
View File
@@ -0,0 +1,818 @@
# -*- coding: utf-8 -*-
u"""
tablib.core
~~~~~~~~~~~
This module implements the central Tablib objects.
:copyright: (c) 2011 by Kenneth Reitz.
:license: MIT, see LICENSE for more details.
"""
from copy import copy
from operator import itemgetter
from tablib import formats
import collections
from itertools import izip
from itertools import imap
try:
from collections import OrderedDict
except ImportError:
from tablib.packages.ordereddict import OrderedDict
__title__ = u'tablib'
__version__ = u'0.9.4'
__build__ = 0x000904
__author__ = u'Kenneth Reitz'
__license__ = u'MIT'
__copyright__ = u'Copyright 2011 Kenneth Reitz'
__docformat__ = u'restructuredtext'
class Row(object):
u"""Internal Row object. Mainly used for filtering."""
__slots__ = [u'tuple', u'_row', u'tags']
def __init__(self, row=list(), tags=list()):
self._row = list(row)
self.tags = list(tags)
def __iter__(self):
return (col for col in self._row)
def __len__(self):
return len(self._row)
def __repr__(self):
return repr(self._row)
def __getslice__(self, i, j):
return self._row[i,j]
def __getitem__(self, i):
return self._row[i]
def __setitem__(self, i, value):
self._row[i] = value
def __delitem__(self, i):
del self._row[i]
def __getstate__(self):
return {slot: [getattr(self, slot) for slot in self.__slots__]}
def __setstate__(self, state):
for (k, v) in list(state.items()): setattr(self, k, v)
def append(self, value):
self._row.append(value)
def insert(self, index, value):
self._row.insert(index, value)
def __contains__(self, item):
return (item in self._row)
@property
def tuple(self):
u'''Tuple representation of :class:`Row`.'''
return tuple(self._row)
@property
def list(self):
u'''List representation of :class:`Row`.'''
return list(self._row)
def has_tag(self, tag):
u"""Returns true if current row contains tag."""
if tag == None:
return False
elif isinstance(tag, basestring):
return (tag in self.tags)
else:
return bool(len(set(tag) & set(self.tags)))
class Dataset(object):
u"""The :class:`Dataset` object is the heart of Tablib. It provides all core
functionality.
Usually you create a :class:`Dataset` instance in your main module, and append
rows and columns as you collect data. ::
data = tablib.Dataset()
data.headers = ('name', 'age')
for (name, age) in some_collector():
data.append((name, age))
You can also set rows and headers upon instantiation. This is useful if dealing
with dozens or hundres of :class:`Dataset` objects. ::
headers = ('first_name', 'last_name')
data = [('John', 'Adams'), ('George', 'Washington')]
data = tablib.Dataset(*data, headers=headers)
:param \*args: (optional) list of rows to populate Dataset
:param headers: (optional) list strings for Dataset header row
.. admonition:: Format Attributes Definition
If you look at the code, the various output/import formats are not
defined within the :class:`Dataset` object. To add support for a new format, see
:ref:`Adding New Formats <newformats>`.
"""
def __init__(self, *args, **kwargs):
self._data = list(Row(arg) for arg in args)
self.__headers = None
# ('title', index) tuples
self._separators = []
# (column, callback) tuples
self._formatters = []
try:
self.headers = kwargs[u'headers']
except KeyError:
self.headers = None
try:
self.title = kwargs[u'title']
except KeyError:
self.title = None
self._register_formats()
def __len__(self):
return self.height
def __getitem__(self, key):
if isinstance(key, basestring):
if key in self.headers:
pos = self.headers.index(key) # get 'key' index from each data
return [row[pos] for row in self._data]
else:
raise KeyError
else:
_results = self._data[key]
if isinstance(_results, Row):
return _results.tuple
else:
return [result.tuple for result in _results]
def __setitem__(self, key, value):
self._validate(value)
self._data[key] = Row(value)
def __delitem__(self, key):
if isinstance(key, basestring):
if key in self.headers:
pos = self.headers.index(key)
del self.headers[pos]
for i, row in enumerate(self._data):
del row[pos]
self._data[i] = row
else:
raise KeyError
else:
del self._data[key]
def __repr__(self):
try:
return u'<%s dataset>' % (self.title.lower())
except AttributeError:
return u'<dataset object>'
@classmethod
def _register_formats(cls):
u"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_set))
except AttributeError:
pass
def _validate(self, row=None, col=None, safety=False):
u"""Assures size of every row in dataset is of proper proportions."""
if row:
is_valid = (len(row) == self.width) if self.width else True
elif col:
if len(col) < 1:
is_valid = True
else:
is_valid = (len(col) == self.height) if self.height else True
else:
is_valid = all((len(x) == self.width for x in self._data))
if is_valid:
return True
else:
if not safety:
raise InvalidDimensions
return False
def _package(self, dicts=True):
u"""Packages Dataset into lists of dictionaries for transmission."""
_data = list(self._data)
# Execute formatters
if self._formatters:
for row_i, row in enumerate(_data):
for col, callback in self._formatters:
try:
if col is None:
for j, c in enumerate(row):
_data[row_i][j] = callback(c)
else:
_data[row_i][col] = callback(row[col])
except IndexError:
raise InvalidDatasetIndex
if self.headers:
if dicts:
data = [OrderedDict(list(izip(self.headers, data_row))) for data_row in _data]
else:
data = [list(self.headers)] + list(_data)
else:
data = [list(row) for row in _data]
return data
def _clean_col(self, col):
u"""Prepares the given column for insert/append."""
col = list(col)
if self.headers:
header = [col.pop(0)]
else:
header = []
if len(col) == 1 and hasattr(col[0], '__call__'):
col = list(imap(col[0], self._data))
col = tuple(header + col)
return col
@property
def height(self):
u"""The number of rows currently in the :class:`Dataset`.
Cannot be directly modified.
"""
return len(self._data)
@property
def width(self):
u"""The number of columns currently in the :class:`Dataset`.
Cannot be directly modified.
"""
try:
return len(self._data[0])
except IndexError:
try:
return len(self.headers)
except TypeError:
return 0
def _get_headers(self):
u"""An *optional* list of strings to be used for header rows and attribute names.
This must be set manually. The given list length must equal :class:`Dataset.width`.
"""
return self.__headers
def _set_headers(self, collection):
u"""Validating headers setter."""
self._validate(collection)
if collection:
try:
self.__headers = list(collection)
except TypeError:
raise TypeError
else:
self.__headers = None
headers = property(_get_headers, _set_headers)
def _get_dict(self):
u"""A native Python representation of the :class:`Dataset` object. If headers have
been set, a list of Python dictionaries will be returned. If no headers have been set,
a list of tuples (rows) will be returned instead.
A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
data = tablib.Dataset()
data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
"""
return self._package()
def _set_dict(self, pickle):
u"""A native Python representation of the Dataset object. If headers have been
set, a list of Python dictionaries will be returned. If no headers have been
set, a list of tuples (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
data = tablib.Dataset()
data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
"""
if not len(pickle):
return
# if list of rows
if isinstance(pickle[0], list):
self.wipe()
for row in pickle:
self.append(Row(row))
# if list of objects
elif isinstance(pickle[0], dict):
self.wipe()
self.headers = list(pickle[0].keys())
for row in pickle:
self.append(Row(list(row.values())))
else:
raise UnsupportedFormat
dict = property(_get_dict, _set_dict)
@property
def xls():
u"""An Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`seperators`. Cannot be set.
.. admonition:: Binary Warning
:class:`Dataset.xls` contains binary data, so make sure to write in binary mode::
with open('output.xls', 'wb') as f:
f.write(data.xls)'
"""
pass
@property
def csv():
u"""A CSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
data = tablib.Dataset()
data.csv = 'age, first_name, last_name\\n90, John, Adams'
Import assumes (for now) that headers exist.
"""
pass
@property
def tsv():
u"""A TSV representation of the :class:`Dataset` object. The top row will contain
headers, if they have been set. Otherwise, the top row will contain
the first row of the dataset.
A dataset object can also be imported by setting the :class:`Dataset.tsv` attribute. ::
data = tablib.Dataset()
data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams'
Import assumes (for now) that headers exist.
"""
@property
def yaml():
u"""A YAML representation of the :class:`Dataset` object. If headers have been
set, a YAML list of objects will be returned. If no headers have
been set, a YAML list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
data = tablib.Dataset()
data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
Import assumes (for now) that headers exist.
"""
pass
@property
def json():
u"""A JSON representation of the :class:`Dataset` object. If headers have been
set, a JSON list of objects will be returned. If no headers have
been set, a JSON list of lists (rows) will be returned instead.
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
data = tablib.Dataset()
data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]'
Import assumes (for now) that headers exist.
"""
@property
def html():
u"""A HTML table representation of the :class:`Dataset` object. If
headers have been set, they will be used as table headers.
..notice:: This method can be used for export only.
"""
pass
def append(self, row=None, col=None, header=None, tags=list()):
u"""Adds a row or column to the :class:`Dataset`.
Usage is :class:`Dataset.insert` for documentation.
"""
if row is not None:
self.insert(self.height, row=row, tags=tags)
elif col is not None:
self.insert(self.width, col=col, header=header)
def insert_separator(self, index, text=u'-'):
u"""Adds a separator to :class:`Dataset` at given index."""
sep = (index, text)
self._separators.append(sep)
def append_separator(self, text=u'-'):
u"""Adds a :ref:`seperator <seperators>` to the :class:`Dataset`."""
# change offsets if headers are or aren't defined
if not self.headers:
index = self.height if self.height else 0
else:
index = (self.height + 1) if self.height else 1
self.insert_separator(index, text)
def add_formatter(self, col, handler):
u"""Adds a :ref:`formatter` to the :class:`Dataset`.
.. versionadded:: 0.9.5
:param col: column to. Accepts index int or header str.
:param handler: reference to callback function to execute
against each cell value.
"""
if isinstance(col, basestring):
if col in self.headers:
col = self.headers.index(col) # get 'key' index from each data
else:
raise KeyError
if not col > self.width:
self._formatters.append((col, handler))
else:
raise InvalidDatasetIndex
return True
def insert(self, index, row=None, col=None, header=None, tags=list()):
u"""Inserts a row or column to the :class:`Dataset` at the given index.
Rows and columns inserted must be the correct size (height or width).
The default behaviour is to insert the given row to the :class:`Dataset`
object at the given index. If the ``col`` parameter is given, however,
a new column will be insert to the :class:`Dataset` object instead.
You can also insert a column of a single callable object, which will
add a new column with the return values of the callable each as an
item in the column. ::
data.append(col=random.randint)
See :ref:`dyncols` for an in-depth example.
.. versionchanged:: 0.9.0
If inserting a column, and :class:`Dataset.headers` is set, the
header attribute must be set, and will be considered the header for
that row.
.. versionadded:: 0.9.0
If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
This gives you the ability to :class:`filter <Dataset.filter>` your
:class:`Dataset` later.
"""
if row:
self._validate(row)
self._data.insert(index, Row(row, tags=tags))
elif col:
col = list(col)
# Callable Columns...
if len(col) == 1 and hasattr(col[0], '__call__'):
col = list(imap(col[0], self._data))
col = self._clean_col(col)
self._validate(col=col)
if self.headers:
# pop the first item off, add to headers
if not header:
raise HeadersNeeded()
self.headers.insert(index, header)
if self.height and self.width:
for i, row in enumerate(self._data):
row.insert(index, col[i])
self._data[i] = row
else:
self._data = [Row([row]) for row in col]
def filter(self, tag):
u"""Returns a new instance of the :class:`Dataset`, excluding any rows
that do not contain the given :ref:`tags <tags>`.
"""
_dset = copy(self)
_dset._data = [row for row in _dset._data if row.has_tag(tag)]
return _dset
def sort(self, col, reverse=False):
u"""Sort a :class:`Dataset` by a specific column, given string (for
header) or integer (for column index). The order can be reversed by
setting ``reverse`` to ``True``.
Returns a new :class:`Dataset` instance where columns have been
sorted."""
if isinstance(col, basestring):
if not self.headers:
raise HeadersNeeded
_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
_dset = Dataset(headers=self.headers)
for item in _sorted:
row = [item[key] for key in self.headers]
_dset.append(row=row)
else:
if self.headers:
col = self.headers[col]
_sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
_dset = Dataset(headers=self.headers)
for item in _sorted:
if self.headers:
row = [item[key] for key in self.headers]
else:
row = item
_dset.append(row=row)
return _dset
def transpose(self):
u"""Transpose a :class:`Dataset`, turning rows into columns and vice
versa, returning a new ``Dataset`` instance. The first row of the
original instance becomes the new header row."""
# Don't transpose if there is no data
if not self:
return
_dset = Dataset()
# The first element of the headers stays in the headers,
# it is our "hinge" on which we rotate the data
new_headers = [self.headers[0]] + self[self.headers[0]]
_dset.headers = new_headers
for column in self.headers:
if column == self.headers[0]:
# It's in the headers, so skip it
continue
# Adding the column name as now they're a regular column
row_data = [column] + self[column]
row_data = Row(row_data)
_dset.append(row=row_data)
return _dset
def stack_rows(self, other):
u"""Stack two :class:`Dataset` instances together by
joining at the row level, and return new combined
``Dataset`` instance."""
if not isinstance(other, Dataset):
return
if self.width != other.width:
raise InvalidDimensions
# Copy the source data
_dset = copy(self)
rows_to_stack = [row for row in _dset._data]
other_rows = [row for row in other._data]
rows_to_stack.extend(other_rows)
_dset._data = rows_to_stack
return _dset
def stack_columns(self, other):
u"""Stack two :class:`Dataset` instances together by
joining at the column level, and return a new
combined ``Dataset`` instance. If either ``Dataset``
has headers set, than the other must as well."""
if not isinstance(other, Dataset):
return
if self.headers or other.headers:
if not self.headers or not other.headers:
raise HeadersNeeded
if self.height != other.height:
raise InvalidDimensions
try:
new_headers = self.headers + other.headers
except TypeError:
new_headers = None
_dset = Dataset()
for column in self.headers:
_dset.append(col=self[column])
for column in other.headers:
_dset.append(col=other[column])
_dset.headers = new_headers
return _dset
def wipe(self):
u"""Removes all content and headers from the :class:`Dataset` object."""
self._data = list()
self.__headers = None
class Databook(object):
u"""A book of :class:`Dataset` objects.
"""
def __init__(self, sets=None):
if sets is None:
self._datasets = list()
else:
self._datasets = sets
self._register_formats()
def __repr__(self):
try:
return u'<%s databook>' % (self.title.lower())
except AttributeError:
return u'<databook object>'
def wipe(self):
u"""Removes all :class:`Dataset` objects from the :class:`Databook`."""
self._datasets = []
@classmethod
def _register_formats(cls):
u"""Adds format properties."""
for fmt in formats.available:
try:
try:
setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_book))
except AttributeError:
pass
def add_sheet(self, dataset):
u"""Adds given :class:`Dataset` to the :class:`Databook`."""
if type(dataset) is Dataset:
self._datasets.append(dataset)
else:
raise InvalidDatasetType
def _package(self):
u"""Packages :class:`Databook` for delivery."""
collector = []
for dset in self._datasets:
collector.append(OrderedDict(
title = dset.title,
data = dset.dict
))
return collector
@property
def size(self):
u"""The number of the :class:`Dataset` objects within :class:`Databook`."""
return len(self._datasets)
def detect(stream):
u"""Return (format, stream) of given stream."""
for fmt in formats.available:
try:
if fmt.detect(stream):
return (fmt, stream)
except AttributeError:
pass
return (None, stream)
def import_set(stream):
u"""Return dataset of given stream."""
(format, stream) = detect(stream)
try:
data = Dataset()
format.import_set(data, stream)
return data
except AttributeError, e:
return None
class InvalidDatasetType(Exception):
u"Only Datasets can be added to a DataBook"
class InvalidDimensions(Exception):
u"Invalid size"
class InvalidDatasetIndex(Exception):
u"Outside of Dataset size"
class HeadersNeeded(Exception):
u"Header parameter must be given when appending a column in this Dataset."
class UnsupportedFormat(NotImplementedError):
u"Format is not supported"
+6 -6
View File
@@ -3,11 +3,11 @@
""" Tablib - formats
"""
import _csv as csv
import _json as json
import _xls as xls
import _yaml as yaml
import _tsv as tsv
import _html as html
from . import _csv as csv
from . import _json as json
from . import _xls as xls
from . import _yaml as yaml
from . import _tsv as tsv
from . import _html as html
available = (json, xls, yaml, csv, tsv, html)
+9 -3
View File
@@ -3,7 +3,13 @@
""" Tablib - CSV Support.
"""
import cStringIO
import sys
if sys.version_info[0] > 2:
from io import StringIO
else:
from cStringIO import StringIO
import csv
import os
@@ -17,7 +23,7 @@ extentions = ('csv',)
def export_set(dataset):
"""Returns CSV representation of Dataset."""
stream = cStringIO.StringIO()
stream = StringIO()
_csv = csv.writer(stream)
for row in dataset._package(dicts=False):
@@ -31,7 +37,7 @@ def import_set(dset, in_stream, headers=True):
dset.wipe()
rows = csv.reader(in_stream.split())
rows = csv.reader(in_stream.splitlines())
for i, row in enumerate(rows):
if (i == 0) and (headers):
+9 -2
View File
@@ -3,9 +3,16 @@
""" Tablib - HTML export support.
"""
from StringIO import StringIO
import sys
if sys.version_info[0] > 2:
from io import StringIO
from tablib.packages import markup3 as markup
else:
from cStringIO import StringIO
from tablib.packages import markup
from tablib.packages import markup
import tablib
BOOK_ENDINGS = 'h3'
+8 -3
View File
@@ -3,8 +3,13 @@
""" Tablib - JSON Support
"""
import tablib.core
from tablib.packages import anyjson
import tablib
import sys
if sys.version_info[:2] > (2, 5):
from tablib.packages import anyjson
else:
from tablib.packages import anyjson25 as anyjson
@@ -34,7 +39,7 @@ def import_book(dbook, in_stream):
dbook.wipe()
for sheet in anyjson.deserialize(in_stream):
data = tablib.core.Dataset()
data = tablib.Dataset()
data.title = sheet['title']
data.dict = sheet['data']
dbook.add_sheet(data)
+7 -2
View File
@@ -3,7 +3,12 @@
""" Tablib - TSV (Tab Separated Values) Support.
"""
import cStringIO
import sys
if sys.version_info[0] > 2:
from io import StringIO
else:
from cStringIO import StringIO
import csv
import os
@@ -17,7 +22,7 @@ extentions = ('tsv',)
def export_set(dataset):
"""Returns a TSV representation of Dataset."""
stream = cStringIO.StringIO()
stream = StringIO()
_tsv = csv.writer(stream, delimiter='\t')
for row in dataset._package(dicts=False):
+12 -7
View File
@@ -3,14 +3,19 @@
""" Tablib - XLS Support.
"""
import cStringIO
import sys
try:
import xlwt
except ImportError:
if sys.version_info[0] > 2:
from io import BytesIO
import tablib.packages.xlwt3 as xlwt
else:
from cStringIO import StringIO as BytesIO
import tablib.packages.xlwt as xlwt
title = 'xls'
extentions = ('xls',)
@@ -23,11 +28,11 @@ def export_set(dataset):
"""Returns XLS representation of Dataset."""
wb = xlwt.Workbook(encoding='utf8')
ws = wb.add_sheet(dataset.title if dataset.title else 'Tabbed Dataset')
ws = wb.add_sheet(dataset.title if dataset.title else 'Tablib Dataset')
dset_sheet(dataset, ws)
stream = cStringIO.StringIO()
stream = BytesIO()
wb.save(stream)
return stream.getvalue()
@@ -43,7 +48,7 @@ def export_book(databook):
dset_sheet(dset, ws)
stream = cStringIO.StringIO()
stream = BytesIO()
wb.save(stream)
return stream.getvalue()
+8 -2
View File
@@ -3,10 +3,16 @@
""" Tablib - YAML Support.
"""
import sys
try:
import yaml
except ImportError:
import tablib.packages.yaml as yaml
if sys.version_info[0] > 2:
import tablib.packages.yaml3 as yaml
else:
import tablib.packages.yaml as yaml
import tablib
@@ -40,7 +46,7 @@ def import_book(dbook, in_stream):
dbook.wipe()
for sheet in yaml.load(in_stream):
data = tablib.core.Dataset()
data = tablib.Dataset()
data.title = sheet['title']
data.dict = sheet['data']
dbook.add_sheet(data)
-37
View File
@@ -1,37 +0,0 @@
# -*- coding: utf-8 -*-
""" Tablib - General Helpers.
"""
import sys
class Struct(object):
"""Your attributes are belong to us."""
def __init__(self, **entries):
self.__dict__.update(entries)
def __getitem__(self, key):
return getattr(self, key, None)
def dictionary(self):
"""Returns dictionary representation of object."""
return self.__dict__
def items(self):
"""Returns items within object."""
return self.__dict__.items()
def keys(self):
"""Returns keys within object."""
return self.__dict__.keys()
def piped():
"""Returns piped input via stdin, else False."""
with sys.stdin as stdin:
# TTY is only way to detect if stdin contains data
return stdin.read() if not stdin.isatty() else None
+5 -5
View File
@@ -53,7 +53,7 @@ class _JsonImplementation(object):
"""Incapsulates a JSON implementation"""
def __init__(self, modspec):
modinfo = dict(zip(_fields, modspec))
modinfo = dict(list(zip(_fields, modspec)))
# No try block. We want importerror to end up at caller
module = self._attempt_load(modinfo["modname"])
@@ -64,9 +64,9 @@ class _JsonImplementation(object):
self._encode_error = modinfo["encerror"]
self._decode_error = modinfo["decerror"]
if isinstance(modinfo["encerror"], basestring):
if isinstance(modinfo["encerror"], str):
self._encode_error = getattr(module, modinfo["encerror"])
if isinstance(modinfo["decerror"], basestring):
if isinstance(modinfo["decerror"], str):
self._decode_error = getattr(module, modinfo["decerror"])
self.name = modinfo["modname"]
@@ -82,7 +82,7 @@ class _JsonImplementation(object):
TypeError if the object could not be serialized."""
try:
return self._encode(data)
except self._encode_error, exc:
except self._encode_error as exc:
raise TypeError(*exc.args)
def deserialize(self, s):
@@ -90,7 +90,7 @@ class _JsonImplementation(object):
ValueError if the string vould not be parsed."""
try:
return self._decode(s)
except self._decode_error, exc:
except self._decode_error as exc:
raise ValueError(*exc.args)
+118
View File
@@ -0,0 +1,118 @@
u"""
Wraps the best available JSON implementation available in a common interface
"""
__version__ = u"0.2.0"
__author__ = u"Rune Halvorsen <runefh@gmail.com>"
__homepage__ = u"http://bitbucket.org/runeh/anyjson/"
__docformat__ = u"restructuredtext"
u"""
.. function:: serialize(obj)
Serialize the object to JSON.
.. function:: deserialize(str)
Deserialize JSON-encoded object to a Python object.
.. function:: force_implementation(name)
Load a specific json module. This is useful for testing and not much else
.. attribute:: implementation
The json implementation object. This is probably not useful to you,
except to get the name of the implementation in use. The name is
available through `implementation.name`.
"""
import sys
from itertools import izip
implementation = None
u"""
.. data:: _modules
List of known json modules, and the names of their serialize/unserialize
methods, as well as the exception they throw. Exception can be either
an exception class or a string.
"""
_modules = [(u"cjson", u"encode", u"EncodeError", u"decode", u"DecodeError"),
(u"jsonlib2", u"write", u"WriteError", u"read", u"ReadError"),
(u"jsonlib", u"write", u"WriteError", u"read", u"ReadError"),
(u"simplejson", u"dumps", TypeError, u"loads", ValueError),
(u"json", u"dumps", TypeError, u"loads", ValueError),
(u"django.utils.simplejson", u"dumps", TypeError, u"loads",
ValueError)]
_fields = (u"modname", u"encoder", u"encerror", u"decoder", u"decerror")
class _JsonImplementation(object):
u"""Incapsulates a JSON implementation"""
def __init__(self, modspec):
modinfo = dict(list(izip(_fields, modspec)))
# No try block. We want importerror to end up at caller
module = self._attempt_load(modinfo[u"modname"])
self.implementation = modinfo[u"modname"]
self._encode = getattr(module, modinfo[u"encoder"])
self._decode = getattr(module, modinfo[u"decoder"])
self._encode_error = modinfo[u"encerror"]
self._decode_error = modinfo[u"decerror"]
if isinstance(modinfo[u"encerror"], unicode):
self._encode_error = getattr(module, modinfo[u"encerror"])
if isinstance(modinfo[u"decerror"], unicode):
self._decode_error = getattr(module, modinfo[u"decerror"])
self.name = modinfo[u"modname"]
def _attempt_load(self, modname):
u"""Attempt to load module name modname, returning it on success,
throwing ImportError if module couldn't be imported"""
__import__(modname)
return sys.modules[modname]
def serialize(self, data):
u"""Serialize the datastructure to json. Returns a string. Raises
TypeError if the object could not be serialized."""
try:
return self._encode(data)
except self._encode_error, exc:
raise TypeError(*exc.args)
def deserialize(self, s):
u"""deserialize the string to python data types. Raises
ValueError if the string vould not be parsed."""
try:
return self._decode(s)
except self._decode_error, exc:
raise ValueError(*exc.args)
def force_implementation(modname):
u"""Forces anyjson to use a specific json module if it's available"""
global implementation
for name, spec in [(e[0], e) for e in _modules]:
if name == modname:
implementation = _JsonImplementation(spec)
return
raise ImportError(u"No module named: %s" % modname)
for modspec in _modules:
try:
implementation = _JsonImplementation(modspec)
break
except ImportError:
pass
else:
raise ImportError(u"No supported JSON module found")
serialize = lambda value: implementation.serialize(value)
deserialize = lambda value: implementation.deserialize(value)
+484
View File
@@ -0,0 +1,484 @@
# This code is in the public domain, it comes
# with absolutely no warranty and you can do
# absolutely whatever you want with it.
__date__ = '17 May 2007'
__version__ = '1.7'
__doc__= """
This is markup.py - a Python module that attempts to
make it easier to generate HTML/XML from a Python program
in an intuitive, lightweight, customizable and pythonic way.
The code is in the public domain.
Version: %s as of %s.
Documentation and further info is at http://markup.sourceforge.net/
Please send bug reports, feature requests, enhancement
ideas or questions to nogradi at gmail dot com.
Installation: drop markup.py somewhere into your Python path.
""" % ( __version__, __date__ )
import string
class element:
"""This class handles the addition of a new element."""
def __init__( self, tag, case='lower', parent=None ):
self.parent = parent
if case == 'lower':
self.tag = tag.lower( )
else:
self.tag = tag.upper( )
def __call__( self, *args, **kwargs ):
if len( args ) > 1:
raise ArgumentError( self.tag )
# if class_ was defined in parent it should be added to every element
if self.parent is not None and self.parent.class_ is not None:
if 'class_' not in kwargs:
kwargs['class_'] = self.parent.class_
if self.parent is None and len( args ) == 1:
x = [ self.render( self.tag, False, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ]
return '\n'.join( x )
elif self.parent is None and len( args ) == 0:
x = [ self.render( self.tag, True, myarg, mydict ) for myarg, mydict in _argsdicts( args, kwargs ) ]
return '\n'.join( x )
if self.tag in self.parent.twotags:
for myarg, mydict in _argsdicts( args, kwargs ):
self.render( self.tag, False, myarg, mydict )
elif self.tag in self.parent.onetags:
if len( args ) == 0:
for myarg, mydict in _argsdicts( args, kwargs ):
self.render( self.tag, True, myarg, mydict ) # here myarg is always None, because len( args ) = 0
else:
raise ClosingError( self.tag )
elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
raise DeprecationError( self.tag )
else:
raise InvalidElementError( self.tag, self.parent.mode )
def render( self, tag, single, between, kwargs ):
"""Append the actual tags to content."""
out = "<%s" % tag
for key, value in kwargs.items( ):
if value is not None: # when value is None that means stuff like <... checked>
key = key.strip('_') # strip this so class_ will mean class, etc.
if key == 'http_equiv': # special cases, maybe change _ to - overall?
key = 'http-equiv'
elif key == 'accept_charset':
key = 'accept-charset'
out = "%s %s=\"%s\"" % ( out, key, escape( value ) )
else:
out = "%s %s" % ( out, key )
if between is not None:
out = "%s>%s</%s>" % ( out, between, tag )
else:
if single:
out = "%s />" % out
else:
out = "%s>" % out
if self.parent is not None:
self.parent.content.append( out )
else:
return out
def close( self ):
"""Append a closing tag unless element has only opening tag."""
if self.tag in self.parent.twotags:
self.parent.content.append( "</%s>" % self.tag )
elif self.tag in self.parent.onetags:
raise ClosingError( self.tag )
elif self.parent.mode == 'strict_html' and self.tag in self.parent.deptags:
raise DeprecationError( self.tag )
def open( self, **kwargs ):
"""Append an opening tag."""
if self.tag in self.parent.twotags or self.tag in self.parent.onetags:
self.render( self.tag, False, None, kwargs )
elif self.mode == 'strict_html' and self.tag in self.parent.deptags:
raise DeprecationError( self.tag )
class page:
"""This is our main class representing a document. Elements are added
as attributes of an instance of this class."""
def __init__( self, mode='strict_html', case='lower', onetags=None, twotags=None, separator='\n', class_=None ):
"""Stuff that effects the whole document.
mode -- 'strict_html' for HTML 4.01 (default)
'html' alias for 'strict_html'
'loose_html' to allow some deprecated elements
'xml' to allow arbitrary elements
case -- 'lower' element names will be printed in lower case (default)
'upper' they will be printed in upper case
onetags -- list or tuple of valid elements with opening tags only
twotags -- list or tuple of valid elements with both opening and closing tags
these two keyword arguments may be used to select
the set of valid elements in 'xml' mode
invalid elements will raise appropriate exceptions
separator -- string to place between added elements, defaults to newline
class_ -- a class that will be added to every element if defined"""
valid_onetags = [ "AREA", "BASE", "BR", "COL", "FRAME", "HR", "IMG", "INPUT", "LINK", "META", "PARAM" ]
valid_twotags = [ "A", "ABBR", "ACRONYM", "ADDRESS", "B", "BDO", "BIG", "BLOCKQUOTE", "BODY", "BUTTON",
"CAPTION", "CITE", "CODE", "COLGROUP", "DD", "DEL", "DFN", "DIV", "DL", "DT", "EM", "FIELDSET",
"FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEAD", "HTML", "I", "IFRAME", "INS",
"KBD", "LABEL", "LEGEND", "LI", "MAP", "NOFRAMES", "NOSCRIPT", "OBJECT", "OL", "OPTGROUP",
"OPTION", "P", "PRE", "Q", "SAMP", "SCRIPT", "SELECT", "SMALL", "SPAN", "STRONG", "STYLE",
"SUB", "SUP", "TABLE", "TBODY", "TD", "TEXTAREA", "TFOOT", "TH", "THEAD", "TITLE", "TR",
"TT", "UL", "VAR" ]
deprecated_onetags = [ "BASEFONT", "ISINDEX" ]
deprecated_twotags = [ "APPLET", "CENTER", "DIR", "FONT", "MENU", "S", "STRIKE", "U" ]
self.header = [ ]
self.content = [ ]
self.footer = [ ]
self.case = case
self.separator = separator
# init( ) sets it to True so we know that </body></html> has to be printed at the end
self._full = False
self.class_= class_
if mode == 'strict_html' or mode == 'html':
self.onetags = valid_onetags
self.onetags += list(map( str.lower, self.onetags ))
self.twotags = valid_twotags
self.twotags += list(map( str.lower, self.twotags ))
self.deptags = deprecated_onetags + deprecated_twotags
self.deptags += list(map( str.lower, self.deptags ))
self.mode = 'strict_html'
elif mode == 'loose_html':
self.onetags = valid_onetags + deprecated_onetags
self.onetags += list(map( str.lower, self.onetags ))
self.twotags = valid_twotags + deprecated_twotags
self.twotags += list(map( str.lower, self.twotags ))
self.mode = mode
elif mode == 'xml':
if onetags and twotags:
self.onetags = onetags
self.twotags = twotags
elif ( onetags and not twotags ) or ( twotags and not onetags ):
raise CustomizationError( )
else:
self.onetags = russell( )
self.twotags = russell( )
self.mode = mode
else:
raise ModeError( mode )
def __getattr__( self, attr ):
if attr.startswith("__") and attr.endswith("__"):
raise AttributeError(attr)
return element( attr, case=self.case, parent=self )
def __str__( self ):
if self._full and ( self.mode == 'strict_html' or self.mode == 'loose_html' ):
end = [ '</body>', '</html>' ]
else:
end = [ ]
return self.separator.join( self.header + self.content + self.footer + end )
def __call__( self, escape=False ):
"""Return the document as a string.
escape -- False print normally
True replace < and > by &lt; and &gt;
the default escape sequences in most browsers"""
if escape:
return _escape( self.__str__( ) )
else:
return self.__str__( )
def add( self, text ):
"""This is an alias to addcontent."""
self.addcontent( text )
def addfooter( self, text ):
"""Add some text to the bottom of the document"""
self.footer.append( text )
def addheader( self, text ):
"""Add some text to the top of the document"""
self.header.append( text )
def addcontent( self, text ):
"""Add some text to the main part of the document"""
self.content.append( text )
def init( self, lang='en', css=None, metainfo=None, title=None, header=None,
footer=None, charset=None, encoding=None, doctype=None, bodyattrs=None, script=None ):
"""This method is used for complete documents with appropriate
doctype, encoding, title, etc information. For an HTML/XML snippet
omit this method.
lang -- language, usually a two character string, will appear
as <html lang='en'> in html mode (ignored in xml mode)
css -- Cascading Style Sheet filename as a string or a list of
strings for multiple css files (ignored in xml mode)
metainfo -- a dictionary in the form { 'name':'content' } to be inserted
into meta element(s) as <meta name='name' content='content'>
(ignored in xml mode)
bodyattrs --a dictionary in the form { 'key':'value', ... } which will be added
as attributes of the <body> element as <body key='value' ... >
(ignored in xml mode)
script -- dictionary containing src:type pairs, <script type='text/type' src=src></script>
title -- the title of the document as a string to be inserted into
a title element as <title>my title</title> (ignored in xml mode)
header -- some text to be inserted right after the <body> element
(ignored in xml mode)
footer -- some text to be inserted right before the </body> element
(ignored in xml mode)
charset -- a string defining the character set, will be inserted into a
<meta http-equiv='Content-Type' content='text/html; charset=myset'>
element (ignored in xml mode)
encoding -- a string defining the encoding, will be put into to first line of
the document as <?xml version='1.0' encoding='myencoding' ?> in
xml mode (ignored in html mode)
doctype -- the document type string, defaults to
<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>
in html mode (ignored in xml mode)"""
self._full = True
if self.mode == 'strict_html' or self.mode == 'loose_html':
if doctype is None:
doctype = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN'>"
self.header.append( doctype )
self.html( lang=lang )
self.head( )
if charset is not None:
self.meta( http_equiv='Content-Type', content="text/html; charset=%s" % charset )
if metainfo is not None:
self.metainfo( metainfo )
if css is not None:
self.css( css )
if title is not None:
self.title( title )
if script is not None:
self.scripts( script )
self.head.close()
if bodyattrs is not None:
self.body( **bodyattrs )
else:
self.body( )
if header is not None:
self.content.append( header )
if footer is not None:
self.footer.append( footer )
elif self.mode == 'xml':
if doctype is None:
if encoding is not None:
doctype = "<?xml version='1.0' encoding='%s' ?>" % encoding
else:
doctype = "<?xml version='1.0' ?>"
self.header.append( doctype )
def css( self, filelist ):
"""This convenience function is only useful for html.
It adds css stylesheet(s) to the document via the <link> element."""
if isinstance( filelist, str ):
self.link( href=filelist, rel='stylesheet', type='text/css', media='all' )
else:
for file in filelist:
self.link( href=file, rel='stylesheet', type='text/css', media='all' )
def metainfo( self, mydict ):
"""This convenience function is only useful for html.
It adds meta information via the <meta> element, the argument is
a dictionary of the form { 'name':'content' }."""
if isinstance( mydict, dict ):
for name, content in mydict.items( ):
self.meta( name=name, content=content )
else:
raise TypeError("Metainfo should be called with a dictionary argument of name:content pairs.")
def scripts( self, mydict ):
"""Only useful in html, mydict is dictionary of src:type pairs will
be rendered as <script type='text/type' src=src></script>"""
if isinstance( mydict, dict ):
for src, type in mydict.items( ):
self.script( '', src=src, type='text/%s' % type )
else:
raise TypeError("Script should be given a dictionary of src:type pairs.")
class _oneliner:
"""An instance of oneliner returns a string corresponding to one element.
This class can be used to write 'oneliners' that return a string
immediately so there is no need to instantiate the page class."""
def __init__( self, case='lower' ):
self.case = case
def __getattr__( self, attr ):
if attr.startswith("__") and attr.endswith("__"):
raise AttributeError(attr)
return element( attr, case=self.case, parent=None )
oneliner = _oneliner( case='lower' )
upper_oneliner = _oneliner( case='upper' )
def _argsdicts( args, mydict ):
"""A utility generator that pads argument list and dictionary values, will only be called with len( args ) = 0, 1."""
if len( args ) == 0:
args = None,
elif len( args ) == 1:
args = _totuple( args[0] )
else:
raise Exception("We should have never gotten here.")
mykeys = list(mydict.keys( ))
myvalues = list(map( _totuple, list(mydict.values( )) ))
maxlength = max( list(map( len, [ args ] + myvalues )) )
for i in range( maxlength ):
thisdict = { }
for key, value in zip( mykeys, myvalues ):
try:
thisdict[ key ] = value[i]
except IndexError:
thisdict[ key ] = value[-1]
try:
thisarg = args[i]
except IndexError:
thisarg = args[-1]
yield thisarg, thisdict
def _totuple( x ):
"""Utility stuff to convert string, int, float, None or anything to a usable tuple."""
if isinstance( x, str ):
out = x,
elif isinstance( x, ( int, float ) ):
out = str( x ),
elif x is None:
out = None,
else:
out = tuple( x )
return out
def escape( text, newline=False ):
"""Escape special html characters."""
if isinstance( text, str ):
if '&' in text:
text = text.replace( '&', '&amp;' )
if '>' in text:
text = text.replace( '>', '&gt;' )
if '<' in text:
text = text.replace( '<', '&lt;' )
if '\"' in text:
text = text.replace( '\"', '&quot;' )
if '\'' in text:
text = text.replace( '\'', '&quot;' )
if newline:
if '\n' in text:
text = text.replace( '\n', '<br>' )
return text
_escape = escape
def unescape( text ):
"""Inverse of escape."""
if isinstance( text, str ):
if '&amp;' in text:
text = text.replace( '&amp;', '&' )
if '&gt;' in text:
text = text.replace( '&gt;', '>' )
if '&lt;' in text:
text = text.replace( '&lt;', '<' )
if '&quot;' in text:
text = text.replace( '&quot;', '\"' )
return text
class dummy:
"""A dummy class for attaching attributes."""
pass
doctype = dummy( )
doctype.frameset = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Frameset//EN' 'http://www.w3.org/TR/html4/frameset.dtd'>"
doctype.strict = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN' 'http://www.w3.org/TR/html4/strict.dtd'>"
doctype.loose = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'>"
class russell:
"""A dummy class that contains anything."""
def __contains__( self, item ):
return True
class MarkupError( Exception ):
"""All our exceptions subclass this."""
def __str__( self ):
return self.message
class ClosingError( MarkupError ):
def __init__( self, tag ):
self.message = "The element '%s' does not accept non-keyword arguments (has no closing tag)." % tag
class OpeningError( MarkupError ):
def __init__( self, tag ):
self.message = "The element '%s' can not be opened." % tag
class ArgumentError( MarkupError ):
def __init__( self, tag ):
self.message = "The element '%s' was called with more than one non-keyword argument." % tag
class InvalidElementError( MarkupError ):
def __init__( self, tag, mode ):
self.message = "The element '%s' is not valid for your mode '%s'." % ( tag, mode )
class DeprecationError( MarkupError ):
def __init__( self, tag ):
self.message = "The element '%s' is deprecated, instantiate markup.page with mode='loose_html' to allow it." % tag
class ModeError( MarkupError ):
def __init__( self, mode ):
self.message = "Mode '%s' is invalid, possible values: strict_html, loose_html, xml." % mode
class CustomizationError( MarkupError ):
def __init__( self ):
self.message = "If you customize the allowed elements, you must define both types 'onetags' and 'twotags'."
if __name__ == '__main__':
print(__doc__)
+127 -127
View File
@@ -1,127 +1,127 @@
# Copyright (c) 2009 Raymond Hettinger
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
from UserDict import DictMixin
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__end
except AttributeError:
self.clear()
self.update(*args, **kwds)
def clear(self):
self.__end = end = []
end += [None, end, end] # sentinel node for doubly linked list
self.__map = {} # key --> [key, prev, next]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
end = self.__end
curr = end[1]
curr[2] = end[1] = self.__map[key] = [key, curr, end]
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
key, prev, next = self.__map.pop(key)
prev[2] = next
next[1] = prev
def __iter__(self):
end = self.__end
curr = end[2]
while curr is not end:
yield curr[0]
curr = curr[2]
def __reversed__(self):
end = self.__end
curr = end[1]
while curr is not end:
yield curr[0]
curr = curr[1]
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
if last:
key = reversed(self).next()
else:
key = iter(self).next()
value = self.pop(key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
tmp = self.__map, self.__end
del self.__map, self.__end
inst_dict = vars(self).copy()
self.__map, self.__end = tmp
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def keys(self):
return list(self)
setdefault = DictMixin.setdefault
update = DictMixin.update
pop = DictMixin.pop
values = DictMixin.values
items = DictMixin.items
iterkeys = DictMixin.iterkeys
itervalues = DictMixin.itervalues
iteritems = DictMixin.iteritems
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, self.items())
def copy(self):
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
if isinstance(other, OrderedDict):
if len(self) != len(other):
return False
for p, q in zip(self.items(), other.items()):
if p != q:
return False
return True
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other
# Copyright (c) 2009 Raymond Hettinger
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
from UserDict import DictMixin
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__end
except AttributeError:
self.clear()
self.update(*args, **kwds)
def clear(self):
self.__end = end = []
end += [None, end, end] # sentinel node for doubly linked list
self.__map = {} # key --> [key, prev, next]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
end = self.__end
curr = end[1]
curr[2] = end[1] = self.__map[key] = [key, curr, end]
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
key, prev, next = self.__map.pop(key)
prev[2] = next
next[1] = prev
def __iter__(self):
end = self.__end
curr = end[2]
while curr is not end:
yield curr[0]
curr = curr[2]
def __reversed__(self):
end = self.__end
curr = end[1]
while curr is not end:
yield curr[0]
curr = curr[1]
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
if last:
key = next(reversed(self))
else:
key = next(iter(self))
value = self.pop(key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
tmp = self.__map, self.__end
del self.__map, self.__end
inst_dict = vars(self).copy()
self.__map, self.__end = tmp
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def keys(self):
return list(self)
setdefault = DictMixin.setdefault
update = DictMixin.update
pop = DictMixin.pop
values = DictMixin.values
items = DictMixin.items
iterkeys = DictMixin.iterkeys
itervalues = DictMixin.itervalues
iteritems = DictMixin.iteritems
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self.items()))
def copy(self):
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
if isinstance(other, OrderedDict):
if len(self) != len(other):
return False
for p, q in zip(list(self.items()), list(other.items())):
if p != q:
return False
return True
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other
-3
View File
@@ -3,9 +3,6 @@
__VERSION__ = '0.7.2'
import sys
if sys.version_info[:2] < (2, 3):
print >> sys.stderr, "Sorry, xlwt requires Python 2.3 or later"
sys.exit(1)
from Workbook import Workbook
from Worksheet import Worksheet
File diff suppressed because it is too large Load Diff
+258
View File
@@ -0,0 +1,258 @@
# Portions are Copyright (C) 2005 Roman V. Kiseliov
# Portions are Copyright (c) 2004 Evgeny Filatov <fufff@users.sourceforge.net>
# Portions are Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
from .BIFFRecords import BiffRecord
from struct import *
def _size_col(sheet, col):
return sheet.col_width(col)
def _size_row(sheet, row):
return sheet.row_height(row)
def _position_image(sheet, row_start, col_start, x1, y1, width, height):
"""Calculate the vertices that define the position of the image as required by
the OBJ record.
+------------+------------+
| A | B |
+-----+------------+------------+
| |(x1,y1) | |
| 1 |(A1)._______|______ |
| | | | |
| | | | |
+-----+----| BITMAP |-----+
| | | | |
| 2 | |______________. |
| | | (B2)|
| | | (x2,y2)|
+---- +------------+------------+
Example of a bitmap that covers some of the area from cell A1 to cell B2.
Based on the width and height of the bitmap we need to calculate 8 vars:
col_start, row_start, col_end, row_end, x1, y1, x2, y2.
The width and height of the cells are also variable and have to be taken into
account.
The values of col_start and row_start are passed in from the calling
function. The values of col_end and row_end are calculated by subtracting
the width and height of the bitmap from the width and height of the
underlying cells.
The vertices are expressed as a percentage of the underlying cell width as
follows (rhs values are in pixels):
x1 = X / W *1024
y1 = Y / H *256
x2 = (X-1) / W *1024
y2 = (Y-1) / H *256
Where: X is distance from the left side of the underlying cell
Y is distance from the top of the underlying cell
W is the width of the cell
H is the height of the cell
Note: the SDK incorrectly states that the height should be expressed as a
percentage of 1024.
col_start - Col containing upper left corner of object
row_start - Row containing top left corner of object
x1 - Distance to left side of object
y1 - Distance to top of object
width - Width of image frame
height - Height of image frame
"""
# Adjust start column for offsets that are greater than the col width
while x1 >= _size_col(sheet, col_start):
x1 -= _size_col(sheet, col_start)
col_start += 1
# Adjust start row for offsets that are greater than the row height
while y1 >= _size_row(sheet, row_start):
y1 -= _size_row(sheet, row_start)
row_start += 1
# Initialise end cell to the same as the start cell
row_end = row_start # Row containing bottom right corner of object
col_end = col_start # Col containing lower right corner of object
width = width + x1 - 1
height = height + y1 - 1
# Subtract the underlying cell widths to find the end cell of the image
while (width >= _size_col(sheet, col_end)):
width -= _size_col(sheet, col_end)
col_end += 1
# Subtract the underlying cell heights to find the end cell of the image
while (height >= _size_row(sheet, row_end)):
height -= _size_row(sheet, row_end)
row_end += 1
# Bitmap isn't allowed to start or finish in a hidden cell, i.e. a cell
# with zero height or width.
if ((_size_col(sheet, col_start) == 0) or (_size_col(sheet, col_end) == 0)
or (_size_row(sheet, row_start) == 0) or (_size_row(sheet, row_end) == 0)):
return
# Convert the pixel values to the percentage value expected by Excel
x1 = int(float(x1) / _size_col(sheet, col_start) * 1024)
y1 = int(float(y1) / _size_row(sheet, row_start) * 256)
# Distance to right side of object
x2 = int(float(width) / _size_col(sheet, col_end) * 1024)
# Distance to bottom of object
y2 = int(float(height) / _size_row(sheet, row_end) * 256)
return (col_start, x1, row_start, y1, col_end, x2, row_end, y2)
class ObjBmpRecord(BiffRecord):
_REC_ID = 0x005D # Record identifier
def __init__(self, row, col, sheet, im_data_bmp, x, y, scale_x, scale_y):
# Scale the frame of the image.
width = im_data_bmp.width * scale_x
height = im_data_bmp.height * scale_y
# Calculate the vertices of the image and write the OBJ record
coordinates = _position_image(sheet, row, col, x, y, width, height)
# print coordinates
col_start, x1, row_start, y1, col_end, x2, row_end, y2 = coordinates
"""Store the OBJ record that precedes an IMDATA record. This could be generalise
to support other Excel objects.
"""
cObj = 0x0001 # Count of objects in file (set to 1)
OT = 0x0008 # Object type. 8 = Picture
id = 0x0001 # Object ID
grbit = 0x0614 # Option flags
colL = col_start # Col containing upper left corner of object
dxL = x1 # Distance from left side of cell
rwT = row_start # Row containing top left corner of object
dyT = y1 # Distance from top of cell
colR = col_end # Col containing lower right corner of object
dxR = x2 # Distance from right of cell
rwB = row_end # Row containing bottom right corner of object
dyB = y2 # Distance from bottom of cell
cbMacro = 0x0000 # Length of FMLA structure
Reserved1 = 0x0000 # Reserved
Reserved2 = 0x0000 # Reserved
icvBack = 0x09 # Background colour
icvFore = 0x09 # Foreground colour
fls = 0x00 # Fill pattern
fAuto = 0x00 # Automatic fill
icv = 0x08 # Line colour
lns = 0xff # Line style
lnw = 0x01 # Line weight
fAutoB = 0x00 # Automatic border
frs = 0x0000 # Frame style
cf = 0x0009 # Image format, 9 = bitmap
Reserved3 = 0x0000 # Reserved
cbPictFmla = 0x0000 # Length of FMLA structure
Reserved4 = 0x0000 # Reserved
grbit2 = 0x0001 # Option flags
Reserved5 = 0x0000 # Reserved
data = pack("<L", cObj)
data += pack("<H", OT)
data += pack("<H", id)
data += pack("<H", grbit)
data += pack("<H", colL)
data += pack("<H", dxL)
data += pack("<H", rwT)
data += pack("<H", dyT)
data += pack("<H", colR)
data += pack("<H", dxR)
data += pack("<H", rwB)
data += pack("<H", dyB)
data += pack("<H", cbMacro)
data += pack("<L", Reserved1)
data += pack("<H", Reserved2)
data += pack("<B", icvBack)
data += pack("<B", icvFore)
data += pack("<B", fls)
data += pack("<B", fAuto)
data += pack("<B", icv)
data += pack("<B", lns)
data += pack("<B", lnw)
data += pack("<B", fAutoB)
data += pack("<H", frs)
data += pack("<L", cf)
data += pack("<H", Reserved3)
data += pack("<H", cbPictFmla)
data += pack("<H", Reserved4)
data += pack("<H", grbit2)
data += pack("<L", Reserved5)
self._rec_data = data
def _process_bitmap(bitmap):
"""Convert a 24 bit bitmap into the modified internal format used by Windows.
This is described in BITMAPCOREHEADER and BITMAPCOREINFO structures in the
MSDN library.
"""
# Open file and binmode the data in case the platform needs it.
fh = open(bitmap, 'rb')
try:
# Slurp the file into a string.
data = fh.read()
finally:
fh.close()
# Check that the file is big enough to be a bitmap.
if len(data) <= 0x36:
raise Exception("bitmap doesn't contain enough data.")
# The first 2 bytes are used to identify the bitmap.
if (data[:2] != b"BM"):
raise Exception("bitmap doesn't appear to to be a valid bitmap image.")
# Remove bitmap data: ID.
data = data[2:]
# Read and remove the bitmap size. This is more reliable than reading
# the data size at offset 0x22.
#
size = unpack("<L", data[:4])[0]
size -= 0x36 # Subtract size of bitmap header.
size += 0x0C # Add size of BIFF header.
data = data[4:]
# Remove bitmap data: reserved, offset, header length.
data = data[12:]
# Read and remove the bitmap width and height. Verify the sizes.
width, height = unpack("<LL", data[:8])
data = data[8:]
if (width > 0xFFFF):
raise Exception("bitmap: largest image width supported is 65k.")
if (height > 0xFFFF):
raise Exception("bitmap: largest image height supported is 65k.")
# Read and remove the bitmap planes and bpp data. Verify them.
planes, bitcount = unpack("<HH", data[:4])
data = data[4:]
if (bitcount != 24):
raise Exception("bitmap isn't a 24bit true color bitmap.")
if (planes != 1):
raise Exception("bitmap: only 1 plane supported in bitmap image.")
# Read and remove the bitmap compression. Verify compression.
compression = unpack("<L", data[:4])[0]
data = data[4:]
if (compression != 0):
raise Exception("bitmap: compression not supported in bitmap image.")
# Remove bitmap data: data size, hres, vres, colours, imp. colours.
data = data[20:]
# Add the BITMAPCOREHEADER data
header = pack("<LHHHH", 0x000c, width, height, 0x01, 0x18)
data = header + data
return (width, height, size, data)
class ImDataBmpRecord(BiffRecord):
_REC_ID = 0x007F
def __init__(self, filename):
"""Insert a 24bit bitmap image in a worksheet. The main record required is
IMDATA but it must be proceeded by a OBJ record to define its position.
"""
BiffRecord.__init__(self)
self.width, self.height, self.size, data = _process_bitmap(filename)
# Write the IMDATA record to store the bitmap data
cf = 0x09
env = 0x01
lcb = self.size
self._rec_data = pack("<HHL", cf, env, lcb) + data
+233
View File
@@ -0,0 +1,233 @@
from struct import unpack, pack
from . import BIFFRecords
class StrCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "sst_idx"]
def __init__(self, rowx, colx, xf_idx, sst_idx):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.sst_idx = sst_idx
def get_biff_data(self):
return pack('<5HL', 0x00FD, 10, self.rowx, self.colx, self.xf_idx, self.sst_idx)
class BlankCell(object):
__slots__ = ["rowx", "colx", "xf_idx"]
def __init__(self, rowx, colx, xf_idx):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
def get_biff_data(self):
return pack('<5H', 0x0201, 6, self.rowx, self.colx, self.xf_idx)
class MulBlankCell(object):
__slots__ = ["rowx", "colx1", "colx2", "xf_idx"]
def __init__(self, rowx, colx1, colx2, xf_idx):
self.rowx = rowx
self.colx1 = colx1
self.colx2 = colx2
self.xf_idx = xf_idx
def get_biff_data(self):
return BIFFRecords.MulBlankRecord(self.rowx,
self.colx1, self.colx2, self.xf_idx).get()
class NumberCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, number):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.number = float(number)
def get_encoded_data(self):
rk_encoded = 0
num = self.number
# The four possible kinds of RK encoding are *not* mutually exclusive.
# The 30-bit integer variety picks up the most.
# In the code below, the four varieties are checked in descending order
# of bangs per buck, or not at all.
# SJM 2007-10-01
if -0x20000000 <= num < 0x20000000: # fits in 30-bit *signed* int
inum = int(num)
if inum == num: # survives round-trip
rk_encoded = 2 | (inum << 2)
return 1, rk_encoded
temp = num * 100
if -0x20000000 <= temp < 0x20000000:
# That was step 1: the coded value will fit in
# a 30-bit signed integer.
itemp = int(round(temp, 0))
# That was step 2: "itemp" is the best candidate coded value.
# Now for step 3: simulate the decoding,
# to check for round-trip correctness.
if itemp / 100.0 == num:
rk_encoded = 3 | (itemp << 2)
return 1, rk_encoded
if 0: # Cost of extra pack+unpack not justified by tiny yield.
packed = pack('<d', num)
w01, w23 = unpack('<2i', packed)
if not w01 and not(w23 & 3):
return 1, w23
packed100 = pack('<d', temp)
w01, w23 = unpack('<2i', packed100)
if not w01 and not(w23 & 3):
return 1, w23 | 1
return 0, pack('<5Hd', 0x0203, 14, self.rowx, self.colx, self.xf_idx, num)
def get_biff_data(self):
isRK, value = self.get_encoded_data()
if isRK:
return pack('<5Hi', 0x27E, 10, self.rowx, self.colx, self.xf_idx, value)
return value # NUMBER record already packed
class BooleanCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, number):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.number = number
def get_biff_data(self):
return BIFFRecords.BoolErrRecord(self.rowx,
self.colx, self.xf_idx, self.number, 0).get()
error_code_map = {
0x00: 0, # Intersection of two cell ranges is empty
0x07: 7, # Division by zero
0x0F: 15, # Wrong type of operand
0x17: 23, # Illegal or deleted cell reference
0x1D: 29, # Wrong function or range name
0x24: 36, # Value range overflow
0x2A: 42, # Argument or function not available
'#NULL!' : 0, # Intersection of two cell ranges is empty
'#DIV/0!': 7, # Division by zero
'#VALUE!': 36, # Wrong type of operand
'#REF!' : 23, # Illegal or deleted cell reference
'#NAME?' : 29, # Wrong function or range name
'#NUM!' : 36, # Value range overflow
'#N/A!' : 42, # Argument or function not available
}
class ErrorCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "number"]
def __init__(self, rowx, colx, xf_idx, error_string_or_code):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
try:
self.number = error_code_map[error_string_or_code]
except KeyError:
raise Exception('Illegal error value (%r)' % error_string_or_code)
def get_biff_data(self):
return BIFFRecords.BoolErrRecord(self.rowx,
self.colx, self.xf_idx, self.number, 1).get()
class FormulaCell(object):
__slots__ = ["rowx", "colx", "xf_idx", "frmla", "calc_flags"]
def __init__(self, rowx, colx, xf_idx, frmla, calc_flags=0):
self.rowx = rowx
self.colx = colx
self.xf_idx = xf_idx
self.frmla = frmla
self.calc_flags = calc_flags
def get_biff_data(self):
return BIFFRecords.FormulaRecord(self.rowx,
self.colx, self.xf_idx, self.frmla.rpn(), self.calc_flags).get()
# module-level function for *internal* use by the Row module
def _get_cells_biff_data_mul(rowx, cell_items):
# Return the BIFF data for all cell records in the row.
# Adjacent BLANK|RK records are combined into MUL(BLANK|RK) records.
pieces = []
nitems = len(cell_items)
i = 0
while i < nitems:
icolx, icell = cell_items[i]
if isinstance(icell, NumberCell):
isRK, value = icell.get_encoded_data()
if not isRK:
pieces.append(value) # pre-packed NUMBER record
i += 1
continue
muldata = [(value, icell.xf_idx)]
target = NumberCell
elif isinstance(icell, BlankCell):
muldata = [icell.xf_idx]
target = BlankCell
else:
pieces.append(icell.get_biff_data())
i += 1
continue
lastcolx = icolx
j = i
packed_record = b'' # (to_py3): 'b' binary data
for j in range(i+1, nitems):
jcolx, jcell = cell_items[j]
if jcolx != lastcolx + 1:
nexti = j
break
if not isinstance(jcell, target):
nexti = j
break
if target == NumberCell:
isRK, value = jcell.get_encoded_data()
if not isRK:
packed_record = value
nexti = j + 1
break
muldata.append((value, jcell.xf_idx))
else:
muldata.append(jcell.xf_idx)
lastcolx = jcolx
else:
nexti = j + 1
if target == NumberCell:
if lastcolx == icolx:
# RK record
value, xf_idx = muldata[0]
pieces.append(pack('<5Hi', 0x027E, 10, rowx, icolx, xf_idx, value))
else:
# MULRK record
nc = lastcolx - icolx + 1
pieces.append(pack('<4H', 0x00BD, 6 * nc + 6, rowx, icolx))
# (to_py3): 'b' binary data
pieces.append(b''.join([pack('<Hi', xf_idx, value) for value, xf_idx in muldata]))
pieces.append(pack('<H', lastcolx))
else:
if lastcolx == icolx:
# BLANK record
xf_idx = muldata[0]
pieces.append(pack('<5H', 0x0201, 6, rowx, icolx, xf_idx))
else:
# MULBLANK record
nc = lastcolx - icolx + 1
pieces.append(pack('<4H', 0x00BE, 2 * nc + 6, rowx, icolx))
# (to_py3): 'b' binary data
pieces.append(b''.join([pack('<H', xf_idx) for xf_idx in muldata]))
pieces.append(pack('<H', lastcolx))
if packed_record:
pieces.append(packed_record)
i = nexti
return b''.join(pieces) # (to_py3): 'b' binary data
+34
View File
@@ -0,0 +1,34 @@
# -*- coding: windows-1252 -*-
from .BIFFRecords import ColInfoRecord
class Column(object):
def __init__(self, colx, parent_sheet):
if not(isinstance(colx, int) and 0 <= colx <= 255):
raise ValueError("column index (%r) not an int in range(256)" % colx)
self._index = colx
self._parent = parent_sheet
self._parent_wb = parent_sheet.get_parent()
self._xf_index = 0x0F
self.width = 0x0B92
self.hidden = 0
self.level = 0
self.collapse = 0
def set_style(self, style):
self._xf_index = self._parent_wb.add_style(style)
def width_in_pixels(self):
# *** Approximation ****
return int(round(self.width * 0.0272 + 0.446, 0))
def get_biff_record(self):
options = (self.hidden & 0x01) << 0
options |= (self.level & 0x07) << 8
options |= (self.collapse & 0x01) << 12
return ColInfoRecord(self._index, self._index, self.width, self._xf_index, options).get()
+516
View File
@@ -0,0 +1,516 @@
import sys
import struct
ENCODING = 'utf-16-le'
class Reader:
def __init__(self, filename, dump = False):
self.dump = dump
self.STREAMS = {}
doc = file(filename, 'rb').read()
self.header, self.data = doc[0:512], doc[512:]
del doc
self.__build_header()
self.__build_MSAT()
self.__build_SAT()
self.__build_directory()
self.__build_short_sectors_data()
if len(self.short_sectors_data) > 0:
self.__build_SSAT()
else:
if self.dump and (self.total_ssat_sectors != 0 or self.ssat_start_sid != -2):
print('NOTE: header says that must be', self.total_ssat_sectors, 'short sectors')
print('NOTE: starting at', self.ssat_start_sid, 'sector')
print('NOTE: but file does not contains data in short sectors')
self.ssat_start_sid = -2
self.total_ssat_sectors = 0
self.SSAT = [-2]
for dentry in self.dir_entry_list[1:]:
(did,
sz, name,
t, c,
did_left, did_right, did_root,
dentry_start_sid,
stream_size
) = dentry
stream_data = b''
if stream_size > 0:
if stream_size >= self.min_stream_size:
args = (self.data, self.SAT, dentry_start_sid, self.sect_size)
else:
args = (self.short_sectors_data, self.SSAT, dentry_start_sid, self.short_sect_size)
stream_data = self.get_stream_data(*args)
if name != b'':
# BAD IDEA: names may be equal. NEED use full paths...
self.STREAMS[name] = stream_data
def __build_header(self):
self.doc_magic = self.header[0:8]
if self.doc_magic != b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1':
raise Exception('Not an OLE file.')
self.file_uid = self.header[8:24]
self.rev_num = self.header[24:26]
self.ver_num = self.header[26:28]
self.byte_order = self.header[28:30]
self.log2_sect_size, = struct.unpack('<H', self.header[30:32])
self.log2_short_sect_size, = struct.unpack('<H', self.header[32:34])
self.total_sat_sectors, = struct.unpack('<L', self.header[44:48])
self.dir_start_sid, = struct.unpack('<l', self.header[48:52])
self.min_stream_size, = struct.unpack('<L', self.header[56:60])
self.ssat_start_sid, = struct.unpack('<l', self.header[60:64])
self.total_ssat_sectors, = struct.unpack('<L', self.header[64:68])
self.msat_start_sid, = struct.unpack('<l', self.header[68:72])
self.total_msat_sectors, = struct.unpack('<L', self.header[72:76])
self.sect_size = 1 << self.log2_sect_size
self.short_sect_size = 1 << self.log2_short_sect_size
if self.dump:
print('file magic: ')
print_bin_data(self.doc_magic)
print('file uid: ')
print_bin_data(self.file_uid)
print('revision number: ')
print_bin_data(self.rev_num)
print('version number: ')
print_bin_data(self.ver_num)
print('byte order: ')
print_bin_data(self.byte_order)
print('sector size :', hex(self.sect_size), self.sect_size)
#print 'total sectors in file :', hex(self.total_sectors), self.total_sectors
print('short sector size :', hex(self.short_sect_size), self.short_sect_size)
print('Total number of sectors used for the SAT :', hex(self.total_sat_sectors), self.total_sat_sectors)
print('SID of first sector of the directory stream:', hex(self.dir_start_sid), self.dir_start_sid)
print('Minimum size of a standard stream :', hex(self.min_stream_size), self.min_stream_size)
print('SID of first sector of the SSAT :', hex(self.ssat_start_sid), self.ssat_start_sid)
print('Total number of sectors used for the SSAT :', hex(self.total_ssat_sectors), self.total_ssat_sectors)
print('SID of first additional sector of the MSAT :', hex(self.msat_start_sid), self.msat_start_sid)
print('Total number of sectors used for the MSAT :', hex(self.total_msat_sectors), self.total_msat_sectors)
def __build_MSAT(self):
self.MSAT = list(struct.unpack('<109l', self.header[76:]))
next = self.msat_start_sid
while next > 0:
msat_sector = struct.unpack('<128l', self.data[next*self.sect_size:(next+1)*self.sect_size])
self.MSAT.extend(msat_sector[:127])
next = msat_sector[-1]
if self.dump:
print('MSAT (header part): \n', self.MSAT[:109])
print('additional MSAT sectors: \n', self.MSAT[109:])
def __build_SAT(self):
sat_stream = b''.join([self.data[i*self.sect_size:(i+1)*self.sect_size] for i in self.MSAT if i >= 0])
sat_sids_count = len(sat_stream) >> 2
self.SAT = struct.unpack('<%dl' % sat_sids_count, sat_stream) # SIDs tuple
if self.dump:
print('SAT sid count:\n', sat_sids_count)
print('SAT content:\n', self.SAT)
def __build_SSAT(self):
ssat_stream = self.get_stream_data(self.data, self.SAT, self.ssat_start_sid, self.sect_size)
ssids_count = len(ssat_stream) >> 2
self.SSAT = struct.unpack('<%dl' % ssids_count, ssat_stream)
if self.dump:
print('SSID count:', ssids_count)
print('SSAT content:\n', self.SSAT)
def __build_directory(self):
dir_stream = self.get_stream_data(self.data, self.SAT, self.dir_start_sid, self.sect_size)
self.dir_entry_list = []
i = 0
while i < len(dir_stream):
dentry = dir_stream[i:i+128] # 128 -- dir entry size
i += 128
did = len(self.dir_entry_list)
sz, = struct.unpack('<H', dentry[64:66])
if sz > 0 :
name = dentry[0:sz-2].decode('utf_16_le', 'replace')
else:
name = b''
t, = struct.unpack('B', dentry[66])
c, = struct.unpack('B', dentry[67])
did_left , = struct.unpack('<l', dentry[68:72])
did_right , = struct.unpack('<l', dentry[72:76])
did_root , = struct.unpack('<l', dentry[76:80])
dentry_start_sid , = struct.unpack('<l', dentry[116:120])
stream_size , = struct.unpack('<L', dentry[120:124])
self.dir_entry_list.extend([(did, sz, name, t, c,
did_left, did_right, did_root,
dentry_start_sid, stream_size)])
if self.dump:
dentry_types = {
0x00: 'Empty',
0x01: 'User storage',
0x02: 'User stream',
0x03: 'LockBytes',
0x04: 'Property',
0x05: 'Root storage'
}
node_colours = {
0x00: 'Red',
0x01: 'Black'
}
print('total directory entries:', len(self.dir_entry_list))
for dentry in self.dir_entry_list:
(did, sz, name, t, c,
did_left, did_right, did_root,
dentry_start_sid, stream_size) = dentry
print('DID', did)
print('Size of the used area of the character buffer of the name:', sz)
print('dir entry name:', repr(name))
print('type of entry:', t, dentry_types[t])
print('entry colour:', c, node_colours[c])
print('left child DID :', did_left)
print('right child DID:', did_right)
print('root DID :', did_root)
print('start SID :', dentry_start_sid)
print('stream size :', stream_size)
if stream_size == 0:
print('stream is empty')
elif stream_size >= self.min_stream_size:
print('stream stored as normal stream')
else:
print('stream stored as short-stream')
def __build_short_sectors_data(self):
(did, sz, name, t, c,
did_left, did_right, did_root,
dentry_start_sid, stream_size) = self.dir_entry_list[0]
assert t == 0x05 # Short-Stream Container Stream (SSCS) resides in Root Storage
if stream_size == 0:
self.short_sectors_data = b''
else:
self.short_sectors_data = self.get_stream_data(self.data, self.SAT, dentry_start_sid, self.sect_size)
def get_stream_data(self, data, SAT, start_sid, sect_size):
sid = start_sid
chunks = [(sid, sid)]
stream_data = b''
while SAT[sid] >= 0:
next_in_chain = SAT[sid]
last_chunk_start, last_chunk_finish = chunks[-1]
if next_in_chain == last_chunk_finish + 1:
chunks[-1] = last_chunk_start, next_in_chain
else:
chunks.extend([(next_in_chain, next_in_chain)])
sid = next_in_chain
for s, f in chunks:
stream_data += data[s*sect_size:(f+1)*sect_size]
#print chunks
return stream_data
def print_bin_data(data):
i = 0
while i < len(data):
j = 0
while (i < len(data)) and (j < 16):
c = b'0x%02X' % ord(data[i])
sys.stdout.write(c)
sys.stdout.write(' ')
i += 1
j += 1
print()
if i == 0:
print('<NO DATA>')
# This implementation writes only 'Root Entry', 'Workbook' streams
# and 2 empty streams for aligning directory stream on sector boundary
#
# LAYOUT:
# 0 header
# 76 MSAT (1st part: 109 SID)
# 512 workbook stream
# ... additional MSAT sectors if streams' size > about 7 Mb == (109*512 * 128)
# ... SAT
# ... directory stream
#
# NOTE: this layout is "ad hoc". It can be more general. RTFM
class XlsDoc:
SECTOR_SIZE = 0x0200
MIN_LIMIT = 0x1000
SID_FREE_SECTOR = -1
SID_END_OF_CHAIN = -2
SID_USED_BY_SAT = -3
SID_USED_BY_MSAT = -4
def __init__(self):
self.book_stream_sect = []
self.dir_stream = b''
self.dir_stream_sect = []
self.packed_SAT = b''
self.SAT_sect = []
self.packed_MSAT_1st = b''
self.packed_MSAT_2nd = b''
self.MSAT_sect_2nd = []
self.header = b''
def __build_directory(self): # align on sector boundary
self.dir_stream = b''
#(to_py3): replaced = b'\x00'.join(b'Root Entry\x00') + b'\x00'
dentry_name = 'Root Entry\x00'.encode(ENCODING)
dentry_name_sz = len(dentry_name)
dentry_name_pad = b'\x00'*(64 - dentry_name_sz)
dentry_type = 0x05 # root storage
dentry_colour = 0x01 # black
dentry_did_left = -1
dentry_did_right = -1
dentry_did_root = 1
dentry_start_sid = -2
dentry_stream_sz = 0
self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
dentry_name + dentry_name_pad,
dentry_name_sz,
dentry_type,
dentry_colour,
dentry_did_left,
dentry_did_right,
dentry_did_root,
0, 0, 0, 0, 0, 0, 0, 0, 0,
dentry_start_sid,
dentry_stream_sz,
0
)
# (to_py3): replaced = b'\x00'.join(b'Workbook\x00') + b'\x00'
dentry_name = 'Workbook\x00'.encode(ENCODING)
dentry_name_sz = len(dentry_name)
dentry_name_pad = b'\x00'*(64 - dentry_name_sz)
dentry_type = 0x02 # user stream
dentry_colour = 0x01 # black
dentry_did_left = -1
dentry_did_right = -1
dentry_did_root = -1
dentry_start_sid = 0
dentry_stream_sz = self.book_stream_len
self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
dentry_name + dentry_name_pad,
dentry_name_sz,
dentry_type,
dentry_colour,
dentry_did_left,
dentry_did_right,
dentry_did_root,
0, 0, 0, 0, 0, 0, 0, 0, 0,
dentry_start_sid,
dentry_stream_sz,
0
)
# padding
dentry_name = b''
dentry_name_sz = len(dentry_name)
dentry_name_pad = b'\x00'*(64 - dentry_name_sz)
dentry_type = 0x00 # empty
dentry_colour = 0x01 # black
dentry_did_left = -1
dentry_did_right = -1
dentry_did_root = -1
dentry_start_sid = -2
dentry_stream_sz = 0
self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
dentry_name + dentry_name_pad,
dentry_name_sz,
dentry_type,
dentry_colour,
dentry_did_left,
dentry_did_right,
dentry_did_root,
0, 0, 0, 0, 0, 0, 0, 0, 0,
dentry_start_sid,
dentry_stream_sz,
0
) * 2
def __build_sat(self):
# Build SAT
book_sect_count = self.book_stream_len >> 9
dir_sect_count = len(self.dir_stream) >> 9
total_sect_count = book_sect_count + dir_sect_count
SAT_sect_count = 0
MSAT_sect_count = 0
SAT_sect_count_limit = 109
while total_sect_count > 128*SAT_sect_count or SAT_sect_count > SAT_sect_count_limit:
SAT_sect_count += 1
total_sect_count += 1
if SAT_sect_count > SAT_sect_count_limit:
MSAT_sect_count += 1
total_sect_count += 1
SAT_sect_count_limit += 127
SAT = [self.SID_FREE_SECTOR]*128*SAT_sect_count
sect = 0
while sect < book_sect_count - 1:
self.book_stream_sect.append(sect)
SAT[sect] = sect + 1
sect += 1
self.book_stream_sect.append(sect)
SAT[sect] = self.SID_END_OF_CHAIN
sect += 1
while sect < book_sect_count + MSAT_sect_count:
self.MSAT_sect_2nd.append(sect)
SAT[sect] = self.SID_USED_BY_MSAT
sect += 1
while sect < book_sect_count + MSAT_sect_count + SAT_sect_count:
self.SAT_sect.append(sect)
SAT[sect] = self.SID_USED_BY_SAT
sect += 1
while sect < book_sect_count + MSAT_sect_count + SAT_sect_count + dir_sect_count - 1:
self.dir_stream_sect.append(sect)
SAT[sect] = sect + 1
sect += 1
self.dir_stream_sect.append(sect)
SAT[sect] = self.SID_END_OF_CHAIN
sect += 1
self.packed_SAT = struct.pack('<%dl' % (SAT_sect_count*128), *SAT)
MSAT_1st = [self.SID_FREE_SECTOR]*109
for i, SAT_sect_num in zip(list(range(0, 109)), self.SAT_sect):
MSAT_1st[i] = SAT_sect_num
self.packed_MSAT_1st = struct.pack('<109l', *MSAT_1st)
MSAT_2nd = [self.SID_FREE_SECTOR]*128*MSAT_sect_count
if MSAT_sect_count > 0:
MSAT_2nd[- 1] = self.SID_END_OF_CHAIN
i = 109
msat_sect = 0
sid_num = 0
while i < SAT_sect_count:
if (sid_num + 1) % 128 == 0:
#print 'link: ',
msat_sect += 1
if msat_sect < len(self.MSAT_sect_2nd):
MSAT_2nd[sid_num] = self.MSAT_sect_2nd[msat_sect]
else:
#print 'sid: ',
MSAT_2nd[sid_num] = self.SAT_sect[i]
i += 1
#print sid_num, MSAT_2nd[sid_num]
sid_num += 1
self.packed_MSAT_2nd = struct.pack('<%dl' % (MSAT_sect_count*128), *MSAT_2nd)
#print vars()
#print zip(range(0, sect), SAT)
#print self.book_stream_sect
#print self.MSAT_sect_2nd
#print MSAT_2nd
#print self.SAT_sect
#print self.dir_stream_sect
def __build_header(self):
doc_magic = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1'
file_uid = b'\x00'*16
rev_num = b'\x3E\x00'
ver_num = b'\x03\x00'
byte_order = b'\xFE\xFF'
log_sect_size = struct.pack('<H', 9)
log_short_sect_size = struct.pack('<H', 6)
not_used0 = b'\x00'*10
total_sat_sectors = struct.pack('<L', len(self.SAT_sect))
dir_start_sid = struct.pack('<l', self.dir_stream_sect[0])
not_used1 = b'\x00'*4
min_stream_size = struct.pack('<L', 0x1000)
ssat_start_sid = struct.pack('<l', -2)
total_ssat_sectors = struct.pack('<L', 0)
if len(self.MSAT_sect_2nd) == 0:
msat_start_sid = struct.pack('<l', -2)
else:
msat_start_sid = struct.pack('<l', self.MSAT_sect_2nd[0])
total_msat_sectors = struct.pack('<L', len(self.MSAT_sect_2nd))
self.header = b''.join([ doc_magic,
file_uid,
rev_num,
ver_num,
byte_order,
log_sect_size,
log_short_sect_size,
not_used0,
total_sat_sectors,
dir_start_sid,
not_used1,
min_stream_size,
ssat_start_sid,
total_ssat_sectors,
msat_start_sid,
total_msat_sectors
])
def save(self, file_name_or_filelike_obj, stream):
# 1. Align stream on 0x1000 boundary (and therefore on sector boundary)
padding = b'\x00' * (0x1000 - (len(stream) % 0x1000))
self.book_stream_len = len(stream) + len(padding)
self.__build_directory()
self.__build_sat()
self.__build_header()
f = file_name_or_filelike_obj
we_own_it = not hasattr(f, 'write')
if we_own_it:
f = open(file_name_or_filelike_obj, 'wb')
f.write(self.header)
f.write(self.packed_MSAT_1st)
f.write(stream)
f.write(padding)
f.write(self.packed_MSAT_2nd)
f.write(self.packed_SAT)
f.write(self.dir_stream)
if we_own_it:
f.close()
+41
View File
@@ -0,0 +1,41 @@
from . import ExcelFormulaParser, ExcelFormulaLexer
import struct
from .antlr import ANTLRException
class Formula(object):
__slots__ = ["__init__", "__s", "__parser", "__sheet_refs", "__xcall_refs"]
def __init__(self, s):
try:
self.__s = s
lexer = ExcelFormulaLexer.Lexer(s)
self.__parser = ExcelFormulaParser.Parser(lexer)
self.__parser.formula()
self.__sheet_refs = self.__parser.sheet_references
self.__xcall_refs = self.__parser.xcall_references
except ANTLRException as e:
# print e
raise ExcelFormulaParser.FormulaParseException("can't parse formula " + s)
def get_references(self):
return self.__sheet_refs, self.__xcall_refs
def patch_references(self, patches):
for offset, idx in patches:
self.__parser.rpn = self.__parser.rpn[:offset] + struct.pack('<H', idx) + self.__parser.rpn[offset+2:]
def text(self):
return self.__s
def rpn(self):
'''
Offset Size Contents
0 2 Size of the following formula data (sz)
2 sz Formula data (RPN token array)
[2+sz] var. (optional) Additional data for specific tokens
'''
return struct.pack("<H", len(self.__parser.rpn)) + self.__parser.rpn
+126
View File
@@ -0,0 +1,126 @@
import sys
from .antlr import EOF, CommonToken as Tok, TokenStream, TokenStreamException
import struct
from . import ExcelFormulaParser
from re import compile as recompile, match, LOCALE, UNICODE, IGNORECASE, VERBOSE
int_const_pattern = r"\d+\b"
flt_const_pattern = r"""
(?:
(?: \d* \. \d+ ) # .1 .12 .123 etc 9.1 etc 98.1 etc
|
(?: \d+ \. ) # 1. 12. 123. etc
)
# followed by optional exponent part
(?: [Ee] [+-]? \d+ ) ?
"""
str_const_pattern = r'"(?:[^"]|"")*"'
#range2d_pattern = recompile(r"\$?[A-I]?[A-Z]\$?\d+:\$?[A-I]?[A-Z]\$?\d+"
ref2d_r1c1_pattern = r"[Rr]0*[1-9][0-9]*[Cc]0*[1-9][0-9]*"
ref2d_pattern = r"\$?[A-I]?[A-Z]\$?0*[1-9][0-9]*"
true_pattern = r"TRUE\b"
false_pattern = r"FALSE\b"
if_pattern = r"IF\b"
choose_pattern = r"CHOOSE\b"
name_pattern = r"\w[\.\w]*"
quotename_pattern = r"'(?:[^']|'')*'" #### It's essential that this bracket be non-grouping.
ne_pattern = r"<>"
ge_pattern = r">="
le_pattern = r"<="
pattern_type_tuples = (
(flt_const_pattern, ExcelFormulaParser.NUM_CONST),
(int_const_pattern, ExcelFormulaParser.INT_CONST),
(str_const_pattern, ExcelFormulaParser.STR_CONST),
# (range2d_pattern , ExcelFormulaParser.RANGE2D),
(ref2d_r1c1_pattern, ExcelFormulaParser.REF2D_R1C1),
(ref2d_pattern , ExcelFormulaParser.REF2D),
(true_pattern , ExcelFormulaParser.TRUE_CONST),
(false_pattern , ExcelFormulaParser.FALSE_CONST),
(if_pattern , ExcelFormulaParser.FUNC_IF),
(choose_pattern , ExcelFormulaParser.FUNC_CHOOSE),
(name_pattern , ExcelFormulaParser.NAME),
(quotename_pattern, ExcelFormulaParser.QUOTENAME),
(ne_pattern, ExcelFormulaParser.NE),
(ge_pattern, ExcelFormulaParser.GE),
(le_pattern, ExcelFormulaParser.LE),
)
_re = recompile(
'(' + ')|('.join([i[0] for i in pattern_type_tuples]) + ')',
VERBOSE+LOCALE+IGNORECASE)
_toktype = [None] + [i[1] for i in pattern_type_tuples]
# need dummy at start because re.MatchObject.lastindex counts from 1
single_char_lookup = {
'=': ExcelFormulaParser.EQ,
'<': ExcelFormulaParser.LT,
'>': ExcelFormulaParser.GT,
'+': ExcelFormulaParser.ADD,
'-': ExcelFormulaParser.SUB,
'*': ExcelFormulaParser.MUL,
'/': ExcelFormulaParser.DIV,
':': ExcelFormulaParser.COLON,
';': ExcelFormulaParser.SEMICOLON,
',': ExcelFormulaParser.COMMA,
'(': ExcelFormulaParser.LP,
')': ExcelFormulaParser.RP,
'&': ExcelFormulaParser.CONCAT,
'%': ExcelFormulaParser.PERCENT,
'^': ExcelFormulaParser.POWER,
'!': ExcelFormulaParser.BANG,
}
class Lexer(TokenStream):
def __init__(self, text):
self._text = text[:]
self._pos = 0
self._line = 0
def isEOF(self):
return len(self._text) <= self._pos
def curr_ch(self):
return self._text[self._pos]
def next_ch(self, n = 1):
self._pos += n
def is_whitespace(self):
return self.curr_ch() in " \t\n\r\f\v"
def match_pattern(self):
m = _re.match(self._text, self._pos)
if not m:
return None
self._pos = m.end(0)
return Tok(type = _toktype[m.lastindex], text = m.group(0), col = m.start(0) + 1)
def nextToken(self):
# skip whitespace
while not self.isEOF() and self.is_whitespace():
self.next_ch()
if self.isEOF():
return Tok(type = EOF)
# first, try to match token with 2 or more chars
t = self.match_pattern()
if t:
return t
# second, we want 1-char tokens
te = self.curr_ch()
try:
ty = single_char_lookup[te]
except KeyError:
raise TokenStreamException(
"Unexpected char %r in column %u." % (self.curr_ch(), self._pos))
self.next_ch()
return Tok(type=ty, text=te, col=self._pos)
if __name__ == '__main__':
try:
for t in Lexer(""" 1.23 456 "abcd" R2C2 a1 iv65536 true false if choose a_name 'qname' <> >= <= """):
print(t)
except TokenStreamException as e:
print("error:", e)
+659
View File
@@ -0,0 +1,659 @@
### $ANTLR 2.7.7 (20060930): "xlwt/excel-formula.g" -> "ExcelFormulaParser.py"$
### import antlr and other modules ..
import sys
from . import antlr
### header action >>>
import struct
from . import Utils
from .UnicodeUtils import upack1
from .ExcelMagic import *
_RVAdelta = {"R": 0, "V": 0x20, "A": 0x40}
_RVAdeltaRef = {"R": 0, "V": 0x20, "A": 0x40, "D": 0x20}
_RVAdeltaArea = {"R": 0, "V": 0x20, "A": 0x40, "D": 0}
class FormulaParseException(Exception):
"""
An exception indicating that a Formula could not be successfully parsed.
"""
### header action <<<
### preamble action>>>
### preamble action <<<
### import antlr.Token
from .antlr import Token
### >>>The Known Token Types <<<
SKIP = antlr.SKIP
INVALID_TYPE = antlr.INVALID_TYPE
EOF_TYPE = antlr.EOF_TYPE
EOF = antlr.EOF
NULL_TREE_LOOKAHEAD = antlr.NULL_TREE_LOOKAHEAD
MIN_USER_TYPE = antlr.MIN_USER_TYPE
TRUE_CONST = 4
FALSE_CONST = 5
STR_CONST = 6
NUM_CONST = 7
INT_CONST = 8
FUNC_IF = 9
FUNC_CHOOSE = 10
NAME = 11
QUOTENAME = 12
EQ = 13
NE = 14
GT = 15
LT = 16
GE = 17
LE = 18
ADD = 19
SUB = 20
MUL = 21
DIV = 22
POWER = 23
PERCENT = 24
LP = 25
RP = 26
LB = 27
RB = 28
COLON = 29
COMMA = 30
SEMICOLON = 31
REF2D = 32
REF2D_R1C1 = 33
BANG = 34
CONCAT = 35
class Parser(antlr.LLkParser):
### user action >>>
### user action <<<
def __init__(self, *args, **kwargs):
antlr.LLkParser.__init__(self, *args, **kwargs)
self.tokenNames = _tokenNames
### __init__ header action >>>
self.rpn = b""
self.sheet_references = []
self.xcall_references = []
### __init__ header action <<<
def formula(self):
self.expr("V")
def expr(self, arg_type):
self.prec0_expr(arg_type)
while True:
if ((self.LA(1) >= EQ and self.LA(1) <= LE)):
pass
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [EQ]:
pass
self.match(EQ)
op = struct.pack('B', ptgEQ)
elif la1 and la1 in [NE]:
pass
self.match(NE)
op = struct.pack('B', ptgNE)
elif la1 and la1 in [GT]:
pass
self.match(GT)
op = struct.pack('B', ptgGT)
elif la1 and la1 in [LT]:
pass
self.match(LT)
op = struct.pack('B', ptgLT)
elif la1 and la1 in [GE]:
pass
self.match(GE)
op = struct.pack('B', ptgGE)
elif la1 and la1 in [LE]:
pass
self.match(LE)
op = struct.pack('B', ptgLE)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
self.prec0_expr(arg_type)
self.rpn += op
else:
break
def prec0_expr(self,
arg_type
):
pass
self.prec1_expr(arg_type)
while True:
if (self.LA(1)==CONCAT):
pass
pass
self.match(CONCAT)
op = struct.pack('B', ptgConcat)
self.prec1_expr(arg_type)
self.rpn += op
else:
break
def prec1_expr(self,
arg_type
):
pass
self.prec2_expr(arg_type)
while True:
if (self.LA(1)==ADD or self.LA(1)==SUB):
pass
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [ADD]:
pass
self.match(ADD)
op = struct.pack('B', ptgAdd)
elif la1 and la1 in [SUB]:
pass
self.match(SUB)
op = struct.pack('B', ptgSub)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
self.prec2_expr(arg_type)
self.rpn += op;
# print "**prec1_expr4 %s" % arg_type
else:
break
def prec2_expr(self,
arg_type
):
pass
self.prec3_expr(arg_type)
while True:
if (self.LA(1)==MUL or self.LA(1)==DIV):
pass
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [MUL]:
pass
self.match(MUL)
op = struct.pack('B', ptgMul)
elif la1 and la1 in [DIV]:
pass
self.match(DIV)
op = struct.pack('B', ptgDiv)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
self.prec3_expr(arg_type)
self.rpn += op
else:
break
def prec3_expr(self,
arg_type
):
pass
self.prec4_expr(arg_type)
while True:
if (self.LA(1)==POWER):
pass
pass
self.match(POWER)
op = struct.pack('B', ptgPower)
self.prec4_expr(arg_type)
self.rpn += op
else:
break
def prec4_expr(self,
arg_type
):
pass
self.prec5_expr(arg_type)
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [PERCENT]:
pass
self.match(PERCENT)
self.rpn += struct.pack('B', ptgPercent)
elif la1 and la1 in [EOF,EQ,NE,GT,LT,GE,LE,ADD,SUB,MUL,DIV,POWER,RP,COMMA,SEMICOLON,CONCAT]:
pass
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
def prec5_expr(self,
arg_type
):
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [TRUE_CONST,FALSE_CONST,STR_CONST,NUM_CONST,INT_CONST,FUNC_IF,FUNC_CHOOSE,NAME,QUOTENAME,LP,REF2D]:
pass
self.primary(arg_type)
elif la1 and la1 in [SUB]:
pass
self.match(SUB)
self.primary(arg_type)
self.rpn += struct.pack('B', ptgUminus)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
def primary(self,
arg_type
):
str_tok = None
int_tok = None
num_tok = None
ref2d_tok = None
ref2d1_tok = None
ref2d2_tok = None
ref3d_ref2d = None
ref3d_ref2d2 = None
name_tok = None
func_tok = None
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [TRUE_CONST]:
pass
self.match(TRUE_CONST)
self.rpn += struct.pack("2B", ptgBool, 1)
elif la1 and la1 in [FALSE_CONST]:
pass
self.match(FALSE_CONST)
self.rpn += struct.pack("2B", ptgBool, 0)
elif la1 and la1 in [STR_CONST]:
pass
str_tok = self.LT(1)
self.match(STR_CONST)
self.rpn += struct.pack("B", ptgStr) + upack1(str_tok.text[1:-1].replace("\"\"", "\""))
elif la1 and la1 in [NUM_CONST]:
pass
num_tok = self.LT(1)
self.match(NUM_CONST)
self.rpn += struct.pack("<Bd", ptgNum, float(num_tok.text))
elif la1 and la1 in [FUNC_IF]:
pass
self.match(FUNC_IF)
self.match(LP)
self.expr("V")
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [SEMICOLON]:
pass
self.match(SEMICOLON)
elif la1 and la1 in [COMMA]:
pass
self.match(COMMA)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
self.rpn += struct.pack("<BBH", ptgAttr, 0x02, 0) # tAttrIf
pos0 = len(self.rpn) - 2
self.expr(arg_type)
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [SEMICOLON]:
pass
self.match(SEMICOLON)
elif la1 and la1 in [COMMA]:
pass
self.match(COMMA)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
self.rpn += struct.pack("<BBH", ptgAttr, 0x08, 0) # tAttrSkip
pos1 = len(self.rpn) - 2
self.rpn = self.rpn[:pos0] + struct.pack("<H", pos1-pos0) + self.rpn[pos0+2:]
self.expr(arg_type)
self.match(RP)
self.rpn += struct.pack("<BBH", ptgAttr, 0x08, 3) # tAttrSkip
self.rpn += struct.pack("<BBH", ptgFuncVarR, 3, 1) # 3 = nargs, 1 = IF func
pos2 = len(self.rpn)
self.rpn = self.rpn[:pos1] + struct.pack("<H", pos2-(pos1+2)-1) + self.rpn[pos1+2:]
elif la1 and la1 in [FUNC_CHOOSE]:
pass
self.match(FUNC_CHOOSE)
arg_type = b"R"
rpn_chunks = []
self.match(LP)
self.expr("V")
rpn_start = len(self.rpn)
ref_markers = [len(self.sheet_references)]
while True:
if (self.LA(1)==COMMA or self.LA(1)==SEMICOLON):
pass
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [SEMICOLON]:
pass
self.match(SEMICOLON)
elif la1 and la1 in [COMMA]:
pass
self.match(COMMA)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
mark = len(self.rpn)
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [TRUE_CONST,FALSE_CONST,STR_CONST,NUM_CONST,INT_CONST,FUNC_IF,FUNC_CHOOSE,NAME,QUOTENAME,SUB,LP,REF2D]:
pass
self.expr(arg_type)
elif la1 and la1 in [RP,COMMA,SEMICOLON]:
pass
self.rpn += struct.pack("B", ptgMissArg)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
rpn_chunks.append(self.rpn[mark:])
ref_markers.append(len(self.sheet_references))
else:
break
self.match(RP)
self.rpn = self.rpn[:rpn_start]
nc = len(rpn_chunks)
chunklens = [len(chunk) for chunk in rpn_chunks]
skiplens = [0] * nc
skiplens[-1] = 3
for ic in range(nc-1, 0, -1):
skiplens[ic-1] = skiplens[ic] + chunklens[ic] + 4
jump_pos = [2 * nc + 2]
for ic in range(nc):
jump_pos.append(jump_pos[-1] + chunklens[ic] + 4)
chunk_shift = 2 * nc + 6 # size of tAttrChoose
for ic in range(nc):
for refx in range(ref_markers[ic], ref_markers[ic+1]):
ref = self.sheet_references[refx]
self.sheet_references[refx] = (ref[0], ref[1], ref[2] + chunk_shift)
chunk_shift += 4 # size of tAttrSkip
choose_rpn = []
choose_rpn.append(struct.pack("<BBH", ptgAttr, 0x04, nc)) # 0x04 is tAttrChoose
choose_rpn.append(struct.pack("<%dH" % (nc+1), *jump_pos))
for ic in range(nc):
choose_rpn.append(rpn_chunks[ic])
choose_rpn.append(struct.pack("<BBH", ptgAttr, 0x08, skiplens[ic])) # 0x08 is tAttrSkip
choose_rpn.append(struct.pack("<BBH", ptgFuncVarV, nc+1, 100)) # 100 is CHOOSE fn
self.rpn += b"".join(choose_rpn)
elif la1 and la1 in [LP]:
pass
self.match(LP)
self.expr(arg_type)
self.match(RP)
self.rpn += struct.pack("B", ptgParen)
else:
if (self.LA(1)==INT_CONST) and (_tokenSet_0.member(self.LA(2))):
pass
int_tok = self.LT(1)
self.match(INT_CONST)
# print "**int_const", int_tok.text
int_value = int(int_tok.text)
if int_value <= 65535:
self.rpn += struct.pack("<BH", ptgInt, int_value)
else:
self.rpn += struct.pack("<Bd", ptgNum, float(int_value))
elif (self.LA(1)==REF2D) and (_tokenSet_0.member(self.LA(2))):
pass
ref2d_tok = self.LT(1)
self.match(REF2D)
# print "**ref2d %s %s" % (ref2d_tok.text, arg_type)
r, c = Utils.cell_to_packed_rowcol(ref2d_tok.text)
ptg = ptgRefR + _RVAdeltaRef[arg_type]
self.rpn += struct.pack("<B2H", ptg, r, c)
elif (self.LA(1)==REF2D) and (self.LA(2)==COLON):
pass
ref2d1_tok = self.LT(1)
self.match(REF2D)
self.match(COLON)
ref2d2_tok = self.LT(1)
self.match(REF2D)
r1, c1 = Utils.cell_to_packed_rowcol(ref2d1_tok.text)
r2, c2 = Utils.cell_to_packed_rowcol(ref2d2_tok.text)
ptg = ptgAreaR + _RVAdeltaArea[arg_type]
self.rpn += struct.pack("<B4H", ptg, r1, r2, c1, c2)
elif (self.LA(1)==INT_CONST or self.LA(1)==NAME or self.LA(1)==QUOTENAME) and (self.LA(2)==COLON or self.LA(2)==BANG):
pass
sheet1=self.sheet()
sheet2 = sheet1
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [COLON]:
pass
self.match(COLON)
sheet2=self.sheet()
elif la1 and la1 in [BANG]:
pass
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
self.match(BANG)
ref3d_ref2d = self.LT(1)
self.match(REF2D)
ptg = ptgRef3dR + _RVAdeltaRef[arg_type]
rpn_ref2d = b""
r1, c1 = Utils.cell_to_packed_rowcol(ref3d_ref2d.text)
rpn_ref2d = struct.pack("<3H", 0x0000, r1, c1)
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [COLON]:
pass
self.match(COLON)
ref3d_ref2d2 = self.LT(1)
self.match(REF2D)
ptg = ptgArea3dR + _RVAdeltaArea[arg_type]
r2, c2 = Utils.cell_to_packed_rowcol(ref3d_ref2d2.text)
rpn_ref2d = struct.pack("<5H", 0x0000, r1, r2, c1, c2)
elif la1 and la1 in [EOF,EQ,NE,GT,LT,GE,LE,ADD,SUB,MUL,DIV,POWER,PERCENT,RP,COMMA,SEMICOLON,CONCAT]:
pass
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
self.rpn += struct.pack("<B", ptg)
self.sheet_references.append((sheet1, sheet2, len(self.rpn)))
self.rpn += rpn_ref2d
elif (self.LA(1)==NAME) and (_tokenSet_0.member(self.LA(2))):
name_tok = self.LT(1)
self.match(NAME)
raise Exception("[formula] found unexpected NAME token (%r)" % name_tok.txt)
# #### TODO: handle references to defined names here
elif (self.LA(1)==NAME) and (self.LA(2)==LP):
func_tok = self.LT(1)
self.match(NAME)
func_toku = func_tok.text.upper()
if func_toku in all_funcs_by_name:
(opcode,
min_argc,
max_argc,
func_type,
arg_type_str) = all_funcs_by_name[func_toku]
arg_type_list = list(arg_type_str)
else:
raise Exception("[formula] unknown function (%s)" % func_tok.text)
# print "**func_tok1 %s %s" % (func_toku, func_type)
xcall = opcode < 0
if xcall:
# The name of the add-in function is passed as the 1st arg
# of the hidden XCALL function
self.xcall_references.append((func_toku, len(self.rpn) + 1))
self.rpn += struct.pack("<BHHH",
ptgNameXR,
0xadde, # ##PATCHME## index to REF entry in EXTERNSHEET record
0xefbe, # ##PATCHME## one-based index to EXTERNNAME record
0x0000) # unused
self.match(LP)
arg_count=self.expr_list(arg_type_list, min_argc, max_argc)
self.match(RP)
if arg_count > max_argc or arg_count < min_argc:
raise Exception("%d parameters for function: %s" % (arg_count, func_tok.text))
if xcall:
func_ptg = ptgFuncVarR + _RVAdelta[func_type]
self.rpn += struct.pack("<2BH", func_ptg, arg_count + 1, 255) # 255 is magic XCALL function
elif min_argc == max_argc:
func_ptg = ptgFuncR + _RVAdelta[func_type]
self.rpn += struct.pack("<BH", func_ptg, opcode)
elif arg_count == 1 and func_tok.text.upper() == "SUM":
self.rpn += struct.pack("<BBH", ptgAttr, 0x10, 0) # tAttrSum
else:
func_ptg = ptgFuncVarR + _RVAdelta[func_type]
self.rpn += struct.pack("<2BH", func_ptg, arg_count, opcode)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
def sheet(self):
ref = None
sheet_ref_name = None
sheet_ref_int = None
sheet_ref_quote = None
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [NAME]:
sheet_ref_name = self.LT(1)
self.match(NAME)
ref = sheet_ref_name.text
elif la1 and la1 in [INT_CONST]:
sheet_ref_int = self.LT(1)
self.match(INT_CONST)
ref = sheet_ref_int.text
elif la1 and la1 in [QUOTENAME]:
sheet_ref_quote = self.LT(1)
self.match(QUOTENAME)
ref = sheet_ref_quote.text[1:-1].replace("''", "'")
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
return ref
def expr_list(self,
arg_type_list, min_argc, max_argc
):
arg_cnt = None
arg_cnt = 0
arg_type = arg_type_list[arg_cnt]
# print "**expr_list1[%d] req=%s" % (arg_cnt, arg_type)
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [TRUE_CONST,FALSE_CONST,STR_CONST,NUM_CONST,INT_CONST,FUNC_IF,FUNC_CHOOSE,NAME,QUOTENAME,SUB,LP,REF2D]:
pass
self.expr(arg_type)
arg_cnt += 1
while True:
if (self.LA(1)==COMMA or self.LA(1)==SEMICOLON):
pass
if arg_cnt < len(arg_type_list):
arg_type = arg_type_list[arg_cnt]
else:
arg_type = arg_type_list[-1]
if arg_type == "+":
arg_type = arg_type_list[-2]
# print "**expr_list2[%d] req=%s" % (arg_cnt, arg_type)
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [SEMICOLON]:
pass
self.match(SEMICOLON)
elif la1 and la1 in [COMMA]:
pass
self.match(COMMA)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
la1 = self.LA(1)
if False:
pass
elif la1 and la1 in [TRUE_CONST,FALSE_CONST,STR_CONST,NUM_CONST,INT_CONST,FUNC_IF,FUNC_CHOOSE,NAME,QUOTENAME,SUB,LP,REF2D]:
pass
self.expr(arg_type)
elif la1 and la1 in [RP,COMMA,SEMICOLON]:
pass
self.rpn += struct.pack("B", ptgMissArg)
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
arg_cnt += 1
else:
break
elif la1 and la1 in [RP]:
pass
else:
raise antlr.NoViableAltException(self.LT(1), self.getFilename())
return arg_cnt
_tokenNames = [
"<0>",
"EOF",
"<2>",
"NULL_TREE_LOOKAHEAD",
"TRUE_CONST",
"FALSE_CONST",
"STR_CONST",
"NUM_CONST",
"INT_CONST",
"FUNC_IF",
"FUNC_CHOOSE",
"NAME",
"QUOTENAME",
"EQ",
"NE",
"GT",
"LT",
"GE",
"LE",
"ADD",
"SUB",
"MUL",
"DIV",
"POWER",
"PERCENT",
"LP",
"RP",
"LB",
"RB",
"COLON",
"COMMA",
"SEMICOLON",
"REF2D",
"REF2D_R1C1",
"BANG",
"CONCAT"
]
### generate bit set
def mk_tokenSet_0():
### var1
data = [ 37681618946, 0]
return data
_tokenSet_0 = antlr.BitSet(mk_tokenSet_0())
+859
View File
@@ -0,0 +1,859 @@
""" lots of Excel Magic Numbers """
# Boundaries BIFF8+
MAX_ROW = 65536
MAX_COL = 256
biff_records = {
0x0000: "DIMENSIONS",
0x0001: "BLANK",
0x0002: "INTEGER",
0x0003: "NUMBER",
0x0004: "LABEL",
0x0005: "BOOLERR",
0x0006: "FORMULA",
0x0007: "STRING",
0x0008: "ROW",
0x0009: "BOF",
0x000A: "EOF",
0x000B: "INDEX",
0x000C: "CALCCOUNT",
0x000D: "CALCMODE",
0x000E: "PRECISION",
0x000F: "REFMODE",
0x0010: "DELTA",
0x0011: "ITERATION",
0x0012: "PROTECT",
0x0013: "PASSWORD",
0x0014: "HEADER",
0x0015: "FOOTER",
0x0016: "EXTERNCOUNT",
0x0017: "EXTERNSHEET",
0x0018: "NAME",
0x0019: "WINDOWPROTECT",
0x001A: "VERTICALPAGEBREAKS",
0x001B: "HORIZONTALPAGEBREAKS",
0x001C: "NOTE",
0x001D: "SELECTION",
0x001E: "FORMAT",
0x001F: "FORMATCOUNT",
0x0020: "COLUMNDEFAULT",
0x0021: "ARRAY",
0x0022: "1904",
0x0023: "EXTERNNAME",
0x0024: "COLWIDTH",
0x0025: "DEFAULTROWHEIGHT",
0x0026: "LEFTMARGIN",
0x0027: "RIGHTMARGIN",
0x0028: "TOPMARGIN",
0x0029: "BOTTOMMARGIN",
0x002A: "PRINTHEADERS",
0x002B: "PRINTGRIDLINES",
0x002F: "FILEPASS",
0x0031: "FONT",
0x0036: "TABLE",
0x003C: "CONTINUE",
0x003D: "WINDOW1",
0x003E: "WINDOW2",
0x0040: "BACKUP",
0x0041: "PANE",
0x0042: "CODEPAGE",
0x0043: "XF",
0x0044: "IXFE",
0x0045: "EFONT",
0x004D: "PLS",
0x0050: "DCON",
0x0051: "DCONREF",
0x0053: "DCONNAME",
0x0055: "DEFCOLWIDTH",
0x0056: "BUILTINFMTCNT",
0x0059: "XCT",
0x005A: "CRN",
0x005B: "FILESHARING",
0x005C: "WRITEACCESS",
0x005D: "OBJ",
0x005E: "UNCALCED",
0x005F: "SAFERECALC",
0x0060: "TEMPLATE",
0x0063: "OBJPROTECT",
0x007D: "COLINFO",
0x007E: "RK",
0x007F: "IMDATA",
0x0080: "GUTS",
0x0081: "WSBOOL",
0x0082: "GRIDSET",
0x0083: "HCENTER",
0x0084: "VCENTER",
0x0085: "BOUNDSHEET",
0x0086: "WRITEPROT",
0x0087: "ADDIN",
0x0088: "EDG",
0x0089: "PUB",
0x008C: "COUNTRY",
0x008D: "HIDEOBJ",
0x008E: "BUNDLESOFFSET",
0x008F: "BUNDLEHEADER",
0x0090: "SORT",
0x0091: "SUB",
0x0092: "PALETTE",
0x0093: "STYLE",
0x0094: "LHRECORD",
0x0095: "LHNGRAPH",
0x0096: "SOUND",
0x0098: "LPR",
0x0099: "STANDARDWIDTH",
0x009A: "FNGROUPNAME",
0x009B: "FILTERMODE",
0x009C: "FNGROUPCOUNT",
0x009D: "AUTOFILTERINFO",
0x009E: "AUTOFILTER",
0x00A0: "SCL",
0x00A1: "SETUP",
0x00A9: "COORDLIST",
0x00AB: "GCW",
0x00AE: "SCENMAN",
0x00AF: "SCENARIO",
0x00B0: "SXVIEW",
0x00B1: "SXVD",
0x00B2: "SXVI",
0x00B4: "SXIVD",
0x00B5: "SXLI",
0x00B6: "SXPI",
0x00B8: "DOCROUTE",
0x00B9: "RECIPNAME",
0x00BC: "SHRFMLA",
0x00BD: "MULRK",
0x00BE: "MULBLANK",
0x00C1: "MMS",
0x00C2: "ADDMENU",
0x00C3: "DELMENU",
0x00C5: "SXDI",
0x00C6: "SXDB",
0x00C7: "SXFIELD",
0x00C8: "SXINDEXLIST",
0x00C9: "SXDOUBLE",
0x00CD: "SXSTRING",
0x00CE: "SXDATETIME",
0x00D0: "SXTBL",
0x00D1: "SXTBRGITEM",
0x00D2: "SXTBPG",
0x00D3: "OBPROJ",
0x00D5: "SXIDSTM",
0x00D6: "RSTRING",
0x00D7: "DBCELL",
0x00DA: "BOOKBOOL",
0x00DC: "SXEXT|PARAMQRY",
0x00DD: "SCENPROTECT",
0x00DE: "OLESIZE",
0x00DF: "UDDESC",
0x00E0: "XF",
0x00E1: "INTERFACEHDR",
0x00E2: "INTERFACEEND",
0x00E3: "SXVS",
0x00E5: "MERGEDCELLS",
0x00E9: "BITMAP",
0x00EB: "MSODRAWINGGROUP",
0x00EC: "MSODRAWING",
0x00ED: "MSODRAWINGSELECTION",
0x00F0: "SXRULE",
0x00F1: "SXEX",
0x00F2: "SXFILT",
0x00F6: "SXNAME",
0x00F7: "SXSELECT",
0x00F8: "SXPAIR",
0x00F9: "SXFMLA",
0x00FB: "SXFORMAT",
0x00FC: "SST",
0x00FD: "LABELSST",
0x00FF: "EXTSST",
0x0100: "SXVDEX",
0x0103: "SXFORMULA",
0x0122: "SXDBEX",
0x0137: "CHTRINSERT",
0x0138: "CHTRINFO",
0x013B: "CHTRCELLCONTENT",
0x013D: "TABID",
0x0140: "CHTRMOVERANGE",
0x014D: "CHTRINSERTTAB",
0x015F: "LABELRANGES",
0x0160: "USESELFS",
0x0161: "DSF",
0x0162: "XL5MODIFY",
0x0196: "CHTRHEADER",
0x01A9: "USERBVIEW",
0x01AA: "USERSVIEWBEGIN",
0x01AB: "USERSVIEWEND",
0x01AD: "QSI",
0x01AE: "SUPBOOK",
0x01AF: "PROT4REV",
0x01B0: "CONDFMT",
0x01B1: "CF",
0x01B2: "DVAL",
0x01B5: "DCONBIN",
0x01B6: "TXO",
0x01B7: "REFRESHALL",
0x01B8: "HLINK",
0x01BA: "CODENAME",
0x01BB: "SXFDBTYPE",
0x01BC: "PROT4REVPASS",
0x01BE: "DV",
0x01C0: "XL9FILE",
0x01C1: "RECALCID",
0x0200: "DIMENSIONS",
0x0201: "BLANK",
0x0203: "NUMBER",
0x0204: "LABEL",
0x0205: "BOOLERR",
0x0206: "FORMULA",
0x0207: "STRING",
0x0208: "ROW",
0x0209: "BOF",
0x020B: "INDEX",
0x0218: "NAME",
0x0221: "ARRAY",
0x0223: "EXTERNNAME",
0x0225: "DEFAULTROWHEIGHT",
0x0231: "FONT",
0x0236: "TABLE",
0x023E: "WINDOW2",
0x0243: "XF",
0x027E: "RK",
0x0293: "STYLE",
0x0406: "FORMULA",
0x0409: "BOF",
0x041E: "FORMAT",
0x0443: "XF",
0x04BC: "SHRFMLA",
0x0800: "SCREENTIP",
0x0803: "WEBQRYSETTINGS",
0x0804: "WEBQRYTABLES",
0x0809: "BOF",
0x0862: "SHEETLAYOUT",
0x0867: "SHEETPROTECTION",
0x1001: "UNITS",
0x1002: "ChartChart",
0x1003: "ChartSeries",
0x1006: "ChartDataformat",
0x1007: "ChartLineformat",
0x1009: "ChartMarkerformat",
0x100A: "ChartAreaformat",
0x100B: "ChartPieformat",
0x100C: "ChartAttachedlabel",
0x100D: "ChartSeriestext",
0x1014: "ChartChartformat",
0x1015: "ChartLegend",
0x1016: "ChartSerieslist",
0x1017: "ChartBar",
0x1018: "ChartLine",
0x1019: "ChartPie",
0x101A: "ChartArea",
0x101B: "ChartScatter",
0x101C: "ChartChartline",
0x101D: "ChartAxis",
0x101E: "ChartTick",
0x101F: "ChartValuerange",
0x1020: "ChartCatserrange",
0x1021: "ChartAxislineformat",
0x1022: "ChartFormatlink",
0x1024: "ChartDefaulttext",
0x1025: "ChartText",
0x1026: "ChartFontx",
0x1027: "ChartObjectLink",
0x1032: "ChartFrame",
0x1033: "BEGIN",
0x1034: "END",
0x1035: "ChartPlotarea",
0x103A: "Chart3D",
0x103C: "ChartPicf",
0x103D: "ChartDropbar",
0x103E: "ChartRadar",
0x103F: "ChartSurface",
0x1040: "ChartRadararea",
0x1041: "ChartAxisparent",
0x1043: "ChartLegendxn",
0x1044: "ChartShtprops",
0x1045: "ChartSertocrt",
0x1046: "ChartAxesused",
0x1048: "ChartSbaseref",
0x104A: "ChartSerparent",
0x104B: "ChartSerauxtrend",
0x104E: "ChartIfmt",
0x104F: "ChartPos",
0x1050: "ChartAlruns",
0x1051: "ChartAI",
0x105B: "ChartSerauxerrbar",
0x105D: "ChartSerfmt",
0x105F: "Chart3DDataFormat",
0x1060: "ChartFbi",
0x1061: "ChartBoppop",
0x1062: "ChartAxcext",
0x1063: "ChartDat",
0x1064: "ChartPlotgrowth",
0x1065: "ChartSiindex",
0x1066: "ChartGelframe",
0x1067: "ChartBoppcustom",
0xFFFF: ""
}
all_funcs_by_name = {
# Includes Analysis ToolPak aka ATP aka add-in aka xcall functions,
# distinguished by -ve opcode.
# name: (opcode, min # args, max # args, func return type, func arg types)
# + in func arg types means more of the same.
'ABS' : ( 24, 1, 1, 'V', 'V'),
'ACCRINT' : ( -1, 6, 7, 'V', 'VVVVVVV'),
'ACCRINTM' : ( -1, 3, 5, 'V', 'VVVVV'),
'ACOS' : ( 99, 1, 1, 'V', 'V'),
'ACOSH' : (233, 1, 1, 'V', 'V'),
'ADDRESS' : (219, 2, 5, 'V', 'VVVVV'),
'AMORDEGRC' : ( -1, 7, 7, 'V', 'VVVVVVV'),
'AMORLINC' : ( -1, 7, 7, 'V', 'VVVVVVV'),
'AND' : ( 36, 1, 30, 'V', 'D+'),
'AREAS' : ( 75, 1, 1, 'V', 'R'),
'ASC' : (214, 1, 1, 'V', 'V'),
'ASIN' : ( 98, 1, 1, 'V', 'V'),
'ASINH' : (232, 1, 1, 'V', 'V'),
'ATAN' : ( 18, 1, 1, 'V', 'V'),
'ATAN2' : ( 97, 2, 2, 'V', 'VV'),
'ATANH' : (234, 1, 1, 'V', 'V'),
'AVEDEV' : (269, 1, 30, 'V', 'D+'),
'AVERAGE' : ( 5, 1, 30, 'V', 'D+'),
'AVERAGEA' : (361, 1, 30, 'V', 'D+'),
'BAHTTEXT' : (368, 1, 1, 'V', 'V'),
'BESSELI' : ( -1, 2, 2, 'V', 'VV'),
'BESSELJ' : ( -1, 2, 2, 'V', 'VV'),
'BESSELK' : ( -1, 2, 2, 'V', 'VV'),
'BESSELY' : ( -1, 2, 2, 'V', 'VV'),
'BETADIST' : (270, 3, 5, 'V', 'VVVVV'),
'BETAINV' : (272, 3, 5, 'V', 'VVVVV'),
'BIN2DEC' : ( -1, 1, 1, 'V', 'V'),
'BIN2HEX' : ( -1, 1, 2, 'V', 'VV'),
'BIN2OCT' : ( -1, 1, 2, 'V', 'VV'),
'BINOMDIST' : (273, 4, 4, 'V', 'VVVV'),
'CEILING' : (288, 2, 2, 'V', 'VV'),
'CELL' : (125, 1, 2, 'V', 'VR'),
'CHAR' : (111, 1, 1, 'V', 'V'),
'CHIDIST' : (274, 2, 2, 'V', 'VV'),
'CHIINV' : (275, 2, 2, 'V', 'VV'),
'CHITEST' : (306, 2, 2, 'V', 'AA'),
'CHOOSE' : (100, 2, 30, 'R', 'VR+'),
'CLEAN' : (162, 1, 1, 'V', 'V'),
'CODE' : (121, 1, 1, 'V', 'V'),
'COLUMN' : ( 9, 0, 1, 'V', 'R'),
'COLUMNS' : ( 77, 1, 1, 'V', 'R'),
'COMBIN' : (276, 2, 2, 'V', 'VV'),
'COMPLEX' : ( -1, 2, 3, 'V', 'VVV'),
'CONCATENATE' : (336, 1, 30, 'V', 'V+'),
'CONFIDENCE' : (277, 3, 3, 'V', 'VVV'),
'CONVERT' : ( -1, 3, 3, 'V', 'VVV'),
'CORREL' : (307, 2, 2, 'V', 'AA'),
'COS' : ( 16, 1, 1, 'V', 'V'),
'COSH' : (230, 1, 1, 'V', 'V'),
'COUNT' : ( 0, 1, 30, 'V', 'D+'),
'COUNTA' : (169, 1, 30, 'V', 'D+'),
'COUNTBLANK' : (347, 1, 1, 'V', 'R'),
'COUNTIF' : (346, 2, 2, 'V', 'RV'),
'COUPDAYBS' : ( -1, 3, 5, 'V', 'VVVVV'),
'COUPDAYS' : ( -1, 3, 5, 'V', 'VVVVV'),
'COUPDAYSNC' : ( -1, 3, 5, 'V', 'VVVVV'),
'COUPNCD' : ( -1, 3, 5, 'V', 'VVVVV'),
'COUPNUM' : ( -1, 3, 5, 'V', 'VVVVV'),
'COUPPCD' : ( -1, 3, 5, 'V', 'VVVVV'),
'COVAR' : (308, 2, 2, 'V', 'AA'),
'CRITBINOM' : (278, 3, 3, 'V', 'VVV'),
'CUMIPMT' : ( -1, 6, 6, 'V', 'VVVVVV'),
'CUMPRINC' : ( -1, 6, 6, 'V', 'VVVVVV'),
'DATE' : ( 65, 3, 3, 'V', 'VVV'),
'DATEDIF' : (351, 3, 3, 'V', 'VVV'),
'DATEVALUE' : (140, 1, 1, 'V', 'V'),
'DAVERAGE' : ( 42, 3, 3, 'V', 'RRR'),
'DAY' : ( 67, 1, 1, 'V', 'V'),
'DAYS360' : (220, 2, 3, 'V', 'VVV'),
'DB' : (247, 4, 5, 'V', 'VVVVV'),
'DBCS' : (215, 1, 1, 'V', 'V'),
'DCOUNT' : ( 40, 3, 3, 'V', 'RRR'),
'DCOUNTA' : (199, 3, 3, 'V', 'RRR'),
'DDB' : (144, 4, 5, 'V', 'VVVVV'),
'DEC2BIN' : ( -1, 1, 2, 'V', 'VV'),
'DEC2HEX' : ( -1, 1, 2, 'V', 'VV'),
'DEC2OCT' : ( -1, 1, 2, 'V', 'VV'),
'DEGREES' : (343, 1, 1, 'V', 'V'),
'DELTA' : ( -1, 1, 2, 'V', 'VV'),
'DEVSQ' : (318, 1, 30, 'V', 'D+'),
'DGET' : (235, 3, 3, 'V', 'RRR'),
'DISC' : ( -1, 4, 5, 'V', 'VVVVV'),
'DMAX' : ( 44, 3, 3, 'V', 'RRR'),
'DMIN' : ( 43, 3, 3, 'V', 'RRR'),
'DOLLAR' : ( 13, 1, 2, 'V', 'VV'),
'DOLLARDE' : ( -1, 2, 2, 'V', 'VV'),
'DOLLARFR' : ( -1, 2, 2, 'V', 'VV'),
'DPRODUCT' : (189, 3, 3, 'V', 'RRR'),
'DSTDEV' : ( 45, 3, 3, 'V', 'RRR'),
'DSTDEVP' : (195, 3, 3, 'V', 'RRR'),
'DSUM' : ( 41, 3, 3, 'V', 'RRR'),
'DURATION' : ( -1, 5, 6, 'V', 'VVVVVV'),
'DVAR' : ( 47, 3, 3, 'V', 'RRR'),
'DVARP' : (196, 3, 3, 'V', 'RRR'),
'EDATE' : ( -1, 2, 2, 'V', 'VV'),
'EFFECT' : ( -1, 2, 2, 'V', 'VV'),
'EOMONTH' : ( -1, 1, 2, 'V', 'VV'),
'ERF' : ( -1, 1, 2, 'V', 'VV'),
'ERFC' : ( -1, 1, 1, 'V', 'V'),
'ERROR.TYPE' : (261, 1, 1, 'V', 'V'),
'EVEN' : (279, 1, 1, 'V', 'V'),
'EXACT' : (117, 2, 2, 'V', 'VV'),
'EXP' : ( 21, 1, 1, 'V', 'V'),
'EXPONDIST' : (280, 3, 3, 'V', 'VVV'),
'FACT' : (184, 1, 1, 'V', 'V'),
'FACTDOUBLE' : ( -1, 1, 1, 'V', 'V'),
'FALSE' : ( 35, 0, 0, 'V', '-'),
'FDIST' : (281, 3, 3, 'V', 'VVV'),
'FIND' : (124, 2, 3, 'V', 'VVV'),
'FINDB' : (205, 2, 3, 'V', 'VVV'),
'FINV' : (282, 3, 3, 'V', 'VVV'),
'FISHER' : (283, 1, 1, 'V', 'V'),
'FISHERINV' : (284, 1, 1, 'V', 'V'),
'FIXED' : ( 14, 2, 3, 'V', 'VVV'),
'FLOOR' : (285, 2, 2, 'V', 'VV'),
'FORECAST' : (309, 3, 3, 'V', 'VAA'),
'FREQUENCY' : (252, 2, 2, 'A', 'RR'),
'FTEST' : (310, 2, 2, 'V', 'AA'),
'FV' : ( 57, 3, 5, 'V', 'VVVVV'),
'FVSCHEDULE' : ( -1, 2, 2, 'V', 'VA'),
'GAMMADIST' : (286, 4, 4, 'V', 'VVVV'),
'GAMMAINV' : (287, 3, 3, 'V', 'VVV'),
'GAMMALN' : (271, 1, 1, 'V', 'V'),
'GCD' : ( -1, 1, 29, 'V', 'V+'),
'GEOMEAN' : (319, 1, 30, 'V', 'D+'),
'GESTEP' : ( -1, 1, 2, 'V', 'VV'),
'GETPIVOTDATA': (358, 2, 30, 'A', 'VAV+'),
'GROWTH' : ( 52, 1, 4, 'A', 'RRRV'),
'HARMEAN' : (320, 1, 30, 'V', 'D+'),
'HEX2BIN' : ( -1, 1, 2, 'V', 'VV'),
'HEX2DEC' : ( -1, 1, 1, 'V', 'V'),
'HEX2OCT' : ( -1, 1, 2, 'V', 'VV'),
'HLOOKUP' : (101, 3, 4, 'V', 'VRRV'),
'HOUR' : ( 71, 1, 1, 'V', 'V'),
'HYPERLINK' : (359, 1, 2, 'V', 'VV'),
'HYPGEOMDIST' : (289, 4, 4, 'V', 'VVVV'),
'IF' : ( 1, 2, 3, 'R', 'VRR'),
'IMABS' : ( -1, 1, 1, 'V', 'V'),
'IMAGINARY' : ( -1, 1, 1, 'V', 'V'),
'IMARGUMENT' : ( -1, 1, 1, 'V', 'V'),
'IMCONJUGATE' : ( -1, 1, 1, 'V', 'V'),
'IMCOS' : ( -1, 1, 1, 'V', 'V'),
'IMDIV' : ( -1, 2, 2, 'V', 'VV'),
'IMEXP' : ( -1, 1, 1, 'V', 'V'),
'IMLN' : ( -1, 1, 1, 'V', 'V'),
'IMLOG10' : ( -1, 1, 1, 'V', 'V'),
'IMLOG2' : ( -1, 1, 1, 'V', 'V'),
'IMPOWER' : ( -1, 2, 2, 'V', 'VV'),
'IMPRODUCT' : ( -1, 2, 2, 'V', 'VV'),
'IMREAL' : ( -1, 1, 1, 'V', 'V'),
'IMSIN' : ( -1, 1, 1, 'V', 'V'),
'IMSQRT' : ( -1, 1, 1, 'V', 'V'),
'IMSUB' : ( -1, 2, 2, 'V', 'VV'),
'IMSUM' : ( -1, 1, 29, 'V', 'V+'),
'INDEX' : ( 29, 2, 4, 'R', 'RVVV'),
'INDIRECT' : (148, 1, 2, 'R', 'VV'),
'INFO' : (244, 1, 1, 'V', 'V'),
'INT' : ( 25, 1, 1, 'V', 'V'),
'INTERCEPT' : (311, 2, 2, 'V', 'AA'),
'INTRATE' : ( -1, 4, 5, 'V', 'VVVVV'),
'IPMT' : (167, 4, 6, 'V', 'VVVVVV'),
'IRR' : ( 62, 1, 2, 'V', 'RV'),
'ISBLANK' : (129, 1, 1, 'V', 'V'),
'ISERR' : (126, 1, 1, 'V', 'V'),
'ISERROR' : ( 3, 1, 1, 'V', 'V'),
'ISEVEN' : ( -1, 1, 1, 'V', 'V'),
'ISLOGICAL' : (198, 1, 1, 'V', 'V'),
'ISNA' : ( 2, 1, 1, 'V', 'V'),
'ISNONTEXT' : (190, 1, 1, 'V', 'V'),
'ISNUMBER' : (128, 1, 1, 'V', 'V'),
'ISODD' : ( -1, 1, 1, 'V', 'V'),
'ISPMT' : (350, 4, 4, 'V', 'VVVV'),
'ISREF' : (105, 1, 1, 'V', 'R'),
'ISTEXT' : (127, 1, 1, 'V', 'V'),
'KURT' : (322, 1, 30, 'V', 'D+'),
'LARGE' : (325, 2, 2, 'V', 'RV'),
'LCM' : ( -1, 1, 29, 'V', 'V+'),
'LEFT' : (115, 1, 2, 'V', 'VV'),
'LEFTB' : (208, 1, 2, 'V', 'VV'),
'LEN' : ( 32, 1, 1, 'V', 'V'),
'LENB' : (211, 1, 1, 'V', 'V'),
'LINEST' : ( 49, 1, 4, 'A', 'RRVV'),
'LN' : ( 22, 1, 1, 'V', 'V'),
'LOG' : (109, 1, 2, 'V', 'VV'),
'LOG10' : ( 23, 1, 1, 'V', 'V'),
'LOGEST' : ( 51, 1, 4, 'A', 'RRVV'),
'LOGINV' : (291, 3, 3, 'V', 'VVV'),
'LOGNORMDIST' : (290, 3, 3, 'V', 'VVV'),
'LOOKUP' : ( 28, 2, 3, 'V', 'VRR'),
'LOWER' : (112, 1, 1, 'V', 'V'),
'MATCH' : ( 64, 2, 3, 'V', 'VRR'),
'MAX' : ( 7, 1, 30, 'V', 'D+'),
'MAXA' : (362, 1, 30, 'V', 'D+'),
'MDETERM' : (163, 1, 1, 'V', 'A'),
'MDURATION' : ( -1, 5, 6, 'V', 'VVVVVV'),
'MEDIAN' : (227, 1, 30, 'V', 'D+'),
'MID' : ( 31, 3, 3, 'V', 'VVV'),
'MIDB' : (210, 3, 3, 'V', 'VVV'),
'MIN' : ( 6, 1, 30, 'V', 'D+'),
'MINA' : (363, 1, 30, 'V', 'D+'),
'MINUTE' : ( 72, 1, 1, 'V', 'V'),
'MINVERSE' : (164, 1, 1, 'A', 'A'),
'MIRR' : ( 61, 3, 3, 'V', 'RVV'),
'MMULT' : (165, 2, 2, 'A', 'AA'),
'MOD' : ( 39, 2, 2, 'V', 'VV'),
'MODE' : (330, 1, 30, 'V', 'A+'), ################ weird #################
'MONTH' : ( 68, 1, 1, 'V', 'V'),
'MROUND' : ( -1, 2, 2, 'V', 'VV'),
'MULTINOMIAL' : ( -1, 1, 29, 'V', 'V+'),
'N' : (131, 1, 1, 'V', 'R'),
'NA' : ( 10, 0, 0, 'V', '-'),
'NEGBINOMDIST': (292, 3, 3, 'V', 'VVV'),
'NETWORKDAYS' : ( -1, 2, 3, 'V', 'VVR'),
'NOMINAL' : ( -1, 2, 2, 'V', 'VV'),
'NORMDIST' : (293, 4, 4, 'V', 'VVVV'),
'NORMINV' : (295, 3, 3, 'V', 'VVV'),
'NORMSDIST' : (294, 1, 1, 'V', 'V'),
'NORMSINV' : (296, 1, 1, 'V', 'V'),
'NOT' : ( 38, 1, 1, 'V', 'V'),
'NOW' : ( 74, 0, 0, 'V', '-'),
'NPER' : ( 58, 3, 5, 'V', 'VVVVV'),
'NPV' : ( 11, 2, 30, 'V', 'VD+'),
'OCT2BIN' : ( -1, 1, 2, 'V', 'VV'),
'OCT2DEC' : ( -1, 1, 1, 'V', 'V'),
'OCT2HEX' : ( -1, 1, 2, 'V', 'VV'),
'ODD' : (298, 1, 1, 'V', 'V'),
'ODDFPRICE' : ( -1, 9, 9, 'V', 'VVVVVVVVV'),
'ODDFYIELD' : ( -1, 9, 9, 'V', 'VVVVVVVVV'),
'ODDLPRICE' : ( -1, 8, 8, 'V', 'VVVVVVVV'),
'ODDLYIELD' : ( -1, 8, 8, 'V', 'VVVVVVVV'),
'OFFSET' : ( 78, 3, 5, 'R', 'RVVVV'),
'OR' : ( 37, 1, 30, 'V', 'D+'),
'PEARSON' : (312, 2, 2, 'V', 'AA'),
'PERCENTILE' : (328, 2, 2, 'V', 'RV'),
'PERCENTRANK' : (329, 2, 3, 'V', 'RVV'),
'PERMUT' : (299, 2, 2, 'V', 'VV'),
'PHONETIC' : (360, 1, 1, 'V', 'R'),
'PI' : ( 19, 0, 0, 'V', '-'),
'PMT' : ( 59, 3, 5, 'V', 'VVVVV'),
'POISSON' : (300, 3, 3, 'V', 'VVV'),
'POWER' : (337, 2, 2, 'V', 'VV'),
'PPMT' : (168, 4, 6, 'V', 'VVVVVV'),
'PRICE' : ( -1, 6, 7, 'V', 'VVVVVVV'),
'PRICEDISC' : ( -1, 4, 5, 'V', 'VVVVV'),
'PRICEMAT' : ( -1, 5, 6, 'V', 'VVVVVV'),
'PROB' : (317, 3, 4, 'V', 'AAVV'),
'PRODUCT' : (183, 1, 30, 'V', 'D+'),
'PROPER' : (114, 1, 1, 'V', 'V'),
'PV' : ( 56, 3, 5, 'V', 'VVVVV'),
'QUARTILE' : (327, 2, 2, 'V', 'RV'),
'QUOTIENT' : ( -1, 2, 2, 'V', 'VV'),
'RADIANS' : (342, 1, 1, 'V', 'V'),
'RAND' : ( 63, 0, 0, 'V', '-'),
'RANDBETWEEN' : ( -1, 2, 2, 'V', 'VV'),
'RANK' : (216, 2, 3, 'V', 'VRV'),
'RATE' : ( 60, 3, 6, 'V', 'VVVVVV'),
'RECEIVED' : ( -1, 4, 5, 'V', 'VVVVV'),
'REPLACE' : (119, 4, 4, 'V', 'VVVV'),
'REPLACEB' : (207, 4, 4, 'V', 'VVVV'),
'REPT' : ( 30, 2, 2, 'V', 'VV'),
'RIGHT' : (116, 1, 2, 'V', 'VV'),
'RIGHTB' : (209, 1, 2, 'V', 'VV'),
'ROMAN' : (354, 1, 2, 'V', 'VV'),
'ROUND' : ( 27, 2, 2, 'V', 'VV'),
'ROUNDDOWN' : (213, 2, 2, 'V', 'VV'),
'ROUNDUP' : (212, 2, 2, 'V', 'VV'),
'ROW' : ( 8, 0, 1, 'V', 'R'),
'ROWS' : ( 76, 1, 1, 'V', 'R'),
'RSQ' : (313, 2, 2, 'V', 'AA'),
'RTD' : (379, 3, 30, 'A', 'VVV+'),
'SEARCH' : ( 82, 2, 3, 'V', 'VVV'),
'SEARCHB' : (206, 2, 3, 'V', 'VVV'),
'SECOND' : ( 73, 1, 1, 'V', 'V'),
'SERIESSUM' : ( -1, 4, 4, 'V', 'VVVA'),
'SIGN' : ( 26, 1, 1, 'V', 'V'),
'SIN' : ( 15, 1, 1, 'V', 'V'),
'SINH' : (229, 1, 1, 'V', 'V'),
'SKEW' : (323, 1, 30, 'V', 'D+'),
'SLN' : (142, 3, 3, 'V', 'VVV'),
'SLOPE' : (315, 2, 2, 'V', 'AA'),
'SMALL' : (326, 2, 2, 'V', 'RV'),
'SQRT' : ( 20, 1, 1, 'V', 'V'),
'SQRTPI' : ( -1, 1, 1, 'V', 'V'),
'STANDARDIZE' : (297, 3, 3, 'V', 'VVV'),
'STDEV' : ( 12, 1, 30, 'V', 'D+'),
'STDEVA' : (366, 1, 30, 'V', 'D+'),
'STDEVP' : (193, 1, 30, 'V', 'D+'),
'STDEVPA' : (364, 1, 30, 'V', 'D+'),
'STEYX' : (314, 2, 2, 'V', 'AA'),
'SUBSTITUTE' : (120, 3, 4, 'V', 'VVVV'),
'SUBTOTAL' : (344, 2, 30, 'V', 'VR+'),
'SUM' : ( 4, 1, 30, 'V', 'D+'),
'SUMIF' : (345, 2, 3, 'V', 'RVR'),
'SUMPRODUCT' : (228, 1, 30, 'V', 'A+'),
'SUMSQ' : (321, 1, 30, 'V', 'D+'),
'SUMX2MY2' : (304, 2, 2, 'V', 'AA'),
'SUMX2PY2' : (305, 2, 2, 'V', 'AA'),
'SUMXMY2' : (303, 2, 2, 'V', 'AA'),
'SYD' : (143, 4, 4, 'V', 'VVVV'),
'T' : (130, 1, 1, 'V', 'R'),
'TAN' : ( 17, 1, 1, 'V', 'V'),
'TANH' : (231, 1, 1, 'V', 'V'),
'TBILLEQ' : ( -1, 3, 3, 'V', 'VVV'),
'TBILLPRICE' : ( -1, 3, 3, 'V', 'VVV'),
'TBILLYIELD' : ( -1, 3, 3, 'V', 'VVV'),
'TDIST' : (301, 3, 3, 'V', 'VVV'),
'TEXT' : ( 48, 2, 2, 'V', 'VV'),
'TIME' : ( 66, 3, 3, 'V', 'VVV'),
'TIMEVALUE' : (141, 1, 1, 'V', 'V'),
'TINV' : (332, 2, 2, 'V', 'VV'),
'TODAY' : (221, 0, 0, 'V', '-'),
'TRANSPOSE' : ( 83, 1, 1, 'A', 'A'),
'TREND' : ( 50, 1, 4, 'A', 'RRRV'),
'TRIM' : (118, 1, 1, 'V', 'V'),
'TRIMMEAN' : (331, 2, 2, 'V', 'RV'),
'TRUE' : ( 34, 0, 0, 'V', '-'),
'TRUNC' : (197, 1, 2, 'V', 'VV'),
'TTEST' : (316, 4, 4, 'V', 'AAVV'),
'TYPE' : ( 86, 1, 1, 'V', 'V'),
'UPPER' : (113, 1, 1, 'V', 'V'),
'USDOLLAR' : (204, 1, 2, 'V', 'VV'),
'VALUE' : ( 33, 1, 1, 'V', 'V'),
'VAR' : ( 46, 1, 30, 'V', 'D+'),
'VARA' : (367, 1, 30, 'V', 'D+'),
'VARP' : (194, 1, 30, 'V', 'D+'),
'VARPA' : (365, 1, 30, 'V', 'D+'),
'VDB' : (222, 5, 7, 'V', 'VVVVVVV'),
'VLOOKUP' : (102, 3, 4, 'V', 'VRRV'),
'WEEKDAY' : ( 70, 1, 2, 'V', 'VV'),
'WEEKNUM' : ( -1, 1, 2, 'V', 'VV'),
'WEIBULL' : (302, 4, 4, 'V', 'VVVV'),
'WORKDAY' : ( -1, 2, 3, 'V', 'VVR'),
'XIRR' : ( -1, 2, 3, 'V', 'AAV'),
'XNPV' : ( -1, 3, 3, 'V', 'VAA'),
'YEAR' : ( 69, 1, 1, 'V', 'V'),
'YEARFRAC' : ( -1, 2, 3, 'V', 'VVV'),
'YIELD' : ( -1, 6, 7, 'V', 'VVVVVVV'),
'YIELDDISC' : ( -1, 4, 5, 'V', 'VVVVV'),
'YIELDMAT' : ( -1, 5, 6, 'V', 'VVVVVV'),
'ZTEST' : (324, 2, 3, 'V', 'RVV'),
}
# Formulas Parse things
ptgExp = 0x01
ptgTbl = 0x02
ptgAdd = 0x03
ptgSub = 0x04
ptgMul = 0x05
ptgDiv = 0x06
ptgPower = 0x07
ptgConcat = 0x08
ptgLT = 0x09
ptgLE = 0x0a
ptgEQ = 0x0b
ptgGE = 0x0c
ptgGT = 0x0d
ptgNE = 0x0e
ptgIsect = 0x0f
ptgUnion = 0x10
ptgRange = 0x11
ptgUplus = 0x12
ptgUminus = 0x13
ptgPercent = 0x14
ptgParen = 0x15
ptgMissArg = 0x16
ptgStr = 0x17
ptgExtend = 0x18
ptgAttr = 0x19
ptgSheet = 0x1a
ptgEndSheet = 0x1b
ptgErr = 0x1c
ptgBool = 0x1d
ptgInt = 0x1e
ptgNum = 0x1f
ptgArrayR = 0x20
ptgFuncR = 0x21
ptgFuncVarR = 0x22
ptgNameR = 0x23
ptgRefR = 0x24
ptgAreaR = 0x25
ptgMemAreaR = 0x26
ptgMemErrR = 0x27
ptgMemNoMemR = 0x28
ptgMemFuncR = 0x29
ptgRefErrR = 0x2a
ptgAreaErrR = 0x2b
ptgRefNR = 0x2c
ptgAreaNR = 0x2d
ptgMemAreaNR = 0x2e
ptgMemNoMemNR = 0x2f
ptgNameXR = 0x39
ptgRef3dR = 0x3a
ptgArea3dR = 0x3b
ptgRefErr3dR = 0x3c
ptgAreaErr3dR = 0x3d
ptgArrayV = 0x40
ptgFuncV = 0x41
ptgFuncVarV = 0x42
ptgNameV = 0x43
ptgRefV = 0x44
ptgAreaV = 0x45
ptgMemAreaV = 0x46
ptgMemErrV = 0x47
ptgMemNoMemV = 0x48
ptgMemFuncV = 0x49
ptgRefErrV = 0x4a
ptgAreaErrV = 0x4b
ptgRefNV = 0x4c
ptgAreaNV = 0x4d
ptgMemAreaNV = 0x4e
ptgMemNoMemNV = 0x4f
ptgFuncCEV = 0x58
ptgNameXV = 0x59
ptgRef3dV = 0x5a
ptgArea3dV = 0x5b
ptgRefErr3dV = 0x5c
ptgAreaErr3dV = 0x5d
ptgArrayA = 0x60
ptgFuncA = 0x61
ptgFuncVarA = 0x62
ptgNameA = 0x63
ptgRefA = 0x64
ptgAreaA = 0x65
ptgMemAreaA = 0x66
ptgMemErrA = 0x67
ptgMemNoMemA = 0x68
ptgMemFuncA = 0x69
ptgRefErrA = 0x6a
ptgAreaErrA = 0x6b
ptgRefNA = 0x6c
ptgAreaNA = 0x6d
ptgMemAreaNA = 0x6e
ptgMemNoMemNA = 0x6f
ptgFuncCEA = 0x78
ptgNameXA = 0x79
ptgRef3dA = 0x7a
ptgArea3dA = 0x7b
ptgRefErr3dA = 0x7c
ptgAreaErr3dA = 0x7d
PtgNames = {
ptgExp : "ptgExp",
ptgTbl : "ptgTbl",
ptgAdd : "ptgAdd",
ptgSub : "ptgSub",
ptgMul : "ptgMul",
ptgDiv : "ptgDiv",
ptgPower : "ptgPower",
ptgConcat : "ptgConcat",
ptgLT : "ptgLT",
ptgLE : "ptgLE",
ptgEQ : "ptgEQ",
ptgGE : "ptgGE",
ptgGT : "ptgGT",
ptgNE : "ptgNE",
ptgIsect : "ptgIsect",
ptgUnion : "ptgUnion",
ptgRange : "ptgRange",
ptgUplus : "ptgUplus",
ptgUminus : "ptgUminus",
ptgPercent : "ptgPercent",
ptgParen : "ptgParen",
ptgMissArg : "ptgMissArg",
ptgStr : "ptgStr",
ptgExtend : "ptgExtend",
ptgAttr : "ptgAttr",
ptgSheet : "ptgSheet",
ptgEndSheet : "ptgEndSheet",
ptgErr : "ptgErr",
ptgBool : "ptgBool",
ptgInt : "ptgInt",
ptgNum : "ptgNum",
ptgArrayR : "ptgArrayR",
ptgFuncR : "ptgFuncR",
ptgFuncVarR : "ptgFuncVarR",
ptgNameR : "ptgNameR",
ptgRefR : "ptgRefR",
ptgAreaR : "ptgAreaR",
ptgMemAreaR : "ptgMemAreaR",
ptgMemErrR : "ptgMemErrR",
ptgMemNoMemR : "ptgMemNoMemR",
ptgMemFuncR : "ptgMemFuncR",
ptgRefErrR : "ptgRefErrR",
ptgAreaErrR : "ptgAreaErrR",
ptgRefNR : "ptgRefNR",
ptgAreaNR : "ptgAreaNR",
ptgMemAreaNR : "ptgMemAreaNR",
ptgMemNoMemNR : "ptgMemNoMemNR",
ptgNameXR : "ptgNameXR",
ptgRef3dR : "ptgRef3dR",
ptgArea3dR : "ptgArea3dR",
ptgRefErr3dR : "ptgRefErr3dR",
ptgAreaErr3dR : "ptgAreaErr3dR",
ptgArrayV : "ptgArrayV",
ptgFuncV : "ptgFuncV",
ptgFuncVarV : "ptgFuncVarV",
ptgNameV : "ptgNameV",
ptgRefV : "ptgRefV",
ptgAreaV : "ptgAreaV",
ptgMemAreaV : "ptgMemAreaV",
ptgMemErrV : "ptgMemErrV",
ptgMemNoMemV : "ptgMemNoMemV",
ptgMemFuncV : "ptgMemFuncV",
ptgRefErrV : "ptgRefErrV",
ptgAreaErrV : "ptgAreaErrV",
ptgRefNV : "ptgRefNV",
ptgAreaNV : "ptgAreaNV",
ptgMemAreaNV : "ptgMemAreaNV",
ptgMemNoMemNV : "ptgMemNoMemNV",
ptgFuncCEV : "ptgFuncCEV",
ptgNameXV : "ptgNameXV",
ptgRef3dV : "ptgRef3dV",
ptgArea3dV : "ptgArea3dV",
ptgRefErr3dV : "ptgRefErr3dV",
ptgAreaErr3dV : "ptgAreaErr3dV",
ptgArrayA : "ptgArrayA",
ptgFuncA : "ptgFuncA",
ptgFuncVarA : "ptgFuncVarA",
ptgNameA : "ptgNameA",
ptgRefA : "ptgRefA",
ptgAreaA : "ptgAreaA",
ptgMemAreaA : "ptgMemAreaA",
ptgMemErrA : "ptgMemErrA",
ptgMemNoMemA : "ptgMemNoMemA",
ptgMemFuncA : "ptgMemFuncA",
ptgRefErrA : "ptgRefErrA",
ptgAreaErrA : "ptgAreaErrA",
ptgRefNA : "ptgRefNA",
ptgAreaNA : "ptgAreaNA",
ptgMemAreaNA : "ptgMemAreaNA",
ptgMemNoMemNA : "ptgMemNoMemNA",
ptgFuncCEA : "ptgFuncCEA",
ptgNameXA : "ptgNameXA",
ptgRef3dA : "ptgRef3dA",
ptgArea3dA : "ptgArea3dA",
ptgRefErr3dA : "ptgRefErr3dA",
ptgAreaErr3dA : "ptgAreaErr3dA"
}
error_msg_by_code = {
0x00: "#NULL!", # intersection of two cell ranges is empty
0x07: "#DIV/0!", # division by zero
0x0F: "#VALUE!", # wrong type of operand
0x17: "#REF!", # illegal or deleted cell reference
0x1D: "#NAME?", # wrong function or range name
0x24: "#NUM!", # value range overflow
0x2A: "#N/A!" # argument or function not available
}
+261
View File
@@ -0,0 +1,261 @@
#!/usr/bin/env python
'''
The XF record is able to store explicit cell formatting attributes or the
attributes of a cell style. Explicit formatting includes the reference to
a cell style XF record. This allows to extend a defined cell style with
some explicit attributes. The formatting attributes are divided into
6 groups:
Group Attributes
-------------------------------------
Number format Number format index (index to FORMAT record)
Font Font index (index to FONT record)
Alignment Horizontal and vertical alignment, text wrap, indentation,
orientation/rotation, text direction
Border Border line styles and colours
Background Background area style and colours
Protection Cell locked, formula hidden
For each group a flag in the cell XF record specifies whether to use the
attributes contained in that XF record or in the referenced style
XF record. In style XF records, these flags specify whether the attributes
will overwrite explicit cell formatting when the style is applied to
a cell. Changing a cell style (without applying this style to a cell) will
change all cells which already use that style and do not contain explicit
cell attributes for the changed style attributes. If a cell XF record does
not contain explicit attributes in a group (if the attribute group flag
is not set), it repeats the attributes of its style XF record.
'''
from . import BIFFRecords
class Font(object):
ESCAPEMENT_NONE = 0x00
ESCAPEMENT_SUPERSCRIPT = 0x01
ESCAPEMENT_SUBSCRIPT = 0x02
UNDERLINE_NONE = 0x00
UNDERLINE_SINGLE = 0x01
UNDERLINE_SINGLE_ACC = 0x21
UNDERLINE_DOUBLE = 0x02
UNDERLINE_DOUBLE_ACC = 0x22
FAMILY_NONE = 0x00
FAMILY_ROMAN = 0x01
FAMILY_SWISS = 0x02
FAMILY_MODERN = 0x03
FAMILY_SCRIPT = 0x04
FAMILY_DECORATIVE = 0x05
CHARSET_ANSI_LATIN = 0x00
CHARSET_SYS_DEFAULT = 0x01
CHARSET_SYMBOL = 0x02
CHARSET_APPLE_ROMAN = 0x4D
CHARSET_ANSI_JAP_SHIFT_JIS = 0x80
CHARSET_ANSI_KOR_HANGUL = 0x81
CHARSET_ANSI_KOR_JOHAB = 0x82
CHARSET_ANSI_CHINESE_GBK = 0x86
CHARSET_ANSI_CHINESE_BIG5 = 0x88
CHARSET_ANSI_GREEK = 0xA1
CHARSET_ANSI_TURKISH = 0xA2
CHARSET_ANSI_VIETNAMESE = 0xA3
CHARSET_ANSI_HEBREW = 0xB1
CHARSET_ANSI_ARABIC = 0xB2
CHARSET_ANSI_BALTIC = 0xBA
CHARSET_ANSI_CYRILLIC = 0xCC
CHARSET_ANSI_THAI = 0xDE
CHARSET_ANSI_LATIN_II = 0xEE
CHARSET_OEM_LATIN_I = 0xFF
def __init__(self):
# twip = 1/20 of a point = 1/1440 of a inch
# usually resolution == 96 pixels per 1 inch
# (rarely 120 pixels per 1 inch or another one)
self.height = 0x00C8 # 200: this is font with height 10 points
self.italic = False
self.struck_out = False
self.outline = False
self.shadow = False
self.colour_index = 0x7FFF
self.bold = False
self._weight = 0x0190 # 0x02BC gives bold font
self.escapement = self.ESCAPEMENT_NONE
self.underline = self.UNDERLINE_NONE
self.family = self.FAMILY_NONE
self.charset = self.CHARSET_SYS_DEFAULT
self.name = b'Arial'
def get_biff_record(self):
height = self.height
options = 0x00
if self.bold:
options |= 0x01
self._weight = 0x02BC
if self.italic:
options |= 0x02
if self.underline != self.UNDERLINE_NONE:
options |= 0x04
if self.struck_out:
options |= 0x08
if self.outline:
options |= 0x010
if self.shadow:
options |= 0x020
colour_index = self.colour_index
weight = self._weight
escapement = self.escapement
underline = self.underline
family = self.family
charset = self.charset
name = self.name
return BIFFRecords.FontRecord(height, options, colour_index, weight, escapement,
underline, family, charset,
name)
def _search_key(self):
return (
self.height,
self.italic,
self.struck_out,
self.outline,
self.shadow,
self.colour_index,
self.bold,
self._weight,
self.escapement,
self.underline,
self.family,
self.charset,
self.name,
)
class Alignment(object):
HORZ_GENERAL = 0x00
HORZ_LEFT = 0x01
HORZ_CENTER = 0x02
HORZ_RIGHT = 0x03
HORZ_FILLED = 0x04
HORZ_JUSTIFIED = 0x05 # BIFF4-BIFF8X
HORZ_CENTER_ACROSS_SEL = 0x06 # Centred across selection (BIFF4-BIFF8X)
HORZ_DISTRIBUTED = 0x07 # Distributed (BIFF8X)
VERT_TOP = 0x00
VERT_CENTER = 0x01
VERT_BOTTOM = 0x02
VERT_JUSTIFIED = 0x03 # Justified (BIFF5-BIFF8X)
VERT_DISTRIBUTED = 0x04 # Distributed (BIFF8X)
DIRECTION_GENERAL = 0x00 # BIFF8X
DIRECTION_LR = 0x01
DIRECTION_RL = 0x02
ORIENTATION_NOT_ROTATED = 0x00
ORIENTATION_STACKED = 0x01
ORIENTATION_90_CC = 0x02
ORIENTATION_90_CW = 0x03
ROTATION_0_ANGLE = 0x00
ROTATION_STACKED = 0xFF
WRAP_AT_RIGHT = 0x01
NOT_WRAP_AT_RIGHT = 0x00
SHRINK_TO_FIT = 0x01
NOT_SHRINK_TO_FIT = 0x00
def __init__(self):
self.horz = self.HORZ_GENERAL
self.vert = self.VERT_BOTTOM
self.dire = self.DIRECTION_GENERAL
self.orie = self.ORIENTATION_NOT_ROTATED
self.rota = self.ROTATION_0_ANGLE
self.wrap = self.NOT_WRAP_AT_RIGHT
self.shri = self.NOT_SHRINK_TO_FIT
self.inde = 0
self.merg = 0
def _search_key(self):
return (
self.horz, self.vert, self.dire, self.orie, self.rota,
self.wrap, self.shri, self.inde, self.merg,
)
class Borders(object):
NO_LINE = 0x00
THIN = 0x01
MEDIUM = 0x02
DASHED = 0x03
DOTTED = 0x04
THICK = 0x05
DOUBLE = 0x06
HAIR = 0x07
#The following for BIFF8
MEDIUM_DASHED = 0x08
THIN_DASH_DOTTED = 0x09
MEDIUM_DASH_DOTTED = 0x0A
THIN_DASH_DOT_DOTTED = 0x0B
MEDIUM_DASH_DOT_DOTTED = 0x0C
SLANTED_MEDIUM_DASH_DOTTED = 0x0D
NEED_DIAG1 = 0x01
NEED_DIAG2 = 0x01
NO_NEED_DIAG1 = 0x00
NO_NEED_DIAG2 = 0x00
def __init__(self):
self.left = self.NO_LINE
self.right = self.NO_LINE
self.top = self.NO_LINE
self.bottom = self.NO_LINE
self.diag = self.NO_LINE
self.left_colour = 0x40
self.right_colour = 0x40
self.top_colour = 0x40
self.bottom_colour = 0x40
self.diag_colour = 0x40
self.need_diag1 = self.NO_NEED_DIAG1
self.need_diag2 = self.NO_NEED_DIAG2
def _search_key(self):
return (
self.left, self.right, self.top, self.bottom, self.diag,
self.left_colour, self.right_colour, self.top_colour,
self.bottom_colour, self.diag_colour,
self.need_diag1, self.need_diag2,
)
class Pattern(object):
# patterns 0x00 - 0x12
NO_PATTERN = 0x00
SOLID_PATTERN = 0x01
def __init__(self):
self.pattern = self.NO_PATTERN
self.pattern_fore_colour = 0x40
self.pattern_back_colour = 0x41
def _search_key(self):
return (
self.pattern,
self.pattern_fore_colour,
self.pattern_back_colour,
)
class Protection(object):
def __init__(self):
self.cell_locked = 1
self.formula_hidden = 0
def _search_key(self):
return (
self.cell_locked,
self.formula_hidden,
)
+253
View File
@@ -0,0 +1,253 @@
# -*- coding: windows-1252 -*-
from . import BIFFRecords
from . import Style
from .Cell import StrCell, BlankCell, NumberCell, FormulaCell, MulBlankCell, BooleanCell, ErrorCell, \
_get_cells_biff_data_mul
from . import ExcelFormula
import datetime as dt
try:
from decimal import Decimal
except ImportError:
# Python 2.3: decimal not supported; create dummy Decimal class
class Decimal(object):
pass
class Row(object):
__slots__ = [# private variables
"__idx",
"__parent",
"__parent_wb",
"__cells",
"__min_col_idx",
"__max_col_idx",
"__xf_index",
"__has_default_xf_index",
"__height_in_pixels",
# public variables
"height",
"has_default_height",
"height_mismatch",
"level",
"collapse",
"hidden",
"space_above",
"space_below"]
def __init__(self, rowx, parent_sheet):
if not (isinstance(rowx, int) and 0 <= rowx <= 65535):
raise ValueError("row index (%r) not an int in range(65536)" % rowx)
self.__idx = rowx
self.__parent = parent_sheet
self.__parent_wb = parent_sheet.get_parent()
self.__cells = {}
self.__min_col_idx = 0
self.__max_col_idx = 0
self.__xf_index = 0x0F
self.__has_default_xf_index = 0
self.__height_in_pixels = 0x11
self.height = 0x00FF
self.has_default_height = 0x00
self.height_mismatch = 0
self.level = 0
self.collapse = 0
self.hidden = 0
self.space_above = 0
self.space_below = 0
def __adjust_height(self, style):
twips = style.font.height
points = float(twips)/20.0
# Cell height in pixels can be calcuted by following approx. formula:
# cell height in pixels = font height in points * 83/50 + 2/5
# It works when screen resolution is 96 dpi
pix = int(round(points*83.0/50.0 + 2.0/5.0))
if pix > self.__height_in_pixels:
self.__height_in_pixels = pix
def __adjust_bound_col_idx(self, *args):
for arg in args:
iarg = int(arg)
if not ((0 <= iarg <= 255) and arg == iarg):
raise ValueError("column index (%r) not an int in range(256)" % arg)
sheet = self.__parent
if iarg < self.__min_col_idx:
self.__min_col_idx = iarg
if iarg > self.__max_col_idx:
self.__max_col_idx = iarg
if iarg < sheet.first_used_col:
sheet.first_used_col = iarg
if iarg > sheet.last_used_col:
sheet.last_used_col = iarg
def __excel_date_dt(self, date):
if isinstance(date, dt.date) and (not isinstance(date, dt.datetime)):
epoch = dt.date(1899, 12, 31)
elif isinstance(date, dt.time):
date = dt.datetime.combine(dt.datetime(1900, 1, 1), date)
epoch = dt.datetime(1900, 1, 1, 0, 0, 0)
else:
epoch = dt.datetime(1899, 12, 31, 0, 0, 0)
delta = date - epoch
xldate = delta.days + float(delta.seconds) / (24*60*60)
# Add a day for Excel's missing leap day in 1900
if xldate > 59:
xldate += 1
return xldate
def get_height_in_pixels(self):
return self.__height_in_pixels
def set_style(self, style):
self.__adjust_height(style)
self.__xf_index = self.__parent_wb.add_style(style)
self.__has_default_xf_index = 1
def get_xf_index(self):
return self.__xf_index
def get_cells_count(self):
return len(self.__cells)
def get_min_col(self):
return self.__min_col_idx
def get_max_col(self):
return self.__max_col_idx
def get_row_biff_data(self):
height_options = (self.height & 0x07FFF)
height_options |= (self.has_default_height & 0x01) << 15
options = (self.level & 0x07) << 0
options |= (self.collapse & 0x01) << 4
options |= (self.hidden & 0x01) << 5
options |= (self.height_mismatch & 0x01) << 6
options |= (self.__has_default_xf_index & 0x01) << 7
options |= (0x01 & 0x01) << 8
options |= (self.__xf_index & 0x0FFF) << 16
options |= (self.space_above & 1) << 28
options |= (self.space_below & 1) << 29
return BIFFRecords.RowRecord(self.__idx, self.__min_col_idx,
self.__max_col_idx, height_options, options).get()
def insert_cell(self, col_index, cell_obj):
if col_index in self.__cells:
if not self.__parent._cell_overwrite_ok:
msg = "Attempt to overwrite cell: sheetname=%r rowx=%d colx=%d" \
% (self.__parent.name, self.__idx, col_index)
raise Exception(msg)
prev_cell_obj = self.__cells[col_index]
sst_idx = getattr(prev_cell_obj, 'sst_idx', None)
if sst_idx is not None:
self.__parent_wb.del_str(sst_idx)
self.__cells[col_index] = cell_obj
def insert_mulcells(self, colx1, colx2, cell_obj):
self.insert_cell(colx1, cell_obj)
for col_index in range(colx1+1, colx2+1):
self.insert_cell(col_index, None)
def get_cells_biff_data(self):
cell_items = [item for item in self.__cells.items() if item[1] is not None]
cell_items.sort() # in column order
return _get_cells_biff_data_mul(self.__idx, cell_items)
# previously:
# return ''.join([cell.get_biff_data() for colx, cell in cell_items])
def get_index(self):
return self.__idx
def set_cell_text(self, colx, value, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, StrCell(self.__idx, colx, xf_index, self.__parent_wb.add_str(value)))
def set_cell_blank(self, colx, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, BlankCell(self.__idx, colx, xf_index))
def set_cell_mulblanks(self, first_colx, last_colx, style=Style.default_style):
assert 0 <= first_colx <= last_colx <= 255
self.__adjust_height(style)
self.__adjust_bound_col_idx(first_colx, last_colx)
xf_index = self.__parent_wb.add_style(style)
# ncols = last_colx - first_colx + 1
self.insert_mulcells(first_colx, last_colx, MulBlankCell(self.__idx, first_colx, last_colx, xf_index))
def set_cell_number(self, colx, number, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, NumberCell(self.__idx, colx, xf_index, number))
def set_cell_date(self, colx, datetime_obj, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx,
NumberCell(self.__idx, colx, xf_index, self.__excel_date_dt(datetime_obj)))
def set_cell_formula(self, colx, formula, style=Style.default_style, calc_flags=0):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.__parent_wb.add_sheet_reference(formula)
self.insert_cell(colx, FormulaCell(self.__idx, colx, xf_index, formula, calc_flags=0))
def set_cell_boolean(self, colx, value, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, BooleanCell(self.__idx, colx, xf_index, bool(value)))
def set_cell_error(self, colx, error_string_or_code, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(colx)
xf_index = self.__parent_wb.add_style(style)
self.insert_cell(colx, ErrorCell(self.__idx, colx, xf_index, error_string_or_code))
def write(self, col, label, style=Style.default_style):
self.__adjust_height(style)
self.__adjust_bound_col_idx(col)
style_index = self.__parent_wb.add_style(style)
if isinstance(label, str):
if len(label) > 0:
self.insert_cell(col,
StrCell(self.__idx, col, style_index, self.__parent_wb.add_str(label))
)
else:
self.insert_cell(col, BlankCell(self.__idx, col, style_index))
elif isinstance(label, bool): # bool is subclass of int; test bool first
self.insert_cell(col, BooleanCell(self.__idx, col, style_index, label))
elif isinstance(label, (float, int, Decimal)):
self.insert_cell(col, NumberCell(self.__idx, col, style_index, label))
elif isinstance(label, (dt.datetime, dt.date, dt.time)):
date_number = self.__excel_date_dt(label)
self.insert_cell(col, NumberCell(self.__idx, col, style_index, date_number))
elif label is None:
self.insert_cell(col, BlankCell(self.__idx, col, style_index))
elif isinstance(label, ExcelFormula.Formula):
self.__parent_wb.add_sheet_reference(label)
self.insert_cell(col, FormulaCell(self.__idx, col, style_index, label))
else:
raise Exception("Unexpected data type %r" % type(label))
write_blanks = set_cell_mulblanks
+593
View File
@@ -0,0 +1,593 @@
# -*- coding: windows-1252 -*-
from . import Formatting
from .BIFFRecords import *
import collections
FIRST_USER_DEFINED_NUM_FORMAT_IDX = 164
class XFStyle(object):
def __init__(self):
self.num_format_str = 'General'
self.font = Formatting.Font()
self.alignment = Formatting.Alignment()
self.borders = Formatting.Borders()
self.pattern = Formatting.Pattern()
self.protection = Formatting.Protection()
default_style = XFStyle()
class StyleCollection(object):
_std_num_fmt_list = [
'general',
'0',
'0.00',
'#,##0',
'#,##0.00',
'"$"#,##0_);("$"#,##',
'"$"#,##0_);[Red]("$"#,##',
'"$"#,##0.00_);("$"#,##',
'"$"#,##0.00_);[Red]("$"#,##',
'0%',
'0.00%',
'0.00E+00',
'# ?/?',
'# ??/??',
'M/D/YY',
'D-MMM-YY',
'D-MMM',
'MMM-YY',
'h:mm AM/PM',
'h:mm:ss AM/PM',
'h:mm',
'h:mm:ss',
'M/D/YY h:mm',
'_(#,##0_);(#,##0)',
'_(#,##0_);[Red](#,##0)',
'_(#,##0.00_);(#,##0.00)',
'_(#,##0.00_);[Red](#,##0.00)',
'_("$"* #,##0_);_("$"* (#,##0);_("$"* "-"_);_(@_)',
'_(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)',
'_("$"* #,##0.00_);_("$"* (#,##0.00);_("$"* "-"??_);_(@_)',
'_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)',
'mm:ss',
'[h]:mm:ss',
'mm:ss.0',
'##0.0E+0',
'@'
]
def __init__(self, style_compression=0):
self.style_compression = style_compression
self.stats = [0, 0, 0, 0, 0, 0]
self._font_id2x = {}
self._font_x2id = {}
self._font_val2x = {}
for x in (0, 1, 2, 3, 5): # The font with index 4 is omitted in all BIFF versions
font = Formatting.Font()
search_key = font._search_key()
self._font_id2x[font] = x
self._font_x2id[x] = font
self._font_val2x[search_key] = x
self._xf_id2x = {}
self._xf_x2id = {}
self._xf_val2x = {}
self._num_formats = {}
for fmtidx, fmtstr in zip(list(range(0, 23)), StyleCollection._std_num_fmt_list[0:23]):
self._num_formats[fmtstr] = fmtidx
for fmtidx, fmtstr in zip(list(range(37, 50)), StyleCollection._std_num_fmt_list[23:]):
self._num_formats[fmtstr] = fmtidx
self.default_style = XFStyle()
self._default_xf = self._add_style(self.default_style)[0]
def add(self, style):
if style == None:
return 0x10
return self._add_style(style)[1]
def _add_style(self, style):
num_format_str = style.num_format_str
if num_format_str in self._num_formats:
num_format_idx = self._num_formats[num_format_str]
else:
num_format_idx = (
FIRST_USER_DEFINED_NUM_FORMAT_IDX
+ len(self._num_formats)
- len(StyleCollection._std_num_fmt_list)
)
self._num_formats[num_format_str] = num_format_idx
font = style.font
if font in self._font_id2x:
font_idx = self._font_id2x[font]
self.stats[0] += 1
elif self.style_compression:
search_key = font._search_key()
font_idx = self._font_val2x.get(search_key)
if font_idx is not None:
self._font_id2x[font] = font_idx
self.stats[1] += 1
else:
font_idx = len(self._font_x2id) + 1 # Why plus 1? Font 4 is missing
self._font_id2x[font] = font_idx
self._font_val2x[search_key] = font_idx
self._font_x2id[font_idx] = font
self.stats[2] += 1
else:
font_idx = len(self._font_id2x) + 1
self._font_id2x[font] = font_idx
self.stats[2] += 1
gof = (style.alignment, style.borders, style.pattern, style.protection)
xf = (font_idx, num_format_idx) + gof
if xf in self._xf_id2x:
xf_index = self._xf_id2x[xf]
self.stats[3] += 1
elif self.style_compression == 2:
xf_key = (font_idx, num_format_idx) + tuple([obj._search_key() for obj in gof])
xf_index = self._xf_val2x.get(xf_key)
if xf_index is not None:
self._xf_id2x[xf] = xf_index
self.stats[4] += 1
else:
xf_index = 0x10 + len(self._xf_x2id)
self._xf_id2x[xf] = xf_index
self._xf_val2x[xf_key] = xf_index
self._xf_x2id[xf_index] = xf
self.stats[5] += 1
else:
xf_index = 0x10 + len(self._xf_id2x)
self._xf_id2x[xf] = xf_index
self.stats[5] += 1
if xf_index >= 0xFFF:
# 12 bits allowed, 0xFFF is a sentinel value
raise ValueError("More than 4094 XFs (styles)")
return xf, xf_index
def get_biff_data(self):
result = b''
result += self._all_fonts()
result += self._all_num_formats()
result += self._all_cell_styles()
result += self._all_styles()
return result
def _all_fonts(self):
result = b''
if self.style_compression:
alist = list(self._font_x2id.items())
else:
alist = [(x, o) for o, x in list(self._font_id2x.items())]
alist.sort()
for font_idx, font in alist:
result += font.get_biff_record().get()
return result
def _all_num_formats(self):
result = b''
alist = [
(v, k)
for k, v in list(self._num_formats.items())
if v >= FIRST_USER_DEFINED_NUM_FORMAT_IDX
]
alist.sort()
for fmtidx, fmtstr in alist:
result += NumberFormatRecord(fmtidx, fmtstr).get()
return result
def _all_cell_styles(self):
result = b''
for i in range(0, 16):
result += XFRecord(self._default_xf, 'style').get()
if self.style_compression == 2:
alist = list(self._xf_x2id.items())
else:
alist = [(x, o) for o, x in list(self._xf_id2x.items())]
alist.sort()
for xf_idx, xf in alist:
result += XFRecord(xf).get()
return result
def _all_styles(self):
return StyleRecord().get()
# easyxf and its supporting objects ###################################
class EasyXFException(Exception):
pass
class EasyXFCallerError(EasyXFException):
pass
class EasyXFAuthorError(EasyXFException):
pass
class IntULim(object):
# If astring represents a valid unsigned integer ('123', '0xabcd', etc)
# and it is <= limit, return the int value; otherwise return None.
def __init__(self, limit):
self.limit = limit
def __call__(self, astring):
try:
value = int(astring, 0)
except ValueError:
return None
if not 0 <= value <= self.limit:
return None
return value
bool_map = {
# Text values for all Boolean attributes
'1': 1, 'yes': 1, 'true': 1, 'on': 1,
'0': 0, 'no': 0, 'false': 0, 'off': 0,
}
border_line_map = {
# Text values for these borders attributes:
# left, right, top, bottom and diag
'no_line': 0x00,
'thin': 0x01,
'medium': 0x02,
'dashed': 0x03,
'dotted': 0x04,
'thick': 0x05,
'double': 0x06,
'hair': 0x07,
'medium_dashed': 0x08,
'thin_dash_dotted': 0x09,
'medium_dash_dotted': 0x0a,
'thin_dash_dot_dotted': 0x0b,
'medium_dash_dot_dotted': 0x0c,
'slanted_medium_dash_dotted': 0x0d,
}
charset_map = {
# Text values for font.charset
'ansi_latin': 0x00,
'sys_default': 0x01,
'symbol': 0x02,
'apple_roman': 0x4d,
'ansi_jap_shift_jis': 0x80,
'ansi_kor_hangul': 0x81,
'ansi_kor_johab': 0x82,
'ansi_chinese_gbk': 0x86,
'ansi_chinese_big5': 0x88,
'ansi_greek': 0xa1,
'ansi_turkish': 0xa2,
'ansi_vietnamese': 0xa3,
'ansi_hebrew': 0xb1,
'ansi_arabic': 0xb2,
'ansi_baltic': 0xba,
'ansi_cyrillic': 0xcc,
'ansi_thai': 0xde,
'ansi_latin_ii': 0xee,
'oem_latin_i': 0xff,
}
# Text values for colour indices. "grey" is a synonym of "gray".
# The names are those given by Microsoft Excel 2003 to the colours
# in the default palette. There is no great correspondence with
# any W3C name-to-RGB mapping.
_colour_map_text = """\
aqua 0x31
black 0x08
blue 0x0C
blue_gray 0x36
bright_green 0x0B
brown 0x3C
coral 0x1D
cyan_ega 0x0F
dark_blue 0x12
dark_blue_ega 0x12
dark_green 0x3A
dark_green_ega 0x11
dark_purple 0x1C
dark_red 0x10
dark_red_ega 0x10
dark_teal 0x38
dark_yellow 0x13
gold 0x33
gray_ega 0x17
gray25 0x16
gray40 0x37
gray50 0x17
gray80 0x3F
green 0x11
ice_blue 0x1F
indigo 0x3E
ivory 0x1A
lavender 0x2E
light_blue 0x30
light_green 0x2A
light_orange 0x34
light_turquoise 0x29
light_yellow 0x2B
lime 0x32
magenta_ega 0x0E
ocean_blue 0x1E
olive_ega 0x13
olive_green 0x3B
orange 0x35
pale_blue 0x2C
periwinkle 0x18
pink 0x0E
plum 0x3D
purple_ega 0x14
red 0x0A
rose 0x2D
sea_green 0x39
silver_ega 0x16
sky_blue 0x28
tan 0x2F
teal 0x15
teal_ega 0x15
turquoise 0x0F
violet 0x14
white 0x09
yellow 0x0D"""
colour_map = {}
for _line in _colour_map_text.splitlines():
_name, _num = _line.split()
_num = int(_num, 0)
colour_map[_name] = _num
if 'gray' in _name:
colour_map[_name.replace('gray', 'grey')] = _num
del _colour_map_text, _line, _name, _num
pattern_map = {
# Text values for pattern.pattern
# xlwt/doc/pattern_examples.xls showcases all of these patterns.
'no_fill': 0,
'none': 0,
'solid': 1,
'solid_fill': 1,
'solid_pattern': 1,
'fine_dots': 2,
'alt_bars': 3,
'sparse_dots': 4,
'thick_horz_bands': 5,
'thick_vert_bands': 6,
'thick_backward_diag': 7,
'thick_forward_diag': 8,
'big_spots': 9,
'bricks': 10,
'thin_horz_bands': 11,
'thin_vert_bands': 12,
'thin_backward_diag': 13,
'thin_forward_diag': 14,
'squares': 15,
'diamonds': 16,
}
def any_str_func(s):
return s.strip()
def colour_index_func(s, maxval=0x7F):
try:
value = int(s, 0)
except ValueError:
return None
if not (0 <= value <= maxval):
return None
return value
colour_index_func_7 = colour_index_func
def colour_index_func_15(s):
return colour_index_func(s, maxval=0x7FFF)
def rotation_func(s):
try:
value = int(s, 0)
except ValueError:
return None
if not (-90 <= value <= 90):
raise EasyXFCallerError("rotation %d: should be -90 to +90 degrees" % value)
if value < 0:
value = 90 - value # encode as 91 to 180 (clockwise)
return value
xf_dict = {
'align': 'alignment', # synonym
'alignment': {
'dire': {
'general': 0,
'lr': 1,
'rl': 2,
},
'direction': 'dire',
'horiz': 'horz',
'horizontal': 'horz',
'horz': {
'general': 0,
'left': 1,
'center': 2,
'centre': 2, # "align: horiz centre" means xf.alignment.horz is set to 2
'right': 3,
'filled': 4,
'justified': 5,
'center_across_selection': 6,
'centre_across_selection': 6,
'distributed': 7,
},
'inde': IntULim(15), # restriction: 0 <= value <= 15
'indent': 'inde',
'rota': [{'stacked': 255, 'none': 0, }, rotation_func],
'rotation': 'rota',
'shri': bool_map,
'shrink': 'shri',
'shrink_to_fit': 'shri',
'vert': {
'top': 0,
'center': 1,
'centre': 1,
'bottom': 2,
'justified': 3,
'distributed': 4,
},
'vertical': 'vert',
'wrap': bool_map,
},
'border': 'borders',
'borders': {
'left': [border_line_map, IntULim(0x0d)],
'right': [border_line_map, IntULim(0x0d)],
'top': [border_line_map, IntULim(0x0d)],
'bottom': [border_line_map, IntULim(0x0d)],
'diag': [border_line_map, IntULim(0x0d)],
'top_colour': [colour_map, colour_index_func_7],
'bottom_colour': [colour_map, colour_index_func_7],
'left_colour': [colour_map, colour_index_func_7],
'right_colour': [colour_map, colour_index_func_7],
'diag_colour': [colour_map, colour_index_func_7],
'top_color': 'top_colour',
'bottom_color': 'bottom_colour',
'left_color': 'left_colour',
'right_color': 'right_colour',
'diag_color': 'diag-colour',
'need_diag_1': bool_map,
'need_diag_2': bool_map,
},
'font': {
'bold': bool_map,
'charset': charset_map,
'color': 'colour_index',
'color_index': 'colour_index',
'colour': 'colour_index',
'colour_index': [colour_map, colour_index_func_15],
'escapement': {'none': 0, 'superscript': 1, 'subscript': 2},
'family': {'none': 0, 'roman': 1, 'swiss': 2, 'modern': 3, 'script': 4, 'decorative': 5, },
'height': IntULim(0xFFFF), # practical limits are much narrower e.g. 160 to 1440 (8pt to 72pt)
'italic': bool_map,
'name': any_str_func,
'outline': bool_map,
'shadow': bool_map,
'struck_out': bool_map,
'underline': [bool_map, {'none': 0, 'single': 1, 'single_acc': 0x21, 'double': 2, 'double_acc': 0x22, }],
},
'pattern': {
'back_color': 'pattern_back_colour',
'back_colour': 'pattern_back_colour',
'fore_color': 'pattern_fore_colour',
'fore_colour': 'pattern_fore_colour',
'pattern': [pattern_map, IntULim(16)],
'pattern_back_color': 'pattern_back_colour',
'pattern_back_colour': [colour_map, colour_index_func_7],
'pattern_fore_color': 'pattern_fore_colour',
'pattern_fore_colour': [colour_map, colour_index_func_7],
},
'protection': {
'cell_locked' : bool_map,
'formula_hidden': bool_map,
},
}
def _esplit(s, split_char, esc_char="\\"):
escaped = False
olist = ['']
for c in s:
if escaped:
olist[-1] += c
escaped = False
elif c == esc_char:
escaped = True
elif c == split_char:
olist.append('')
else:
olist[-1] += c
return olist
def _parse_strg_to_obj(strg, obj, parse_dict,
field_sep=",", line_sep=";", intro_sep=":", esc_char="\\", debug=False):
for line in _esplit(strg, line_sep, esc_char):
line = line.strip()
if not line:
break
split_line = _esplit(line, intro_sep, esc_char)
if len(split_line) != 2:
raise EasyXFCallerError('line %r should have exactly 1 "%c"' % (line, intro_sep))
section, item_str = split_line
section = section.strip().lower()
for counter in range(2):
result = parse_dict.get(section)
if result is None:
raise EasyXFCallerError('section %r is unknown' % section)
if isinstance(result, dict):
break
if not isinstance(result, str):
raise EasyXFAuthorError(
'section %r should map to dict or str object; found %r' % (section, type(result)))
# synonym
old_section = section
section = result
else:
raise EasyXFAuthorError('Attempt to define synonym of synonym (%r: %r)' % (old_section, result))
section_dict = result
section_obj = getattr(obj, section, None)
if section_obj is None:
raise EasyXFAuthorError('instance of %s class has no attribute named %s' % (obj.__class__.__name__, section))
for kv_str in _esplit(item_str, field_sep, esc_char):
guff = kv_str.split()
if not guff:
continue
k = guff[0].lower().replace('-', '_')
v = ' '.join(guff[1:])
if not v:
raise EasyXFCallerError("no value supplied for %s.%s" % (section, k))
for counter in range(2):
result = section_dict.get(k)
if result is None:
raise EasyXFCallerError('%s.%s is not a known attribute' % (section, k))
if not isinstance(result, str):
break
# synonym
old_k = k
k = result
else:
raise EasyXFAuthorError('Attempt to define synonym of synonym (%r: %r)' % (old_k, result))
value_info = result
if not isinstance(value_info, list):
value_info = [value_info]
for value_rule in value_info:
if isinstance(value_rule, dict):
# dict maps strings to integer field values
vl = v.lower().replace('-', '_')
if vl in value_rule:
value = value_rule[vl]
break
elif isinstance(value_rule, collections.Callable):
value = value_rule(v)
if value is not None:
break
else:
raise EasyXFAuthorError("unknown value rule for attribute %r: %r" % (k, value_rule))
else:
raise EasyXFCallerError("unexpected value %r for %s.%s" % (v, section, k))
try:
orig = getattr(section_obj, k)
except AttributeError:
raise EasyXFAuthorError('%s.%s in dictionary but not in supplied object' % (section, k))
if debug: print("+++ %s.%s = %r # %s; was %r" % (section, k, value, v, orig))
setattr(section_obj, k, value)
def easyxf(strg_to_parse="", num_format_str=None,
field_sep=",", line_sep=";", intro_sep=":", esc_char="\\", debug=False):
xfobj = XFStyle()
if num_format_str is not None:
xfobj.num_format_str = num_format_str
if strg_to_parse:
_parse_strg_to_obj(strg_to_parse, xfobj, xf_dict,
field_sep=field_sep, line_sep=line_sep, intro_sep=intro_sep, esc_char=esc_char, debug=debug)
return xfobj
+79
View File
@@ -0,0 +1,79 @@
'''
From BIFF8 on, strings are always stored using UTF-16LE text encoding. The
character array is a sequence of 16-bit values4. Additionally it is
possible to use a compressed format, which omits the high bytes of all
characters, if they are all zero.
The following tables describe the standard format of the entire string, but
in many records the strings differ from this format. This will be mentioned
separately. It is possible (but not required) to store Rich-Text formatting
information and Asian phonetic information inside a Unicode string. This
results in four different ways to store a string. The character array
is not zero-terminated.
The string consists of the character count (as usual an 8-bit value or
a 16-bit value), option flags, the character array and optional formatting
information. If the string is empty, sometimes the option flags field will
not occur. This is mentioned at the respective place.
Offset Size Contents
0 1 or 2 Length of the string (character count, ln)
1 or 2 1 Option flags:
Bit Mask Contents
0 01H Character compression (ccompr):
0 = Compressed (8-bit characters)
1 = Uncompressed (16-bit characters)
2 04H Asian phonetic settings (phonetic):
0 = Does not contain Asian phonetic settings
1 = Contains Asian phonetic settings
3 08H Rich-Text settings (richtext):
0 = Does not contain Rich-Text settings
1 = Contains Rich-Text settings
[2 or 3] 2 (optional, only if richtext=1) Number of Rich-Text formatting runs (rt)
[var.] 4 (optional, only if phonetic=1) Size of Asian phonetic settings block (in bytes, sz)
var. ln or
2·ln Character array (8-bit characters or 16-bit characters, dependent on ccompr)
[var.] 4·rt (optional, only if richtext=1) List of rt formatting runs
[var.] sz (optional, only if phonetic=1) Asian Phonetic Settings Block
'''
from struct import pack
def upack2(s, encoding='ascii'):
# If not unicode, make it so.
if isinstance(s, str):
us = s
else:
us = str(s, encoding)
# Limit is based on number of content characters
# (not on number of bytes in packed result)
len_us = len(us)
if len_us > 65535:
raise Exception('String longer than 65535 characters')
try:
encs = us.encode('latin1')
# Success here means all chars are in U+0000 to U+00FF
# inclusive, meaning that we can use "compressed format".
flag = 0
except UnicodeEncodeError:
encs = us.encode('utf_16_le')
flag = 1
return pack('<HB', len_us, flag) + encs
def upack1(s, encoding='ascii'):
# Same as upack2(), but with a one-byte length field.
if isinstance(s, str):
us = s
else:
us = str(s, encoding)
len_us = len(us)
if len_us > 255:
raise Exception('String longer than 255 characters')
try:
encs = us.encode('latin1')
flag = 0
except UnicodeEncodeError:
encs = us.encode('utf_16_le')
flag = 1
return pack('<BB', len_us, flag) + encs
+196
View File
@@ -0,0 +1,196 @@
# pyXLWriter: A library for generating Excel Spreadsheets
# Copyright (c) 2004 Evgeny Filatov <fufff@users.sourceforge.net>
# Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# This library is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
# General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#----------------------------------------------------------------------------
# This module was written/ported from PERL Spreadsheet::WriteExcel module
# The author of the PERL Spreadsheet::WriteExcel module is John McNamara
# <jmcnamara@cpan.org>
#----------------------------------------------------------------------------
# See the README.txt distributed with pyXLWriter for more details.
# Portions are (C) Roman V. Kiseliov, 2005
# Utilities for work with reference to cells and with sheetnames
__rev_id__ = """$Id: Utils.py 3844 2009-05-20 01:02:54Z sjmachin $"""
import re
from struct import pack
from .ExcelMagic import MAX_ROW, MAX_COL
_re_cell_ex = re.compile(r"(\$?)([A-I]?[A-Z])(\$?)(\d+)", re.IGNORECASE)
_re_row_range = re.compile(r"\$?(\d+):\$?(\d+)")
_re_col_range = re.compile(r"\$?([A-I]?[A-Z]):\$?([A-I]?[A-Z])", re.IGNORECASE)
_re_cell_range = re.compile(r"\$?([A-I]?[A-Z]\$?\d+):\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
_re_cell_ref = re.compile(r"\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
def col_by_name(colname):
"""
"""
col = 0
pow = 1
for i in range(len(colname)-1, -1, -1):
ch = colname[i]
col += (ord(ch) - ord('A') + 1) * pow
pow *= 26
return col - 1
def cell_to_rowcol(cell):
"""Convert an Excel cell reference string in A1 notation
to numeric row/col notation.
Returns: row, col, row_abs, col_abs
"""
m = _re_cell_ex.match(cell)
if not m:
raise Exception("Ill-formed single_cell reference: %s" % cell)
col_abs, col, row_abs, row = m.groups()
row_abs = bool(row_abs)
col_abs = bool(col_abs)
row = int(row) - 1
col = col_by_name(col.upper())
return row, col, row_abs, col_abs
def cell_to_rowcol2(cell):
"""Convert an Excel cell reference string in A1 notation
to numeric row/col notation.
Returns: row, col
"""
m = _re_cell_ex.match(cell)
if not m:
raise Exception("Error in cell format")
col_abs, col, row_abs, row = m.groups()
# Convert base26 column string to number
# All your Base are belong to us.
row = int(row) - 1
col = col_by_name(col.upper())
return row, col
def rowcol_to_cell(row, col, row_abs=False, col_abs=False):
"""Convert numeric row/col notation to an Excel cell reference string in
A1 notation.
"""
assert 0 <= row < MAX_ROW # MAX_ROW counts from 1
assert 0 <= col < MAX_COL # MAX_COL counts from 1
d = col // 26
m = col % 26
chr1 = "" # Most significant character in AA1
if row_abs:
row_abs = '$'
else:
row_abs = ''
if col_abs:
col_abs = '$'
else:
col_abs = ''
if d > 0:
chr1 = chr(ord('A') + d - 1)
chr2 = chr(ord('A') + m)
# Zero index to 1-index
return col_abs + chr1 + chr2 + row_abs + str(row + 1)
def rowcol_pair_to_cellrange(row1, col1, row2, col2,
row1_abs=False, col1_abs=False, row2_abs=False, col2_abs=False):
"""Convert two (row,column) pairs
into a cell range string in A1:B2 notation.
Returns: cell range string
"""
assert row1 <= row2
assert col1 <= col2
return (
rowcol_to_cell(row1, col1, row1_abs, col1_abs)
+ ":"
+ rowcol_to_cell(row2, col2, row2_abs, col2_abs)
)
def cellrange_to_rowcol_pair(cellrange):
"""Convert cell range string in A1 notation to numeric row/col
pair.
Returns: row1, col1, row2, col2
"""
cellrange = cellrange.upper()
# Convert a row range: '1:3'
res = _re_row_range.match(cellrange)
if res:
row1 = int(res.group(1)) - 1
col1 = 0
row2 = int(res.group(2)) - 1
col2 = -1
return row1, col1, row2, col2
# Convert a column range: 'A:A' or 'B:G'.
# A range such as A:A is equivalent to A1:A16384, so add rows as required
res = _re_col_range.match(cellrange)
if res:
col1 = col_by_name(res.group(1).upper())
row1 = 0
col2 = col_by_name(res.group(2).upper())
row2 = -1
return row1, col1, row2, col2
# Convert a cell range: 'A1:B7'
res = _re_cell_range.match(cellrange)
if res:
row1, col1 = cell_to_rowcol2(res.group(1))
row2, col2 = cell_to_rowcol2(res.group(2))
return row1, col1, row2, col2
# Convert a cell reference: 'A1' or 'AD2000'
res = _re_cell_ref.match(cellrange)
if res:
row1, col1 = cell_to_rowcol2(res.group(1))
return row1, col1, row1, col1
raise Exception("Unknown cell reference %s" % (cell))
def cell_to_packed_rowcol(cell):
""" pack row and column into the required 4 byte format """
row, col, row_abs, col_abs = cell_to_rowcol(cell)
if col >= MAX_COL:
raise Exception("Column %s greater than IV in formula" % cell)
if row >= MAX_ROW: # this for BIFF8. for BIFF7 available 2^14
raise Exception("Row %s greater than %d in formula" % (cell, MAX_ROW))
col |= int(not row_abs) << 15
col |= int(not col_abs) << 14
return row, col
# === sheetname functions ===
def valid_sheet_name(sheet_name):
if sheet_name == "" or sheet_name[0] == "'" or len(sheet_name) > 31:
return False
for c in sheet_name:
if c in "[]:\\?/*\x00":
return False
return True
def quote_sheet_name(unquoted_sheet_name):
if not valid_sheet_name(unquoted_sheet_name):
raise Exception(
'attempt to quote an invalid worksheet name %r' % unquoted_sheet_name)
return "'" + unquoted_sheet_name.replace("'", "''") + "'"
+635
View File
@@ -0,0 +1,635 @@
'''
Record Order in BIFF8
Workbook Globals Substream
BOF Type = workbook globals
Interface Header
MMS
Interface End
WRITEACCESS
CODEPAGE
DSF
TABID
FNGROUPCOUNT
Workbook Protection Block
WINDOWPROTECT
PROTECT
PASSWORD
PROT4REV
PROT4REVPASS
BACKUP
HIDEOBJ
WINDOW1
DATEMODE
PRECISION
REFRESHALL
BOOKBOOL
FONT +
FORMAT *
XF +
STYLE +
? PALETTE
USESELFS
BOUNDSHEET +
COUNTRY
? Link Table
SST
ExtSST
EOF
'''
from . import BIFFRecords
from . import Style
class Workbook(object):
#################################################################
## Constructor
#################################################################
def __init__(self, encoding='ascii', style_compression=0):
self.encoding = encoding
self.__owner = 'None'
self.__country_code = None # 0x07 is Russia :-)
self.__wnd_protect = 0
self.__obj_protect = 0
self.__protect = 0
self.__backup_on_save = 0
# for WINDOW1 record
self.__hpos_twips = 0x01E0
self.__vpos_twips = 0x005A
self.__width_twips = 0x3FCF
self.__height_twips = 0x2A4E
self.__active_sheet = 0
self.__first_tab_index = 0
self.__selected_tabs = 0x01
self.__tab_width_twips = 0x0258
self.__wnd_hidden = 0
self.__wnd_mini = 0
self.__hscroll_visible = 1
self.__vscroll_visible = 1
self.__tabs_visible = 1
self.__styles = Style.StyleCollection(style_compression)
self.__dates_1904 = 0
self.__use_cell_values = 1
self.__sst = BIFFRecords.SharedStringTable(self.encoding)
self.__worksheets = []
self.__worksheet_idx_from_name = {}
self.__sheet_refs = {}
self._supbook_xref = {}
self._xcall_xref = {}
self._ownbook_supbookx = None
self._ownbook_supbook_ref = None
self._xcall_supbookx = None
self._xcall_supbook_ref = None
#################################################################
## Properties, "getters", "setters"
#################################################################
def get_style_stats(self):
return self.__styles.stats[:]
def set_owner(self, value):
self.__owner = value
def get_owner(self):
return self.__owner
owner = property(get_owner, set_owner)
#################################################################
def set_country_code(self, value):
self.__country_code = value
def get_country_code(self):
return self.__country_code
country_code = property(get_country_code, set_country_code)
#################################################################
def set_wnd_protect(self, value):
self.__wnd_protect = int(value)
def get_wnd_protect(self):
return bool(self.__wnd_protect)
wnd_protect = property(get_wnd_protect, set_wnd_protect)
#################################################################
def set_obj_protect(self, value):
self.__obj_protect = int(value)
def get_obj_protect(self):
return bool(self.__obj_protect)
obj_protect = property(get_obj_protect, set_obj_protect)
#################################################################
def set_protect(self, value):
self.__protect = int(value)
def get_protect(self):
return bool(self.__protect)
protect = property(get_protect, set_protect)
#################################################################
def set_backup_on_save(self, value):
self.__backup_on_save = int(value)
def get_backup_on_save(self):
return bool(self.__backup_on_save)
backup_on_save = property(get_backup_on_save, set_backup_on_save)
#################################################################
def set_hpos(self, value):
self.__hpos_twips = value & 0xFFFF
def get_hpos(self):
return self.__hpos_twips
hpos = property(get_hpos, set_hpos)
#################################################################
def set_vpos(self, value):
self.__vpos_twips = value & 0xFFFF
def get_vpos(self):
return self.__vpos_twips
vpos = property(get_vpos, set_vpos)
#################################################################
def set_width(self, value):
self.__width_twips = value & 0xFFFF
def get_width(self):
return self.__width_twips
width = property(get_width, set_width)
#################################################################
def set_height(self, value):
self.__height_twips = value & 0xFFFF
def get_height(self):
return self.__height_twips
height = property(get_height, set_height)
#################################################################
def set_active_sheet(self, value):
self.__active_sheet = value & 0xFFFF
self.__first_tab_index = self.__active_sheet
def get_active_sheet(self):
return self.__active_sheet
active_sheet = property(get_active_sheet, set_active_sheet)
#################################################################
def set_tab_width(self, value):
self.__tab_width_twips = value & 0xFFFF
def get_tab_width(self):
return self.__tab_width_twips
tab_width = property(get_tab_width, set_tab_width)
#################################################################
def set_wnd_visible(self, value):
self.__wnd_hidden = int(not value)
def get_wnd_visible(self):
return not bool(self.__wnd_hidden)
wnd_visible = property(get_wnd_visible, set_wnd_visible)
#################################################################
def set_wnd_mini(self, value):
self.__wnd_mini = int(value)
def get_wnd_mini(self):
return bool(self.__wnd_mini)
wnd_mini = property(get_wnd_mini, set_wnd_mini)
#################################################################
def set_hscroll_visible(self, value):
self.__hscroll_visible = int(value)
def get_hscroll_visible(self):
return bool(self.__hscroll_visible)
hscroll_visible = property(get_hscroll_visible, set_hscroll_visible)
#################################################################
def set_vscroll_visible(self, value):
self.__vscroll_visible = int(value)
def get_vscroll_visible(self):
return bool(self.__vscroll_visible)
vscroll_visible = property(get_vscroll_visible, set_vscroll_visible)
#################################################################
def set_tabs_visible(self, value):
self.__tabs_visible = int(value)
def get_tabs_visible(self):
return bool(self.__tabs_visible)
tabs_visible = property(get_tabs_visible, set_tabs_visible)
#################################################################
def set_dates_1904(self, value):
self.__dates_1904 = int(value)
def get_dates_1904(self):
return bool(self.__dates_1904)
dates_1904 = property(get_dates_1904, set_dates_1904)
#################################################################
def set_use_cell_values(self, value):
self.__use_cell_values = int(value)
def get_use_cell_values(self):
return bool(self.__use_cell_values)
use_cell_values = property(get_use_cell_values, set_use_cell_values)
#################################################################
def get_default_style(self):
return self.__styles.default_style
default_style = property(get_default_style)
##################################################################
## Methods
##################################################################
def add_style(self, style):
return self.__styles.add(style)
def add_str(self, s):
return self.__sst.add_str(s)
def del_str(self, sst_idx):
self.__sst.del_str(sst_idx)
def str_index(self, s):
return self.__sst.str_index(s)
def add_sheet(self, sheetname, cell_overwrite_ok=False):
from . import Worksheet, Utils
if not isinstance(sheetname, str):
sheetname = sheetname.decode(self.encoding)
if not Utils.valid_sheet_name(sheetname):
raise Exception("invalid worksheet name %r" % sheetname)
lower_name = sheetname.lower()
if lower_name in self.__worksheet_idx_from_name:
raise Exception("duplicate worksheet name %r" % sheetname)
self.__worksheet_idx_from_name[lower_name] = len(self.__worksheets)
self.__worksheets.append(Worksheet(sheetname, self, cell_overwrite_ok))
return self.__worksheets[-1]
def get_sheet(self, sheetnum):
return self.__worksheets[sheetnum]
def raise_bad_sheetname(self, sheetname):
raise Exception("Formula: unknown sheet name %s" % sheetname)
def convert_sheetindex(self, strg_ref, n_sheets):
idx = int(strg_ref)
if 0 <= idx < n_sheets:
return idx
msg = "Formula: sheet index (%s) >= number of sheets (%d)" % (strg_ref, n_sheets)
raise Exception(msg)
def _get_supbook_index(self, tag):
if tag in self._supbook_xref:
return self._supbook_xref[tag]
self._supbook_xref[tag] = idx = len(self._supbook_xref)
return idx
def setup_ownbook(self):
self._ownbook_supbookx = self._get_supbook_index(('ownbook', 0))
self._ownbook_supbook_ref = None
reference = (self._ownbook_supbookx, 0xFFFE, 0xFFFE)
if reference in self.__sheet_refs:
raise Exception("can't happen")
self.__sheet_refs[reference] = self._ownbook_supbook_ref = len(self.__sheet_refs)
def setup_xcall(self):
self._xcall_supbookx = self._get_supbook_index(('xcall', 0))
self._xcall_supbook_ref = None
reference = (self._xcall_supbookx, 0xFFFE, 0xFFFE)
if reference in self.__sheet_refs:
raise Exception("can't happen")
self.__sheet_refs[reference] = self._xcall_supbook_ref = len(self.__sheet_refs)
def add_sheet_reference(self, formula):
patches = []
n_sheets = len(self.__worksheets)
sheet_refs, xcall_refs = formula.get_references()
for ref0, ref1, offset in sheet_refs:
if not ref0.isdigit():
try:
ref0n = self.__worksheet_idx_from_name[ref0.lower()]
except KeyError:
self.raise_bad_sheetname(ref0)
else:
ref0n = self.convert_sheetindex(ref0, n_sheets)
if ref1 == ref0:
ref1n = ref0n
elif not ref1.isdigit():
try:
ref1n = self.__worksheet_idx_from_name[ref1.lower()]
except KeyError:
self.raise_bad_sheetname(ref1)
else:
ref1n = self.convert_sheetindex(ref1, n_sheets)
if ref1n < ref0n:
msg = "Formula: sheets out of order; %r:%r -> (%d, %d)" \
% (ref0, ref1, ref0n, ref1n)
raise Exception(msg)
if self._ownbook_supbookx is None:
self.setup_ownbook()
reference = (self._ownbook_supbookx, ref0n, ref1n)
if reference in self.__sheet_refs:
patches.append((offset, self.__sheet_refs[reference]))
else:
nrefs = len(self.__sheet_refs)
if nrefs > 65535:
raise Exception('More than 65536 inter-sheet references')
self.__sheet_refs[reference] = nrefs
patches.append((offset, nrefs))
for funcname, offset in xcall_refs:
if self._ownbook_supbookx is None:
self.setup_ownbook()
if self._xcall_supbookx is None:
self.setup_xcall()
# print funcname, self._supbook_xref
patches.append((offset, self._xcall_supbook_ref))
if not isinstance(funcname, str):
funcname = funcname.decode(self.encoding)
if funcname in self._xcall_xref:
idx = self._xcall_xref[funcname]
else:
self._xcall_xref[funcname] = idx = len(self._xcall_xref)
patches.append((offset + 2, idx + 1))
formula.patch_references(patches)
##################################################################
## BIFF records generation
##################################################################
def __bof_rec(self):
return BIFFRecords.Biff8BOFRecord(BIFFRecords.Biff8BOFRecord.BOOK_GLOBAL).get()
def __eof_rec(self):
return BIFFRecords.EOFRecord().get()
def __intf_hdr_rec(self):
return BIFFRecords.InteraceHdrRecord().get()
def __intf_end_rec(self):
return BIFFRecords.InteraceEndRecord().get()
def __intf_mms_rec(self):
return BIFFRecords.MMSRecord().get()
def __write_access_rec(self):
return BIFFRecords.WriteAccessRecord(self.__owner).get()
def __wnd_protect_rec(self):
return BIFFRecords.WindowProtectRecord(self.__wnd_protect).get()
def __obj_protect_rec(self):
return BIFFRecords.ObjectProtectRecord(self.__obj_protect).get()
def __protect_rec(self):
return BIFFRecords.ProtectRecord(self.__protect).get()
def __password_rec(self):
return BIFFRecords.PasswordRecord().get()
def __prot4rev_rec(self):
return BIFFRecords.Prot4RevRecord().get()
def __prot4rev_pass_rec(self):
return BIFFRecords.Prot4RevPassRecord().get()
def __backup_rec(self):
return BIFFRecords.BackupRecord(self.__backup_on_save).get()
def __hide_obj_rec(self):
return BIFFRecords.HideObjRecord().get()
def __window1_rec(self):
flags = 0
flags |= (self.__wnd_hidden) << 0
flags |= (self.__wnd_mini) << 1
flags |= (self.__hscroll_visible) << 3
flags |= (self.__vscroll_visible) << 4
flags |= (self.__tabs_visible) << 5
return BIFFRecords.Window1Record(self.__hpos_twips, self.__vpos_twips,
self.__width_twips, self.__height_twips,
flags,
self.__active_sheet, self.__first_tab_index,
self.__selected_tabs, self.__tab_width_twips).get()
def __codepage_rec(self):
return BIFFRecords.CodepageBiff8Record().get()
def __country_rec(self):
if not self.__country_code:
return b''
return BIFFRecords.CountryRecord(self.__country_code, self.__country_code).get()
def __dsf_rec(self):
return BIFFRecords.DSFRecord().get()
def __tabid_rec(self):
return BIFFRecords.TabIDRecord(len(self.__worksheets)).get()
def __fngroupcount_rec(self):
return BIFFRecords.FnGroupCountRecord().get()
def __datemode_rec(self):
return BIFFRecords.DateModeRecord(self.__dates_1904).get()
def __precision_rec(self):
return BIFFRecords.PrecisionRecord(self.__use_cell_values).get()
def __refresh_all_rec(self):
return BIFFRecords.RefreshAllRecord().get()
def __bookbool_rec(self):
return BIFFRecords.BookBoolRecord().get()
def __all_fonts_num_formats_xf_styles_rec(self):
return self.__styles.get_biff_data()
def __palette_rec(self):
result = b''
return result
def __useselfs_rec(self):
return BIFFRecords.UseSelfsRecord().get()
def __boundsheets_rec(self, data_len_before, data_len_after, sheet_biff_lens):
# .................................
# BOUNDSEHEET0
# BOUNDSEHEET1
# BOUNDSEHEET2
# ..................................
# WORKSHEET0
# WORKSHEET1
# WORKSHEET2
boundsheets_len = 0
for sheet in self.__worksheets:
boundsheets_len += len(BIFFRecords.BoundSheetRecord(
0x00, sheet.visibility, sheet.name, self.encoding
).get())
start = data_len_before + boundsheets_len + data_len_after
result = b''
for sheet_biff_len, sheet in zip(sheet_biff_lens, self.__worksheets):
result += BIFFRecords.BoundSheetRecord(
start, sheet.visibility, sheet.name, self.encoding
).get()
start += sheet_biff_len
return result
def __all_links_rec(self):
pieces = []
temp = [(idx, tag) for tag, idx in list(self._supbook_xref.items())]
temp.sort()
for idx, tag in temp:
stype, snum = tag
if stype == 'ownbook':
rec = BIFFRecords.InternalReferenceSupBookRecord(len(self.__worksheets)).get()
pieces.append(rec)
elif stype == 'xcall':
rec = BIFFRecords.XcallSupBookRecord().get()
pieces.append(rec)
temp = [(idx, name) for name, idx in list(self._xcall_xref.items())]
temp.sort()
for idx, name in temp:
rec = BIFFRecords.ExternnameRecord(
options=0, index=0, name=name, fmla='\x02\x00\x1c\x17').get()
pieces.append(rec)
else:
raise Exception('unknown supbook stype %r' % stype)
if len(self.__sheet_refs) > 0:
# get references in index order
temp = [(idx, ref) for ref, idx in list(self.__sheet_refs.items())]
temp.sort()
temp = [ref for idx, ref in temp]
externsheet_record = BIFFRecords.ExternSheetRecord(temp).get()
pieces.append(externsheet_record)
return b''.join(pieces)
def __sst_rec(self):
return self.__sst.get_biff_record()
def __ext_sst_rec(self, abs_stream_pos):
return b''
#return BIFFRecords.ExtSSTRecord(abs_stream_pos, self.sst_record.str_placement,
#self.sst_record.portions_len).get()
def get_biff_data(self):
before = b''
before += self.__bof_rec()
before += self.__intf_hdr_rec()
before += self.__intf_mms_rec()
before += self.__intf_end_rec()
before += self.__write_access_rec()
before += self.__codepage_rec()
before += self.__dsf_rec()
before += self.__tabid_rec()
before += self.__fngroupcount_rec()
before += self.__wnd_protect_rec()
before += self.__protect_rec()
before += self.__obj_protect_rec()
before += self.__password_rec()
before += self.__prot4rev_rec()
before += self.__prot4rev_pass_rec()
before += self.__backup_rec()
before += self.__hide_obj_rec()
before += self.__window1_rec()
before += self.__datemode_rec()
before += self.__precision_rec()
before += self.__refresh_all_rec()
before += self.__bookbool_rec()
before += self.__all_fonts_num_formats_xf_styles_rec()
before += self.__palette_rec()
before += self.__useselfs_rec()
country = self.__country_rec()
all_links = self.__all_links_rec()
shared_str_table = self.__sst_rec()
after = country + all_links + shared_str_table
ext_sst = self.__ext_sst_rec(0) # need fake cause we need calc stream pos
eof = self.__eof_rec()
self.__worksheets[self.__active_sheet].selected = True
sheets = b''
sheet_biff_lens = []
for sheet in self.__worksheets:
data = sheet.get_biff_data()
sheets += data
sheet_biff_lens.append(len(data))
bundlesheets = self.__boundsheets_rec(len(before), len(after)+len(ext_sst)+len(eof), sheet_biff_lens)
sst_stream_pos = len(before) + len(bundlesheets) + len(country) + len(all_links)
ext_sst = self.__ext_sst_rec(sst_stream_pos)
return before + bundlesheets + after + ext_sst + eof + sheets
def save(self, filename):
from . import CompoundDoc
doc = CompoundDoc.XlsDoc()
doc.save(filename, self.get_biff_data())
File diff suppressed because it is too large Load Diff
+9
View File
@@ -0,0 +1,9 @@
import sys
from .Workbook import Workbook
from .Worksheet import Worksheet
from .Row import Row
from .Column import Column
from .Formatting import Font, Alignment, Borders, Pattern, Protection
from .Style import XFStyle, easyxf
from .ExcelFormula import *
File diff suppressed because it is too large Load Diff
+288
View File
@@ -0,0 +1,288 @@
from .error import *
from .tokens import *
from .events import *
from .nodes import *
from .loader import *
from .dumper import *
__version__ = '3.09'
try:
from cyaml import *
__with_libyaml__ = True
except ImportError:
__with_libyaml__ = False
def scan(stream, Loader=Loader):
"""
Scan a YAML stream and produce scanning tokens.
"""
loader = Loader(stream)
while loader.check_token():
yield loader.get_token()
def parse(stream, Loader=Loader):
"""
Parse a YAML stream and produce parsing events.
"""
loader = Loader(stream)
while loader.check_event():
yield loader.get_event()
def compose(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding representation tree.
"""
loader = Loader(stream)
return loader.get_single_node()
def compose_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding representation trees.
"""
loader = Loader(stream)
while loader.check_node():
yield loader.get_node()
def load(stream, Loader=Loader):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
"""
loader = Loader(stream)
return loader.get_single_data()
def load_all(stream, Loader=Loader):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
"""
loader = Loader(stream)
while loader.check_data():
yield loader.get_data()
def safe_load(stream):
"""
Parse the first YAML document in a stream
and produce the corresponding Python object.
Resolve only basic YAML tags.
"""
return load(stream, SafeLoader)
def safe_load_all(stream):
"""
Parse all YAML documents in a stream
and produce corresponding Python objects.
Resolve only basic YAML tags.
"""
return load_all(stream, SafeLoader)
def emit(events, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None):
"""
Emit YAML parsing events into a stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
from StringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
for event in events:
dumper.emit(event)
if getvalue:
return getvalue()
def serialize_all(nodes, stream=None, Dumper=Dumper,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of representation trees into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
from StringIO import StringIO
else:
from cStringIO import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
dumper.open()
for node in nodes:
dumper.serialize(node)
dumper.close()
if getvalue:
return getvalue()
def serialize(node, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a representation tree into a YAML stream.
If stream is None, return the produced string instead.
"""
return serialize_all([node], stream, Dumper=Dumper, **kwds)
def dump_all(documents, stream=None, Dumper=Dumper,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding='utf-8', explicit_start=None, explicit_end=None,
version=None, tags=None):
"""
Serialize a sequence of Python objects into a YAML stream.
If stream is None, return the produced string instead.
"""
getvalue = None
if stream is None:
if encoding is None:
from io import StringIO
else:
from io import StringIO
stream = StringIO()
getvalue = stream.getvalue
dumper = Dumper(stream, default_style=default_style,
default_flow_style=default_flow_style,
canonical=canonical, indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break,
encoding=encoding, version=version, tags=tags,
explicit_start=explicit_start, explicit_end=explicit_end)
dumper.open()
for data in documents:
dumper.represent(data)
dumper.close()
if getvalue:
return getvalue()
def dump(data, stream=None, Dumper=Dumper, **kwds):
"""
Serialize a Python object into a YAML stream.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=Dumper, **kwds)
def safe_dump_all(documents, stream=None, **kwds):
"""
Serialize a sequence of Python objects into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
def safe_dump(data, stream=None, **kwds):
"""
Serialize a Python object into a YAML stream.
Produce only basic YAML tags.
If stream is None, return the produced string instead.
"""
return dump_all([data], stream, Dumper=SafeDumper, **kwds)
def add_implicit_resolver(tag, regexp, first=None,
Loader=Loader, Dumper=Dumper):
"""
Add an implicit scalar detector.
If an implicit scalar value matches the given regexp,
the corresponding tag is assigned to the scalar.
first is a sequence of possible initial characters or None.
"""
Loader.add_implicit_resolver(tag, regexp, first)
Dumper.add_implicit_resolver(tag, regexp, first)
def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper):
"""
Add a path based resolver for the given tag.
A path is a list of keys that forms a path
to a node in the representation tree.
Keys can be string values, integers, or None.
"""
Loader.add_path_resolver(tag, path, kind)
Dumper.add_path_resolver(tag, path, kind)
def add_constructor(tag, constructor, Loader=Loader):
"""
Add a constructor for the given tag.
Constructor is a function that accepts a Loader instance
and a node object and produces the corresponding Python object.
"""
Loader.add_constructor(tag, constructor)
def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader):
"""
Add a multi-constructor for the given tag prefix.
Multi-constructor is called for a node if its tag starts with tag_prefix.
Multi-constructor accepts a Loader instance, a tag suffix,
and a node object and produces the corresponding Python object.
"""
Loader.add_multi_constructor(tag_prefix, multi_constructor)
def add_representer(data_type, representer, Dumper=Dumper):
"""
Add a representer for the given type.
Representer is a function accepting a Dumper instance
and an instance of the given data type
and producing the corresponding representation node.
"""
Dumper.add_representer(data_type, representer)
def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
"""
Add a representer for the given type.
Multi-representer is a function accepting a Dumper instance
and an instance of the given data type or subtype
and producing the corresponding representation node.
"""
Dumper.add_multi_representer(data_type, multi_representer)
class YAMLObjectMetaclass(type):
"""
The metaclass for YAMLObject.
"""
def __init__(cls, name, bases, kwds):
super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
cls.yaml_dumper.add_representer(cls, cls.to_yaml)
class YAMLObject(object):
"""
An object that can dump itself to a YAML stream
and load itself from a YAML stream.
"""
__metaclass__ = YAMLObjectMetaclass
__slots__ = () # no direct instantiation, so allow immutable subclasses
yaml_loader = Loader
yaml_dumper = Dumper
yaml_tag = None
yaml_flow_style = None
def from_yaml(cls, loader, node):
"""
Convert a representation node to a Python object.
"""
return loader.construct_yaml_object(node, cls)
from_yaml = classmethod(from_yaml)
def to_yaml(cls, dumper, data):
"""
Convert a Python object to a representation node.
"""
return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
flow_style=cls.yaml_flow_style)
to_yaml = classmethod(to_yaml)
+139
View File
@@ -0,0 +1,139 @@
__all__ = ['Composer', 'ComposerError']
from .error import MarkedYAMLError
from .events import *
from .nodes import *
class ComposerError(MarkedYAMLError):
pass
class Composer:
def __init__(self):
self.anchors = {}
def check_node(self):
# Drop the STREAM-START event.
if self.check_event(StreamStartEvent):
self.get_event()
# If there are more documents available?
return not self.check_event(StreamEndEvent)
def get_node(self):
# Get the root node of the next document.
if not self.check_event(StreamEndEvent):
return self.compose_document()
def get_single_node(self):
# Drop the STREAM-START event.
self.get_event()
# Compose a document if the stream is not empty.
document = None
if not self.check_event(StreamEndEvent):
document = self.compose_document()
# Ensure that the stream contains no more documents.
if not self.check_event(StreamEndEvent):
event = self.get_event()
raise ComposerError("expected a single document in the stream",
document.start_mark, "but found another document",
event.start_mark)
# Drop the STREAM-END event.
self.get_event()
return document
def compose_document(self):
# Drop the DOCUMENT-START event.
self.get_event()
# Compose the root node.
node = self.compose_node(None, None)
# Drop the DOCUMENT-END event.
self.get_event()
self.anchors = {}
return node
def compose_node(self, parent, index):
if self.check_event(AliasEvent):
event = self.get_event()
anchor = event.anchor
if anchor not in self.anchors:
raise ComposerError(None, None, "found undefined alias %r"
% anchor, event.start_mark)
return self.anchors[anchor]
event = self.peek_event()
anchor = event.anchor
if anchor is not None:
if anchor in self.anchors:
raise ComposerError("found duplicate anchor %r; first occurence"
% anchor, self.anchors[anchor].start_mark,
"second occurence", event.start_mark)
self.descend_resolver(parent, index)
if self.check_event(ScalarEvent):
node = self.compose_scalar_node(anchor)
elif self.check_event(SequenceStartEvent):
node = self.compose_sequence_node(anchor)
elif self.check_event(MappingStartEvent):
node = self.compose_mapping_node(anchor)
self.ascend_resolver()
return node
def compose_scalar_node(self, anchor):
event = self.get_event()
tag = event.tag
if tag is None or tag == '!':
tag = self.resolve(ScalarNode, event.value, event.implicit)
node = ScalarNode(tag, event.value,
event.start_mark, event.end_mark, style=event.style)
if anchor is not None:
self.anchors[anchor] = node
return node
def compose_sequence_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(SequenceNode, None, start_event.implicit)
node = SequenceNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
index = 0
while not self.check_event(SequenceEndEvent):
node.value.append(self.compose_node(node, index))
index += 1
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(MappingNode, None, start_event.implicit)
node = MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(MappingEndEvent):
#key_event = self.peek_event()
item_key = self.compose_node(node, None)
#if item_key in node.value:
# raise ComposerError("while composing a mapping", start_event.start_mark,
# "found duplicate key", key_event.start_mark)
item_value = self.compose_node(node, item_key)
#node.value[item_key] = item_value
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
+686
View File
@@ -0,0 +1,686 @@
__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor',
'ConstructorError']
from .error import *
from .nodes import *
import collections, datetime, base64, binascii, re, sys, types
class ConstructorError(MarkedYAMLError):
pass
class BaseConstructor:
yaml_constructors = {}
yaml_multi_constructors = {}
def __init__(self):
self.constructed_objects = {}
self.recursive_objects = {}
self.state_generators = []
self.deep_construct = False
def check_data(self):
# If there are more documents available?
return self.check_node()
def get_data(self):
# Construct and return the next document.
if self.check_node():
return self.construct_document(self.get_node())
def get_single_data(self):
# Ensure that the stream contains a single document and construct it.
node = self.get_single_node()
if node is not None:
return self.construct_document(node)
return None
def construct_document(self, node):
data = self.construct_object(node)
while self.state_generators:
state_generators = self.state_generators
self.state_generators = []
for generator in state_generators:
for dummy in generator:
pass
self.constructed_objects = {}
self.recursive_objects = {}
self.deep_construct = False
return data
def construct_object(self, node, deep=False):
if deep:
old_deep = self.deep_construct
self.deep_construct = True
if node in self.constructed_objects:
return self.constructed_objects[node]
if node in self.recursive_objects:
raise ConstructorError(None, None,
"found unconstructable recursive node", node.start_mark)
self.recursive_objects[node] = None
constructor = None
tag_suffix = None
if node.tag in self.yaml_constructors:
constructor = self.yaml_constructors[node.tag]
else:
for tag_prefix in self.yaml_multi_constructors:
if node.tag.startswith(tag_prefix):
tag_suffix = node.tag[len(tag_prefix):]
constructor = self.yaml_multi_constructors[tag_prefix]
break
else:
if None in self.yaml_multi_constructors:
tag_suffix = node.tag
constructor = self.yaml_multi_constructors[None]
elif None in self.yaml_constructors:
constructor = self.yaml_constructors[None]
elif isinstance(node, ScalarNode):
constructor = self.__class__.construct_scalar
elif isinstance(node, SequenceNode):
constructor = self.__class__.construct_sequence
elif isinstance(node, MappingNode):
constructor = self.__class__.construct_mapping
if tag_suffix is None:
data = constructor(self, node)
else:
data = constructor(self, tag_suffix, node)
if isinstance(data, types.GeneratorType):
generator = data
data = next(generator)
if self.deep_construct:
for dummy in generator:
pass
else:
self.state_generators.append(generator)
self.constructed_objects[node] = data
del self.recursive_objects[node]
if deep:
self.deep_construct = old_deep
return data
def construct_scalar(self, node):
if not isinstance(node, ScalarNode):
raise ConstructorError(None, None,
"expected a scalar node, but found %s" % node.id,
node.start_mark)
return node.value
def construct_sequence(self, node, deep=False):
if not isinstance(node, SequenceNode):
raise ConstructorError(None, None,
"expected a sequence node, but found %s" % node.id,
node.start_mark)
return [self.construct_object(child, deep=deep)
for child in node.value]
def construct_mapping(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
mapping = {}
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if not isinstance(key, collections.Hashable):
raise ConstructorError("while constructing a mapping", node.start_mark,
"found unhashable key", key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping
def construct_pairs(self, node, deep=False):
if not isinstance(node, MappingNode):
raise ConstructorError(None, None,
"expected a mapping node, but found %s" % node.id,
node.start_mark)
pairs = []
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
value = self.construct_object(value_node, deep=deep)
pairs.append((key, value))
return pairs
@classmethod
def add_constructor(cls, tag, constructor):
if not 'yaml_constructors' in cls.__dict__:
cls.yaml_constructors = cls.yaml_constructors.copy()
cls.yaml_constructors[tag] = constructor
@classmethod
def add_multi_constructor(cls, tag_prefix, multi_constructor):
if not 'yaml_multi_constructors' in cls.__dict__:
cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
cls.yaml_multi_constructors[tag_prefix] = multi_constructor
class SafeConstructor(BaseConstructor):
def construct_scalar(self, node):
if isinstance(node, MappingNode):
for key_node, value_node in node.value:
if key_node.tag == 'tag:yaml.org,2002:value':
return self.construct_scalar(value_node)
return super().construct_scalar(node)
def flatten_mapping(self, node):
merge = []
index = 0
while index < len(node.value):
key_node, value_node = node.value[index]
if key_node.tag == 'tag:yaml.org,2002:merge':
del node.value[index]
if isinstance(value_node, MappingNode):
self.flatten_mapping(value_node)
merge.extend(value_node.value)
elif isinstance(value_node, SequenceNode):
submerge = []
for subnode in value_node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing a mapping",
node.start_mark,
"expected a mapping for merging, but found %s"
% subnode.id, subnode.start_mark)
self.flatten_mapping(subnode)
submerge.append(subnode.value)
submerge.reverse()
for value in submerge:
merge.extend(value)
else:
raise ConstructorError("while constructing a mapping", node.start_mark,
"expected a mapping or list of mappings for merging, but found %s"
% value_node.id, value_node.start_mark)
elif key_node.tag == 'tag:yaml.org,2002:value':
key_node.tag = 'tag:yaml.org,2002:str'
index += 1
else:
index += 1
if merge:
node.value = merge + node.value
def construct_mapping(self, node, deep=False):
if isinstance(node, MappingNode):
self.flatten_mapping(node)
return super().construct_mapping(node, deep=deep)
def construct_yaml_null(self, node):
self.construct_scalar(node)
return None
bool_values = {
'yes': True,
'no': False,
'true': True,
'false': False,
'on': True,
'off': False,
}
def construct_yaml_bool(self, node):
value = self.construct_scalar(node)
return self.bool_values[value.lower()]
def construct_yaml_int(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '')
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '0':
return 0
elif value.startswith('0b'):
return sign*int(value[2:], 2)
elif value.startswith('0x'):
return sign*int(value[2:], 16)
elif value[0] == '0':
return sign*int(value, 8)
elif ':' in value:
digits = [int(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*int(value)
inf_value = 1e300
while inf_value != inf_value*inf_value:
inf_value *= inf_value
nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99).
def construct_yaml_float(self, node):
value = self.construct_scalar(node)
value = value.replace('_', '').lower()
sign = +1
if value[0] == '-':
sign = -1
if value[0] in '+-':
value = value[1:]
if value == '.inf':
return sign*self.inf_value
elif value == '.nan':
return self.nan_value
elif ':' in value:
digits = [float(part) for part in value.split(':')]
digits.reverse()
base = 1
value = 0.0
for digit in digits:
value += digit*base
base *= 60
return sign*value
else:
return sign*float(value)
def construct_yaml_binary(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
timestamp_regexp = re.compile(
r'''^(?P<year>[0-9][0-9][0-9][0-9])
-(?P<month>[0-9][0-9]?)
-(?P<day>[0-9][0-9]?)
(?:(?:[Tt]|[ \t]+)
(?P<hour>[0-9][0-9]?)
:(?P<minute>[0-9][0-9])
:(?P<second>[0-9][0-9])
(?:\.(?P<fraction>[0-9]*))?
(?:[ \t]*(?P<tz>Z|(?P<tz_sign>[-+])(?P<tz_hour>[0-9][0-9]?)
(?::(?P<tz_minute>[0-9][0-9]))?))?)?$''', re.X)
def construct_yaml_timestamp(self, node):
value = self.construct_scalar(node)
match = self.timestamp_regexp.match(node.value)
values = match.groupdict()
year = int(values['year'])
month = int(values['month'])
day = int(values['day'])
if not values['hour']:
return datetime.date(year, month, day)
hour = int(values['hour'])
minute = int(values['minute'])
second = int(values['second'])
fraction = 0
if values['fraction']:
fraction = values['fraction'][:6]
while len(fraction) < 6:
fraction += '0'
fraction = int(fraction)
delta = None
if values['tz_sign']:
tz_hour = int(values['tz_hour'])
tz_minute = int(values['tz_minute'] or 0)
delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
if values['tz_sign'] == '-':
delta = -delta
data = datetime.datetime(year, month, day, hour, minute, second, fraction)
if delta:
data -= delta
return data
def construct_yaml_omap(self, node):
# Note: we do not check for duplicate keys, because it's too
# CPU-expensive.
omap = []
yield omap
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing an ordered map", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
omap.append((key, value))
def construct_yaml_pairs(self, node):
# Note: the same code as `construct_yaml_omap`.
pairs = []
yield pairs
if not isinstance(node, SequenceNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a sequence, but found %s" % node.id, node.start_mark)
for subnode in node.value:
if not isinstance(subnode, MappingNode):
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a mapping of length 1, but found %s" % subnode.id,
subnode.start_mark)
if len(subnode.value) != 1:
raise ConstructorError("while constructing pairs", node.start_mark,
"expected a single mapping item, but found %d items" % len(subnode.value),
subnode.start_mark)
key_node, value_node = subnode.value[0]
key = self.construct_object(key_node)
value = self.construct_object(value_node)
pairs.append((key, value))
def construct_yaml_set(self, node):
data = set()
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_str(self, node):
return self.construct_scalar(node)
def construct_yaml_seq(self, node):
data = []
yield data
data.extend(self.construct_sequence(node))
def construct_yaml_map(self, node):
data = {}
yield data
value = self.construct_mapping(node)
data.update(value)
def construct_yaml_object(self, node, cls):
data = cls.__new__(cls)
yield data
if hasattr(data, '__setstate__'):
state = self.construct_mapping(node, deep=True)
data.__setstate__(state)
else:
state = self.construct_mapping(node)
data.__dict__.update(state)
def construct_undefined(self, node):
raise ConstructorError(None, None,
"could not determine a constructor for the tag %r" % node.tag,
node.start_mark)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:null',
SafeConstructor.construct_yaml_null)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:bool',
SafeConstructor.construct_yaml_bool)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:int',
SafeConstructor.construct_yaml_int)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:float',
SafeConstructor.construct_yaml_float)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:binary',
SafeConstructor.construct_yaml_binary)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:timestamp',
SafeConstructor.construct_yaml_timestamp)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:omap',
SafeConstructor.construct_yaml_omap)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:pairs',
SafeConstructor.construct_yaml_pairs)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:set',
SafeConstructor.construct_yaml_set)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:str',
SafeConstructor.construct_yaml_str)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:seq',
SafeConstructor.construct_yaml_seq)
SafeConstructor.add_constructor(
'tag:yaml.org,2002:map',
SafeConstructor.construct_yaml_map)
SafeConstructor.add_constructor(None,
SafeConstructor.construct_undefined)
class Constructor(SafeConstructor):
def construct_python_str(self, node):
return self.construct_scalar(node)
def construct_python_unicode(self, node):
return self.construct_scalar(node)
def construct_python_bytes(self, node):
try:
value = self.construct_scalar(node).encode('ascii')
except UnicodeEncodeError as exc:
raise ConstructorError(None, None,
"failed to convert base64 data into ascii: %s" % exc,
node.start_mark)
try:
if hasattr(base64, 'decodebytes'):
return base64.decodebytes(value)
else:
return base64.decodestring(value)
except binascii.Error as exc:
raise ConstructorError(None, None,
"failed to decode base64 data: %s" % exc, node.start_mark)
def construct_python_long(self, node):
return self.construct_yaml_int(node)
def construct_python_complex(self, node):
return complex(self.construct_scalar(node))
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
def find_python_module(self, name, mark):
if not name:
raise ConstructorError("while constructing a Python module", mark,
"expected non-empty name appended to the tag", mark)
try:
__import__(name)
except ImportError as exc:
raise ConstructorError("while constructing a Python module", mark,
"cannot find module %r (%s)" % (name, exc), mark)
return sys.modules[name]
def find_python_name(self, name, mark):
if not name:
raise ConstructorError("while constructing a Python object", mark,
"expected non-empty name appended to the tag", mark)
if '.' in name:
module_name, object_name = name.rsplit('.', 1)
else:
module_name = 'builtins'
object_name = name
try:
__import__(module_name)
except ImportError as exc:
raise ConstructorError("while constructing a Python object", mark,
"cannot find module %r (%s)" % (module_name, exc), mark)
module = sys.modules[module_name]
if not hasattr(module, object_name):
raise ConstructorError("while constructing a Python object", mark,
"cannot find %r in the module %r"
% (object_name, module.__name__), mark)
return getattr(module, object_name)
def construct_python_name(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python name", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_name(suffix, node.start_mark)
def construct_python_module(self, suffix, node):
value = self.construct_scalar(node)
if value:
raise ConstructorError("while constructing a Python module", node.start_mark,
"expected the empty value, but found %r" % value, node.start_mark)
return self.find_python_module(suffix, node.start_mark)
def make_python_instance(self, suffix, node,
args=None, kwds=None, newobj=False):
if not args:
args = []
if not kwds:
kwds = {}
cls = self.find_python_name(suffix, node.start_mark)
if newobj and isinstance(cls, type):
return cls.__new__(cls, *args, **kwds)
else:
return cls(*args, **kwds)
def set_python_instance_state(self, instance, state):
if hasattr(instance, '__setstate__'):
instance.__setstate__(state)
else:
slotstate = {}
if isinstance(state, tuple) and len(state) == 2:
state, slotstate = state
if hasattr(instance, '__dict__'):
instance.__dict__.update(state)
elif state:
slotstate.update(state)
for key, value in slotstate.items():
setattr(object, key, value)
def construct_python_object(self, suffix, node):
# Format:
# !!python/object:module.name { ... state ... }
instance = self.make_python_instance(suffix, node, newobj=True)
yield instance
deep = hasattr(instance, '__setstate__')
state = self.construct_mapping(node, deep=deep)
self.set_python_instance_state(instance, state)
def construct_python_object_apply(self, suffix, node, newobj=False):
# Format:
# !!python/object/apply # (or !!python/object/new)
# args: [ ... arguments ... ]
# kwds: { ... keywords ... }
# state: ... state ...
# listitems: [ ... listitems ... ]
# dictitems: { ... dictitems ... }
# or short format:
# !!python/object/apply [ ... arguments ... ]
# The difference between !!python/object/apply and !!python/object/new
# is how an object is created, check make_python_instance for details.
if isinstance(node, SequenceNode):
args = self.construct_sequence(node, deep=True)
kwds = {}
state = {}
listitems = []
dictitems = {}
else:
value = self.construct_mapping(node, deep=True)
args = value.get('args', [])
kwds = value.get('kwds', {})
state = value.get('state', {})
listitems = value.get('listitems', [])
dictitems = value.get('dictitems', {})
instance = self.make_python_instance(suffix, node, args, kwds, newobj)
if state:
self.set_python_instance_state(instance, state)
if listitems:
instance.extend(listitems)
if dictitems:
for key in dictitems:
instance[key] = dictitems[key]
return instance
def construct_python_object_new(self, suffix, node):
return self.construct_python_object_apply(suffix, node, newobj=True)
Constructor.add_constructor(
'tag:yaml.org,2002:python/none',
Constructor.construct_yaml_null)
Constructor.add_constructor(
'tag:yaml.org,2002:python/bool',
Constructor.construct_yaml_bool)
Constructor.add_constructor(
'tag:yaml.org,2002:python/str',
Constructor.construct_python_str)
Constructor.add_constructor(
'tag:yaml.org,2002:python/unicode',
Constructor.construct_python_unicode)
Constructor.add_constructor(
'tag:yaml.org,2002:python/bytes',
Constructor.construct_python_bytes)
Constructor.add_constructor(
'tag:yaml.org,2002:python/int',
Constructor.construct_yaml_int)
Constructor.add_constructor(
'tag:yaml.org,2002:python/long',
Constructor.construct_python_long)
Constructor.add_constructor(
'tag:yaml.org,2002:python/float',
Constructor.construct_yaml_float)
Constructor.add_constructor(
'tag:yaml.org,2002:python/complex',
Constructor.construct_python_complex)
Constructor.add_constructor(
'tag:yaml.org,2002:python/list',
Constructor.construct_yaml_seq)
Constructor.add_constructor(
'tag:yaml.org,2002:python/tuple',
Constructor.construct_python_tuple)
Constructor.add_constructor(
'tag:yaml.org,2002:python/dict',
Constructor.construct_yaml_map)
Constructor.add_multi_constructor(
'tag:yaml.org,2002:python/name:',
Constructor.construct_python_name)
Constructor.add_multi_constructor(
'tag:yaml.org,2002:python/module:',
Constructor.construct_python_module)
Constructor.add_multi_constructor(
'tag:yaml.org,2002:python/object:',
Constructor.construct_python_object)
Constructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/apply:',
Constructor.construct_python_object_apply)
Constructor.add_multi_constructor(
'tag:yaml.org,2002:python/object/new:',
Constructor.construct_python_object_new)
+85
View File
@@ -0,0 +1,85 @@
__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader',
'CBaseDumper', 'CSafeDumper', 'CDumper']
from _yaml import CParser, CEmitter
from .constructor import *
from .serializer import *
from .representer import *
from .resolver import *
class CBaseLoader(CParser, BaseConstructor, BaseResolver):
def __init__(self, stream):
CParser.__init__(self, stream)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class CSafeLoader(CParser, SafeConstructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class CLoader(CParser, Constructor, Resolver):
def __init__(self, stream):
CParser.__init__(self, stream)
Constructor.__init__(self)
Resolver.__init__(self)
class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class CDumper(CEmitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
CEmitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width, encoding=encoding,
allow_unicode=allow_unicode, line_break=line_break,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
+62
View File
@@ -0,0 +1,62 @@
__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
from .emitter import *
from .serializer import *
from .representer import *
from .resolver import *
class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
SafeRepresenter.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
class Dumper(Emitter, Serializer, Representer, Resolver):
def __init__(self, stream,
default_style=None, default_flow_style=None,
canonical=None, indent=None, width=None,
allow_unicode=None, line_break=None,
encoding=None, explicit_start=None, explicit_end=None,
version=None, tags=None):
Emitter.__init__(self, stream, canonical=canonical,
indent=indent, width=width,
allow_unicode=allow_unicode, line_break=line_break)
Serializer.__init__(self, encoding=encoding,
explicit_start=explicit_start, explicit_end=explicit_end,
version=version, tags=tags)
Representer.__init__(self, default_style=default_style,
default_flow_style=default_flow_style)
Resolver.__init__(self)
File diff suppressed because it is too large Load Diff
+75
View File
@@ -0,0 +1,75 @@
__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
class Mark:
def __init__(self, name, index, line, column, buffer, pointer):
self.name = name
self.index = index
self.line = line
self.column = column
self.buffer = buffer
self.pointer = pointer
def get_snippet(self, indent=4, max_length=75):
if self.buffer is None:
return None
head = ''
start = self.pointer
while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029':
start -= 1
if self.pointer-start > max_length/2-1:
head = ' ... '
start += 5
break
tail = ''
end = self.pointer
while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029':
end += 1
if end-self.pointer > max_length/2-1:
tail = ' ... '
end -= 5
break
snippet = self.buffer[start:end]
return ' '*indent + head + snippet + tail + '\n' \
+ ' '*(indent+self.pointer-start+len(head)) + '^'
def __str__(self):
snippet = self.get_snippet()
where = " in \"%s\", line %d, column %d" \
% (self.name, self.line+1, self.column+1)
if snippet is not None:
where += ":\n"+snippet
return where
class YAMLError(Exception):
pass
class MarkedYAMLError(YAMLError):
def __init__(self, context=None, context_mark=None,
problem=None, problem_mark=None, note=None):
self.context = context
self.context_mark = context_mark
self.problem = problem
self.problem_mark = problem_mark
self.note = note
def __str__(self):
lines = []
if self.context is not None:
lines.append(self.context)
if self.context_mark is not None \
and (self.problem is None or self.problem_mark is None
or self.context_mark.name != self.problem_mark.name
or self.context_mark.line != self.problem_mark.line
or self.context_mark.column != self.problem_mark.column):
lines.append(str(self.context_mark))
if self.problem is not None:
lines.append(self.problem)
if self.problem_mark is not None:
lines.append(str(self.problem_mark))
if self.note is not None:
lines.append(self.note)
return '\n'.join(lines)
+86
View File
@@ -0,0 +1,86 @@
# Abstract classes.
class Event(object):
def __init__(self, start_mark=None, end_mark=None):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in ['anchor', 'tag', 'implicit', 'value']
if hasattr(self, key)]
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
class NodeEvent(Event):
def __init__(self, anchor, start_mark=None, end_mark=None):
self.anchor = anchor
self.start_mark = start_mark
self.end_mark = end_mark
class CollectionStartEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
flow_style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class CollectionEndEvent(Event):
pass
# Implementations.
class StreamStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None, encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndEvent(Event):
pass
class DocumentStartEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None, version=None, tags=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
self.version = version
self.tags = tags
class DocumentEndEvent(Event):
def __init__(self, start_mark=None, end_mark=None,
explicit=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.explicit = explicit
class AliasEvent(NodeEvent):
pass
class ScalarEvent(NodeEvent):
def __init__(self, anchor, tag, implicit, value,
start_mark=None, end_mark=None, style=None):
self.anchor = anchor
self.tag = tag
self.implicit = implicit
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class SequenceStartEvent(CollectionStartEvent):
pass
class SequenceEndEvent(CollectionEndEvent):
pass
class MappingStartEvent(CollectionStartEvent):
pass
class MappingEndEvent(CollectionEndEvent):
pass
+40
View File
@@ -0,0 +1,40 @@
__all__ = ['BaseLoader', 'SafeLoader', 'Loader']
from .reader import *
from .scanner import *
from .parser import *
from .composer import *
from .constructor import *
from .resolver import *
class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
BaseConstructor.__init__(self)
BaseResolver.__init__(self)
class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
SafeConstructor.__init__(self)
Resolver.__init__(self)
class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
def __init__(self, stream):
Reader.__init__(self, stream)
Scanner.__init__(self)
Parser.__init__(self)
Composer.__init__(self)
Constructor.__init__(self)
Resolver.__init__(self)
+49
View File
@@ -0,0 +1,49 @@
class Node(object):
def __init__(self, tag, value, start_mark, end_mark):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
value = self.value
#if isinstance(value, list):
# if len(value) == 0:
# value = '<empty>'
# elif len(value) == 1:
# value = '<1 item>'
# else:
# value = '<%d items>' % len(value)
#else:
# if len(value) > 75:
# value = repr(value[:70]+u' ... ')
# else:
# value = repr(value)
value = repr(value)
return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
class ScalarNode(Node):
id = 'scalar'
def __init__(self, tag, value,
start_mark=None, end_mark=None, style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
class CollectionNode(Node):
def __init__(self, tag, value,
start_mark=None, end_mark=None, flow_style=None):
self.tag = tag
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
self.flow_style = flow_style
class SequenceNode(CollectionNode):
id = 'sequence'
class MappingNode(CollectionNode):
id = 'mapping'
+584
View File
@@ -0,0 +1,584 @@
# The following YAML grammar is LL(1) and is parsed by a recursive descent
# parser.
#
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
# block_node_or_indentless_sequence ::=
# ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# FIRST sets:
#
# stream: { STREAM-START }
# explicit_document: { DIRECTIVE DOCUMENT-START }
# implicit_document: FIRST(block_node)
# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# block_sequence: { BLOCK-SEQUENCE-START }
# block_mapping: { BLOCK-MAPPING-START }
# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
# indentless_sequence: { ENTRY }
# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
# flow_sequence: { FLOW-SEQUENCE-START }
# flow_mapping: { FLOW-MAPPING-START }
# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
__all__ = ['Parser', 'ParserError']
from .error import MarkedYAMLError
from .tokens import *
from .events import *
from .scanner import *
class ParserError(MarkedYAMLError):
pass
class Parser:
# Since writing a recursive-descendant parser is a straightforward task, we
# do not give many comments here.
DEFAULT_TAGS = {
'!': '!',
'!!': 'tag:yaml.org,2002:',
}
def __init__(self):
self.current_event = None
self.yaml_version = None
self.tag_handles = {}
self.states = []
self.marks = []
self.state = self.parse_stream_start
def check_event(self, *choices):
# Check the type of the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
if self.current_event is not None:
if not choices:
return True
for choice in choices:
if isinstance(self.current_event, choice):
return True
return False
def peek_event(self):
# Get the next event.
if self.current_event is None:
if self.state:
self.current_event = self.state()
return self.current_event
def get_event(self):
# Get the next event and proceed further.
if self.current_event is None:
if self.state:
self.current_event = self.state()
value = self.current_event
self.current_event = None
return value
# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
# implicit_document ::= block_node DOCUMENT-END*
# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
def parse_stream_start(self):
# Parse the stream start.
token = self.get_token()
event = StreamStartEvent(token.start_mark, token.end_mark,
encoding=token.encoding)
# Prepare the next state.
self.state = self.parse_implicit_document_start
return event
def parse_implicit_document_start(self):
# Parse an implicit document.
if not self.check_token(DirectiveToken, DocumentStartToken,
StreamEndToken):
self.tag_handles = self.DEFAULT_TAGS
token = self.peek_token()
start_mark = end_mark = token.start_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=False)
# Prepare the next state.
self.states.append(self.parse_document_end)
self.state = self.parse_block_node
return event
else:
return self.parse_document_start()
def parse_document_start(self):
# Parse any extra document end indicators.
while self.check_token(DocumentEndToken):
self.get_token()
# Parse an explicit document.
if not self.check_token(StreamEndToken):
token = self.peek_token()
start_mark = token.start_mark
version, tags = self.process_directives()
if not self.check_token(DocumentStartToken):
raise ParserError(None, None,
"expected '<document start>', but found %r"
% self.peek_token().id,
self.peek_token().start_mark)
token = self.get_token()
end_mark = token.end_mark
event = DocumentStartEvent(start_mark, end_mark,
explicit=True, version=version, tags=tags)
self.states.append(self.parse_document_end)
self.state = self.parse_document_content
else:
# Parse the end of the stream.
token = self.get_token()
event = StreamEndEvent(token.start_mark, token.end_mark)
assert not self.states
assert not self.marks
self.state = None
return event
def parse_document_end(self):
# Parse the document end.
token = self.peek_token()
start_mark = end_mark = token.start_mark
explicit = False
if self.check_token(DocumentEndToken):
token = self.get_token()
end_mark = token.end_mark
explicit = True
event = DocumentEndEvent(start_mark, end_mark,
explicit=explicit)
# Prepare the next state.
self.state = self.parse_document_start
return event
def parse_document_content(self):
if self.check_token(DirectiveToken,
DocumentStartToken, DocumentEndToken, StreamEndToken):
event = self.process_empty_scalar(self.peek_token().start_mark)
self.state = self.states.pop()
return event
else:
return self.parse_block_node()
def process_directives(self):
self.yaml_version = None
self.tag_handles = {}
while self.check_token(DirectiveToken):
token = self.get_token()
if token.name == 'YAML':
if self.yaml_version is not None:
raise ParserError(None, None,
"found duplicate YAML directive", token.start_mark)
major, minor = token.value
if major != 1:
raise ParserError(None, None,
"found incompatible YAML document (version 1.* is required)",
token.start_mark)
self.yaml_version = token.value
elif token.name == 'TAG':
handle, prefix = token.value
if handle in self.tag_handles:
raise ParserError(None, None,
"duplicate tag handle %r" % handle,
token.start_mark)
self.tag_handles[handle] = prefix
if self.tag_handles:
value = self.yaml_version, self.tag_handles.copy()
else:
value = self.yaml_version, None
for key in self.DEFAULT_TAGS:
if key not in self.tag_handles:
self.tag_handles[key] = self.DEFAULT_TAGS[key]
return value
# block_node_or_indentless_sequence ::= ALIAS
# | properties (block_content | indentless_block_sequence)?
# | block_content
# | indentless_block_sequence
# block_node ::= ALIAS
# | properties block_content?
# | block_content
# flow_node ::= ALIAS
# | properties flow_content?
# | flow_content
# properties ::= TAG ANCHOR? | ANCHOR TAG?
# block_content ::= block_collection | flow_collection | SCALAR
# flow_content ::= flow_collection | SCALAR
# block_collection ::= block_sequence | block_mapping
# flow_collection ::= flow_sequence | flow_mapping
def parse_block_node(self):
return self.parse_node(block=True)
def parse_flow_node(self):
return self.parse_node()
def parse_block_node_or_indentless_sequence(self):
return self.parse_node(block=True, indentless_sequence=True)
def parse_node(self, block=False, indentless_sequence=False):
if self.check_token(AliasToken):
token = self.get_token()
event = AliasEvent(token.value, token.start_mark, token.end_mark)
self.state = self.states.pop()
else:
anchor = None
tag = None
start_mark = end_mark = tag_mark = None
if self.check_token(AnchorToken):
token = self.get_token()
start_mark = token.start_mark
end_mark = token.end_mark
anchor = token.value
if self.check_token(TagToken):
token = self.get_token()
tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
elif self.check_token(TagToken):
token = self.get_token()
start_mark = tag_mark = token.start_mark
end_mark = token.end_mark
tag = token.value
if self.check_token(AnchorToken):
token = self.get_token()
end_mark = token.end_mark
anchor = token.value
if tag is not None:
handle, suffix = tag
if handle is not None:
if handle not in self.tag_handles:
raise ParserError("while parsing a node", start_mark,
"found undefined tag handle %r" % handle,
tag_mark)
tag = self.tag_handles[handle]+suffix
else:
tag = suffix
#if tag == '!':
# raise ParserError("while parsing a node", start_mark,
# "found non-specific tag '!'", tag_mark,
# "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
if start_mark is None:
start_mark = end_mark = self.peek_token().start_mark
event = None
implicit = (tag is None or tag == '!')
if indentless_sequence and self.check_token(BlockEntryToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark)
self.state = self.parse_indentless_sequence_entry
else:
if self.check_token(ScalarToken):
token = self.get_token()
end_mark = token.end_mark
if (token.plain and tag is None) or tag == '!':
implicit = (True, False)
elif tag is None:
implicit = (False, True)
else:
implicit = (False, False)
event = ScalarEvent(anchor, tag, implicit, token.value,
start_mark, end_mark, style=token.style)
self.state = self.states.pop()
elif self.check_token(FlowSequenceStartToken):
end_mark = self.peek_token().end_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_sequence_first_entry
elif self.check_token(FlowMappingStartToken):
end_mark = self.peek_token().end_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=True)
self.state = self.parse_flow_mapping_first_key
elif block and self.check_token(BlockSequenceStartToken):
end_mark = self.peek_token().start_mark
event = SequenceStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_sequence_first_entry
elif block and self.check_token(BlockMappingStartToken):
end_mark = self.peek_token().start_mark
event = MappingStartEvent(anchor, tag, implicit,
start_mark, end_mark, flow_style=False)
self.state = self.parse_block_mapping_first_key
elif anchor is not None or tag is not None:
# Empty scalars are allowed even if a tag or an anchor is
# specified.
event = ScalarEvent(anchor, tag, (implicit, False), '',
start_mark, end_mark)
self.state = self.states.pop()
else:
if block:
node = 'block'
else:
node = 'flow'
token = self.peek_token()
raise ParserError("while parsing a %s node" % node, start_mark,
"expected the node content, but found %r" % token.id,
token.start_mark)
return event
# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
def parse_block_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_sequence_entry()
def parse_block_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken, BlockEndToken):
self.states.append(self.parse_block_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_block_sequence_entry
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block collection", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
def parse_indentless_sequence_entry(self):
if self.check_token(BlockEntryToken):
token = self.get_token()
if not self.check_token(BlockEntryToken,
KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_indentless_sequence_entry)
return self.parse_block_node()
else:
self.state = self.parse_indentless_sequence_entry
return self.process_empty_scalar(token.end_mark)
token = self.peek_token()
event = SequenceEndEvent(token.start_mark, token.start_mark)
self.state = self.states.pop()
return event
# block_mapping ::= BLOCK-MAPPING_START
# ((KEY block_node_or_indentless_sequence?)?
# (VALUE block_node_or_indentless_sequence?)?)*
# BLOCK-END
def parse_block_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_block_mapping_key()
def parse_block_mapping_key(self):
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_value)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_value
return self.process_empty_scalar(token.end_mark)
if not self.check_token(BlockEndToken):
token = self.peek_token()
raise ParserError("while parsing a block mapping", self.marks[-1],
"expected <block end>, but found %r" % token.id, token.start_mark)
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_block_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(KeyToken, ValueToken, BlockEndToken):
self.states.append(self.parse_block_mapping_key)
return self.parse_block_node_or_indentless_sequence()
else:
self.state = self.parse_block_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_block_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
# flow_sequence ::= FLOW-SEQUENCE-START
# (flow_sequence_entry FLOW-ENTRY)*
# flow_sequence_entry?
# FLOW-SEQUENCE-END
# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
#
# Note that while production rules for both flow_sequence_entry and
# flow_mapping_entry are equal, their interpretations are different.
# For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
# generate an inline mapping (set syntax).
def parse_flow_sequence_first_entry(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_sequence_entry(first=True)
def parse_flow_sequence_entry(self, first=False):
if not self.check_token(FlowSequenceEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow sequence", self.marks[-1],
"expected ',' or ']', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.peek_token()
event = MappingStartEvent(None, None, True,
token.start_mark, token.end_mark,
flow_style=True)
self.state = self.parse_flow_sequence_entry_mapping_key
return event
elif not self.check_token(FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry)
return self.parse_flow_node()
token = self.get_token()
event = SequenceEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_sequence_entry_mapping_key(self):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_value
return self.process_empty_scalar(token.end_mark)
def parse_flow_sequence_entry_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
self.states.append(self.parse_flow_sequence_entry_mapping_end)
return self.parse_flow_node()
else:
self.state = self.parse_flow_sequence_entry_mapping_end
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_sequence_entry_mapping_end
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_sequence_entry_mapping_end(self):
self.state = self.parse_flow_sequence_entry
token = self.peek_token()
return MappingEndEvent(token.start_mark, token.start_mark)
# flow_mapping ::= FLOW-MAPPING-START
# (flow_mapping_entry FLOW-ENTRY)*
# flow_mapping_entry?
# FLOW-MAPPING-END
# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
def parse_flow_mapping_first_key(self):
token = self.get_token()
self.marks.append(token.start_mark)
return self.parse_flow_mapping_key(first=True)
def parse_flow_mapping_key(self, first=False):
if not self.check_token(FlowMappingEndToken):
if not first:
if self.check_token(FlowEntryToken):
self.get_token()
else:
token = self.peek_token()
raise ParserError("while parsing a flow mapping", self.marks[-1],
"expected ',' or '}', but got %r" % token.id, token.start_mark)
if self.check_token(KeyToken):
token = self.get_token()
if not self.check_token(ValueToken,
FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_value)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_value
return self.process_empty_scalar(token.end_mark)
elif not self.check_token(FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_empty_value)
return self.parse_flow_node()
token = self.get_token()
event = MappingEndEvent(token.start_mark, token.end_mark)
self.state = self.states.pop()
self.marks.pop()
return event
def parse_flow_mapping_value(self):
if self.check_token(ValueToken):
token = self.get_token()
if not self.check_token(FlowEntryToken, FlowMappingEndToken):
self.states.append(self.parse_flow_mapping_key)
return self.parse_flow_node()
else:
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(token.end_mark)
else:
self.state = self.parse_flow_mapping_key
token = self.peek_token()
return self.process_empty_scalar(token.start_mark)
def parse_flow_mapping_empty_value(self):
self.state = self.parse_flow_mapping_key
return self.process_empty_scalar(self.peek_token().start_mark)
def process_empty_scalar(self, mark):
return ScalarEvent(None, None, (True, False), '', mark, mark)
+192
View File
@@ -0,0 +1,192 @@
# This module contains abstractions for the input stream. You don't have to
# looks further, there are no pretty code.
#
# We define two classes here.
#
# Mark(source, line, column)
# It's just a record and its only use is producing nice error messages.
# Parser does not use it for any other purposes.
#
# Reader(source, data)
# Reader determines the encoding of `data` and converts it to unicode.
# Reader provides the following methods and attributes:
# reader.peek(length=1) - return the next `length` characters
# reader.forward(length=1) - move the current position to `length` characters.
# reader.index - the number of the current character.
# reader.line, stream.column - the line and the column of the current character.
__all__ = ['Reader', 'ReaderError']
from .error import YAMLError, Mark
import codecs, re
class ReaderError(YAMLError):
def __init__(self, name, position, character, encoding, reason):
self.name = name
self.character = character
self.position = position
self.encoding = encoding
self.reason = reason
def __str__(self):
if isinstance(self.character, bytes):
return "'%s' codec can't decode byte #x%02x: %s\n" \
" in \"%s\", position %d" \
% (self.encoding, ord(self.character), self.reason,
self.name, self.position)
else:
return "unacceptable character #x%04x: %s\n" \
" in \"%s\", position %d" \
% (self.character, self.reason,
self.name, self.position)
class Reader(object):
# Reader:
# - determines the data encoding and converts it to a unicode string,
# - checks if characters are in allowed range,
# - adds '\0' to the end.
# Reader accepts
# - a `bytes` object,
# - a `str` object,
# - a file-like object with its `read` method returning `str`,
# - a file-like object with its `read` method returning `unicode`.
# Yeah, it's ugly and slow.
def __init__(self, stream):
self.name = None
self.stream = None
self.stream_pointer = 0
self.eof = True
self.buffer = ''
self.pointer = 0
self.raw_buffer = None
self.raw_decode = None
self.encoding = None
self.index = 0
self.line = 0
self.column = 0
if isinstance(stream, str):
self.name = "<unicode string>"
self.check_printable(stream)
self.buffer = stream+'\0'
elif isinstance(stream, bytes):
self.name = "<byte string>"
self.raw_buffer = stream
self.determine_encoding()
else:
self.stream = stream
self.name = getattr(stream, 'name', "<file>")
self.eof = False
self.raw_buffer = None
self.determine_encoding()
def peek(self, index=0):
try:
return self.buffer[self.pointer+index]
except IndexError:
self.update(index+1)
return self.buffer[self.pointer+index]
def prefix(self, length=1):
if self.pointer+length >= len(self.buffer):
self.update(length)
return self.buffer[self.pointer:self.pointer+length]
def forward(self, length=1):
if self.pointer+length+1 >= len(self.buffer):
self.update(length+1)
while length:
ch = self.buffer[self.pointer]
self.pointer += 1
self.index += 1
if ch in '\n\x85\u2028\u2029' \
or (ch == '\r' and self.buffer[self.pointer] != '\n'):
self.line += 1
self.column = 0
elif ch != '\uFEFF':
self.column += 1
length -= 1
def get_mark(self):
if self.stream is None:
return Mark(self.name, self.index, self.line, self.column,
self.buffer, self.pointer)
else:
return Mark(self.name, self.index, self.line, self.column,
None, None)
def determine_encoding(self):
while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
self.update_raw()
if isinstance(self.raw_buffer, bytes):
if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
self.raw_decode = codecs.utf_16_le_decode
self.encoding = 'utf-16-le'
elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
self.raw_decode = codecs.utf_16_be_decode
self.encoding = 'utf-16-be'
else:
self.raw_decode = codecs.utf_8_decode
self.encoding = 'utf-8'
self.update(1)
NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
def check_printable(self, data):
match = self.NON_PRINTABLE.search(data)
if match:
character = match.group()
position = self.index+(len(self.buffer)-self.pointer)+match.start()
raise ReaderError(self.name, position, ord(character),
'unicode', "special characters are not allowed")
def update(self, length):
if self.raw_buffer is None:
return
self.buffer = self.buffer[self.pointer:]
self.pointer = 0
while len(self.buffer) < length:
if not self.eof:
self.update_raw()
if self.raw_decode is not None:
try:
data, converted = self.raw_decode(self.raw_buffer,
'strict', self.eof)
except UnicodeDecodeError as exc:
character = self.raw_buffer[exc.start]
if self.stream is not None:
position = self.stream_pointer-len(self.raw_buffer)+exc.start
else:
position = exc.start
raise ReaderError(self.name, position, character,
exc.encoding, exc.reason)
else:
data = self.raw_buffer
converted = len(data)
self.check_printable(data)
self.buffer += data
self.raw_buffer = self.raw_buffer[converted:]
if self.eof:
self.buffer += '\0'
self.raw_buffer = None
break
def update_raw(self, size=4096):
data = self.stream.read(size)
if self.raw_buffer is None:
self.raw_buffer = data
else:
self.raw_buffer += data
self.stream_pointer += len(data)
if not data:
self.eof = True
#try:
# import psyco
# psyco.bind(Reader)
#except ImportError:
# pass
+374
View File
@@ -0,0 +1,374 @@
__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
'RepresenterError']
from .error import *
from .nodes import *
import datetime, sys, copyreg, types, base64
class RepresenterError(YAMLError):
pass
class BaseRepresenter:
yaml_representers = {}
yaml_multi_representers = {}
def __init__(self, default_style=None, default_flow_style=None):
self.default_style = default_style
self.default_flow_style = default_flow_style
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent(self, data):
node = self.represent_data(data)
self.serialize(node)
self.represented_objects = {}
self.object_keeper = []
self.alias_key = None
def represent_data(self, data):
if self.ignore_aliases(data):
self.alias_key = None
else:
self.alias_key = id(data)
if self.alias_key is not None:
if self.alias_key in self.represented_objects:
node = self.represented_objects[self.alias_key]
#if node is None:
# raise RepresenterError("recursive objects are not allowed: %r" % data)
return node
#self.represented_objects[alias_key] = None
self.object_keeper.append(data)
data_types = type(data).__mro__
if data_types[0] in self.yaml_representers:
node = self.yaml_representers[data_types[0]](self, data)
else:
for data_type in data_types:
if data_type in self.yaml_multi_representers:
node = self.yaml_multi_representers[data_type](self, data)
break
else:
if None in self.yaml_multi_representers:
node = self.yaml_multi_representers[None](self, data)
elif None in self.yaml_representers:
node = self.yaml_representers[None](self, data)
else:
node = ScalarNode(None, str(data))
#if alias_key is not None:
# self.represented_objects[alias_key] = node
return node
@classmethod
def add_representer(cls, data_type, representer):
if not 'yaml_representers' in cls.__dict__:
cls.yaml_representers = cls.yaml_representers.copy()
cls.yaml_representers[data_type] = representer
@classmethod
def add_multi_representer(cls, data_type, representer):
if not 'yaml_multi_representers' in cls.__dict__:
cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
cls.yaml_multi_representers[data_type] = representer
def represent_scalar(self, tag, value, style=None):
if style is None:
style = self.default_style
node = ScalarNode(tag, value, style=style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
return node
def represent_sequence(self, tag, sequence, flow_style=None):
value = []
node = SequenceNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
for item in sequence:
node_item = self.represent_data(item)
if not (isinstance(node_item, ScalarNode) and not node_item.style):
best_style = False
value.append(node_item)
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def represent_mapping(self, tag, mapping, flow_style=None):
value = []
node = MappingNode(tag, value, flow_style=flow_style)
if self.alias_key is not None:
self.represented_objects[self.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = list(mapping.items())
try:
mapping = sorted(mapping)
except TypeError:
pass
for item_key, item_value in mapping:
node_key = self.represent_data(item_key)
node_value = self.represent_data(item_value)
if not (isinstance(node_key, ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if self.default_flow_style is not None:
node.flow_style = self.default_flow_style
else:
node.flow_style = best_style
return node
def ignore_aliases(self, data):
return False
class SafeRepresenter(BaseRepresenter):
def ignore_aliases(self, data):
if data in [None, ()]:
return True
if isinstance(data, (str, bytes, bool, int, float)):
return True
def represent_none(self, data):
return self.represent_scalar('tag:yaml.org,2002:null', 'null')
def represent_str(self, data):
return self.represent_scalar('tag:yaml.org,2002:str', data)
def represent_binary(self, data):
if hasattr(base64, 'encodebytes'):
data = base64.encodebytes(data).decode('ascii')
else:
data = base64.encodestring(data).decode('ascii')
return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|')
def represent_bool(self, data):
if data:
value = 'true'
else:
value = 'false'
return self.represent_scalar('tag:yaml.org,2002:bool', value)
def represent_int(self, data):
return self.represent_scalar('tag:yaml.org,2002:int', str(data))
inf_value = 1e300
while repr(inf_value) != repr(inf_value*inf_value):
inf_value *= inf_value
def represent_float(self, data):
if data != data or (data == 0.0 and data == 1.0):
value = '.nan'
elif data == self.inf_value:
value = '.inf'
elif data == -self.inf_value:
value = '-.inf'
else:
value = repr(data).lower()
# Note that in some cases `repr(data)` represents a float number
# without the decimal parts. For instance:
# >>> repr(1e17)
# '1e17'
# Unfortunately, this is not a valid float representation according
# to the definition of the `!!float` tag. We fix this by adding
# '.0' before the 'e' symbol.
if '.' not in value and 'e' in value:
value = value.replace('e', '.0e', 1)
return self.represent_scalar('tag:yaml.org,2002:float', value)
def represent_list(self, data):
#pairs = (len(data) > 0 and isinstance(data, list))
#if pairs:
# for item in data:
# if not isinstance(item, tuple) or len(item) != 2:
# pairs = False
# break
#if not pairs:
return self.represent_sequence('tag:yaml.org,2002:seq', data)
#value = []
#for item_key, item_value in data:
# value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
# [(item_key, item_value)]))
#return SequenceNode(u'tag:yaml.org,2002:pairs', value)
def represent_dict(self, data):
return self.represent_mapping('tag:yaml.org,2002:map', data)
def represent_set(self, data):
value = {}
for key in data:
value[key] = None
return self.represent_mapping('tag:yaml.org,2002:set', value)
def represent_date(self, data):
value = data.isoformat()
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_datetime(self, data):
value = data.isoformat(' ')
return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
def represent_yaml_object(self, tag, data, cls, flow_style=None):
if hasattr(data, '__getstate__'):
state = data.__getstate__()
else:
state = data.__dict__.copy()
return self.represent_mapping(tag, state, flow_style=flow_style)
def represent_undefined(self, data):
raise RepresenterError("cannot represent an object: %s" % data)
SafeRepresenter.add_representer(type(None),
SafeRepresenter.represent_none)
SafeRepresenter.add_representer(str,
SafeRepresenter.represent_str)
SafeRepresenter.add_representer(bytes,
SafeRepresenter.represent_binary)
SafeRepresenter.add_representer(bool,
SafeRepresenter.represent_bool)
SafeRepresenter.add_representer(int,
SafeRepresenter.represent_int)
SafeRepresenter.add_representer(float,
SafeRepresenter.represent_float)
SafeRepresenter.add_representer(list,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(tuple,
SafeRepresenter.represent_list)
SafeRepresenter.add_representer(dict,
SafeRepresenter.represent_dict)
SafeRepresenter.add_representer(set,
SafeRepresenter.represent_set)
SafeRepresenter.add_representer(datetime.date,
SafeRepresenter.represent_date)
SafeRepresenter.add_representer(datetime.datetime,
SafeRepresenter.represent_datetime)
SafeRepresenter.add_representer(None,
SafeRepresenter.represent_undefined)
class Representer(SafeRepresenter):
def represent_complex(self, data):
if data.imag == 0.0:
data = '%r' % data.real
elif data.real == 0.0:
data = '%rj' % data.imag
elif data.imag > 0:
data = '%r+%rj' % (data.real, data.imag)
else:
data = '%r%rj' % (data.real, data.imag)
return self.represent_scalar('tag:yaml.org,2002:python/complex', data)
def represent_tuple(self, data):
return self.represent_sequence('tag:yaml.org,2002:python/tuple', data)
def represent_name(self, data):
name = '%s.%s' % (data.__module__, data.__name__)
return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '')
def represent_module(self, data):
return self.represent_scalar(
'tag:yaml.org,2002:python/module:'+data.__name__, '')
def represent_object(self, data):
# We use __reduce__ API to save the data. data.__reduce__ returns
# a tuple of length 2-5:
# (function, args, state, listitems, dictitems)
# For reconstructing, we calls function(*args), then set its state,
# listitems, and dictitems if they are not None.
# A special case is when function.__name__ == '__newobj__'. In this
# case we create the object with args[0].__new__(*args).
# Another special case is when __reduce__ returns a string - we don't
# support it.
# We produce a !!python/object, !!python/object/new or
# !!python/object/apply node.
cls = type(data)
if cls in copyreg.dispatch_table:
reduce = copyreg.dispatch_table[cls](data)
elif hasattr(data, '__reduce_ex__'):
reduce = data.__reduce_ex__(2)
elif hasattr(data, '__reduce__'):
reduce = data.__reduce__()
else:
raise RepresenterError("cannot represent object: %r" % data)
reduce = (list(reduce)+[None]*5)[:5]
function, args, state, listitems, dictitems = reduce
args = list(args)
if state is None:
state = {}
if listitems is not None:
listitems = list(listitems)
if dictitems is not None:
dictitems = dict(dictitems)
if function.__name__ == '__newobj__':
function = args[0]
args = args[1:]
tag = 'tag:yaml.org,2002:python/object/new:'
newobj = True
else:
tag = 'tag:yaml.org,2002:python/object/apply:'
newobj = False
function_name = '%s.%s' % (function.__module__, function.__name__)
if not args and not listitems and not dictitems \
and isinstance(state, dict) and newobj:
return self.represent_mapping(
'tag:yaml.org,2002:python/object:'+function_name, state)
if not listitems and not dictitems \
and isinstance(state, dict) and not state:
return self.represent_sequence(tag+function_name, args)
value = {}
if args:
value['args'] = args
if state or not isinstance(state, dict):
value['state'] = state
if listitems:
value['listitems'] = listitems
if dictitems:
value['dictitems'] = dictitems
return self.represent_mapping(tag+function_name, value)
Representer.add_representer(complex,
Representer.represent_complex)
Representer.add_representer(tuple,
Representer.represent_tuple)
Representer.add_representer(type,
Representer.represent_name)
Representer.add_representer(types.FunctionType,
Representer.represent_name)
Representer.add_representer(types.BuiltinFunctionType,
Representer.represent_name)
Representer.add_representer(types.ModuleType,
Representer.represent_module)
Representer.add_multi_representer(object,
Representer.represent_object)
+224
View File
@@ -0,0 +1,224 @@
__all__ = ['BaseResolver', 'Resolver']
from .error import *
from .nodes import *
import re
class ResolverError(YAMLError):
pass
class BaseResolver:
DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
yaml_implicit_resolvers = {}
yaml_path_resolvers = {}
def __init__(self):
self.resolver_exact_paths = []
self.resolver_prefix_paths = []
@classmethod
def add_implicit_resolver(cls, tag, regexp, first):
if not 'yaml_implicit_resolvers' in cls.__dict__:
cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
if first is None:
first = [None]
for ch in first:
cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
@classmethod
def add_path_resolver(cls, tag, path, kind=None):
# Note: `add_path_resolver` is experimental. The API could be changed.
# `new_path` is a pattern that is matched against the path from the
# root to the node that is being considered. `node_path` elements are
# tuples `(node_check, index_check)`. `node_check` is a node class:
# `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None`
# matches any kind of a node. `index_check` could be `None`, a boolean
# value, a string value, or a number. `None` and `False` match against
# any _value_ of sequence and mapping nodes. `True` matches against
# any _key_ of a mapping node. A string `index_check` matches against
# a mapping value that corresponds to a scalar key which content is
# equal to the `index_check` value. An integer `index_check` matches
# against a sequence value with the index equal to `index_check`.
if not 'yaml_path_resolvers' in cls.__dict__:
cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
new_path = []
for element in path:
if isinstance(element, (list, tuple)):
if len(element) == 2:
node_check, index_check = element
elif len(element) == 1:
node_check = element[0]
index_check = True
else:
raise ResolverError("Invalid path element: %s" % element)
else:
node_check = None
index_check = element
if node_check is str:
node_check = ScalarNode
elif node_check is list:
node_check = SequenceNode
elif node_check is dict:
node_check = MappingNode
elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
and not isinstance(node_check, str) \
and node_check is not None:
raise ResolverError("Invalid node checker: %s" % node_check)
if not isinstance(index_check, (str, int)) \
and index_check is not None:
raise ResolverError("Invalid index checker: %s" % index_check)
new_path.append((node_check, index_check))
if kind is str:
kind = ScalarNode
elif kind is list:
kind = SequenceNode
elif kind is dict:
kind = MappingNode
elif kind not in [ScalarNode, SequenceNode, MappingNode] \
and kind is not None:
raise ResolverError("Invalid node kind: %s" % kind)
cls.yaml_path_resolvers[tuple(new_path), kind] = tag
def descend_resolver(self, current_node, current_index):
if not self.yaml_path_resolvers:
return
exact_paths = {}
prefix_paths = []
if current_node:
depth = len(self.resolver_prefix_paths)
for path, kind in self.resolver_prefix_paths[-1]:
if self.check_resolver_prefix(depth, path, kind,
current_node, current_index):
if len(path) > depth:
prefix_paths.append((path, kind))
else:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
for path, kind in self.yaml_path_resolvers:
if not path:
exact_paths[kind] = self.yaml_path_resolvers[path, kind]
else:
prefix_paths.append((path, kind))
self.resolver_exact_paths.append(exact_paths)
self.resolver_prefix_paths.append(prefix_paths)
def ascend_resolver(self):
if not self.yaml_path_resolvers:
return
self.resolver_exact_paths.pop()
self.resolver_prefix_paths.pop()
def check_resolver_prefix(self, depth, path, kind,
current_node, current_index):
node_check, index_check = path[depth-1]
if isinstance(node_check, str):
if current_node.tag != node_check:
return
elif node_check is not None:
if not isinstance(current_node, node_check):
return
if index_check is True and current_index is not None:
return
if (index_check is False or index_check is None) \
and current_index is None:
return
if isinstance(index_check, str):
if not (isinstance(current_index, ScalarNode)
and index_check == current_index.value):
return
elif isinstance(index_check, int) and not isinstance(index_check, bool):
if index_check != current_index:
return
return True
def resolve(self, kind, value, implicit):
if kind is ScalarNode and implicit[0]:
if value == '':
resolvers = self.yaml_implicit_resolvers.get('', [])
else:
resolvers = self.yaml_implicit_resolvers.get(value[0], [])
resolvers += self.yaml_implicit_resolvers.get(None, [])
for tag, regexp in resolvers:
if regexp.match(value):
return tag
implicit = implicit[1]
if self.yaml_path_resolvers:
exact_paths = self.resolver_exact_paths[-1]
if kind in exact_paths:
return exact_paths[kind]
if None in exact_paths:
return exact_paths[None]
if kind is ScalarNode:
return self.DEFAULT_SCALAR_TAG
elif kind is SequenceNode:
return self.DEFAULT_SEQUENCE_TAG
elif kind is MappingNode:
return self.DEFAULT_MAPPING_TAG
class Resolver(BaseResolver):
pass
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:bool',
re.compile(r'''^(?:yes|Yes|YES|no|No|NO
|true|True|TRUE|false|False|FALSE
|on|On|ON|off|Off|OFF)$''', re.X),
list('yYnNtTfFoO'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:float',
re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
|\.[0-9_]+(?:[eE][-+][0-9]+)?
|[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
|[-+]?\.(?:inf|Inf|INF)
|\.(?:nan|NaN|NAN))$''', re.X),
list('-+0123456789.'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:int',
re.compile(r'''^(?:[-+]?0b[0-1_]+
|[-+]?0[0-7_]+
|[-+]?(?:0|[1-9][0-9_]*)
|[-+]?0x[0-9a-fA-F_]+
|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
list('-+0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:merge',
re.compile(r'^(?:<<)$'),
['<'])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:null',
re.compile(r'''^(?: ~
|null|Null|NULL
| )$''', re.X),
['~', 'n', 'N', ''])
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:timestamp',
re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
|[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
(?:[Tt]|[ \t]+)[0-9][0-9]?
:[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
(?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
list('0123456789'))
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:value',
re.compile(r'^(?:=)$'),
['='])
# The following resolver is only for documentation purposes. It cannot work
# because plain scalars cannot start with '!', '&', or '*'.
Resolver.add_implicit_resolver(
'tag:yaml.org,2002:yaml',
re.compile(r'^(?:!|&|\*)$'),
list('!&*'))
File diff suppressed because it is too large Load Diff
+111
View File
@@ -0,0 +1,111 @@
__all__ = ['Serializer', 'SerializerError']
from .error import YAMLError
from .events import *
from .nodes import *
class SerializerError(YAMLError):
pass
class Serializer:
ANCHOR_TEMPLATE = 'id%03d'
def __init__(self, encoding=None,
explicit_start=None, explicit_end=None, version=None, tags=None):
self.use_encoding = encoding
self.use_explicit_start = explicit_start
self.use_explicit_end = explicit_end
self.use_version = version
self.use_tags = tags
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
self.closed = None
def open(self):
if self.closed is None:
self.emit(StreamStartEvent(encoding=self.use_encoding))
self.closed = False
elif self.closed:
raise SerializerError("serializer is closed")
else:
raise SerializerError("serializer is already opened")
def close(self):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif not self.closed:
self.emit(StreamEndEvent())
self.closed = True
#def __del__(self):
# self.close()
def serialize(self, node):
if self.closed is None:
raise SerializerError("serializer is not opened")
elif self.closed:
raise SerializerError("serializer is closed")
self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
version=self.use_version, tags=self.use_tags))
self.anchor_node(node)
self.serialize_node(node, None, None)
self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
self.serialized_nodes = {}
self.anchors = {}
self.last_anchor_id = 0
def anchor_node(self, node):
if node in self.anchors:
if self.anchors[node] is None:
self.anchors[node] = self.generate_anchor(node)
else:
self.anchors[node] = None
if isinstance(node, SequenceNode):
for item in node.value:
self.anchor_node(item)
elif isinstance(node, MappingNode):
for key, value in node.value:
self.anchor_node(key)
self.anchor_node(value)
def generate_anchor(self, node):
self.last_anchor_id += 1
return self.ANCHOR_TEMPLATE % self.last_anchor_id
def serialize_node(self, node, parent, index):
alias = self.anchors[node]
if node in self.serialized_nodes:
self.emit(AliasEvent(alias))
else:
self.serialized_nodes[node] = True
self.descend_resolver(parent, index)
if isinstance(node, ScalarNode):
detected_tag = self.resolve(ScalarNode, node.value, (True, False))
default_tag = self.resolve(ScalarNode, node.value, (False, True))
implicit = (node.tag == detected_tag), (node.tag == default_tag)
self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
style=node.style))
elif isinstance(node, SequenceNode):
implicit = (node.tag
== self.resolve(SequenceNode, node.value, True))
self.emit(SequenceStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
index = 0
for item in node.value:
self.serialize_node(item, node, index)
index += 1
self.emit(SequenceEndEvent())
elif isinstance(node, MappingNode):
implicit = (node.tag
== self.resolve(MappingNode, node.value, True))
self.emit(MappingStartEvent(alias, node.tag, implicit,
flow_style=node.flow_style))
for key, value in node.value:
self.serialize_node(key, node, None)
self.serialize_node(value, node, key)
self.emit(MappingEndEvent())
self.ascend_resolver()
+104
View File
@@ -0,0 +1,104 @@
class Token(object):
def __init__(self, start_mark, end_mark):
self.start_mark = start_mark
self.end_mark = end_mark
def __repr__(self):
attributes = [key for key in self.__dict__
if not key.endswith('_mark')]
attributes.sort()
arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
for key in attributes])
return '%s(%s)' % (self.__class__.__name__, arguments)
#class BOMToken(Token):
# id = '<byte order mark>'
class DirectiveToken(Token):
id = '<directive>'
def __init__(self, name, value, start_mark, end_mark):
self.name = name
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class DocumentStartToken(Token):
id = '<document start>'
class DocumentEndToken(Token):
id = '<document end>'
class StreamStartToken(Token):
id = '<stream start>'
def __init__(self, start_mark=None, end_mark=None,
encoding=None):
self.start_mark = start_mark
self.end_mark = end_mark
self.encoding = encoding
class StreamEndToken(Token):
id = '<stream end>'
class BlockSequenceStartToken(Token):
id = '<block sequence start>'
class BlockMappingStartToken(Token):
id = '<block mapping start>'
class BlockEndToken(Token):
id = '<block end>'
class FlowSequenceStartToken(Token):
id = '['
class FlowMappingStartToken(Token):
id = '{'
class FlowSequenceEndToken(Token):
id = ']'
class FlowMappingEndToken(Token):
id = '}'
class KeyToken(Token):
id = '?'
class ValueToken(Token):
id = ':'
class BlockEntryToken(Token):
id = '-'
class FlowEntryToken(Token):
id = ','
class AliasToken(Token):
id = '<alias>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class AnchorToken(Token):
id = '<anchor>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class TagToken(Token):
id = '<tag>'
def __init__(self, value, start_mark, end_mark):
self.value = value
self.start_mark = start_mark
self.end_mark = end_mark
class ScalarToken(Token):
id = '<scalar>'
def __init__(self, value, plain, start_mark, end_mark, style=None):
self.value = value
self.plain = plain
self.start_mark = start_mark
self.end_mark = end_mark
self.style = style
-5
View File
@@ -1,5 +0,0 @@
rm -fr nosetests.xml
tox
# coverage xml
rm -fr pylint.txt
# pylint -d W0312 -d W0212 -d E1101 -d E0202 -d W0102 -d E0102 -f parseable ./tablib > pylint.txt || true
+42 -1
View File
@@ -4,8 +4,14 @@
"""Tests for Tablib."""
import unittest
import sys
if sys.version_info[0] > 2:
from tablib.packages import markup3 as markup
else:
from tablib.packages import markup
from tablib.packages import markup
import tablib
@@ -295,6 +301,21 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(_csv, data.csv)
def test_csv_import_set_with_spaces(self):
"""Generate and import CSV set serialization when row values have
spaces."""
data.append(('Bill Gates', 'Microsoft'))
data.append(('Steve Jobs', 'Apple'))
data.headers = ('Name', 'Company')
_csv = data.csv
data.csv = _csv
self.assertEqual(_csv, data.csv)
def test_tsv_import_set(self):
"""Generate and import TSV set serialization."""
data.append(self.john)
@@ -307,6 +328,7 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(_tsv, data.tsv)
def test_csv_format_detect(self):
"""Test CSV format detection."""
@@ -322,6 +344,7 @@ class TablibTestCase(unittest.TestCase):
self.assertTrue(tablib.formats.csv.detect(_csv))
self.assertFalse(tablib.formats.csv.detect(_bunk))
def test_tsv_format_detect(self):
"""Test TSV format detection."""
@@ -337,6 +360,7 @@ class TablibTestCase(unittest.TestCase):
self.assertTrue(tablib.formats.tsv.detect(_tsv))
self.assertFalse(tablib.formats.tsv.detect(_bunk))
def test_json_format_detect(self):
"""Test JSON format detection."""
@@ -374,6 +398,7 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(tablib.detect(_json)[0], tablib.formats.json)
self.assertEqual(tablib.detect(_bunk)[0], None)
def test_transpose(self):
"""Transpose a dataset."""
@@ -388,6 +413,7 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(second_row,
("gpa",90, 67, 50))
def test_row_stacking(self):
"""Row stacking."""
@@ -405,6 +431,7 @@ class TablibTestCase(unittest.TestCase):
expected_data = original_data + original_data
self.assertEqual(row_stacked[column], expected_data)
def test_column_stacking(self):
"""Column stacking"""
@@ -425,6 +452,7 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(column_stacked[0],
("John", "Adams", 90, "John", "Adams", 90))
def test_sorting(self):
"""Sort columns."""
@@ -442,6 +470,7 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(second_row, expected_second)
self.assertEqual(third_row, expected_third)
def test_wipe(self):
"""Purge a dataset."""
@@ -459,5 +488,17 @@ class TablibTestCase(unittest.TestCase):
self.assertTrue(data[0] == new_row)
def test_formatters(self):
"""Confirm formatters are being triggered."""
def _formatter(cell_value):
return str(cell_value).upper()
self.founders.add_formatter('last_name', _formatter)
for name in [r['last_name'] for r in self.founders.dict]:
self.assertTrue(name.isupper())
if __name__ == '__main__':
unittest.main()
+13 -5
View File
@@ -1,9 +1,17 @@
[tox]
envlist = py24,py25,py26,py27
envlist = py25,py26,py27,py3
[testenv]
commands=py.test --junitxml=junit-{envname}.xml
deps =
nose
simplejson
pytest
deps = pytest
[testenv:py25]
simplejson = pytest simplejson
[testenv:pypy]
basepython=/usr/bin/pypy-c
simplejson = pytest simplejson
[testenv:py3]
basepython=/usr/bin/python3
simplejson = pytest