+ {% endif %}
+{% endblock %}
+{# do not display relbars #}
+{% block relbar1 %}{% endblock %}
+{% block relbar2 %}
+ {% if theme_github_fork %}
+
+ {% endif %}
+{% endblock %}
+{% block sidebar1 %}{% endblock %}
+{% block sidebar2 %}{% endblock %}
diff --git a/docs/_themes/kr_small/static/flasky.css_t b/docs/_themes/kr_small/static/flasky.css_t
new file mode 100644
index 0000000..fe2141c
--- /dev/null
+++ b/docs/_themes/kr_small/static/flasky.css_t
@@ -0,0 +1,287 @@
+/*
+ * flasky.css_t
+ * ~~~~~~~~~~~~
+ *
+ * Sphinx stylesheet -- flasky theme based on nature theme.
+ *
+ * :copyright: Copyright 2007-2010 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+@import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+ font-family: 'Georgia', serif;
+ font-size: 17px;
+ color: #000;
+ background: white;
+ margin: 0;
+ padding: 0;
+}
+
+div.documentwrapper {
+ float: left;
+ width: 100%;
+}
+
+div.bodywrapper {
+ margin: 40px auto 0 auto;
+ width: 700px;
+}
+
+hr {
+ border: 1px solid #B1B4B6;
+}
+
+div.body {
+ background-color: #ffffff;
+ color: #3E4349;
+ padding: 0 30px 30px 30px;
+}
+
+img.floatingflask {
+ padding: 0 0 10px 10px;
+ float: right;
+}
+
+div.footer {
+ text-align: right;
+ color: #888;
+ padding: 10px;
+ font-size: 14px;
+ width: 650px;
+ margin: 0 auto 40px auto;
+}
+
+div.footer a {
+ color: #888;
+ text-decoration: underline;
+}
+
+div.related {
+ line-height: 32px;
+ color: #888;
+}
+
+div.related ul {
+ padding: 0 0 0 10px;
+}
+
+div.related a {
+ color: #444;
+}
+
+/* -- body styles ----------------------------------------------------------- */
+
+a {
+ color: #004B6B;
+ text-decoration: underline;
+}
+
+a:hover {
+ color: #6D4100;
+ text-decoration: underline;
+}
+
+div.body {
+ padding-bottom: 40px; /* saved for footer */
+}
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ font-family: 'Garamond', 'Georgia', serif;
+ font-weight: normal;
+ margin: 30px 0px 10px 0px;
+ padding: 0;
+}
+
+{% if theme_index_logo %}
+div.indexwrapper h1 {
+ text-indent: -999999px;
+ background: url({{ theme_index_logo }}) no-repeat center center;
+ height: {{ theme_index_logo_height }};
+}
+{% endif %}
+
+div.body h2 { font-size: 180%; }
+div.body h3 { font-size: 150%; }
+div.body h4 { font-size: 130%; }
+div.body h5 { font-size: 100%; }
+div.body h6 { font-size: 100%; }
+
+a.headerlink {
+ color: white;
+ padding: 0 4px;
+ text-decoration: none;
+}
+
+a.headerlink:hover {
+ color: #444;
+ background: #eaeaea;
+}
+
+div.body p, div.body dd, div.body li {
+ line-height: 1.4em;
+}
+
+div.admonition {
+ background: #fafafa;
+ margin: 20px -30px;
+ padding: 10px 30px;
+ border-top: 1px solid #ccc;
+ border-bottom: 1px solid #ccc;
+}
+
+div.admonition p.admonition-title {
+ font-family: 'Garamond', 'Georgia', serif;
+ font-weight: normal;
+ font-size: 24px;
+ margin: 0 0 10px 0;
+ padding: 0;
+ line-height: 1;
+}
+
+div.admonition p.last {
+ margin-bottom: 0;
+}
+
+div.highlight{
+ background-color: white;
+}
+
+dt:target, .highlight {
+ background: #FAF3E8;
+}
+
+div.note {
+ background-color: #eee;
+ border: 1px solid #ccc;
+}
+
+div.seealso {
+ background-color: #ffc;
+ border: 1px solid #ff6;
+}
+
+div.topic {
+ background-color: #eee;
+}
+
+div.warning {
+ background-color: #ffe4e4;
+ border: 1px solid #f66;
+}
+
+p.admonition-title {
+ display: inline;
+}
+
+p.admonition-title:after {
+ content: ":";
+}
+
+pre, tt {
+ font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
+ font-size: 0.85em;
+}
+
+img.screenshot {
+}
+
+tt.descname, tt.descclassname {
+ font-size: 0.95em;
+}
+
+tt.descname {
+ padding-right: 0.08em;
+}
+
+img.screenshot {
+ -moz-box-shadow: 2px 2px 4px #eee;
+ -webkit-box-shadow: 2px 2px 4px #eee;
+ box-shadow: 2px 2px 4px #eee;
+}
+
+table.docutils {
+ border: 1px solid #888;
+ -moz-box-shadow: 2px 2px 4px #eee;
+ -webkit-box-shadow: 2px 2px 4px #eee;
+ box-shadow: 2px 2px 4px #eee;
+}
+
+table.docutils td, table.docutils th {
+ border: 1px solid #888;
+ padding: 0.25em 0.7em;
+}
+
+table.field-list, table.footnote {
+ border: none;
+ -moz-box-shadow: none;
+ -webkit-box-shadow: none;
+ box-shadow: none;
+}
+
+table.footnote {
+ margin: 15px 0;
+ width: 100%;
+ border: 1px solid #eee;
+}
+
+table.field-list th {
+ padding: 0 0.8em 0 0;
+}
+
+table.field-list td {
+ padding: 0;
+}
+
+table.footnote td {
+ padding: 0.5em;
+}
+
+dl {
+ margin: 0;
+ padding: 0;
+}
+
+dl dd {
+ margin-left: 30px;
+}
+
+pre {
+ padding: 0;
+ margin: 15px -30px;
+ padding: 8px;
+ line-height: 1.3em;
+ padding: 7px 30px;
+ background: #eee;
+ border-radius: 2px;
+ -moz-border-radius: 2px;
+ -webkit-border-radius: 2px;
+}
+
+dl pre {
+ margin-left: -60px;
+ padding-left: 60px;
+}
+
+tt {
+ background-color: #ecf0f3;
+ color: #222;
+ /* padding: 1px 2px; */
+}
+
+tt.xref, a tt {
+ background-color: #FBFBFB;
+}
+
+a:hover tt {
+ background: #EEE;
+}
diff --git a/docs/_themes/kr_small/theme.conf b/docs/_themes/kr_small/theme.conf
new file mode 100644
index 0000000..542b462
--- /dev/null
+++ b/docs/_themes/kr_small/theme.conf
@@ -0,0 +1,10 @@
+[theme]
+inherit = basic
+stylesheet = flasky.css
+nosidebar = true
+pygments_style = flask_theme_support.FlaskyStyle
+
+[options]
+index_logo = ''
+index_logo_height = 120px
+github_fork = ''
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..4d80fe7
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,220 @@
+# -*- coding: utf-8 -*-
+#
+# Tablib documentation build configuration file, created by
+# sphinx-quickstart on Tue Oct 5 15:25:21 2010.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Tablib'
+copyright = u'2010, Kenneth Reitz'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.8.3'
+# The full version, including alpha/beta/rc tags.
+release = '0.8.3'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# " v documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Tablibdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+ ('index', 'Tablib.tex', u'Tablib Documentation',
+ u'Kenneth Reitz', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ ('index', 'tablib', u'Tablib Documentation',
+ [u'Kenneth Reitz'], 1)
+]
+
+sys.path.append(os.path.abspath('_themes'))
+html_theme_path = ['_themes']
+html_theme = 'kr'
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..bb0589c
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,34 @@
+.. Tablib documentation master file, created by
+ sphinx-quickstart on Tue Oct 5 15:25:21 2010.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Welcome to Tablib's documentation!
+==================================
+
+Contents:
+
+.. toctree::
+ :maxdepth: 2
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
+Dataset Object
+--------------
+.. module:: tablib
+
+.. autoclass:: Databook
+ :members:
+ :inherited-members:
+
+Databook Object
+---------------
+
+.. autoclass:: Dataset
+ :members:
+ :inherited-members:
diff --git a/setup.py b/setup.py
index 060ce00..e93f61b 100644
--- a/setup.py
+++ b/setup.py
@@ -15,17 +15,27 @@ if sys.argv[-1] == "publish":
publish()
sys.exit()
+required = []
+
+# if sys.version_info < (2, 6):
+# required.append('simplejson')
+
setup(
name='tablib',
- version='0.8.4',
+ version='0.8.5',
description='Format agnostic tabular data library (XLS, JSON, YAML, CSV)',
long_description=open('README.rst').read() + '\n\n' +
open('HISTORY.rst').read(),
author='Kenneth Reitz',
author_email='me@kennethreitz.com',
url='http://github.com/kennethreitz/tablib',
- packages=['tablib', 'tablib.formats'],
- install_requires=['xlwt', 'simplejson', 'PyYAML'],
+ packages= [
+ 'tablib', 'tablib.formats',
+ 'tablib.packages.simplejson'
+ 'tablib.packages.xlwt',
+ 'tablib.packages.yaml',
+ ],
+ install_requires=required,
license='MIT',
classifiers=(
'Development Status :: 5 - Production/Stable',
diff --git a/tablib/core.py b/tablib/core.py
index aa840ad..544c208 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -1,21 +1,108 @@
# -*- coding: utf-8 -*-
+"""
+ tablib.core
+ ~~~~~~~~~~~
-""" Tablib - Core Library.
+ This module implements the central tablib objects.
+
+ :copyright: (c) 2010 by Kenneth Reitz.
+ :license: MIT, see LICENSE for more details.
"""
from tablib.formats import FORMATS as formats
__title__ = 'tablib'
-__version__ = '0.8.4'
-__build__ = 0x000804
+__version__ = '0.8.5'
+__build__ = 0x000805
__author__ = 'Kenneth Reitz'
__license__ = 'MIT'
__copyright__ = 'Copyright 2010 Kenneth Reitz'
class Dataset(object):
- """Epic Tabular-Dataset object. """
+ """The tablib Dataset object is the heart of tablib. It provides all core
+ functionality.
+
+ Usually you create a :class:`Dataset` instance in your main module, and append
+ rows and columns as you collect data. ::
+
+ data = tablib.Dataset()
+ data.headers = ('name', 'age')
+
+ for (name, age) in some_collector():
+ data.append((name, age))
+
+ You can also set rows and headers upon instantiation. This is useful if dealing
+ with dozens or hundres of :class:`Dataset` objects. ::
+
+ headers = ('first_name', 'last_name')
+ data = [('John', 'Adams'), ('George', 'Washington')]
+
+ data = tablib.Dataset(*data, headers=headers)
+
+
+ :param \*args: (optional) list of rows to populate Dataset
+ :param headers: (optional) list strings for Dataset header row
+
+
+ .. admonition:: About the Format Attributes
+
+ If you look at the code, the various output/import formats are not
+ defined within the itself. To add support for a new format, see
+ :ref:`Adding New Formats`.
+
+ .. attribute:: csv
+
+ A CSV representation of the Dataset object. The top row will contain
+ headers, if they have been set. Otherwise, the top row will contain
+ the first row of the dataset.
+
+ A dataset object can also be imported by setting the `Dataset.csv` attribute: ::
+
+ data = tablib.Dataset()
+ data.csv = 'age, first_name, last_name\\n90, John, Adams'
+
+ Import assumes (for now) that headers exist.
+
+
+ .. attribute:: dict
+
+ An native Python representation of the Dataset object. If headers have been
+ set, a list of Python dictionaries will be returned. If no headers have been
+ set, a list of tuples (rows) will be returned instead.
+
+ A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
+
+ data = tablib.Dataset()
+ data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
+
+
+ .. attribute:: xls
+
+ An Excel Spreadsheet representation of the Dataset object, including
+ :ref:`seperators`.
+
+ *Note:* `Dataset.xls` contains binary data, so make sure to write in binary
+ mode::
+
+ with open('output.xls', 'wb') as f:
+ f.write(data.xls)
+
+
+ .. attribute:: yaml
+
+ A YAML representation of the Dataset object. If headers have been
+ set, a YAML list of objects will be returned. If no headers have
+ been set, a YAML list of lists (rows) will be returned instead.
+
+ A dataset object can also be imported by setting the `Dataset.json` attribute: ::
+
+ data = tablib.Dataset()
+ data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
+
+ Import assumes (for now) that headers exist.
+ """
def __init__(self, *args, **kwargs):
self._data = list(args)
@@ -155,19 +242,34 @@ class Dataset(object):
@property
def dict(self):
- """Returns python dict of Dataset."""
+ """A JSON representation of the Dataset object. If headers have been
+ set, a JSON list of objects will be returned. If no headers have
+ been set, a JSON list of lists (rows) will be returned instead.
+
+ A dataset object can also be imported by setting the `Dataset.json` attribute: ::
+
+ data = tablib.Dataset()
+ data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
+
+ """
return self._package()
@dict.setter
def dict(self, pickle):
- """Returns python dict of Dataset."""
+
if not len(pickle):
return
+
+ # if list of rows
if isinstance(pickle[0], list):
+ self.wipe()
for row in pickle:
self.append(row)
+
+ # if list of objects
elif isinstance(pickle[0], dict):
+ self.wipe()
self.headers = pickle[0].keys()
for row in pickle:
self.append(row.values())
diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py
index 27d2e0d..2391417 100644
--- a/tablib/formats/_csv.py
+++ b/tablib/formats/_csv.py
@@ -7,8 +7,6 @@ import cStringIO
import csv
import os
-import simplejson as json
-
import tablib
diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py
index f7c88ee..da31b23 100644
--- a/tablib/formats/_json.py
+++ b/tablib/formats/_json.py
@@ -3,9 +3,17 @@
""" Tablib - JSON Support
"""
-import simplejson as json
+try:
+ import json # load system JSON (Python >= 2.6)
+except ImportError:
+ try:
+ import simplejson as json
+ except ImportError:
+ import tablib.packages.simplejson as json # use the vendorized copy
+
import tablib.core
+
title = 'json'
extentions = ('json', 'jsn')
@@ -43,5 +51,5 @@ def detect(stream):
try:
json.loads(stream)
return True
- except json.decoder.JSONDecodeError:
+ except ValueError:
return False
\ No newline at end of file
diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py
index f7be4a4..97a9580 100644
--- a/tablib/formats/_xls.py
+++ b/tablib/formats/_xls.py
@@ -3,9 +3,13 @@
""" Tablib - XLS Support.
"""
-import xlwt
import cStringIO
+try:
+ import xlwt
+except ImportError:
+ import tablib.packages.xlwt as xlwt
+
title = 'xls'
extentions = ('xls',)
diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py
index 57d63d7..59d49a0 100644
--- a/tablib/formats/_yaml.py
+++ b/tablib/formats/_yaml.py
@@ -3,7 +3,11 @@
""" Tablib - YAML Support.
"""
-import yaml
+try:
+ import yaml
+except ImportError:
+ import tablib.packages.yaml as yaml
+
import tablib
diff --git a/tablib/packages/__init__.py b/tablib/packages/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tablib/packages/simplejson/__init__.py b/tablib/packages/simplejson/__init__.py
new file mode 100644
index 0000000..dcfd541
--- /dev/null
+++ b/tablib/packages/simplejson/__init__.py
@@ -0,0 +1,437 @@
+r"""JSON (JavaScript Object Notation) is a subset of
+JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
+interchange format.
+
+:mod:`simplejson` exposes an API familiar to users of the standard library
+:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
+version of the :mod:`json` library contained in Python 2.6, but maintains
+compatibility with Python 2.4 and Python 2.5 and (currently) has
+significant performance advantages, even without using the optional C
+extension for speedups.
+
+Encoding basic Python object hierarchies::
+
+ >>> import simplejson as json
+ >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
+ '["foo", {"bar": ["baz", null, 1.0, 2]}]'
+ >>> print json.dumps("\"foo\bar")
+ "\"foo\bar"
+ >>> print json.dumps(u'\u1234')
+ "\u1234"
+ >>> print json.dumps('\\')
+ "\\"
+ >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+ {"a": 0, "b": 0, "c": 0}
+ >>> from StringIO import StringIO
+ >>> io = StringIO()
+ >>> json.dump(['streaming API'], io)
+ >>> io.getvalue()
+ '["streaming API"]'
+
+Compact encoding::
+
+ >>> import simplejson as json
+ >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+ '[1,2,3,{"4":5,"6":7}]'
+
+Pretty printing::
+
+ >>> import simplejson as json
+ >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')
+ >>> print '\n'.join([l.rstrip() for l in s.splitlines()])
+ {
+ "4": 5,
+ "6": 7
+ }
+
+Decoding JSON::
+
+ >>> import simplejson as json
+ >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
+ >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
+ True
+ >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
+ True
+ >>> from StringIO import StringIO
+ >>> io = StringIO('["streaming API"]')
+ >>> json.load(io)[0] == 'streaming API'
+ True
+
+Specializing JSON object decoding::
+
+ >>> import simplejson as json
+ >>> def as_complex(dct):
+ ... if '__complex__' in dct:
+ ... return complex(dct['real'], dct['imag'])
+ ... return dct
+ ...
+ >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
+ ... object_hook=as_complex)
+ (1+2j)
+ >>> from decimal import Decimal
+ >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
+ True
+
+Specializing JSON object encoding::
+
+ >>> import simplejson as json
+ >>> def encode_complex(obj):
+ ... if isinstance(obj, complex):
+ ... return [obj.real, obj.imag]
+ ... raise TypeError(repr(o) + " is not JSON serializable")
+ ...
+ >>> json.dumps(2 + 1j, default=encode_complex)
+ '[2.0, 1.0]'
+ >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
+ '[2.0, 1.0]'
+ >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
+ '[2.0, 1.0]'
+
+
+Using simplejson.tool from the shell to validate and pretty-print::
+
+ $ echo '{"json":"obj"}' | python -m simplejson.tool
+ {
+ "json": "obj"
+ }
+ $ echo '{ 1.2:3.4}' | python -m simplejson.tool
+ Expecting property name: line 1 column 2 (char 2)
+"""
+__version__ = '2.1.1'
+__all__ = [
+ 'dump', 'dumps', 'load', 'loads',
+ 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
+ 'OrderedDict',
+]
+
+__author__ = 'Bob Ippolito '
+
+from decimal import Decimal
+
+from decoder import JSONDecoder, JSONDecodeError
+from encoder import JSONEncoder
+def _import_OrderedDict():
+ import collections
+ try:
+ return collections.OrderedDict
+ except AttributeError:
+ import ordered_dict
+ return ordered_dict.OrderedDict
+OrderedDict = _import_OrderedDict()
+
+def _import_c_make_encoder():
+ try:
+ from simplejson._speedups import make_encoder
+ return make_encoder
+ except ImportError:
+ return None
+
+_default_encoder = JSONEncoder(
+ skipkeys=False,
+ ensure_ascii=True,
+ check_circular=True,
+ allow_nan=True,
+ indent=None,
+ separators=None,
+ encoding='utf-8',
+ default=None,
+ use_decimal=False,
+)
+
+def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
+ allow_nan=True, cls=None, indent=None, separators=None,
+ encoding='utf-8', default=None, use_decimal=False, **kw):
+ """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
+ ``.write()``-supporting file-like object).
+
+ If ``skipkeys`` is true then ``dict`` keys that are not basic types
+ (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+ will be skipped instead of raising a ``TypeError``.
+
+ If ``ensure_ascii`` is false, then the some chunks written to ``fp``
+ may be ``unicode`` instances, subject to normal Python ``str`` to
+ ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
+ understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
+ to cause an error.
+
+ If ``check_circular`` is false, then the circular reference check
+ for container types will be skipped and a circular reference will
+ result in an ``OverflowError`` (or worse).
+
+ If ``allow_nan`` is false, then it will be a ``ValueError`` to
+ serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
+ in strict compliance of the JSON specification, instead of using the
+ JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+ If *indent* is a string, then JSON array elements and object members
+ will be pretty-printed with a newline followed by that string repeated
+ for each level of nesting. ``None`` (the default) selects the most compact
+ representation without any newlines. For backwards compatibility with
+ versions of simplejson earlier than 2.1.0, an integer is also accepted
+ and is converted to a string with that many spaces.
+
+ If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+ then it will be used instead of the default ``(', ', ': ')`` separators.
+ ``(',', ':')`` is the most compact JSON representation.
+
+ ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+ ``default(obj)`` is a function that should return a serializable version
+ of obj or raise TypeError. The default simply raises TypeError.
+
+ If *use_decimal* is true (default: ``False``) then decimal.Decimal
+ will be natively serialized to JSON with full precision.
+
+ To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+ ``.default()`` method to serialize additional types), specify it with
+ the ``cls`` kwarg.
+
+ """
+ # cached encoder
+ if (not skipkeys and ensure_ascii and
+ check_circular and allow_nan and
+ cls is None and indent is None and separators is None and
+ encoding == 'utf-8' and default is None and not kw):
+ iterable = _default_encoder.iterencode(obj)
+ else:
+ if cls is None:
+ cls = JSONEncoder
+ iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+ check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+ separators=separators, encoding=encoding,
+ default=default, use_decimal=use_decimal, **kw).iterencode(obj)
+ # could accelerate with writelines in some versions of Python, at
+ # a debuggability cost
+ for chunk in iterable:
+ fp.write(chunk)
+
+
+def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
+ allow_nan=True, cls=None, indent=None, separators=None,
+ encoding='utf-8', default=None, use_decimal=False, **kw):
+ """Serialize ``obj`` to a JSON formatted ``str``.
+
+ If ``skipkeys`` is false then ``dict`` keys that are not basic types
+ (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+ will be skipped instead of raising a ``TypeError``.
+
+ If ``ensure_ascii`` is false, then the return value will be a
+ ``unicode`` instance subject to normal Python ``str`` to ``unicode``
+ coercion rules instead of being escaped to an ASCII ``str``.
+
+ If ``check_circular`` is false, then the circular reference check
+ for container types will be skipped and a circular reference will
+ result in an ``OverflowError`` (or worse).
+
+ If ``allow_nan`` is false, then it will be a ``ValueError`` to
+ serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
+ strict compliance of the JSON specification, instead of using the
+ JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+ If ``indent`` is a string, then JSON array elements and object members
+ will be pretty-printed with a newline followed by that string repeated
+ for each level of nesting. ``None`` (the default) selects the most compact
+ representation without any newlines. For backwards compatibility with
+ versions of simplejson earlier than 2.1.0, an integer is also accepted
+ and is converted to a string with that many spaces.
+
+ If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+ then it will be used instead of the default ``(', ', ': ')`` separators.
+ ``(',', ':')`` is the most compact JSON representation.
+
+ ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+ ``default(obj)`` is a function that should return a serializable version
+ of obj or raise TypeError. The default simply raises TypeError.
+
+ If *use_decimal* is true (default: ``False``) then decimal.Decimal
+ will be natively serialized to JSON with full precision.
+
+ To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+ ``.default()`` method to serialize additional types), specify it with
+ the ``cls`` kwarg.
+
+ """
+ # cached encoder
+ if (not skipkeys and ensure_ascii and
+ check_circular and allow_nan and
+ cls is None and indent is None and separators is None and
+ encoding == 'utf-8' and default is None and not use_decimal
+ and not kw):
+ return _default_encoder.encode(obj)
+ if cls is None:
+ cls = JSONEncoder
+ return cls(
+ skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+ check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+ separators=separators, encoding=encoding, default=default,
+ use_decimal=use_decimal, **kw).encode(obj)
+
+
+_default_decoder = JSONDecoder(encoding=None, object_hook=None,
+ object_pairs_hook=None)
+
+
+def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
+ parse_int=None, parse_constant=None, object_pairs_hook=None,
+ use_decimal=False, **kw):
+ """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
+ a JSON document) to a Python object.
+
+ *encoding* determines the encoding used to interpret any
+ :class:`str` objects decoded by this instance (``'utf-8'`` by
+ default). It has no effect when decoding :class:`unicode` objects.
+
+ Note that currently only encodings that are a superset of ASCII work,
+ strings of other encodings should be passed in as :class:`unicode`.
+
+ *object_hook*, if specified, will be called with the result of every
+ JSON object decoded and its return value will be used in place of the
+ given :class:`dict`. This can be used to provide custom
+ deserializations (e.g. to support JSON-RPC class hinting).
+
+ *object_pairs_hook* is an optional function that will be called with
+ the result of any object literal decode with an ordered list of pairs.
+ The return value of *object_pairs_hook* will be used instead of the
+ :class:`dict`. This feature can be used to implement custom decoders
+ that rely on the order that the key and value pairs are decoded (for
+ example, :func:`collections.OrderedDict` will remember the order of
+ insertion). If *object_hook* is also defined, the *object_pairs_hook*
+ takes priority.
+
+ *parse_float*, if specified, will be called with the string of every
+ JSON float to be decoded. By default, this is equivalent to
+ ``float(num_str)``. This can be used to use another datatype or parser
+ for JSON floats (e.g. :class:`decimal.Decimal`).
+
+ *parse_int*, if specified, will be called with the string of every
+ JSON int to be decoded. By default, this is equivalent to
+ ``int(num_str)``. This can be used to use another datatype or parser
+ for JSON integers (e.g. :class:`float`).
+
+ *parse_constant*, if specified, will be called with one of the
+ following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
+ can be used to raise an exception if invalid JSON numbers are
+ encountered.
+
+ If *use_decimal* is true (default: ``False``) then it implies
+ parse_float=decimal.Decimal for parity with ``dump``.
+
+ To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+ kwarg.
+
+ """
+ return loads(fp.read(),
+ encoding=encoding, cls=cls, object_hook=object_hook,
+ parse_float=parse_float, parse_int=parse_int,
+ parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
+ use_decimal=use_decimal, **kw)
+
+
+def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
+ parse_int=None, parse_constant=None, object_pairs_hook=None,
+ use_decimal=False, **kw):
+ """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
+ document) to a Python object.
+
+ *encoding* determines the encoding used to interpret any
+ :class:`str` objects decoded by this instance (``'utf-8'`` by
+ default). It has no effect when decoding :class:`unicode` objects.
+
+ Note that currently only encodings that are a superset of ASCII work,
+ strings of other encodings should be passed in as :class:`unicode`.
+
+ *object_hook*, if specified, will be called with the result of every
+ JSON object decoded and its return value will be used in place of the
+ given :class:`dict`. This can be used to provide custom
+ deserializations (e.g. to support JSON-RPC class hinting).
+
+ *object_pairs_hook* is an optional function that will be called with
+ the result of any object literal decode with an ordered list of pairs.
+ The return value of *object_pairs_hook* will be used instead of the
+ :class:`dict`. This feature can be used to implement custom decoders
+ that rely on the order that the key and value pairs are decoded (for
+ example, :func:`collections.OrderedDict` will remember the order of
+ insertion). If *object_hook* is also defined, the *object_pairs_hook*
+ takes priority.
+
+ *parse_float*, if specified, will be called with the string of every
+ JSON float to be decoded. By default, this is equivalent to
+ ``float(num_str)``. This can be used to use another datatype or parser
+ for JSON floats (e.g. :class:`decimal.Decimal`).
+
+ *parse_int*, if specified, will be called with the string of every
+ JSON int to be decoded. By default, this is equivalent to
+ ``int(num_str)``. This can be used to use another datatype or parser
+ for JSON integers (e.g. :class:`float`).
+
+ *parse_constant*, if specified, will be called with one of the
+ following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
+ can be used to raise an exception if invalid JSON numbers are
+ encountered.
+
+ If *use_decimal* is true (default: ``False``) then it implies
+ parse_float=decimal.Decimal for parity with ``dump``.
+
+ To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+ kwarg.
+
+ """
+ if (cls is None and encoding is None and object_hook is None and
+ parse_int is None and parse_float is None and
+ parse_constant is None and object_pairs_hook is None
+ and not use_decimal and not kw):
+ return _default_decoder.decode(s)
+ if cls is None:
+ cls = JSONDecoder
+ if object_hook is not None:
+ kw['object_hook'] = object_hook
+ if object_pairs_hook is not None:
+ kw['object_pairs_hook'] = object_pairs_hook
+ if parse_float is not None:
+ kw['parse_float'] = parse_float
+ if parse_int is not None:
+ kw['parse_int'] = parse_int
+ if parse_constant is not None:
+ kw['parse_constant'] = parse_constant
+ if use_decimal:
+ if parse_float is not None:
+ raise TypeError("use_decimal=True implies parse_float=Decimal")
+ kw['parse_float'] = Decimal
+ return cls(encoding=encoding, **kw).decode(s)
+
+
+def _toggle_speedups(enabled):
+ import simplejson.decoder as dec
+ import simplejson.encoder as enc
+ import simplejson.scanner as scan
+ c_make_encoder = _import_c_make_encoder()
+ if enabled:
+ dec.scanstring = dec.c_scanstring or dec.py_scanstring
+ enc.c_make_encoder = c_make_encoder
+ enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
+ enc.py_encode_basestring_ascii)
+ scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
+ else:
+ dec.scanstring = dec.py_scanstring
+ enc.c_make_encoder = None
+ enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
+ scan.make_scanner = scan.py_make_scanner
+ dec.make_scanner = scan.make_scanner
+ global _default_decoder
+ _default_decoder = JSONDecoder(
+ encoding=None,
+ object_hook=None,
+ object_pairs_hook=None,
+ )
+ global _default_encoder
+ _default_encoder = JSONEncoder(
+ skipkeys=False,
+ ensure_ascii=True,
+ check_circular=True,
+ allow_nan=True,
+ indent=None,
+ separators=None,
+ encoding='utf-8',
+ default=None,
+ )
diff --git a/tablib/packages/simplejson/_speedups.c b/tablib/packages/simplejson/_speedups.c
new file mode 100644
index 0000000..b06ba50
--- /dev/null
+++ b/tablib/packages/simplejson/_speedups.c
@@ -0,0 +1,2561 @@
+#include "Python.h"
+#include "structmember.h"
+#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
+#define PyOS_string_to_double json_PyOS_string_to_double
+static double
+json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
+static double
+json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
+ double x;
+ assert(endptr == NULL);
+ assert(overflow_exception == NULL);
+ PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
+ x = PyOS_ascii_atof(s);
+ PyFPE_END_PROTECT(x)
+ return x;
+}
+#endif
+#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
+#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+#endif
+#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
+typedef int Py_ssize_t;
+#define PY_SSIZE_T_MAX INT_MAX
+#define PY_SSIZE_T_MIN INT_MIN
+#define PyInt_FromSsize_t PyInt_FromLong
+#define PyInt_AsSsize_t PyInt_AsLong
+#endif
+#ifndef Py_IS_FINITE
+#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
+#endif
+
+#ifdef __GNUC__
+#define UNUSED __attribute__((__unused__))
+#else
+#define UNUSED
+#endif
+
+#define DEFAULT_ENCODING "utf-8"
+
+#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
+#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
+#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
+#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
+#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr))
+
+static PyTypeObject PyScannerType;
+static PyTypeObject PyEncoderType;
+static PyTypeObject *DecimalTypePtr;
+
+typedef struct _PyScannerObject {
+ PyObject_HEAD
+ PyObject *encoding;
+ PyObject *strict;
+ PyObject *object_hook;
+ PyObject *pairs_hook;
+ PyObject *parse_float;
+ PyObject *parse_int;
+ PyObject *parse_constant;
+ PyObject *memo;
+} PyScannerObject;
+
+static PyMemberDef scanner_members[] = {
+ {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
+ {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
+ {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
+ {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
+ {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
+ {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
+ {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
+ {NULL}
+};
+
+typedef struct _PyEncoderObject {
+ PyObject_HEAD
+ PyObject *markers;
+ PyObject *defaultfn;
+ PyObject *encoder;
+ PyObject *indent;
+ PyObject *key_separator;
+ PyObject *item_separator;
+ PyObject *sort_keys;
+ PyObject *skipkeys;
+ PyObject *key_memo;
+ int fast_encode;
+ int allow_nan;
+ int use_decimal;
+} PyEncoderObject;
+
+static PyMemberDef encoder_members[] = {
+ {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
+ {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
+ {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
+ {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
+ {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
+ {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
+ {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
+ {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
+ {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
+ {NULL}
+};
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
+static PyObject *
+ascii_escape_unicode(PyObject *pystr);
+static PyObject *
+ascii_escape_str(PyObject *pystr);
+static PyObject *
+py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
+void init_speedups(void);
+static PyObject *
+scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+static PyObject *
+scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
+static PyObject *
+_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
+static PyObject *
+scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+static int
+scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
+static void
+scanner_dealloc(PyObject *self);
+static int
+scanner_clear(PyObject *self);
+static PyObject *
+encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+static int
+encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
+static void
+encoder_dealloc(PyObject *self);
+static int
+encoder_clear(PyObject *self);
+static int
+encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
+static int
+encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
+static int
+encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
+static PyObject *
+_encoded_const(PyObject *obj);
+static void
+raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
+static PyObject *
+encoder_encode_string(PyEncoderObject *s, PyObject *obj);
+static int
+_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
+static PyObject *
+_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
+static PyObject *
+encoder_encode_float(PyEncoderObject *s, PyObject *obj);
+
+#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
+#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
+
+#define MIN_EXPANSION 6
+#ifdef Py_UNICODE_WIDE
+#define MAX_EXPANSION (2 * MIN_EXPANSION)
+#else
+#define MAX_EXPANSION MIN_EXPANSION
+#endif
+
+static int
+_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
+{
+ /* PyObject to Py_ssize_t converter */
+ *size_ptr = PyInt_AsSsize_t(o);
+ if (*size_ptr == -1 && PyErr_Occurred())
+ return 0;
+ return 1;
+}
+
+static PyObject *
+_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
+{
+ /* Py_ssize_t to PyObject converter */
+ return PyInt_FromSsize_t(*size_ptr);
+}
+
+static Py_ssize_t
+ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
+{
+ /* Escape unicode code point c to ASCII escape sequences
+ in char *output. output must have at least 12 bytes unused to
+ accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
+ output[chars++] = '\\';
+ switch (c) {
+ case '\\': output[chars++] = (char)c; break;
+ case '"': output[chars++] = (char)c; break;
+ case '\b': output[chars++] = 'b'; break;
+ case '\f': output[chars++] = 'f'; break;
+ case '\n': output[chars++] = 'n'; break;
+ case '\r': output[chars++] = 'r'; break;
+ case '\t': output[chars++] = 't'; break;
+ default:
+#ifdef Py_UNICODE_WIDE
+ if (c >= 0x10000) {
+ /* UTF-16 surrogate pair */
+ Py_UNICODE v = c - 0x10000;
+ c = 0xd800 | ((v >> 10) & 0x3ff);
+ output[chars++] = 'u';
+ output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c ) & 0xf];
+ c = 0xdc00 | (v & 0x3ff);
+ output[chars++] = '\\';
+ }
+#endif
+ output[chars++] = 'u';
+ output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
+ output[chars++] = "0123456789abcdef"[(c ) & 0xf];
+ }
+ return chars;
+}
+
+static PyObject *
+ascii_escape_unicode(PyObject *pystr)
+{
+ /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
+ Py_ssize_t i;
+ Py_ssize_t input_chars;
+ Py_ssize_t output_size;
+ Py_ssize_t max_output_size;
+ Py_ssize_t chars;
+ PyObject *rval;
+ char *output;
+ Py_UNICODE *input_unicode;
+
+ input_chars = PyUnicode_GET_SIZE(pystr);
+ input_unicode = PyUnicode_AS_UNICODE(pystr);
+
+ /* One char input can be up to 6 chars output, estimate 4 of these */
+ output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+ max_output_size = 2 + (input_chars * MAX_EXPANSION);
+ rval = PyString_FromStringAndSize(NULL, output_size);
+ if (rval == NULL) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ chars = 0;
+ output[chars++] = '"';
+ for (i = 0; i < input_chars; i++) {
+ Py_UNICODE c = input_unicode[i];
+ if (S_CHAR(c)) {
+ output[chars++] = (char)c;
+ }
+ else {
+ chars = ascii_escape_char(c, output, chars);
+ }
+ if (output_size - chars < (1 + MAX_EXPANSION)) {
+ /* There's more than four, so let's resize by a lot */
+ Py_ssize_t new_output_size = output_size * 2;
+ /* This is an upper bound */
+ if (new_output_size > max_output_size) {
+ new_output_size = max_output_size;
+ }
+ /* Make sure that the output size changed before resizing */
+ if (new_output_size != output_size) {
+ output_size = new_output_size;
+ if (_PyString_Resize(&rval, output_size) == -1) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ }
+ }
+ }
+ output[chars++] = '"';
+ if (_PyString_Resize(&rval, chars) == -1) {
+ return NULL;
+ }
+ return rval;
+}
+
+static PyObject *
+ascii_escape_str(PyObject *pystr)
+{
+ /* Take a PyString pystr and return a new ASCII-only escaped PyString */
+ Py_ssize_t i;
+ Py_ssize_t input_chars;
+ Py_ssize_t output_size;
+ Py_ssize_t chars;
+ PyObject *rval;
+ char *output;
+ char *input_str;
+
+ input_chars = PyString_GET_SIZE(pystr);
+ input_str = PyString_AS_STRING(pystr);
+
+ /* Fast path for a string that's already ASCII */
+ for (i = 0; i < input_chars; i++) {
+ Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
+ if (!S_CHAR(c)) {
+ /* If we have to escape something, scan the string for unicode */
+ Py_ssize_t j;
+ for (j = i; j < input_chars; j++) {
+ c = (Py_UNICODE)(unsigned char)input_str[j];
+ if (c > 0x7f) {
+ /* We hit a non-ASCII character, bail to unicode mode */
+ PyObject *uni;
+ uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
+ if (uni == NULL) {
+ return NULL;
+ }
+ rval = ascii_escape_unicode(uni);
+ Py_DECREF(uni);
+ return rval;
+ }
+ }
+ break;
+ }
+ }
+
+ if (i == input_chars) {
+ /* Input is already ASCII */
+ output_size = 2 + input_chars;
+ }
+ else {
+ /* One char input can be up to 6 chars output, estimate 4 of these */
+ output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
+ }
+ rval = PyString_FromStringAndSize(NULL, output_size);
+ if (rval == NULL) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ output[0] = '"';
+
+ /* We know that everything up to i is ASCII already */
+ chars = i + 1;
+ memcpy(&output[1], input_str, i);
+
+ for (; i < input_chars; i++) {
+ Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
+ if (S_CHAR(c)) {
+ output[chars++] = (char)c;
+ }
+ else {
+ chars = ascii_escape_char(c, output, chars);
+ }
+ /* An ASCII char can't possibly expand to a surrogate! */
+ if (output_size - chars < (1 + MIN_EXPANSION)) {
+ /* There's more than four, so let's resize by a lot */
+ output_size *= 2;
+ if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
+ output_size = 2 + (input_chars * MIN_EXPANSION);
+ }
+ if (_PyString_Resize(&rval, output_size) == -1) {
+ return NULL;
+ }
+ output = PyString_AS_STRING(rval);
+ }
+ }
+ output[chars++] = '"';
+ if (_PyString_Resize(&rval, chars) == -1) {
+ return NULL;
+ }
+ return rval;
+}
+
+static void
+raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
+{
+ /* Use the Python function simplejson.decoder.errmsg to raise a nice
+ looking ValueError exception */
+ static PyObject *JSONDecodeError = NULL;
+ PyObject *exc;
+ if (JSONDecodeError == NULL) {
+ PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
+ if (decoder == NULL)
+ return;
+ JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
+ Py_DECREF(decoder);
+ if (JSONDecodeError == NULL)
+ return;
+ }
+ exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
+ if (exc) {
+ PyErr_SetObject(JSONDecodeError, exc);
+ Py_DECREF(exc);
+ }
+}
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+ /* return u''.join(lst) */
+ static PyObject *joinfn = NULL;
+ if (joinfn == NULL) {
+ PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
+ if (ustr == NULL)
+ return NULL;
+
+ joinfn = PyObject_GetAttrString(ustr, "join");
+ Py_DECREF(ustr);
+ if (joinfn == NULL)
+ return NULL;
+ }
+ return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
+}
+
+static PyObject *
+join_list_string(PyObject *lst)
+{
+ /* return ''.join(lst) */
+ static PyObject *joinfn = NULL;
+ if (joinfn == NULL) {
+ PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
+ if (ustr == NULL)
+ return NULL;
+
+ joinfn = PyObject_GetAttrString(ustr, "join");
+ Py_DECREF(ustr);
+ if (joinfn == NULL)
+ return NULL;
+ }
+ return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
+}
+
+static PyObject *
+_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
+ /* return (rval, idx) tuple, stealing reference to rval */
+ PyObject *tpl;
+ PyObject *pyidx;
+ /*
+ steal a reference to rval, returns (rval, idx)
+ */
+ if (rval == NULL) {
+ return NULL;
+ }
+ pyidx = PyInt_FromSsize_t(idx);
+ if (pyidx == NULL) {
+ Py_DECREF(rval);
+ return NULL;
+ }
+ tpl = PyTuple_New(2);
+ if (tpl == NULL) {
+ Py_DECREF(pyidx);
+ Py_DECREF(rval);
+ return NULL;
+ }
+ PyTuple_SET_ITEM(tpl, 0, rval);
+ PyTuple_SET_ITEM(tpl, 1, pyidx);
+ return tpl;
+}
+
+#define APPEND_OLD_CHUNK \
+ if (chunk != NULL) { \
+ if (chunks == NULL) { \
+ chunks = PyList_New(0); \
+ if (chunks == NULL) { \
+ goto bail; \
+ } \
+ } \
+ if (PyList_Append(chunks, chunk)) { \
+ goto bail; \
+ } \
+ Py_CLEAR(chunk); \
+ }
+
+static PyObject *
+scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
+{
+ /* Read the JSON string from PyString pystr.
+ end is the index of the first character after the quote.
+ encoding is the encoding of pystr (must be an ASCII superset)
+ if strict is zero then literal control characters are allowed
+ *next_end_ptr is a return-by-reference index of the character
+ after the end quote
+
+ Return value is a new PyString (if ASCII-only) or PyUnicode
+ */
+ PyObject *rval;
+ Py_ssize_t len = PyString_GET_SIZE(pystr);
+ Py_ssize_t begin = end - 1;
+ Py_ssize_t next = begin;
+ int has_unicode = 0;
+ char *buf = PyString_AS_STRING(pystr);
+ PyObject *chunks = NULL;
+ PyObject *chunk = NULL;
+
+ if (end < 0 || len <= end) {
+ PyErr_SetString(PyExc_ValueError, "end is out of bounds");
+ goto bail;
+ }
+ while (1) {
+ /* Find the end of the string or the next escape */
+ Py_UNICODE c = 0;
+ for (next = end; next < len; next++) {
+ c = (unsigned char)buf[next];
+ if (c == '"' || c == '\\') {
+ break;
+ }
+ else if (strict && c <= 0x1f) {
+ raise_errmsg("Invalid control character at", pystr, next);
+ goto bail;
+ }
+ else if (c > 0x7f) {
+ has_unicode = 1;
+ }
+ }
+ if (!(c == '"' || c == '\\')) {
+ raise_errmsg("Unterminated string starting at", pystr, begin);
+ goto bail;
+ }
+ /* Pick up this chunk if it's not zero length */
+ if (next != end) {
+ PyObject *strchunk;
+ APPEND_OLD_CHUNK
+ strchunk = PyString_FromStringAndSize(&buf[end], next - end);
+ if (strchunk == NULL) {
+ goto bail;
+ }
+ if (has_unicode) {
+ chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
+ Py_DECREF(strchunk);
+ if (chunk == NULL) {
+ goto bail;
+ }
+ }
+ else {
+ chunk = strchunk;
+ }
+ }
+ next++;
+ if (c == '"') {
+ end = next;
+ break;
+ }
+ if (next == len) {
+ raise_errmsg("Unterminated string starting at", pystr, begin);
+ goto bail;
+ }
+ c = buf[next];
+ if (c != 'u') {
+ /* Non-unicode backslash escapes */
+ end = next + 1;
+ switch (c) {
+ case '"': break;
+ case '\\': break;
+ case '/': break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ default: c = 0;
+ }
+ if (c == 0) {
+ raise_errmsg("Invalid \\escape", pystr, end - 2);
+ goto bail;
+ }
+ }
+ else {
+ c = 0;
+ next++;
+ end = next + 4;
+ if (end >= len) {
+ raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+ goto bail;
+ }
+ /* Decode 4 hex digits */
+ for (; next < end; next++) {
+ Py_UNICODE digit = buf[next];
+ c <<= 4;
+ switch (digit) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c |= (digit - '0'); break;
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ c |= (digit - 'a' + 10); break;
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ c |= (digit - 'A' + 10); break;
+ default:
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ goto bail;
+ }
+ }
+#ifdef Py_UNICODE_WIDE
+ /* Surrogate pair */
+ if ((c & 0xfc00) == 0xd800) {
+ Py_UNICODE c2 = 0;
+ if (end + 6 >= len) {
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ goto bail;
+ }
+ if (buf[next++] != '\\' || buf[next++] != 'u') {
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ goto bail;
+ }
+ end += 6;
+ /* Decode 4 hex digits */
+ for (; next < end; next++) {
+ c2 <<= 4;
+ Py_UNICODE digit = buf[next];
+ switch (digit) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c2 |= (digit - '0'); break;
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ c2 |= (digit - 'a' + 10); break;
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ c2 |= (digit - 'A' + 10); break;
+ default:
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ goto bail;
+ }
+ }
+ if ((c2 & 0xfc00) != 0xdc00) {
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ goto bail;
+ }
+ c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
+ }
+ else if ((c & 0xfc00) == 0xdc00) {
+ raise_errmsg("Unpaired low surrogate", pystr, end - 5);
+ goto bail;
+ }
+#endif
+ }
+ if (c > 0x7f) {
+ has_unicode = 1;
+ }
+ APPEND_OLD_CHUNK
+ if (has_unicode) {
+ chunk = PyUnicode_FromUnicode(&c, 1);
+ if (chunk == NULL) {
+ goto bail;
+ }
+ }
+ else {
+ char c_char = Py_CHARMASK(c);
+ chunk = PyString_FromStringAndSize(&c_char, 1);
+ if (chunk == NULL) {
+ goto bail;
+ }
+ }
+ }
+
+ if (chunks == NULL) {
+ if (chunk != NULL)
+ rval = chunk;
+ else
+ rval = PyString_FromStringAndSize("", 0);
+ }
+ else {
+ APPEND_OLD_CHUNK
+ rval = join_list_string(chunks);
+ if (rval == NULL) {
+ goto bail;
+ }
+ Py_CLEAR(chunks);
+ }
+
+ *next_end_ptr = end;
+ return rval;
+bail:
+ *next_end_ptr = -1;
+ Py_XDECREF(chunk);
+ Py_XDECREF(chunks);
+ return NULL;
+}
+
+
+static PyObject *
+scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
+{
+ /* Read the JSON string from PyUnicode pystr.
+ end is the index of the first character after the quote.
+ if strict is zero then literal control characters are allowed
+ *next_end_ptr is a return-by-reference index of the character
+ after the end quote
+
+ Return value is a new PyUnicode
+ */
+ PyObject *rval;
+ Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
+ Py_ssize_t begin = end - 1;
+ Py_ssize_t next = begin;
+ const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
+ PyObject *chunks = NULL;
+ PyObject *chunk = NULL;
+
+ if (end < 0 || len <= end) {
+ PyErr_SetString(PyExc_ValueError, "end is out of bounds");
+ goto bail;
+ }
+ while (1) {
+ /* Find the end of the string or the next escape */
+ Py_UNICODE c = 0;
+ for (next = end; next < len; next++) {
+ c = buf[next];
+ if (c == '"' || c == '\\') {
+ break;
+ }
+ else if (strict && c <= 0x1f) {
+ raise_errmsg("Invalid control character at", pystr, next);
+ goto bail;
+ }
+ }
+ if (!(c == '"' || c == '\\')) {
+ raise_errmsg("Unterminated string starting at", pystr, begin);
+ goto bail;
+ }
+ /* Pick up this chunk if it's not zero length */
+ if (next != end) {
+ APPEND_OLD_CHUNK
+ chunk = PyUnicode_FromUnicode(&buf[end], next - end);
+ if (chunk == NULL) {
+ goto bail;
+ }
+ }
+ next++;
+ if (c == '"') {
+ end = next;
+ break;
+ }
+ if (next == len) {
+ raise_errmsg("Unterminated string starting at", pystr, begin);
+ goto bail;
+ }
+ c = buf[next];
+ if (c != 'u') {
+ /* Non-unicode backslash escapes */
+ end = next + 1;
+ switch (c) {
+ case '"': break;
+ case '\\': break;
+ case '/': break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ default: c = 0;
+ }
+ if (c == 0) {
+ raise_errmsg("Invalid \\escape", pystr, end - 2);
+ goto bail;
+ }
+ }
+ else {
+ c = 0;
+ next++;
+ end = next + 4;
+ if (end >= len) {
+ raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
+ goto bail;
+ }
+ /* Decode 4 hex digits */
+ for (; next < end; next++) {
+ Py_UNICODE digit = buf[next];
+ c <<= 4;
+ switch (digit) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c |= (digit - '0'); break;
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ c |= (digit - 'a' + 10); break;
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ c |= (digit - 'A' + 10); break;
+ default:
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ goto bail;
+ }
+ }
+#ifdef Py_UNICODE_WIDE
+ /* Surrogate pair */
+ if ((c & 0xfc00) == 0xd800) {
+ Py_UNICODE c2 = 0;
+ if (end + 6 >= len) {
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ goto bail;
+ }
+ if (buf[next++] != '\\' || buf[next++] != 'u') {
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ goto bail;
+ }
+ end += 6;
+ /* Decode 4 hex digits */
+ for (; next < end; next++) {
+ c2 <<= 4;
+ Py_UNICODE digit = buf[next];
+ switch (digit) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ c2 |= (digit - '0'); break;
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f':
+ c2 |= (digit - 'a' + 10); break;
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F':
+ c2 |= (digit - 'A' + 10); break;
+ default:
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
+ goto bail;
+ }
+ }
+ if ((c2 & 0xfc00) != 0xdc00) {
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
+ goto bail;
+ }
+ c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
+ }
+ else if ((c & 0xfc00) == 0xdc00) {
+ raise_errmsg("Unpaired low surrogate", pystr, end - 5);
+ goto bail;
+ }
+#endif
+ }
+ APPEND_OLD_CHUNK
+ chunk = PyUnicode_FromUnicode(&c, 1);
+ if (chunk == NULL) {
+ goto bail;
+ }
+ }
+
+ if (chunks == NULL) {
+ if (chunk != NULL)
+ rval = chunk;
+ else
+ rval = PyUnicode_FromUnicode(NULL, 0);
+ }
+ else {
+ APPEND_OLD_CHUNK
+ rval = join_list_unicode(chunks);
+ if (rval == NULL) {
+ goto bail;
+ }
+ Py_CLEAR(chunks);
+ }
+ *next_end_ptr = end;
+ return rval;
+bail:
+ *next_end_ptr = -1;
+ Py_XDECREF(chunk);
+ Py_XDECREF(chunks);
+ return NULL;
+}
+
+PyDoc_STRVAR(pydoc_scanstring,
+ "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
+ "\n"
+ "Scan the string s for a JSON string. End is the index of the\n"
+ "character in s after the quote that started the JSON string.\n"
+ "Unescapes all valid JSON string escape sequences and raises ValueError\n"
+ "on attempt to decode an invalid string. If strict is False then literal\n"
+ "control characters are allowed in the string.\n"
+ "\n"
+ "Returns a tuple of the decoded string and the index of the character in s\n"
+ "after the end quote."
+);
+
+static PyObject *
+py_scanstring(PyObject* self UNUSED, PyObject *args)
+{
+ PyObject *pystr;
+ PyObject *rval;
+ Py_ssize_t end;
+ Py_ssize_t next_end = -1;
+ char *encoding = NULL;
+ int strict = 1;
+ if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
+ return NULL;
+ }
+ if (encoding == NULL) {
+ encoding = DEFAULT_ENCODING;
+ }
+ if (PyString_Check(pystr)) {
+ rval = scanstring_str(pystr, end, encoding, strict, &next_end);
+ }
+ else if (PyUnicode_Check(pystr)) {
+ rval = scanstring_unicode(pystr, end, strict, &next_end);
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "first argument must be a string, not %.80s",
+ Py_TYPE(pystr)->tp_name);
+ return NULL;
+ }
+ return _build_rval_index_tuple(rval, next_end);
+}
+
+PyDoc_STRVAR(pydoc_encode_basestring_ascii,
+ "encode_basestring_ascii(basestring) -> str\n"
+ "\n"
+ "Return an ASCII-only JSON representation of a Python string"
+);
+
+static PyObject *
+py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
+{
+ /* Return an ASCII-only JSON representation of a Python string */
+ /* METH_O */
+ if (PyString_Check(pystr)) {
+ return ascii_escape_str(pystr);
+ }
+ else if (PyUnicode_Check(pystr)) {
+ return ascii_escape_unicode(pystr);
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "first argument must be a string, not %.80s",
+ Py_TYPE(pystr)->tp_name);
+ return NULL;
+ }
+}
+
+static void
+scanner_dealloc(PyObject *self)
+{
+ /* Deallocate scanner object */
+ scanner_clear(self);
+ Py_TYPE(self)->tp_free(self);
+}
+
+static int
+scanner_traverse(PyObject *self, visitproc visit, void *arg)
+{
+ PyScannerObject *s;
+ assert(PyScanner_Check(self));
+ s = (PyScannerObject *)self;
+ Py_VISIT(s->encoding);
+ Py_VISIT(s->strict);
+ Py_VISIT(s->object_hook);
+ Py_VISIT(s->pairs_hook);
+ Py_VISIT(s->parse_float);
+ Py_VISIT(s->parse_int);
+ Py_VISIT(s->parse_constant);
+ Py_VISIT(s->memo);
+ return 0;
+}
+
+static int
+scanner_clear(PyObject *self)
+{
+ PyScannerObject *s;
+ assert(PyScanner_Check(self));
+ s = (PyScannerObject *)self;
+ Py_CLEAR(s->encoding);
+ Py_CLEAR(s->strict);
+ Py_CLEAR(s->object_hook);
+ Py_CLEAR(s->pairs_hook);
+ Py_CLEAR(s->parse_float);
+ Py_CLEAR(s->parse_int);
+ Py_CLEAR(s->parse_constant);
+ Py_CLEAR(s->memo);
+ return 0;
+}
+
+static PyObject *
+_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON object from PyString pystr.
+ idx is the index of the first character after the opening curly brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing curly brace.
+
+ Returns a new PyObject (usually a dict, but object_hook or
+ object_pairs_hook can change that)
+ */
+ char *str = PyString_AS_STRING(pystr);
+ Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+ PyObject *rval = NULL;
+ PyObject *pairs = NULL;
+ PyObject *item;
+ PyObject *key = NULL;
+ PyObject *val = NULL;
+ char *encoding = PyString_AS_STRING(s->encoding);
+ int strict = PyObject_IsTrue(s->strict);
+ int has_pairs_hook = (s->pairs_hook != Py_None);
+ Py_ssize_t next_idx;
+ if (has_pairs_hook) {
+ pairs = PyList_New(0);
+ if (pairs == NULL)
+ return NULL;
+ }
+ else {
+ rval = PyDict_New();
+ if (rval == NULL)
+ return NULL;
+ }
+
+ /* skip whitespace after { */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* only loop if the object is non-empty */
+ if (idx <= end_idx && str[idx] != '}') {
+ while (idx <= end_idx) {
+ PyObject *memokey;
+
+ /* read key */
+ if (str[idx] != '"') {
+ raise_errmsg("Expecting property name", pystr, idx);
+ goto bail;
+ }
+ key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
+ if (key == NULL)
+ goto bail;
+ memokey = PyDict_GetItem(s->memo, key);
+ if (memokey != NULL) {
+ Py_INCREF(memokey);
+ Py_DECREF(key);
+ key = memokey;
+ }
+ else {
+ if (PyDict_SetItem(s->memo, key, key) < 0)
+ goto bail;
+ }
+ idx = next_idx;
+
+ /* skip whitespace between key and : delimiter, read :, skip whitespace */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ if (idx > end_idx || str[idx] != ':') {
+ raise_errmsg("Expecting : delimiter", pystr, idx);
+ goto bail;
+ }
+ idx++;
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* read any JSON data type */
+ val = scan_once_str(s, pystr, idx, &next_idx);
+ if (val == NULL)
+ goto bail;
+
+ if (has_pairs_hook) {
+ item = PyTuple_Pack(2, key, val);
+ if (item == NULL)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ if (PyList_Append(pairs, item) == -1) {
+ Py_DECREF(item);
+ goto bail;
+ }
+ Py_DECREF(item);
+ }
+ else {
+ if (PyDict_SetItem(rval, key, val) < 0)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ }
+ idx = next_idx;
+
+ /* skip whitespace before } or , */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* bail if the object is closed or we didn't get the , delimiter */
+ if (idx > end_idx) break;
+ if (str[idx] == '}') {
+ break;
+ }
+ else if (str[idx] != ',') {
+ raise_errmsg("Expecting , delimiter", pystr, idx);
+ goto bail;
+ }
+ idx++;
+
+ /* skip whitespace after , delimiter */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ }
+ }
+ /* verify that idx < end_idx, str[idx] should be '}' */
+ if (idx > end_idx || str[idx] != '}') {
+ raise_errmsg("Expecting object", pystr, end_idx);
+ goto bail;
+ }
+
+ /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
+ if (s->pairs_hook != Py_None) {
+ val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
+ if (val == NULL)
+ goto bail;
+ Py_DECREF(pairs);
+ *next_idx_ptr = idx + 1;
+ return val;
+ }
+
+ /* if object_hook is not None: rval = object_hook(rval) */
+ if (s->object_hook != Py_None) {
+ val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
+ if (val == NULL)
+ goto bail;
+ Py_DECREF(rval);
+ rval = val;
+ val = NULL;
+ }
+ *next_idx_ptr = idx + 1;
+ return rval;
+bail:
+ Py_XDECREF(rval);
+ Py_XDECREF(key);
+ Py_XDECREF(val);
+ Py_XDECREF(pairs);
+ return NULL;
+}
+
+static PyObject *
+_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON object from PyUnicode pystr.
+ idx is the index of the first character after the opening curly brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing curly brace.
+
+ Returns a new PyObject (usually a dict, but object_hook can change that)
+ */
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+ Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ PyObject *rval = NULL;
+ PyObject *pairs = NULL;
+ PyObject *item;
+ PyObject *key = NULL;
+ PyObject *val = NULL;
+ int strict = PyObject_IsTrue(s->strict);
+ int has_pairs_hook = (s->pairs_hook != Py_None);
+ Py_ssize_t next_idx;
+
+ if (has_pairs_hook) {
+ pairs = PyList_New(0);
+ if (pairs == NULL)
+ return NULL;
+ }
+ else {
+ rval = PyDict_New();
+ if (rval == NULL)
+ return NULL;
+ }
+
+ /* skip whitespace after { */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* only loop if the object is non-empty */
+ if (idx <= end_idx && str[idx] != '}') {
+ while (idx <= end_idx) {
+ PyObject *memokey;
+
+ /* read key */
+ if (str[idx] != '"') {
+ raise_errmsg("Expecting property name", pystr, idx);
+ goto bail;
+ }
+ key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
+ if (key == NULL)
+ goto bail;
+ memokey = PyDict_GetItem(s->memo, key);
+ if (memokey != NULL) {
+ Py_INCREF(memokey);
+ Py_DECREF(key);
+ key = memokey;
+ }
+ else {
+ if (PyDict_SetItem(s->memo, key, key) < 0)
+ goto bail;
+ }
+ idx = next_idx;
+
+ /* skip whitespace between key and : delimiter, read :, skip whitespace */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ if (idx > end_idx || str[idx] != ':') {
+ raise_errmsg("Expecting : delimiter", pystr, idx);
+ goto bail;
+ }
+ idx++;
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* read any JSON term */
+ val = scan_once_unicode(s, pystr, idx, &next_idx);
+ if (val == NULL)
+ goto bail;
+
+ if (has_pairs_hook) {
+ item = PyTuple_Pack(2, key, val);
+ if (item == NULL)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ if (PyList_Append(pairs, item) == -1) {
+ Py_DECREF(item);
+ goto bail;
+ }
+ Py_DECREF(item);
+ }
+ else {
+ if (PyDict_SetItem(rval, key, val) < 0)
+ goto bail;
+ Py_CLEAR(key);
+ Py_CLEAR(val);
+ }
+ idx = next_idx;
+
+ /* skip whitespace before } or , */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* bail if the object is closed or we didn't get the , delimiter */
+ if (idx > end_idx) break;
+ if (str[idx] == '}') {
+ break;
+ }
+ else if (str[idx] != ',') {
+ raise_errmsg("Expecting , delimiter", pystr, idx);
+ goto bail;
+ }
+ idx++;
+
+ /* skip whitespace after , delimiter */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ }
+ }
+
+ /* verify that idx < end_idx, str[idx] should be '}' */
+ if (idx > end_idx || str[idx] != '}') {
+ raise_errmsg("Expecting object", pystr, end_idx);
+ goto bail;
+ }
+
+ /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
+ if (s->pairs_hook != Py_None) {
+ val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
+ if (val == NULL)
+ goto bail;
+ Py_DECREF(pairs);
+ *next_idx_ptr = idx + 1;
+ return val;
+ }
+
+ /* if object_hook is not None: rval = object_hook(rval) */
+ if (s->object_hook != Py_None) {
+ val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
+ if (val == NULL)
+ goto bail;
+ Py_DECREF(rval);
+ rval = val;
+ val = NULL;
+ }
+ *next_idx_ptr = idx + 1;
+ return rval;
+bail:
+ Py_XDECREF(rval);
+ Py_XDECREF(key);
+ Py_XDECREF(val);
+ Py_XDECREF(pairs);
+ return NULL;
+}
+
+static PyObject *
+_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON array from PyString pystr.
+ idx is the index of the first character after the opening brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing brace.
+
+ Returns a new PyList
+ */
+ char *str = PyString_AS_STRING(pystr);
+ Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+ PyObject *val = NULL;
+ PyObject *rval = PyList_New(0);
+ Py_ssize_t next_idx;
+ if (rval == NULL)
+ return NULL;
+
+ /* skip whitespace after [ */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* only loop if the array is non-empty */
+ if (idx <= end_idx && str[idx] != ']') {
+ while (idx <= end_idx) {
+
+ /* read any JSON term and de-tuplefy the (rval, idx) */
+ val = scan_once_str(s, pystr, idx, &next_idx);
+ if (val == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
+ PyErr_Clear();
+ raise_errmsg("Expecting object", pystr, idx);
+ }
+ goto bail;
+ }
+
+ if (PyList_Append(rval, val) == -1)
+ goto bail;
+
+ Py_CLEAR(val);
+ idx = next_idx;
+
+ /* skip whitespace between term and , */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* bail if the array is closed or we didn't get the , delimiter */
+ if (idx > end_idx) break;
+ if (str[idx] == ']') {
+ break;
+ }
+ else if (str[idx] != ',') {
+ raise_errmsg("Expecting , delimiter", pystr, idx);
+ goto bail;
+ }
+ idx++;
+
+ /* skip whitespace after , */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ }
+ }
+
+ /* verify that idx < end_idx, str[idx] should be ']' */
+ if (idx > end_idx || str[idx] != ']') {
+ raise_errmsg("Expecting object", pystr, end_idx);
+ goto bail;
+ }
+ *next_idx_ptr = idx + 1;
+ return rval;
+bail:
+ Py_XDECREF(val);
+ Py_DECREF(rval);
+ return NULL;
+}
+
+static PyObject *
+_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON array from PyString pystr.
+ idx is the index of the first character after the opening brace.
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the closing brace.
+
+ Returns a new PyList
+ */
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+ Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ PyObject *val = NULL;
+ PyObject *rval = PyList_New(0);
+ Py_ssize_t next_idx;
+ if (rval == NULL)
+ return NULL;
+
+ /* skip whitespace after [ */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* only loop if the array is non-empty */
+ if (idx <= end_idx && str[idx] != ']') {
+ while (idx <= end_idx) {
+
+ /* read any JSON term */
+ val = scan_once_unicode(s, pystr, idx, &next_idx);
+ if (val == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
+ PyErr_Clear();
+ raise_errmsg("Expecting object", pystr, idx);
+ }
+ goto bail;
+ }
+
+ if (PyList_Append(rval, val) == -1)
+ goto bail;
+
+ Py_CLEAR(val);
+ idx = next_idx;
+
+ /* skip whitespace between term and , */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+
+ /* bail if the array is closed or we didn't get the , delimiter */
+ if (idx > end_idx) break;
+ if (str[idx] == ']') {
+ break;
+ }
+ else if (str[idx] != ',') {
+ raise_errmsg("Expecting , delimiter", pystr, idx);
+ goto bail;
+ }
+ idx++;
+
+ /* skip whitespace after , */
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
+ }
+ }
+
+ /* verify that idx < end_idx, str[idx] should be ']' */
+ if (idx > end_idx || str[idx] != ']') {
+ raise_errmsg("Expecting object", pystr, end_idx);
+ goto bail;
+ }
+ *next_idx_ptr = idx + 1;
+ return rval;
+bail:
+ Py_XDECREF(val);
+ Py_DECREF(rval);
+ return NULL;
+}
+
+static PyObject *
+_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON constant from PyString pystr.
+ constant is the constant string that was found
+ ("NaN", "Infinity", "-Infinity").
+ idx is the index of the first character of the constant
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the constant.
+
+ Returns the result of parse_constant
+ */
+ PyObject *cstr;
+ PyObject *rval;
+ /* constant is "NaN", "Infinity", or "-Infinity" */
+ cstr = PyString_InternFromString(constant);
+ if (cstr == NULL)
+ return NULL;
+
+ /* rval = parse_constant(constant) */
+ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
+ idx += PyString_GET_SIZE(cstr);
+ Py_DECREF(cstr);
+ *next_idx_ptr = idx;
+ return rval;
+}
+
+static PyObject *
+_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON number from PyString pystr.
+ idx is the index of the first character of the number
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of that number:
+ PyInt, PyLong, or PyFloat.
+ May return other types if parse_int or parse_float are set
+ */
+ char *str = PyString_AS_STRING(pystr);
+ Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
+ Py_ssize_t idx = start;
+ int is_float = 0;
+ PyObject *rval;
+ PyObject *numstr;
+
+ /* read a sign if it's there, make sure it's not the end of the string */
+ if (str[idx] == '-') {
+ idx++;
+ if (idx > end_idx) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+ }
+
+ /* read as many integer digits as we find as long as it doesn't start with 0 */
+ if (str[idx] >= '1' && str[idx] <= '9') {
+ idx++;
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ }
+ /* if it starts with 0 we only expect one integer digit */
+ else if (str[idx] == '0') {
+ idx++;
+ }
+ /* no integer digits, error */
+ else {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+
+ /* if the next char is '.' followed by a digit then read all float digits */
+ if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
+ is_float = 1;
+ idx += 2;
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ }
+
+ /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
+ if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
+
+ /* save the index of the 'e' or 'E' just in case we need to backtrack */
+ Py_ssize_t e_start = idx;
+ idx++;
+
+ /* read an exponent sign if present */
+ if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
+
+ /* read all digits */
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+
+ /* if we got a digit, then parse as float. if not, backtrack */
+ if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
+ is_float = 1;
+ }
+ else {
+ idx = e_start;
+ }
+ }
+
+ /* copy the section we determined to be a number */
+ numstr = PyString_FromStringAndSize(&str[start], idx - start);
+ if (numstr == NULL)
+ return NULL;
+ if (is_float) {
+ /* parse as a float using a fast path if available, otherwise call user defined method */
+ if (s->parse_float != (PyObject *)&PyFloat_Type) {
+ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
+ }
+ else {
+ /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
+ double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
+ NULL, NULL);
+ if (d == -1.0 && PyErr_Occurred())
+ return NULL;
+ rval = PyFloat_FromDouble(d);
+ }
+ }
+ else {
+ /* parse as an int using a fast path if available, otherwise call user defined method */
+ if (s->parse_int != (PyObject *)&PyInt_Type) {
+ rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
+ }
+ else {
+ rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
+ }
+ }
+ Py_DECREF(numstr);
+ *next_idx_ptr = idx;
+ return rval;
+}
+
+static PyObject *
+_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
+ /* Read a JSON number from PyUnicode pystr.
+ idx is the index of the first character of the number
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of that number:
+ PyInt, PyLong, or PyFloat.
+ May return other types if parse_int or parse_float are set
+ */
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+ Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
+ Py_ssize_t idx = start;
+ int is_float = 0;
+ PyObject *rval;
+ PyObject *numstr;
+
+ /* read a sign if it's there, make sure it's not the end of the string */
+ if (str[idx] == '-') {
+ idx++;
+ if (idx > end_idx) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+ }
+
+ /* read as many integer digits as we find as long as it doesn't start with 0 */
+ if (str[idx] >= '1' && str[idx] <= '9') {
+ idx++;
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ }
+ /* if it starts with 0 we only expect one integer digit */
+ else if (str[idx] == '0') {
+ idx++;
+ }
+ /* no integer digits, error */
+ else {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+
+ /* if the next char is '.' followed by a digit then read all float digits */
+ if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
+ is_float = 1;
+ idx += 2;
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+ }
+
+ /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
+ if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
+ Py_ssize_t e_start = idx;
+ idx++;
+
+ /* read an exponent sign if present */
+ if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
+
+ /* read all digits */
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
+
+ /* if we got a digit, then parse as float. if not, backtrack */
+ if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
+ is_float = 1;
+ }
+ else {
+ idx = e_start;
+ }
+ }
+
+ /* copy the section we determined to be a number */
+ numstr = PyUnicode_FromUnicode(&str[start], idx - start);
+ if (numstr == NULL)
+ return NULL;
+ if (is_float) {
+ /* parse as a float using a fast path if available, otherwise call user defined method */
+ if (s->parse_float != (PyObject *)&PyFloat_Type) {
+ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
+ }
+ else {
+ rval = PyFloat_FromString(numstr, NULL);
+ }
+ }
+ else {
+ /* no fast path for unicode -> int, just call */
+ rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
+ }
+ Py_DECREF(numstr);
+ *next_idx_ptr = idx;
+ return rval;
+}
+
+static PyObject *
+scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+ /* Read one JSON term (of any kind) from PyString pystr.
+ idx is the index of the first character of the term
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of the term.
+ */
+ char *str = PyString_AS_STRING(pystr);
+ Py_ssize_t length = PyString_GET_SIZE(pystr);
+ if (idx >= length) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+ switch (str[idx]) {
+ case '"':
+ /* string */
+ return scanstring_str(pystr, idx + 1,
+ PyString_AS_STRING(s->encoding),
+ PyObject_IsTrue(s->strict),
+ next_idx_ptr);
+ case '{':
+ /* object */
+ return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
+ case '[':
+ /* array */
+ return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
+ case 'n':
+ /* null */
+ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
+ Py_INCREF(Py_None);
+ *next_idx_ptr = idx + 4;
+ return Py_None;
+ }
+ break;
+ case 't':
+ /* true */
+ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
+ Py_INCREF(Py_True);
+ *next_idx_ptr = idx + 4;
+ return Py_True;
+ }
+ break;
+ case 'f':
+ /* false */
+ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
+ Py_INCREF(Py_False);
+ *next_idx_ptr = idx + 5;
+ return Py_False;
+ }
+ break;
+ case 'N':
+ /* NaN */
+ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
+ return _parse_constant(s, "NaN", idx, next_idx_ptr);
+ }
+ break;
+ case 'I':
+ /* Infinity */
+ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
+ return _parse_constant(s, "Infinity", idx, next_idx_ptr);
+ }
+ break;
+ case '-':
+ /* -Infinity */
+ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
+ return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
+ }
+ break;
+ }
+ /* Didn't find a string, object, array, or named constant. Look for a number. */
+ return _match_number_str(s, pystr, idx, next_idx_ptr);
+}
+
+static PyObject *
+scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
+{
+ /* Read one JSON term (of any kind) from PyUnicode pystr.
+ idx is the index of the first character of the term
+ *next_idx_ptr is a return-by-reference index to the first character after
+ the number.
+
+ Returns a new PyObject representation of the term.
+ */
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
+ Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
+ if (idx >= length) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ }
+ switch (str[idx]) {
+ case '"':
+ /* string */
+ return scanstring_unicode(pystr, idx + 1,
+ PyObject_IsTrue(s->strict),
+ next_idx_ptr);
+ case '{':
+ /* object */
+ return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
+ case '[':
+ /* array */
+ return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
+ case 'n':
+ /* null */
+ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
+ Py_INCREF(Py_None);
+ *next_idx_ptr = idx + 4;
+ return Py_None;
+ }
+ break;
+ case 't':
+ /* true */
+ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
+ Py_INCREF(Py_True);
+ *next_idx_ptr = idx + 4;
+ return Py_True;
+ }
+ break;
+ case 'f':
+ /* false */
+ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
+ Py_INCREF(Py_False);
+ *next_idx_ptr = idx + 5;
+ return Py_False;
+ }
+ break;
+ case 'N':
+ /* NaN */
+ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
+ return _parse_constant(s, "NaN", idx, next_idx_ptr);
+ }
+ break;
+ case 'I':
+ /* Infinity */
+ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
+ return _parse_constant(s, "Infinity", idx, next_idx_ptr);
+ }
+ break;
+ case '-':
+ /* -Infinity */
+ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
+ return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
+ }
+ break;
+ }
+ /* Didn't find a string, object, array, or named constant. Look for a number. */
+ return _match_number_unicode(s, pystr, idx, next_idx_ptr);
+}
+
+static PyObject *
+scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ /* Python callable interface to scan_once_{str,unicode} */
+ PyObject *pystr;
+ PyObject *rval;
+ Py_ssize_t idx;
+ Py_ssize_t next_idx = -1;
+ static char *kwlist[] = {"string", "idx", NULL};
+ PyScannerObject *s;
+ assert(PyScanner_Check(self));
+ s = (PyScannerObject *)self;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
+ return NULL;
+
+ if (PyString_Check(pystr)) {
+ rval = scan_once_str(s, pystr, idx, &next_idx);
+ }
+ else if (PyUnicode_Check(pystr)) {
+ rval = scan_once_unicode(s, pystr, idx, &next_idx);
+ }
+ else {
+ PyErr_Format(PyExc_TypeError,
+ "first argument must be a string, not %.80s",
+ Py_TYPE(pystr)->tp_name);
+ return NULL;
+ }
+ PyDict_Clear(s->memo);
+ return _build_rval_index_tuple(rval, next_idx);
+}
+
+static PyObject *
+scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyScannerObject *s;
+ s = (PyScannerObject *)type->tp_alloc(type, 0);
+ if (s != NULL) {
+ s->encoding = NULL;
+ s->strict = NULL;
+ s->object_hook = NULL;
+ s->pairs_hook = NULL;
+ s->parse_float = NULL;
+ s->parse_int = NULL;
+ s->parse_constant = NULL;
+ }
+ return (PyObject *)s;
+}
+
+static int
+scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ /* Initialize Scanner object */
+ PyObject *ctx;
+ static char *kwlist[] = {"context", NULL};
+ PyScannerObject *s;
+
+ assert(PyScanner_Check(self));
+ s = (PyScannerObject *)self;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
+ return -1;
+
+ if (s->memo == NULL) {
+ s->memo = PyDict_New();
+ if (s->memo == NULL)
+ goto bail;
+ }
+
+ /* PyString_AS_STRING is used on encoding */
+ s->encoding = PyObject_GetAttrString(ctx, "encoding");
+ if (s->encoding == NULL)
+ goto bail;
+ if (s->encoding == Py_None) {
+ Py_DECREF(Py_None);
+ s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
+ }
+ else if (PyUnicode_Check(s->encoding)) {
+ PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
+ Py_DECREF(s->encoding);
+ s->encoding = tmp;
+ }
+ if (s->encoding == NULL || !PyString_Check(s->encoding))
+ goto bail;
+
+ /* All of these will fail "gracefully" so we don't need to verify them */
+ s->strict = PyObject_GetAttrString(ctx, "strict");
+ if (s->strict == NULL)
+ goto bail;
+ s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
+ if (s->object_hook == NULL)
+ goto bail;
+ s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
+ if (s->pairs_hook == NULL)
+ goto bail;
+ s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
+ if (s->parse_float == NULL)
+ goto bail;
+ s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
+ if (s->parse_int == NULL)
+ goto bail;
+ s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
+ if (s->parse_constant == NULL)
+ goto bail;
+
+ return 0;
+
+bail:
+ Py_CLEAR(s->encoding);
+ Py_CLEAR(s->strict);
+ Py_CLEAR(s->object_hook);
+ Py_CLEAR(s->pairs_hook);
+ Py_CLEAR(s->parse_float);
+ Py_CLEAR(s->parse_int);
+ Py_CLEAR(s->parse_constant);
+ return -1;
+}
+
+PyDoc_STRVAR(scanner_doc, "JSON scanner object");
+
+static
+PyTypeObject PyScannerType = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* tp_internal */
+ "simplejson._speedups.Scanner", /* tp_name */
+ sizeof(PyScannerObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ scanner_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ scanner_call, /* tp_call */
+ 0, /* tp_str */
+ 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
+ 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
+ scanner_doc, /* tp_doc */
+ scanner_traverse, /* tp_traverse */
+ scanner_clear, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ 0, /* tp_methods */
+ scanner_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ scanner_init, /* tp_init */
+ 0,/* PyType_GenericAlloc, */ /* tp_alloc */
+ scanner_new, /* tp_new */
+ 0,/* PyObject_GC_Del, */ /* tp_free */
+};
+
+static PyObject *
+encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyEncoderObject *s;
+ s = (PyEncoderObject *)type->tp_alloc(type, 0);
+ if (s != NULL) {
+ s->markers = NULL;
+ s->defaultfn = NULL;
+ s->encoder = NULL;
+ s->indent = NULL;
+ s->key_separator = NULL;
+ s->item_separator = NULL;
+ s->sort_keys = NULL;
+ s->skipkeys = NULL;
+ s->key_memo = NULL;
+ }
+ return (PyObject *)s;
+}
+
+static int
+encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ /* initialize Encoder object */
+ static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", NULL};
+
+ PyEncoderObject *s;
+ PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
+ PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal;
+
+ assert(PyEncoder_Check(self));
+ s = (PyEncoderObject *)self;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOO:make_encoder", kwlist,
+ &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
+ &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal))
+ return -1;
+
+ s->markers = markers;
+ s->defaultfn = defaultfn;
+ s->encoder = encoder;
+ s->indent = indent;
+ s->key_separator = key_separator;
+ s->item_separator = item_separator;
+ s->sort_keys = sort_keys;
+ s->skipkeys = skipkeys;
+ s->key_memo = key_memo;
+ s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
+ s->allow_nan = PyObject_IsTrue(allow_nan);
+ s->use_decimal = PyObject_IsTrue(use_decimal);
+
+ Py_INCREF(s->markers);
+ Py_INCREF(s->defaultfn);
+ Py_INCREF(s->encoder);
+ Py_INCREF(s->indent);
+ Py_INCREF(s->key_separator);
+ Py_INCREF(s->item_separator);
+ Py_INCREF(s->sort_keys);
+ Py_INCREF(s->skipkeys);
+ Py_INCREF(s->key_memo);
+ return 0;
+}
+
+static PyObject *
+encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ /* Python callable interface to encode_listencode_obj */
+ static char *kwlist[] = {"obj", "_current_indent_level", NULL};
+ PyObject *obj;
+ PyObject *rval;
+ Py_ssize_t indent_level;
+ PyEncoderObject *s;
+ assert(PyEncoder_Check(self));
+ s = (PyEncoderObject *)self;
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
+ &obj, _convertPyInt_AsSsize_t, &indent_level))
+ return NULL;
+ rval = PyList_New(0);
+ if (rval == NULL)
+ return NULL;
+ if (encoder_listencode_obj(s, rval, obj, indent_level)) {
+ Py_DECREF(rval);
+ return NULL;
+ }
+ return rval;
+}
+
+static PyObject *
+_encoded_const(PyObject *obj)
+{
+ /* Return the JSON string representation of None, True, False */
+ if (obj == Py_None) {
+ static PyObject *s_null = NULL;
+ if (s_null == NULL) {
+ s_null = PyString_InternFromString("null");
+ }
+ Py_INCREF(s_null);
+ return s_null;
+ }
+ else if (obj == Py_True) {
+ static PyObject *s_true = NULL;
+ if (s_true == NULL) {
+ s_true = PyString_InternFromString("true");
+ }
+ Py_INCREF(s_true);
+ return s_true;
+ }
+ else if (obj == Py_False) {
+ static PyObject *s_false = NULL;
+ if (s_false == NULL) {
+ s_false = PyString_InternFromString("false");
+ }
+ Py_INCREF(s_false);
+ return s_false;
+ }
+ else {
+ PyErr_SetString(PyExc_ValueError, "not a const");
+ return NULL;
+ }
+}
+
+static PyObject *
+encoder_encode_float(PyEncoderObject *s, PyObject *obj)
+{
+ /* Return the JSON representation of a PyFloat */
+ double i = PyFloat_AS_DOUBLE(obj);
+ if (!Py_IS_FINITE(i)) {
+ if (!s->allow_nan) {
+ PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
+ return NULL;
+ }
+ if (i > 0) {
+ return PyString_FromString("Infinity");
+ }
+ else if (i < 0) {
+ return PyString_FromString("-Infinity");
+ }
+ else {
+ return PyString_FromString("NaN");
+ }
+ }
+ /* Use a better float format here? */
+ return PyObject_Repr(obj);
+}
+
+static PyObject *
+encoder_encode_string(PyEncoderObject *s, PyObject *obj)
+{
+ /* Return the JSON representation of a string */
+ if (s->fast_encode)
+ return py_encode_basestring_ascii(NULL, obj);
+ else
+ return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
+}
+
+static int
+_steal_list_append(PyObject *lst, PyObject *stolen)
+{
+ /* Append stolen and then decrement its reference count */
+ int rval = PyList_Append(lst, stolen);
+ Py_DECREF(stolen);
+ return rval;
+}
+
+static int
+encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
+{
+ /* Encode Python object obj to a JSON term, rval is a PyList */
+ PyObject *newobj;
+ int rv;
+
+ if (obj == Py_None || obj == Py_True || obj == Py_False) {
+ PyObject *cstr = _encoded_const(obj);
+ if (cstr == NULL)
+ return -1;
+ return _steal_list_append(rval, cstr);
+ }
+ else if (PyString_Check(obj) || PyUnicode_Check(obj))
+ {
+ PyObject *encoded = encoder_encode_string(s, obj);
+ if (encoded == NULL)
+ return -1;
+ return _steal_list_append(rval, encoded);
+ }
+ else if (PyInt_Check(obj) || PyLong_Check(obj)) {
+ PyObject *encoded = PyObject_Str(obj);
+ if (encoded == NULL)
+ return -1;
+ return _steal_list_append(rval, encoded);
+ }
+ else if (PyFloat_Check(obj)) {
+ PyObject *encoded = encoder_encode_float(s, obj);
+ if (encoded == NULL)
+ return -1;
+ return _steal_list_append(rval, encoded);
+ }
+ else if (PyList_Check(obj) || PyTuple_Check(obj)) {
+ return encoder_listencode_list(s, rval, obj, indent_level);
+ }
+ else if (PyDict_Check(obj)) {
+ return encoder_listencode_dict(s, rval, obj, indent_level);
+ }
+ else if (s->use_decimal && Decimal_Check(obj)) {
+ PyObject *encoded = PyObject_Str(obj);
+ if (encoded == NULL)
+ return -1;
+ return _steal_list_append(rval, encoded);
+ }
+ else {
+ PyObject *ident = NULL;
+ if (s->markers != Py_None) {
+ int has_key;
+ ident = PyLong_FromVoidPtr(obj);
+ if (ident == NULL)
+ return -1;
+ has_key = PyDict_Contains(s->markers, ident);
+ if (has_key) {
+ if (has_key != -1)
+ PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+ Py_DECREF(ident);
+ return -1;
+ }
+ if (PyDict_SetItem(s->markers, ident, obj)) {
+ Py_DECREF(ident);
+ return -1;
+ }
+ }
+ newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
+ if (newobj == NULL) {
+ Py_XDECREF(ident);
+ return -1;
+ }
+ rv = encoder_listencode_obj(s, rval, newobj, indent_level);
+ Py_DECREF(newobj);
+ if (rv) {
+ Py_XDECREF(ident);
+ return -1;
+ }
+ if (ident != NULL) {
+ if (PyDict_DelItem(s->markers, ident)) {
+ Py_XDECREF(ident);
+ return -1;
+ }
+ Py_XDECREF(ident);
+ }
+ return rv;
+ }
+}
+
+static int
+encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
+{
+ /* Encode Python dict dct a JSON term, rval is a PyList */
+ static PyObject *open_dict = NULL;
+ static PyObject *close_dict = NULL;
+ static PyObject *empty_dict = NULL;
+ static PyObject *iteritems = NULL;
+ PyObject *kstr = NULL;
+ PyObject *ident = NULL;
+ PyObject *key, *value;
+ PyObject *iter = NULL;
+ PyObject *item = NULL;
+ PyObject *encoded = NULL;
+ int skipkeys;
+ Py_ssize_t idx;
+
+ if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
+ open_dict = PyString_InternFromString("{");
+ close_dict = PyString_InternFromString("}");
+ empty_dict = PyString_InternFromString("{}");
+ iteritems = PyString_InternFromString("iteritems");
+ if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
+ return -1;
+ }
+ if (PyDict_Size(dct) == 0)
+ return PyList_Append(rval, empty_dict);
+
+ if (s->markers != Py_None) {
+ int has_key;
+ ident = PyLong_FromVoidPtr(dct);
+ if (ident == NULL)
+ goto bail;
+ has_key = PyDict_Contains(s->markers, ident);
+ if (has_key) {
+ if (has_key != -1)
+ PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+ goto bail;
+ }
+ if (PyDict_SetItem(s->markers, ident, dct)) {
+ goto bail;
+ }
+ }
+
+ if (PyList_Append(rval, open_dict))
+ goto bail;
+
+ if (s->indent != Py_None) {
+ /* TODO: DOES NOT RUN */
+ indent_level += 1;
+ /*
+ newline_indent = '\n' + (_indent * _current_indent_level)
+ separator = _item_separator + newline_indent
+ buf += newline_indent
+ */
+ }
+
+ /* TODO: C speedup not implemented for sort_keys */
+
+ skipkeys = PyObject_IsTrue(s->skipkeys);
+ idx = 0;
+ iter = PyObject_CallMethodObjArgs(dct, iteritems, NULL);
+ if (iter == NULL)
+ goto bail;
+ while ((item = PyIter_Next(iter))) {
+
+ key = PyTuple_GetItem(item, 0);
+ if (key == NULL)
+ goto bail;
+ value = PyTuple_GetItem(item, 1);
+ if (value == NULL)
+ goto bail;
+
+ encoded = PyDict_GetItem(s->key_memo, key);
+ if (encoded != NULL) {
+ Py_INCREF(encoded);
+ }
+ else if (PyString_Check(key) || PyUnicode_Check(key)) {
+ Py_INCREF(key);
+ kstr = key;
+ }
+ else if (PyFloat_Check(key)) {
+ kstr = encoder_encode_float(s, key);
+ if (kstr == NULL)
+ goto bail;
+ }
+ else if (PyInt_Check(key) || PyLong_Check(key)) {
+ kstr = PyObject_Str(key);
+ if (kstr == NULL)
+ goto bail;
+ }
+ else if (key == Py_True || key == Py_False || key == Py_None) {
+ kstr = _encoded_const(key);
+ if (kstr == NULL)
+ goto bail;
+ }
+ else if (skipkeys) {
+ Py_DECREF(item);
+ continue;
+ }
+ else {
+ /* TODO: include repr of key */
+ PyErr_SetString(PyExc_ValueError, "keys must be a string");
+ goto bail;
+ }
+
+ if (idx) {
+ if (PyList_Append(rval, s->item_separator))
+ goto bail;
+ }
+
+ if (encoded == NULL) {
+ encoded = encoder_encode_string(s, kstr);
+ Py_CLEAR(kstr);
+ if (encoded == NULL)
+ goto bail;
+ if (PyDict_SetItem(s->key_memo, key, encoded))
+ goto bail;
+ }
+ if (PyList_Append(rval, encoded)) {
+ goto bail;
+ }
+ Py_CLEAR(encoded);
+ if (PyList_Append(rval, s->key_separator))
+ goto bail;
+ if (encoder_listencode_obj(s, rval, value, indent_level))
+ goto bail;
+ Py_CLEAR(item);
+ idx += 1;
+ }
+ Py_CLEAR(iter);
+ if (PyErr_Occurred())
+ goto bail;
+ if (ident != NULL) {
+ if (PyDict_DelItem(s->markers, ident))
+ goto bail;
+ Py_CLEAR(ident);
+ }
+ if (s->indent != Py_None) {
+ /* TODO: DOES NOT RUN */
+ indent_level -= 1;
+ /*
+ yield '\n' + (_indent * _current_indent_level)
+ */
+ }
+ if (PyList_Append(rval, close_dict))
+ goto bail;
+ return 0;
+
+bail:
+ Py_XDECREF(encoded);
+ Py_XDECREF(item);
+ Py_XDECREF(iter);
+ Py_XDECREF(kstr);
+ Py_XDECREF(ident);
+ return -1;
+}
+
+
+static int
+encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
+{
+ /* Encode Python list seq to a JSON term, rval is a PyList */
+ static PyObject *open_array = NULL;
+ static PyObject *close_array = NULL;
+ static PyObject *empty_array = NULL;
+ PyObject *ident = NULL;
+ PyObject *iter = NULL;
+ PyObject *obj = NULL;
+ int is_true;
+ int i = 0;
+
+ if (open_array == NULL || close_array == NULL || empty_array == NULL) {
+ open_array = PyString_InternFromString("[");
+ close_array = PyString_InternFromString("]");
+ empty_array = PyString_InternFromString("[]");
+ if (open_array == NULL || close_array == NULL || empty_array == NULL)
+ return -1;
+ }
+ ident = NULL;
+ is_true = PyObject_IsTrue(seq);
+ if (is_true == -1)
+ return -1;
+ else if (is_true == 0)
+ return PyList_Append(rval, empty_array);
+
+ if (s->markers != Py_None) {
+ int has_key;
+ ident = PyLong_FromVoidPtr(seq);
+ if (ident == NULL)
+ goto bail;
+ has_key = PyDict_Contains(s->markers, ident);
+ if (has_key) {
+ if (has_key != -1)
+ PyErr_SetString(PyExc_ValueError, "Circular reference detected");
+ goto bail;
+ }
+ if (PyDict_SetItem(s->markers, ident, seq)) {
+ goto bail;
+ }
+ }
+
+ iter = PyObject_GetIter(seq);
+ if (iter == NULL)
+ goto bail;
+
+ if (PyList_Append(rval, open_array))
+ goto bail;
+ if (s->indent != Py_None) {
+ /* TODO: DOES NOT RUN */
+ indent_level += 1;
+ /*
+ newline_indent = '\n' + (_indent * _current_indent_level)
+ separator = _item_separator + newline_indent
+ buf += newline_indent
+ */
+ }
+ while ((obj = PyIter_Next(iter))) {
+ if (i) {
+ if (PyList_Append(rval, s->item_separator))
+ goto bail;
+ }
+ if (encoder_listencode_obj(s, rval, obj, indent_level))
+ goto bail;
+ i++;
+ Py_CLEAR(obj);
+ }
+ Py_CLEAR(iter);
+ if (PyErr_Occurred())
+ goto bail;
+ if (ident != NULL) {
+ if (PyDict_DelItem(s->markers, ident))
+ goto bail;
+ Py_CLEAR(ident);
+ }
+ if (s->indent != Py_None) {
+ /* TODO: DOES NOT RUN */
+ indent_level -= 1;
+ /*
+ yield '\n' + (_indent * _current_indent_level)
+ */
+ }
+ if (PyList_Append(rval, close_array))
+ goto bail;
+ return 0;
+
+bail:
+ Py_XDECREF(obj);
+ Py_XDECREF(iter);
+ Py_XDECREF(ident);
+ return -1;
+}
+
+static void
+encoder_dealloc(PyObject *self)
+{
+ /* Deallocate Encoder */
+ encoder_clear(self);
+ Py_TYPE(self)->tp_free(self);
+}
+
+static int
+encoder_traverse(PyObject *self, visitproc visit, void *arg)
+{
+ PyEncoderObject *s;
+ assert(PyEncoder_Check(self));
+ s = (PyEncoderObject *)self;
+ Py_VISIT(s->markers);
+ Py_VISIT(s->defaultfn);
+ Py_VISIT(s->encoder);
+ Py_VISIT(s->indent);
+ Py_VISIT(s->key_separator);
+ Py_VISIT(s->item_separator);
+ Py_VISIT(s->sort_keys);
+ Py_VISIT(s->skipkeys);
+ Py_VISIT(s->key_memo);
+ return 0;
+}
+
+static int
+encoder_clear(PyObject *self)
+{
+ /* Deallocate Encoder */
+ PyEncoderObject *s;
+ assert(PyEncoder_Check(self));
+ s = (PyEncoderObject *)self;
+ Py_CLEAR(s->markers);
+ Py_CLEAR(s->defaultfn);
+ Py_CLEAR(s->encoder);
+ Py_CLEAR(s->indent);
+ Py_CLEAR(s->key_separator);
+ Py_CLEAR(s->item_separator);
+ Py_CLEAR(s->sort_keys);
+ Py_CLEAR(s->skipkeys);
+ Py_CLEAR(s->key_memo);
+ return 0;
+}
+
+PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
+
+static
+PyTypeObject PyEncoderType = {
+ PyObject_HEAD_INIT(NULL)
+ 0, /* tp_internal */
+ "simplejson._speedups.Encoder", /* tp_name */
+ sizeof(PyEncoderObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ encoder_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ encoder_call, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
+ encoder_doc, /* tp_doc */
+ encoder_traverse, /* tp_traverse */
+ encoder_clear, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ 0, /* tp_methods */
+ encoder_members, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ encoder_init, /* tp_init */
+ 0, /* tp_alloc */
+ encoder_new, /* tp_new */
+ 0, /* tp_free */
+};
+
+static PyMethodDef speedups_methods[] = {
+ {"encode_basestring_ascii",
+ (PyCFunction)py_encode_basestring_ascii,
+ METH_O,
+ pydoc_encode_basestring_ascii},
+ {"scanstring",
+ (PyCFunction)py_scanstring,
+ METH_VARARGS,
+ pydoc_scanstring},
+ {NULL, NULL, 0, NULL}
+};
+
+PyDoc_STRVAR(module_doc,
+"simplejson speedups\n");
+
+void
+init_speedups(void)
+{
+ PyObject *m, *decimal;
+ PyScannerType.tp_new = PyType_GenericNew;
+ if (PyType_Ready(&PyScannerType) < 0)
+ return;
+ PyEncoderType.tp_new = PyType_GenericNew;
+ if (PyType_Ready(&PyEncoderType) < 0)
+ return;
+
+ decimal = PyImport_ImportModule("decimal");
+ if (decimal == NULL)
+ return;
+ DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal");
+ Py_DECREF(decimal);
+ if (DecimalTypePtr == NULL)
+ return;
+
+ m = Py_InitModule3("_speedups", speedups_methods, module_doc);
+ Py_INCREF((PyObject*)&PyScannerType);
+ PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
+ Py_INCREF((PyObject*)&PyEncoderType);
+ PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
+}
diff --git a/tablib/packages/simplejson/decoder.py b/tablib/packages/simplejson/decoder.py
new file mode 100644
index 0000000..4cf4015
--- /dev/null
+++ b/tablib/packages/simplejson/decoder.py
@@ -0,0 +1,421 @@
+"""Implementation of JSONDecoder
+"""
+import re
+import sys
+import struct
+
+from simplejson.scanner import make_scanner
+def _import_c_scanstring():
+ try:
+ from simplejson._speedups import scanstring
+ return scanstring
+ except ImportError:
+ return None
+c_scanstring = _import_c_scanstring()
+
+__all__ = ['JSONDecoder']
+
+FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
+
+def _floatconstants():
+ _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
+ # The struct module in Python 2.4 would get frexp() out of range here
+ # when an endian is specified in the format string. Fixed in Python 2.5+
+ if sys.byteorder != 'big':
+ _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
+ nan, inf = struct.unpack('dd', _BYTES)
+ return nan, inf, -inf
+
+NaN, PosInf, NegInf = _floatconstants()
+
+
+class JSONDecodeError(ValueError):
+ """Subclass of ValueError with the following additional properties:
+
+ msg: The unformatted error message
+ doc: The JSON document being parsed
+ pos: The start index of doc where parsing failed
+ end: The end index of doc where parsing failed (may be None)
+ lineno: The line corresponding to pos
+ colno: The column corresponding to pos
+ endlineno: The line corresponding to end (may be None)
+ endcolno: The column corresponding to end (may be None)
+
+ """
+ def __init__(self, msg, doc, pos, end=None):
+ ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
+ self.msg = msg
+ self.doc = doc
+ self.pos = pos
+ self.end = end
+ self.lineno, self.colno = linecol(doc, pos)
+ if end is not None:
+ self.endlineno, self.endcolno = linecol(doc, pos)
+ else:
+ self.endlineno, self.endcolno = None, None
+
+
+def linecol(doc, pos):
+ lineno = doc.count('\n', 0, pos) + 1
+ if lineno == 1:
+ colno = pos
+ else:
+ colno = pos - doc.rindex('\n', 0, pos)
+ return lineno, colno
+
+
+def errmsg(msg, doc, pos, end=None):
+ # Note that this function is called from _speedups
+ lineno, colno = linecol(doc, pos)
+ if end is None:
+ #fmt = '{0}: line {1} column {2} (char {3})'
+ #return fmt.format(msg, lineno, colno, pos)
+ fmt = '%s: line %d column %d (char %d)'
+ return fmt % (msg, lineno, colno, pos)
+ endlineno, endcolno = linecol(doc, end)
+ #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
+ #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
+ fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
+ return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
+
+
+_CONSTANTS = {
+ '-Infinity': NegInf,
+ 'Infinity': PosInf,
+ 'NaN': NaN,
+}
+
+STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
+BACKSLASH = {
+ '"': u'"', '\\': u'\\', '/': u'/',
+ 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
+}
+
+DEFAULT_ENCODING = "utf-8"
+
+def py_scanstring(s, end, encoding=None, strict=True,
+ _b=BACKSLASH, _m=STRINGCHUNK.match):
+ """Scan the string s for a JSON string. End is the index of the
+ character in s after the quote that started the JSON string.
+ Unescapes all valid JSON string escape sequences and raises ValueError
+ on attempt to decode an invalid string. If strict is False then literal
+ control characters are allowed in the string.
+
+ Returns a tuple of the decoded string and the index of the character in s
+ after the end quote."""
+ if encoding is None:
+ encoding = DEFAULT_ENCODING
+ chunks = []
+ _append = chunks.append
+ begin = end - 1
+ while 1:
+ chunk = _m(s, end)
+ if chunk is None:
+ raise JSONDecodeError(
+ "Unterminated string starting at", s, begin)
+ end = chunk.end()
+ content, terminator = chunk.groups()
+ # Content is contains zero or more unescaped string characters
+ if content:
+ if not isinstance(content, unicode):
+ content = unicode(content, encoding)
+ _append(content)
+ # Terminator is the end of string, a literal control character,
+ # or a backslash denoting that an escape sequence follows
+ if terminator == '"':
+ break
+ elif terminator != '\\':
+ if strict:
+ msg = "Invalid control character %r at" % (terminator,)
+ #msg = "Invalid control character {0!r} at".format(terminator)
+ raise JSONDecodeError(msg, s, end)
+ else:
+ _append(terminator)
+ continue
+ try:
+ esc = s[end]
+ except IndexError:
+ raise JSONDecodeError(
+ "Unterminated string starting at", s, begin)
+ # If not a unicode escape sequence, must be in the lookup table
+ if esc != 'u':
+ try:
+ char = _b[esc]
+ except KeyError:
+ msg = "Invalid \\escape: " + repr(esc)
+ raise JSONDecodeError(msg, s, end)
+ end += 1
+ else:
+ # Unicode escape sequence
+ esc = s[end + 1:end + 5]
+ next_end = end + 5
+ if len(esc) != 4:
+ msg = "Invalid \\uXXXX escape"
+ raise JSONDecodeError(msg, s, end)
+ uni = int(esc, 16)
+ # Check for surrogate pair on UCS-4 systems
+ if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+ msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
+ if not s[end + 5:end + 7] == '\\u':
+ raise JSONDecodeError(msg, s, end)
+ esc2 = s[end + 7:end + 11]
+ if len(esc2) != 4:
+ raise JSONDecodeError(msg, s, end)
+ uni2 = int(esc2, 16)
+ uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
+ next_end += 6
+ char = unichr(uni)
+ end = next_end
+ # Append the unescaped character
+ _append(char)
+ return u''.join(chunks), end
+
+
+# Use speedup if available
+scanstring = c_scanstring or py_scanstring
+
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
+WHITESPACE_STR = ' \t\n\r'
+
+def JSONObject((s, end), encoding, strict, scan_once, object_hook,
+ object_pairs_hook, memo=None,
+ _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+ # Backwards compatibility
+ if memo is None:
+ memo = {}
+ memo_get = memo.setdefault
+ pairs = []
+ # Use a slice to prevent IndexError from being raised, the following
+ # check will raise a more specific ValueError if the string is empty
+ nextchar = s[end:end + 1]
+ # Normally we expect nextchar == '"'
+ if nextchar != '"':
+ if nextchar in _ws:
+ end = _w(s, end).end()
+ nextchar = s[end:end + 1]
+ # Trivial empty object
+ if nextchar == '}':
+ if object_pairs_hook is not None:
+ result = object_pairs_hook(pairs)
+ return result, end
+ pairs = {}
+ if object_hook is not None:
+ pairs = object_hook(pairs)
+ return pairs, end + 1
+ elif nextchar != '"':
+ raise JSONDecodeError("Expecting property name", s, end)
+ end += 1
+ while True:
+ key, end = scanstring(s, end, encoding, strict)
+ key = memo_get(key, key)
+
+ # To skip some function call overhead we optimize the fast paths where
+ # the JSON key separator is ": " or just ":".
+ if s[end:end + 1] != ':':
+ end = _w(s, end).end()
+ if s[end:end + 1] != ':':
+ raise JSONDecodeError("Expecting : delimiter", s, end)
+
+ end += 1
+
+ try:
+ if s[end] in _ws:
+ end += 1
+ if s[end] in _ws:
+ end = _w(s, end + 1).end()
+ except IndexError:
+ pass
+
+ try:
+ value, end = scan_once(s, end)
+ except StopIteration:
+ raise JSONDecodeError("Expecting object", s, end)
+ pairs.append((key, value))
+
+ try:
+ nextchar = s[end]
+ if nextchar in _ws:
+ end = _w(s, end + 1).end()
+ nextchar = s[end]
+ except IndexError:
+ nextchar = ''
+ end += 1
+
+ if nextchar == '}':
+ break
+ elif nextchar != ',':
+ raise JSONDecodeError("Expecting , delimiter", s, end - 1)
+
+ try:
+ nextchar = s[end]
+ if nextchar in _ws:
+ end += 1
+ nextchar = s[end]
+ if nextchar in _ws:
+ end = _w(s, end + 1).end()
+ nextchar = s[end]
+ except IndexError:
+ nextchar = ''
+
+ end += 1
+ if nextchar != '"':
+ raise JSONDecodeError("Expecting property name", s, end - 1)
+
+ if object_pairs_hook is not None:
+ result = object_pairs_hook(pairs)
+ return result, end
+ pairs = dict(pairs)
+ if object_hook is not None:
+ pairs = object_hook(pairs)
+ return pairs, end
+
+def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+ values = []
+ nextchar = s[end:end + 1]
+ if nextchar in _ws:
+ end = _w(s, end + 1).end()
+ nextchar = s[end:end + 1]
+ # Look-ahead for trivial empty array
+ if nextchar == ']':
+ return values, end + 1
+ _append = values.append
+ while True:
+ try:
+ value, end = scan_once(s, end)
+ except StopIteration:
+ raise JSONDecodeError("Expecting object", s, end)
+ _append(value)
+ nextchar = s[end:end + 1]
+ if nextchar in _ws:
+ end = _w(s, end + 1).end()
+ nextchar = s[end:end + 1]
+ end += 1
+ if nextchar == ']':
+ break
+ elif nextchar != ',':
+ raise JSONDecodeError("Expecting , delimiter", s, end)
+
+ try:
+ if s[end] in _ws:
+ end += 1
+ if s[end] in _ws:
+ end = _w(s, end + 1).end()
+ except IndexError:
+ pass
+
+ return values, end
+
+class JSONDecoder(object):
+ """Simple JSON decoder
+
+ Performs the following translations in decoding by default:
+
+ +---------------+-------------------+
+ | JSON | Python |
+ +===============+===================+
+ | object | dict |
+ +---------------+-------------------+
+ | array | list |
+ +---------------+-------------------+
+ | string | unicode |
+ +---------------+-------------------+
+ | number (int) | int, long |
+ +---------------+-------------------+
+ | number (real) | float |
+ +---------------+-------------------+
+ | true | True |
+ +---------------+-------------------+
+ | false | False |
+ +---------------+-------------------+
+ | null | None |
+ +---------------+-------------------+
+
+ It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
+ their corresponding ``float`` values, which is outside the JSON spec.
+
+ """
+
+ def __init__(self, encoding=None, object_hook=None, parse_float=None,
+ parse_int=None, parse_constant=None, strict=True,
+ object_pairs_hook=None):
+ """
+ *encoding* determines the encoding used to interpret any
+ :class:`str` objects decoded by this instance (``'utf-8'`` by
+ default). It has no effect when decoding :class:`unicode` objects.
+
+ Note that currently only encodings that are a superset of ASCII work,
+ strings of other encodings should be passed in as :class:`unicode`.
+
+ *object_hook*, if specified, will be called with the result of every
+ JSON object decoded and its return value will be used in place of the
+ given :class:`dict`. This can be used to provide custom
+ deserializations (e.g. to support JSON-RPC class hinting).
+
+ *object_pairs_hook* is an optional function that will be called with
+ the result of any object literal decode with an ordered list of pairs.
+ The return value of *object_pairs_hook* will be used instead of the
+ :class:`dict`. This feature can be used to implement custom decoders
+ that rely on the order that the key and value pairs are decoded (for
+ example, :func:`collections.OrderedDict` will remember the order of
+ insertion). If *object_hook* is also defined, the *object_pairs_hook*
+ takes priority.
+
+ *parse_float*, if specified, will be called with the string of every
+ JSON float to be decoded. By default, this is equivalent to
+ ``float(num_str)``. This can be used to use another datatype or parser
+ for JSON floats (e.g. :class:`decimal.Decimal`).
+
+ *parse_int*, if specified, will be called with the string of every
+ JSON int to be decoded. By default, this is equivalent to
+ ``int(num_str)``. This can be used to use another datatype or parser
+ for JSON integers (e.g. :class:`float`).
+
+ *parse_constant*, if specified, will be called with one of the
+ following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
+ can be used to raise an exception if invalid JSON numbers are
+ encountered.
+
+ *strict* controls the parser's behavior when it encounters an
+ invalid control character in a string. The default setting of
+ ``True`` means that unescaped control characters are parse errors, if
+ ``False`` then control characters will be allowed in strings.
+
+ """
+ self.encoding = encoding
+ self.object_hook = object_hook
+ self.object_pairs_hook = object_pairs_hook
+ self.parse_float = parse_float or float
+ self.parse_int = parse_int or int
+ self.parse_constant = parse_constant or _CONSTANTS.__getitem__
+ self.strict = strict
+ self.parse_object = JSONObject
+ self.parse_array = JSONArray
+ self.parse_string = scanstring
+ self.memo = {}
+ self.scan_once = make_scanner(self)
+
+ def decode(self, s, _w=WHITESPACE.match):
+ """Return the Python representation of ``s`` (a ``str`` or ``unicode``
+ instance containing a JSON document)
+
+ """
+ obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+ end = _w(s, end).end()
+ if end != len(s):
+ raise JSONDecodeError("Extra data", s, end, len(s))
+ return obj
+
+ def raw_decode(self, s, idx=0):
+ """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
+ beginning with a JSON document) and return a 2-tuple of the Python
+ representation and the index in ``s`` where the document ended.
+
+ This can be used to decode a JSON document from a string that may
+ have extraneous data at the end.
+
+ """
+ try:
+ obj, end = self.scan_once(s, idx)
+ except StopIteration:
+ raise JSONDecodeError("No JSON object could be decoded", s, idx)
+ return obj, end
diff --git a/tablib/packages/simplejson/encoder.py b/tablib/packages/simplejson/encoder.py
new file mode 100644
index 0000000..cab8456
--- /dev/null
+++ b/tablib/packages/simplejson/encoder.py
@@ -0,0 +1,501 @@
+"""Implementation of JSONEncoder
+"""
+import re
+from decimal import Decimal
+
+def _import_speedups():
+ try:
+ from simplejson import _speedups
+ return _speedups.encode_basestring_ascii, _speedups.make_encoder
+ except ImportError:
+ return None, None
+c_encode_basestring_ascii, c_make_encoder = _import_speedups()
+
+from simplejson.decoder import PosInf
+
+ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
+ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
+HAS_UTF8 = re.compile(r'[\x80-\xff]')
+ESCAPE_DCT = {
+ '\\': '\\\\',
+ '"': '\\"',
+ '\b': '\\b',
+ '\f': '\\f',
+ '\n': '\\n',
+ '\r': '\\r',
+ '\t': '\\t',
+}
+for i in range(0x20):
+ #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
+ ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+
+FLOAT_REPR = repr
+
+def encode_basestring(s):
+ """Return a JSON representation of a Python string
+
+ """
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
+ def replace(match):
+ return ESCAPE_DCT[match.group(0)]
+ return u'"' + ESCAPE.sub(replace, s) + u'"'
+
+
+def py_encode_basestring_ascii(s):
+ """Return an ASCII-only JSON representation of a Python string
+
+ """
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
+ def replace(match):
+ s = match.group(0)
+ try:
+ return ESCAPE_DCT[s]
+ except KeyError:
+ n = ord(s)
+ if n < 0x10000:
+ #return '\\u{0:04x}'.format(n)
+ return '\\u%04x' % (n,)
+ else:
+ # surrogate pair
+ n -= 0x10000
+ s1 = 0xd800 | ((n >> 10) & 0x3ff)
+ s2 = 0xdc00 | (n & 0x3ff)
+ #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
+ return '\\u%04x\\u%04x' % (s1, s2)
+ return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+
+
+encode_basestring_ascii = (
+ c_encode_basestring_ascii or py_encode_basestring_ascii)
+
+class JSONEncoder(object):
+ """Extensible JSON encoder for Python data structures.
+
+ Supports the following objects and types by default:
+
+ +-------------------+---------------+
+ | Python | JSON |
+ +===================+===============+
+ | dict | object |
+ +-------------------+---------------+
+ | list, tuple | array |
+ +-------------------+---------------+
+ | str, unicode | string |
+ +-------------------+---------------+
+ | int, long, float | number |
+ +-------------------+---------------+
+ | True | true |
+ +-------------------+---------------+
+ | False | false |
+ +-------------------+---------------+
+ | None | null |
+ +-------------------+---------------+
+
+ To extend this to recognize other objects, subclass and implement a
+ ``.default()`` method with another method that returns a serializable
+ object for ``o`` if possible, otherwise it should call the superclass
+ implementation (to raise ``TypeError``).
+
+ """
+ item_separator = ', '
+ key_separator = ': '
+ def __init__(self, skipkeys=False, ensure_ascii=True,
+ check_circular=True, allow_nan=True, sort_keys=False,
+ indent=None, separators=None, encoding='utf-8', default=None,
+ use_decimal=False):
+ """Constructor for JSONEncoder, with sensible defaults.
+
+ If skipkeys is false, then it is a TypeError to attempt
+ encoding of keys that are not str, int, long, float or None. If
+ skipkeys is True, such items are simply skipped.
+
+ If ensure_ascii is true, the output is guaranteed to be str
+ objects with all incoming unicode characters escaped. If
+ ensure_ascii is false, the output will be unicode object.
+
+ If check_circular is true, then lists, dicts, and custom encoded
+ objects will be checked for circular references during encoding to
+ prevent an infinite recursion (which would cause an OverflowError).
+ Otherwise, no such check takes place.
+
+ If allow_nan is true, then NaN, Infinity, and -Infinity will be
+ encoded as such. This behavior is not JSON specification compliant,
+ but is consistent with most JavaScript based encoders and decoders.
+ Otherwise, it will be a ValueError to encode such floats.
+
+ If sort_keys is true, then the output of dictionaries will be
+ sorted by key; this is useful for regression tests to ensure
+ that JSON serializations can be compared on a day-to-day basis.
+
+ If indent is a string, then JSON array elements and object members
+ will be pretty-printed with a newline followed by that string repeated
+ for each level of nesting. ``None`` (the default) selects the most compact
+ representation without any newlines. For backwards compatibility with
+ versions of simplejson earlier than 2.1.0, an integer is also accepted
+ and is converted to a string with that many spaces.
+
+ If specified, separators should be a (item_separator, key_separator)
+ tuple. The default is (', ', ': '). To get the most compact JSON
+ representation you should specify (',', ':') to eliminate whitespace.
+
+ If specified, default is a function that gets called for objects
+ that can't otherwise be serialized. It should return a JSON encodable
+ version of the object or raise a ``TypeError``.
+
+ If encoding is not None, then all input strings will be
+ transformed into unicode using that encoding prior to JSON-encoding.
+ The default is UTF-8.
+
+ If use_decimal is true (not the default), ``decimal.Decimal`` will
+ be supported directly by the encoder. For the inverse, decode JSON
+ with ``parse_float=decimal.Decimal``.
+
+ """
+
+ self.skipkeys = skipkeys
+ self.ensure_ascii = ensure_ascii
+ self.check_circular = check_circular
+ self.allow_nan = allow_nan
+ self.sort_keys = sort_keys
+ self.use_decimal = use_decimal
+ if isinstance(indent, (int, long)):
+ indent = ' ' * indent
+ self.indent = indent
+ if separators is not None:
+ self.item_separator, self.key_separator = separators
+ if default is not None:
+ self.default = default
+ self.encoding = encoding
+
+ def default(self, o):
+ """Implement this method in a subclass such that it returns
+ a serializable object for ``o``, or calls the base implementation
+ (to raise a ``TypeError``).
+
+ For example, to support arbitrary iterators, you could
+ implement default like this::
+
+ def default(self, o):
+ try:
+ iterable = iter(o)
+ except TypeError:
+ pass
+ else:
+ return list(iterable)
+ return JSONEncoder.default(self, o)
+
+ """
+ raise TypeError(repr(o) + " is not JSON serializable")
+
+ def encode(self, o):
+ """Return a JSON string representation of a Python data structure.
+
+ >>> from simplejson import JSONEncoder
+ >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
+ '{"foo": ["bar", "baz"]}'
+
+ """
+ # This is for extremely simple cases and benchmarks.
+ if isinstance(o, basestring):
+ if isinstance(o, str):
+ _encoding = self.encoding
+ if (_encoding is not None
+ and not (_encoding == 'utf-8')):
+ o = o.decode(_encoding)
+ if self.ensure_ascii:
+ return encode_basestring_ascii(o)
+ else:
+ return encode_basestring(o)
+ # This doesn't pass the iterator directly to ''.join() because the
+ # exceptions aren't as detailed. The list call should be roughly
+ # equivalent to the PySequence_Fast that ''.join() would do.
+ chunks = self.iterencode(o, _one_shot=True)
+ if not isinstance(chunks, (list, tuple)):
+ chunks = list(chunks)
+ if self.ensure_ascii:
+ return ''.join(chunks)
+ else:
+ return u''.join(chunks)
+
+ def iterencode(self, o, _one_shot=False):
+ """Encode the given object and yield each string
+ representation as available.
+
+ For example::
+
+ for chunk in JSONEncoder().iterencode(bigobject):
+ mysocket.write(chunk)
+
+ """
+ if self.check_circular:
+ markers = {}
+ else:
+ markers = None
+ if self.ensure_ascii:
+ _encoder = encode_basestring_ascii
+ else:
+ _encoder = encode_basestring
+ if self.encoding != 'utf-8':
+ def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
+ if isinstance(o, str):
+ o = o.decode(_encoding)
+ return _orig_encoder(o)
+
+ def floatstr(o, allow_nan=self.allow_nan,
+ _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
+ # Check for specials. Note that this type of test is processor
+ # and/or platform-specific, so do tests which don't depend on
+ # the internals.
+
+ if o != o:
+ text = 'NaN'
+ elif o == _inf:
+ text = 'Infinity'
+ elif o == _neginf:
+ text = '-Infinity'
+ else:
+ return _repr(o)
+
+ if not allow_nan:
+ raise ValueError(
+ "Out of range float values are not JSON compliant: " +
+ repr(o))
+
+ return text
+
+
+ key_memo = {}
+ if (_one_shot and c_make_encoder is not None
+ and not self.indent and not self.sort_keys):
+ _iterencode = c_make_encoder(
+ markers, self.default, _encoder, self.indent,
+ self.key_separator, self.item_separator, self.sort_keys,
+ self.skipkeys, self.allow_nan, key_memo, self.use_decimal)
+ else:
+ _iterencode = _make_iterencode(
+ markers, self.default, _encoder, self.indent, floatstr,
+ self.key_separator, self.item_separator, self.sort_keys,
+ self.skipkeys, _one_shot, self.use_decimal)
+ try:
+ return _iterencode(o, 0)
+ finally:
+ key_memo.clear()
+
+
+class JSONEncoderForHTML(JSONEncoder):
+ """An encoder that produces JSON safe to embed in HTML.
+
+ To embed JSON content in, say, a script tag on a web page, the
+ characters &, < and > should be escaped. They cannot be escaped
+ with the usual entities (e.g. &) because they are not expanded
+ within '
+ self.assertEqual(
+ r'"\u003c/script\u003e\u003cscript\u003e'
+ r'alert(\"gotcha\")\u003c/script\u003e"',
+ self.encoder.encode(bad_string))
+ self.assertEqual(
+ bad_string, self.decoder.decode(
+ self.encoder.encode(bad_string)))
diff --git a/tablib/packages/simplejson/tests/test_fail.py b/tablib/packages/simplejson/tests/test_fail.py
new file mode 100644
index 0000000..646c0f4
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_fail.py
@@ -0,0 +1,91 @@
+from unittest import TestCase
+
+import simplejson as json
+
+# Fri Dec 30 18:57:26 2005
+JSONDOCS = [
+ # http://json.org/JSON_checker/test/fail1.json
+ '"A JSON payload should be an object or array, not a string."',
+ # http://json.org/JSON_checker/test/fail2.json
+ '["Unclosed array"',
+ # http://json.org/JSON_checker/test/fail3.json
+ '{unquoted_key: "keys must be quoted}',
+ # http://json.org/JSON_checker/test/fail4.json
+ '["extra comma",]',
+ # http://json.org/JSON_checker/test/fail5.json
+ '["double extra comma",,]',
+ # http://json.org/JSON_checker/test/fail6.json
+ '[ , "<-- missing value"]',
+ # http://json.org/JSON_checker/test/fail7.json
+ '["Comma after the close"],',
+ # http://json.org/JSON_checker/test/fail8.json
+ '["Extra close"]]',
+ # http://json.org/JSON_checker/test/fail9.json
+ '{"Extra comma": true,}',
+ # http://json.org/JSON_checker/test/fail10.json
+ '{"Extra value after close": true} "misplaced quoted value"',
+ # http://json.org/JSON_checker/test/fail11.json
+ '{"Illegal expression": 1 + 2}',
+ # http://json.org/JSON_checker/test/fail12.json
+ '{"Illegal invocation": alert()}',
+ # http://json.org/JSON_checker/test/fail13.json
+ '{"Numbers cannot have leading zeroes": 013}',
+ # http://json.org/JSON_checker/test/fail14.json
+ '{"Numbers cannot be hex": 0x14}',
+ # http://json.org/JSON_checker/test/fail15.json
+ '["Illegal backslash escape: \\x15"]',
+ # http://json.org/JSON_checker/test/fail16.json
+ '["Illegal backslash escape: \\\'"]',
+ # http://json.org/JSON_checker/test/fail17.json
+ '["Illegal backslash escape: \\017"]',
+ # http://json.org/JSON_checker/test/fail18.json
+ '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
+ # http://json.org/JSON_checker/test/fail19.json
+ '{"Missing colon" null}',
+ # http://json.org/JSON_checker/test/fail20.json
+ '{"Double colon":: null}',
+ # http://json.org/JSON_checker/test/fail21.json
+ '{"Comma instead of colon", null}',
+ # http://json.org/JSON_checker/test/fail22.json
+ '["Colon instead of comma": false]',
+ # http://json.org/JSON_checker/test/fail23.json
+ '["Bad value", truth]',
+ # http://json.org/JSON_checker/test/fail24.json
+ "['single quote']",
+ # http://code.google.com/p/simplejson/issues/detail?id=3
+ u'["A\u001FZ control characters in string"]',
+]
+
+SKIPS = {
+ 1: "why not have a string payload?",
+ 18: "spec doesn't specify any nesting limitations",
+}
+
+class TestFail(TestCase):
+ def test_failures(self):
+ for idx, doc in enumerate(JSONDOCS):
+ idx = idx + 1
+ if idx in SKIPS:
+ json.loads(doc)
+ continue
+ try:
+ json.loads(doc)
+ except json.JSONDecodeError:
+ pass
+ else:
+ #self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc))
+ self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
+
+ def test_array_decoder_issue46(self):
+ # http://code.google.com/p/simplejson/issues/detail?id=46
+ for doc in [u'[,]', '[,]']:
+ try:
+ json.loads(doc)
+ except json.JSONDecodeError, e:
+ self.assertEquals(e.pos, 1)
+ self.assertEquals(e.lineno, 1)
+ self.assertEquals(e.colno, 1)
+ except Exception, e:
+ self.fail("Unexpected exception raised %r %s" % (e, e))
+ else:
+ self.fail("Unexpected success parsing '[,]'")
\ No newline at end of file
diff --git a/tablib/packages/simplejson/tests/test_float.py b/tablib/packages/simplejson/tests/test_float.py
new file mode 100644
index 0000000..94502c6
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_float.py
@@ -0,0 +1,19 @@
+import math
+from unittest import TestCase
+
+import simplejson as json
+
+class TestFloat(TestCase):
+ def test_floats(self):
+ for num in [1617161771.7650001, math.pi, math.pi**100,
+ math.pi**-100, 3.1]:
+ self.assertEquals(float(json.dumps(num)), num)
+ self.assertEquals(json.loads(json.dumps(num)), num)
+ self.assertEquals(json.loads(unicode(json.dumps(num))), num)
+
+ def test_ints(self):
+ for num in [1, 1L, 1<<32, 1<<64]:
+ self.assertEquals(json.dumps(num), str(num))
+ self.assertEquals(int(json.dumps(num)), num)
+ self.assertEquals(json.loads(json.dumps(num)), num)
+ self.assertEquals(json.loads(unicode(json.dumps(num))), num)
diff --git a/tablib/packages/simplejson/tests/test_indent.py b/tablib/packages/simplejson/tests/test_indent.py
new file mode 100644
index 0000000..985831b
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_indent.py
@@ -0,0 +1,53 @@
+from unittest import TestCase
+
+import simplejson as json
+import textwrap
+
+class TestIndent(TestCase):
+ def test_indent(self):
+ h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh',
+ 'i-vhbjkhnth',
+ {'nifty': 87}, {'field': 'yes', 'morefield': False} ]
+
+ expect = textwrap.dedent("""\
+ [
+ \t[
+ \t\t"blorpie"
+ \t],
+ \t[
+ \t\t"whoops"
+ \t],
+ \t[],
+ \t"d-shtaeou",
+ \t"d-nthiouh",
+ \t"i-vhbjkhnth",
+ \t{
+ \t\t"nifty": 87
+ \t},
+ \t{
+ \t\t"field": "yes",
+ \t\t"morefield": false
+ \t}
+ ]""")
+
+
+ d1 = json.dumps(h)
+ d2 = json.dumps(h, indent='\t', sort_keys=True, separators=(',', ': '))
+ d3 = json.dumps(h, indent=' ', sort_keys=True, separators=(',', ': '))
+ d4 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': '))
+
+ h1 = json.loads(d1)
+ h2 = json.loads(d2)
+ h3 = json.loads(d3)
+ h4 = json.loads(d4)
+
+ self.assertEquals(h1, h)
+ self.assertEquals(h2, h)
+ self.assertEquals(h3, h)
+ self.assertEquals(h4, h)
+ self.assertEquals(d3, expect.replace('\t', ' '))
+ self.assertEquals(d4, expect.replace('\t', ' '))
+ # NOTE: Python 2.4 textwrap.dedent converts tabs to spaces,
+ # so the following is expected to fail. Python 2.4 is not a
+ # supported platform in simplejson 2.1.0+.
+ self.assertEquals(d2, expect)
diff --git a/tablib/packages/simplejson/tests/test_pass1.py b/tablib/packages/simplejson/tests/test_pass1.py
new file mode 100644
index 0000000..c3d6302
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_pass1.py
@@ -0,0 +1,76 @@
+from unittest import TestCase
+
+import simplejson as json
+
+# from http://json.org/JSON_checker/test/pass1.json
+JSON = r'''
+[
+ "JSON Test Pattern pass1",
+ {"object with 1 member":["array with 1 element"]},
+ {},
+ [],
+ -42,
+ true,
+ false,
+ null,
+ {
+ "integer": 1234567890,
+ "real": -9876.543210,
+ "e": 0.123456789e-12,
+ "E": 1.234567890E+34,
+ "": 23456789012E666,
+ "zero": 0,
+ "one": 1,
+ "space": " ",
+ "quote": "\"",
+ "backslash": "\\",
+ "controls": "\b\f\n\r\t",
+ "slash": "/ & \/",
+ "alpha": "abcdefghijklmnopqrstuvwyz",
+ "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
+ "digit": "0123456789",
+ "special": "`1~!@#$%^&*()_+-={':[,]}|;.>?",
+ "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
+ "true": true,
+ "false": false,
+ "null": null,
+ "array":[ ],
+ "object":{ },
+ "address": "50 St. James Street",
+ "url": "http://www.JSON.org/",
+ "comment": "// /* */": " ",
+ " s p a c e d " :[1,2 , 3
+
+,
+
+4 , 5 , 6 ,7 ],
+ "compact": [1,2,3,4,5,6,7],
+ "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
+ "quotes": "" \u0022 %22 0x22 034 "",
+ "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
+: "A key can be any string"
+ },
+ 0.5 ,98.6
+,
+99.44
+,
+
+1066
+
+
+,"rosebud"]
+'''
+
+class TestPass1(TestCase):
+ def test_parse(self):
+ # test in/out equivalence and parsing
+ res = json.loads(JSON)
+ out = json.dumps(res)
+ self.assertEquals(res, json.loads(out))
+ try:
+ json.dumps(res, allow_nan=False)
+ except ValueError:
+ pass
+ else:
+ self.fail("23456789012E666 should be out of range")
diff --git a/tablib/packages/simplejson/tests/test_pass2.py b/tablib/packages/simplejson/tests/test_pass2.py
new file mode 100644
index 0000000..de4ee00
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_pass2.py
@@ -0,0 +1,14 @@
+from unittest import TestCase
+import simplejson as json
+
+# from http://json.org/JSON_checker/test/pass2.json
+JSON = r'''
+[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
+'''
+
+class TestPass2(TestCase):
+ def test_parse(self):
+ # test in/out equivalence and parsing
+ res = json.loads(JSON)
+ out = json.dumps(res)
+ self.assertEquals(res, json.loads(out))
diff --git a/tablib/packages/simplejson/tests/test_pass3.py b/tablib/packages/simplejson/tests/test_pass3.py
new file mode 100644
index 0000000..f591aba
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_pass3.py
@@ -0,0 +1,20 @@
+from unittest import TestCase
+
+import simplejson as json
+
+# from http://json.org/JSON_checker/test/pass3.json
+JSON = r'''
+{
+ "JSON Test Pattern pass3": {
+ "The outermost value": "must be an object or array.",
+ "In this test": "It is an object."
+ }
+}
+'''
+
+class TestPass3(TestCase):
+ def test_parse(self):
+ # test in/out equivalence and parsing
+ res = json.loads(JSON)
+ out = json.dumps(res)
+ self.assertEquals(res, json.loads(out))
diff --git a/tablib/packages/simplejson/tests/test_recursion.py b/tablib/packages/simplejson/tests/test_recursion.py
new file mode 100644
index 0000000..97422a6
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_recursion.py
@@ -0,0 +1,67 @@
+from unittest import TestCase
+
+import simplejson as json
+
+class JSONTestObject:
+ pass
+
+
+class RecursiveJSONEncoder(json.JSONEncoder):
+ recurse = False
+ def default(self, o):
+ if o is JSONTestObject:
+ if self.recurse:
+ return [JSONTestObject]
+ else:
+ return 'JSONTestObject'
+ return json.JSONEncoder.default(o)
+
+
+class TestRecursion(TestCase):
+ def test_listrecursion(self):
+ x = []
+ x.append(x)
+ try:
+ json.dumps(x)
+ except ValueError:
+ pass
+ else:
+ self.fail("didn't raise ValueError on list recursion")
+ x = []
+ y = [x]
+ x.append(y)
+ try:
+ json.dumps(x)
+ except ValueError:
+ pass
+ else:
+ self.fail("didn't raise ValueError on alternating list recursion")
+ y = []
+ x = [y, y]
+ # ensure that the marker is cleared
+ json.dumps(x)
+
+ def test_dictrecursion(self):
+ x = {}
+ x["test"] = x
+ try:
+ json.dumps(x)
+ except ValueError:
+ pass
+ else:
+ self.fail("didn't raise ValueError on dict recursion")
+ x = {}
+ y = {"a": x, "b": x}
+ # ensure that the marker is cleared
+ json.dumps(x)
+
+ def test_defaultrecursion(self):
+ enc = RecursiveJSONEncoder()
+ self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"')
+ enc.recurse = True
+ try:
+ enc.encode(JSONTestObject)
+ except ValueError:
+ pass
+ else:
+ self.fail("didn't raise ValueError on default recursion")
diff --git a/tablib/packages/simplejson/tests/test_scanstring.py b/tablib/packages/simplejson/tests/test_scanstring.py
new file mode 100644
index 0000000..a7fcd46
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_scanstring.py
@@ -0,0 +1,117 @@
+import sys
+from unittest import TestCase
+
+import simplejson as json
+import simplejson.decoder
+
+class TestScanString(TestCase):
+ def test_py_scanstring(self):
+ self._test_scanstring(simplejson.decoder.py_scanstring)
+
+ def test_c_scanstring(self):
+ if not simplejson.decoder.c_scanstring:
+ return
+ self._test_scanstring(simplejson.decoder.c_scanstring)
+
+ def _test_scanstring(self, scanstring):
+ self.assertEquals(
+ scanstring('"z\\ud834\\udd20x"', 1, None, True),
+ (u'z\U0001d120x', 16))
+
+ if sys.maxunicode == 65535:
+ self.assertEquals(
+ scanstring(u'"z\U0001d120x"', 1, None, True),
+ (u'z\U0001d120x', 6))
+ else:
+ self.assertEquals(
+ scanstring(u'"z\U0001d120x"', 1, None, True),
+ (u'z\U0001d120x', 5))
+
+ self.assertEquals(
+ scanstring('"\\u007b"', 1, None, True),
+ (u'{', 8))
+
+ self.assertEquals(
+ scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
+ (u'A JSON payload should be an object or array, not a string.', 60))
+
+ self.assertEquals(
+ scanstring('["Unclosed array"', 2, None, True),
+ (u'Unclosed array', 17))
+
+ self.assertEquals(
+ scanstring('["extra comma",]', 2, None, True),
+ (u'extra comma', 14))
+
+ self.assertEquals(
+ scanstring('["double extra comma",,]', 2, None, True),
+ (u'double extra comma', 21))
+
+ self.assertEquals(
+ scanstring('["Comma after the close"],', 2, None, True),
+ (u'Comma after the close', 24))
+
+ self.assertEquals(
+ scanstring('["Extra close"]]', 2, None, True),
+ (u'Extra close', 14))
+
+ self.assertEquals(
+ scanstring('{"Extra comma": true,}', 2, None, True),
+ (u'Extra comma', 14))
+
+ self.assertEquals(
+ scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
+ (u'Extra value after close', 26))
+
+ self.assertEquals(
+ scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
+ (u'Illegal expression', 21))
+
+ self.assertEquals(
+ scanstring('{"Illegal invocation": alert()}', 2, None, True),
+ (u'Illegal invocation', 21))
+
+ self.assertEquals(
+ scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
+ (u'Numbers cannot have leading zeroes', 37))
+
+ self.assertEquals(
+ scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
+ (u'Numbers cannot be hex', 24))
+
+ self.assertEquals(
+ scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
+ (u'Too deep', 30))
+
+ self.assertEquals(
+ scanstring('{"Missing colon" null}', 2, None, True),
+ (u'Missing colon', 16))
+
+ self.assertEquals(
+ scanstring('{"Double colon":: null}', 2, None, True),
+ (u'Double colon', 15))
+
+ self.assertEquals(
+ scanstring('{"Comma instead of colon", null}', 2, None, True),
+ (u'Comma instead of colon', 25))
+
+ self.assertEquals(
+ scanstring('["Colon instead of comma": false]', 2, None, True),
+ (u'Colon instead of comma', 25))
+
+ self.assertEquals(
+ scanstring('["Bad value", truth]', 2, None, True),
+ (u'Bad value', 12))
+
+ def test_issue3623(self):
+ self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
+ "xxx")
+ self.assertRaises(UnicodeDecodeError,
+ json.encoder.encode_basestring_ascii, "xx\xff")
+
+ def test_overflow(self):
+ # Python 2.5 does not have maxsize
+ maxsize = getattr(sys, 'maxsize', sys.maxint)
+ self.assertRaises(OverflowError, json.decoder.scanstring, "xxx",
+ maxsize + 1)
+
diff --git a/tablib/packages/simplejson/tests/test_separators.py b/tablib/packages/simplejson/tests/test_separators.py
new file mode 100644
index 0000000..cbda93c
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_separators.py
@@ -0,0 +1,42 @@
+import textwrap
+from unittest import TestCase
+
+import simplejson as json
+
+
+class TestSeparators(TestCase):
+ def test_separators(self):
+ h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
+ {'nifty': 87}, {'field': 'yes', 'morefield': False} ]
+
+ expect = textwrap.dedent("""\
+ [
+ [
+ "blorpie"
+ ] ,
+ [
+ "whoops"
+ ] ,
+ [] ,
+ "d-shtaeou" ,
+ "d-nthiouh" ,
+ "i-vhbjkhnth" ,
+ {
+ "nifty" : 87
+ } ,
+ {
+ "field" : "yes" ,
+ "morefield" : false
+ }
+ ]""")
+
+
+ d1 = json.dumps(h)
+ d2 = json.dumps(h, indent=' ', sort_keys=True, separators=(' ,', ' : '))
+
+ h1 = json.loads(d1)
+ h2 = json.loads(d2)
+
+ self.assertEquals(h1, h)
+ self.assertEquals(h2, h)
+ self.assertEquals(d2, expect)
diff --git a/tablib/packages/simplejson/tests/test_speedups.py b/tablib/packages/simplejson/tests/test_speedups.py
new file mode 100644
index 0000000..4bf0875
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_speedups.py
@@ -0,0 +1,21 @@
+import decimal
+from unittest import TestCase
+
+from simplejson import decoder, encoder, scanner
+
+def has_speedups():
+ return encoder.c_make_encoder is not None
+
+class TestDecode(TestCase):
+ def test_make_scanner(self):
+ if not has_speedups():
+ return
+ self.assertRaises(AttributeError, scanner.c_make_scanner, 1)
+
+ def test_make_encoder(self):
+ if not has_speedups():
+ return
+ self.assertRaises(TypeError, encoder.c_make_encoder,
+ None,
+ "\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75",
+ None)
diff --git a/tablib/packages/simplejson/tests/test_unicode.py b/tablib/packages/simplejson/tests/test_unicode.py
new file mode 100644
index 0000000..f73e5bf
--- /dev/null
+++ b/tablib/packages/simplejson/tests/test_unicode.py
@@ -0,0 +1,99 @@
+from unittest import TestCase
+
+import simplejson as json
+
+class TestUnicode(TestCase):
+ def test_encoding1(self):
+ encoder = json.JSONEncoder(encoding='utf-8')
+ u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+ s = u.encode('utf-8')
+ ju = encoder.encode(u)
+ js = encoder.encode(s)
+ self.assertEquals(ju, js)
+
+ def test_encoding2(self):
+ u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+ s = u.encode('utf-8')
+ ju = json.dumps(u, encoding='utf-8')
+ js = json.dumps(s, encoding='utf-8')
+ self.assertEquals(ju, js)
+
+ def test_encoding3(self):
+ u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+ j = json.dumps(u)
+ self.assertEquals(j, '"\\u03b1\\u03a9"')
+
+ def test_encoding4(self):
+ u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+ j = json.dumps([u])
+ self.assertEquals(j, '["\\u03b1\\u03a9"]')
+
+ def test_encoding5(self):
+ u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+ j = json.dumps(u, ensure_ascii=False)
+ self.assertEquals(j, u'"' + u + u'"')
+
+ def test_encoding6(self):
+ u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+ j = json.dumps([u], ensure_ascii=False)
+ self.assertEquals(j, u'["' + u + u'"]')
+
+ def test_big_unicode_encode(self):
+ u = u'\U0001d120'
+ self.assertEquals(json.dumps(u), '"\\ud834\\udd20"')
+ self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"')
+
+ def test_big_unicode_decode(self):
+ u = u'z\U0001d120x'
+ self.assertEquals(json.loads('"' + u + '"'), u)
+ self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u)
+
+ def test_unicode_decode(self):
+ for i in range(0, 0xd7ff):
+ u = unichr(i)
+ #s = '"\\u{0:04x}"'.format(i)
+ s = '"\\u%04x"' % (i,)
+ self.assertEquals(json.loads(s), u)
+
+ def test_object_pairs_hook_with_unicode(self):
+ s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
+ p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4),
+ (u"qrt", 5), (u"pad", 6), (u"hoy", 7)]
+ self.assertEqual(json.loads(s), eval(s))
+ self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p)
+ od = json.loads(s, object_pairs_hook=json.OrderedDict)
+ self.assertEqual(od, json.OrderedDict(p))
+ self.assertEqual(type(od), json.OrderedDict)
+ # the object_pairs_hook takes priority over the object_hook
+ self.assertEqual(json.loads(s,
+ object_pairs_hook=json.OrderedDict,
+ object_hook=lambda x: None),
+ json.OrderedDict(p))
+
+
+ def test_default_encoding(self):
+ self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
+ {'a': u'\xe9'})
+
+ def test_unicode_preservation(self):
+ self.assertEquals(type(json.loads(u'""')), unicode)
+ self.assertEquals(type(json.loads(u'"a"')), unicode)
+ self.assertEquals(type(json.loads(u'["a"]')[0]), unicode)
+
+ def test_ensure_ascii_false_returns_unicode(self):
+ # http://code.google.com/p/simplejson/issues/detail?id=48
+ self.assertEquals(type(json.dumps([], ensure_ascii=False)), unicode)
+ self.assertEquals(type(json.dumps(0, ensure_ascii=False)), unicode)
+ self.assertEquals(type(json.dumps({}, ensure_ascii=False)), unicode)
+ self.assertEquals(type(json.dumps("", ensure_ascii=False)), unicode)
+
+ def test_ensure_ascii_false_bytestring_encoding(self):
+ # http://code.google.com/p/simplejson/issues/detail?id=48
+ doc1 = {u'quux': 'Arr\xc3\xaat sur images'}
+ doc2 = {u'quux': u'Arr\xeat sur images'}
+ doc_ascii = '{"quux": "Arr\\u00eat sur images"}'
+ doc_unicode = u'{"quux": "Arr\xeat sur images"}'
+ self.assertEquals(json.dumps(doc1), doc_ascii)
+ self.assertEquals(json.dumps(doc2), doc_ascii)
+ self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode)
+ self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode)
diff --git a/tablib/packages/simplejson/tool.py b/tablib/packages/simplejson/tool.py
new file mode 100644
index 0000000..73370db
--- /dev/null
+++ b/tablib/packages/simplejson/tool.py
@@ -0,0 +1,39 @@
+r"""Command-line tool to validate and pretty-print JSON
+
+Usage::
+
+ $ echo '{"json":"obj"}' | python -m simplejson.tool
+ {
+ "json": "obj"
+ }
+ $ echo '{ 1.2:3.4}' | python -m simplejson.tool
+ Expecting property name: line 1 column 2 (char 2)
+
+"""
+import sys
+import simplejson as json
+
+def main():
+ if len(sys.argv) == 1:
+ infile = sys.stdin
+ outfile = sys.stdout
+ elif len(sys.argv) == 2:
+ infile = open(sys.argv[1], 'rb')
+ outfile = sys.stdout
+ elif len(sys.argv) == 3:
+ infile = open(sys.argv[1], 'rb')
+ outfile = open(sys.argv[2], 'wb')
+ else:
+ raise SystemExit(sys.argv[0] + " [infile [outfile]]")
+ try:
+ obj = json.load(infile,
+ object_pairs_hook=json.OrderedDict,
+ use_decimal=True)
+ except ValueError, e:
+ raise SystemExit(e)
+ json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True)
+ outfile.write('\n')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tablib/packages/xlwt/BIFFRecords.py b/tablib/packages/xlwt/BIFFRecords.py
new file mode 100644
index 0000000..4660c03
--- /dev/null
+++ b/tablib/packages/xlwt/BIFFRecords.py
@@ -0,0 +1,2393 @@
+# -*- coding: cp1252 -*-
+from struct import pack
+from UnicodeUtils import upack1, upack2
+import sys
+
+class SharedStringTable(object):
+ _SST_ID = 0x00FC
+ _CONTINUE_ID = 0x003C
+
+ def __init__(self, encoding):
+ self.encoding = encoding
+ self._str_indexes = {}
+ self._tally = []
+ self._add_calls = 0
+ # Following 3 attrs are used for temporary storage in the
+ # get_biff_record() method and methods called by it. The pseudo-
+ # initialisation here is for documentation purposes only.
+ self._sst_record = None
+ self._continues = None
+ self._current_piece = None
+
+ def add_str(self, s):
+ if self.encoding != 'ascii' and not isinstance(s, unicode):
+ s = unicode(s, self.encoding)
+ self._add_calls += 1
+ if s not in self._str_indexes:
+ idx = len(self._str_indexes)
+ self._str_indexes[s] = idx
+ self._tally.append(1)
+ else:
+ idx = self._str_indexes[s]
+ self._tally[idx] += 1
+ return idx
+
+ def del_str(self, idx):
+ # This is called when we are replacing the contents of a string cell.
+ assert self._tally[idx] > 0
+ self._tally[idx] -= 1
+ self._add_calls -= 1
+
+ def str_index(self, s):
+ return self._str_indexes[s]
+
+ def get_biff_record(self):
+ self._sst_record = ''
+ self._continues = [None, None]
+ self._current_piece = pack(' 0x2020: # limit for BIFF7/8
+ chunks = []
+ pos = 0
+ while pos < len(data):
+ chunk_pos = pos + 0x2020
+ chunk = data[pos:chunk_pos]
+ chunks.append(chunk)
+ pos = chunk_pos
+ continues = pack('<2H', self._REC_ID, len(chunks[0])) + chunks[0]
+ for chunk in chunks[1:]:
+ continues += pack('<2H%ds'%len(chunk), 0x003C, len(chunk), chunk)
+ # 0x003C -- CONTINUE record id
+ return continues
+ else:
+ return self.get_rec_header() + data
+
+
+class Biff8BOFRecord(BiffRecord):
+ """
+ Offset Size Contents
+ 0 2 Version, contains 0600H for BIFF8 and BIFF8X
+ 2 2 Type of the following data:
+ 0005H = Workbook globals
+ 0006H = Visual Basic module
+ 0010H = Worksheet
+ 0020H = Chart
+ 0040H = Macro sheet
+ 0100H = Workspace file
+ 4 2 Build identifier
+ 6 2 Build year
+ 8 4 File history flags
+ 12 4 Lowest Excel version that can read all records in this file
+ """
+ _REC_ID = 0x0809
+ # stream types
+ BOOK_GLOBAL = 0x0005
+ VB_MODULE = 0x0006
+ WORKSHEET = 0x0010
+ CHART = 0x0020
+ MACROSHEET = 0x0040
+ WORKSPACE = 0x0100
+
+ def __init__(self, rec_type):
+ version = 0x0600
+ build = 0x0DBB
+ year = 0x07CC
+ file_hist_flags = 0x00L
+ ver_can_read = 0x06L
+
+ self._rec_data = pack('<4H2I', version, rec_type, build, year, file_hist_flags, ver_can_read)
+
+
+class InteraceHdrRecord(BiffRecord):
+ _REC_ID = 0x00E1
+
+ def __init__(self):
+ self._rec_data = pack('BB', 0xB0, 0x04)
+
+
+class InteraceEndRecord(BiffRecord):
+ _REC_ID = 0x00E2
+
+ def __init__(self):
+ self._rec_data = ''
+
+
+class MMSRecord(BiffRecord):
+ _REC_ID = 0x00C1
+
+ def __init__(self):
+ self._rec_data = pack('> 15
+ c = low_15 | high_15
+ passwd_hash ^= c
+ passwd_hash ^= len(plaintext)
+ passwd_hash ^= 0xCE4B
+ return passwd_hash
+
+ def __init__(self, passwd = ""):
+ self._rec_data = pack('=8
+ 2 var. List of OFFSET structures for all portions. Each OFFSET contains the following data:
+ Offset Size Contents
+ 0 4 Absolute stream position of first string of the portion
+ 4 2 Position of first string of the portion inside of current record,
+ including record header. This counter restarts at zero, if the SST
+ record is continued with a CONTINUE record.
+ 6 2 Not used
+ """
+ _REC_ID = 0x00FF
+
+ def __init__(self, sst_stream_pos, str_placement, portions_len):
+ extsst = {}
+ abs_stream_pos = sst_stream_pos
+ str_counter = 0
+ portion_counter = 0
+ while str_counter < len(str_placement):
+ str_chunk_num, pos_in_chunk = str_placement[str_counter]
+ if str_chunk_num <> portion_counter:
+ portion_counter = str_chunk_num
+ abs_stream_pos += portions_len[portion_counter-1]
+ #print hex(abs_stream_pos)
+ str_stream_pos = abs_stream_pos + pos_in_chunk + 4 # header
+ extsst[str_counter] = (pos_in_chunk, str_stream_pos)
+ str_counter += 1
+
+ exsst_str_count_delta = max(8, len(str_placement)*8/0x2000) # maybe smth else?
+ self._rec_data = pack(' last_used_row or first_used_col > last_used_col:
+ # Special case: empty worksheet
+ first_used_row = first_used_col = 0
+ last_used_row = last_used_col = -1
+ self._rec_data = pack('<2L3H',
+ first_used_row, last_used_row + 1,
+ first_used_col, last_used_col + 1,
+ 0x00)
+
+
+class Window2Record(BiffRecord):
+ """
+ Record WINDOW2, BIFF8:
+
+ Offset Size Contents
+ 0 2 Option flags (see below)
+ 2 2 Index to first visible row
+ 4 2 Index to first visible column
+ 6 2 Colour index of grid line colour. Note that in BIFF2-BIFF7 an RGB colour is
+ written instead.
+ 8 2 Not used
+ 10 2 Cached magnification factor in page break preview (in percent); 0 = Default (60%)
+ 12 2 Cached magnification factor in normal view (in percent); 0 = Default (100%)
+ 14 4 Not used
+
+ In BIFF8 this record stores used magnification factors for page break
+ preview and normal view. These values are used to restore the
+ magnification, when the view is changed. The real magnification of the
+ currently active view is stored in the SCL record. The type of the
+ active view is stored in the option flags field (see below).
+
+ 0 0001H 0 = Show formula results 1 = Show formulas
+ 1 0002H 0 = Do not show grid lines 1 = Show grid lines
+ 2 0004H 0 = Do not show sheet headers 1 = Show sheet headers
+ 3 0008H 0 = Panes are not frozen 1 = Panes are frozen (freeze)
+ 4 0010H 0 = Show zero values as empty cells 1 = Show zero values
+ 5 0020H 0 = Manual grid line colour 1 = Automatic grid line colour
+ 6 0040H 0 = Columns from left to right 1 = Columns from right to left
+ 7 0080H 0 = Do not show outline symbols 1 = Show outline symbols
+ 8 0100H 0 = Keep splits if pane freeze is removed 1 = Remove splits if pane freeze is removed
+ 9 0200H 0 = Sheet not selected 1 = Sheet selected (BIFF5-BIFF8)
+ 10 0400H 0 = Sheet not visible 1 = Sheet visible (BIFF5-BIFF8)
+ 11 0800H 0 = Show in normal view 1 = Show in page break preview (BIFF8)
+
+ The freeze flag specifies, if a following PANE record describes unfrozen or frozen panes.
+
+ *** This class appends the optional SCL record ***
+
+ Record SCL, BIFF4-BIFF8:
+
+ This record stores the magnification of the active view of the current worksheet.
+ In BIFF8 this can be either the normal view or the page break preview.
+ This is determined in the WINDOW2 record. The SCL record is part of the
+ Sheet View Settings Block.
+
+ Offset Size Contents
+ 0 2 Numerator of the view magnification fraction (num)
+ 2 2 Denumerator [denominator] of the view magnification fraction (den)
+ The magnification is stored as reduced fraction. The magnification results from num/den.
+
+ SJM note: Excel expresses (e.g.) 25% in reduced form i.e. 1/4. Reason unknown. This code
+ writes 25/100, and Excel is happy with that.
+
+ """
+ _REC_ID = 0x023E
+
+ def __init__(self, options, first_visible_row, first_visible_col,
+ grid_colour, preview_magn, normal_magn, scl_magn):
+ self._rec_data = pack('<7HL', options,
+ first_visible_row, first_visible_col,
+ grid_colour,
+ 0x00,
+ preview_magn, normal_magn,
+ 0x00L)
+ if scl_magn:
+ self._scl_rec = pack('<4H', 0x00A0, 4, scl_magn, 100)
+ else:
+ self._scl_rec = ''
+
+ def get(self):
+ return self.get_rec_header() + self._rec_data + self._scl_rec
+
+
+class PanesRecord(BiffRecord):
+ """
+ This record stores the position of window panes. It is part of the Sheet
+ View Settings Block. If the sheet does not contain any splits, this
+ record will not occur.
+ A sheet can be split in two different ways, with unfrozen panes or with
+ frozen panes. A flag in the WINDOW2 record specifies, if the panes are
+ frozen, which affects the contents of this record.
+
+ Record PANE, BIFF2-BIFF8:
+ Offset Size Contents
+ 0 2 Position of the vertical split
+ (px, 0 = No vertical split):
+ Unfrozen pane: Width of the left pane(s)
+ (in twips = 1/20 of a point)
+ Frozen pane: Number of visible
+ columns in left pane(s)
+ 2 2 Position of the horizontal split
+ (py, 0 = No horizontal split):
+ Unfrozen pane: Height of the top pane(s)
+ (in twips = 1/20 of a point)
+ Frozen pane: Number of visible
+ rows in top pane(s)
+ 4 2 Index to first visible row
+ in bottom pane(s)
+ 6 2 Index to first visible column
+ in right pane(s)
+ 8 1 Identifier of pane with active
+ cell cursor
+ [9] 1 Not used (BIFF5-BIFF8 only, not written
+ in BIFF2-BIFF4)
+
+ If the panes are frozen, pane 0 is always active, regardless
+ of the cursor position. The correct identifiers for all possible
+ combinations of visible panes are shown in the following pictures.
+
+ px = 0, py = 0 px = 0, py > 0
+ -------------------------- ------------|-------------
+ | | | |
+ | | | 3 |
+ | | | |
+ - 3 - --------------------------
+ | | | |
+ | | | 2 |
+ | | | |
+ -------------------------- ------------|-------------
+
+ px > 0, py = 0 px > 0, py > 0
+ ------------|------------- ------------|-------------
+ | | | | | |
+ | | | | 3 | 2 |
+ | | | | | |
+ - 3 | 1 - --------------------------
+ | | | | | |
+ | | | | 1 | 0 |
+ | | | | | |
+ ------------|------------- ------------|-------------
+ """
+ _REC_ID = 0x0041
+ def __init__(self, px, py, first_row_bottom, first_col_right, active_pane):
+ self._rec_data = pack('<5H',
+ px, py,
+ first_row_bottom, first_col_right,
+ active_pane)
+
+
+class RowRecord(BiffRecord):
+ """
+ This record contains the properties of a single row in a sheet. Rows
+ and cells in a sheet are divided into blocks of 32 rows.
+
+ Record ROW, BIFF3-BIFF8:
+
+ Offset Size Contents
+ 0 2 Index of this row
+ 2 2 Index to column of the first cell which is described by a cell record
+ 4 2 Index to column of the last cell which is described by a cell record,
+ increased by 1
+ 6 2 Bit Mask Contents
+ 14-0 7FFFH Height of the row, in twips = 1/20 of a point
+ 15 8000H 0 = Row has custom height; 1 = Row has default height
+ 8 2 Not used
+ 10 2 In BIFF3-BIFF4 this field contains a relative offset
+ to calculate stream position of the first cell record
+ for this row. In BIFF5-BIFF8 this field is not used
+ anymore, but the DBCELL record instead.
+ 12 4 Option flags and default row formatting:
+ Bit Mask Contents
+ 2-0 00000007H Outline level of the row
+ 4 00000010H 1 = Outline group starts or ends here (depending
+ on where the outline buttons are located,
+ see WSBOOL record), and is collapsed
+ 5 00000020H 1 = Row is hidden (manually, or by a filter or outline group)
+ 6 00000040H 1 = Row height and default font height do not match
+ 7 00000080H 1 = Row has explicit default format (fl)
+ 8 00000100H Always 1
+ 27-16 0FFF0000H If fl=1: Index to default XF record
+ 28 10000000H 1 = Additional space above the row. This flag is set,
+ if the upper border of at least one cell in this row
+ or if the lower border of at least one cell in the row
+ above is formatted with a thick line style.
+ Thin and medium line styles are not taken into account.
+ 29 20000000H 1 = Additional space below the row. This flag is set,
+ if the lower border of at least one cell in this row
+ or if the upper border of at least one cell in the row
+ below is formatted with a medium or thick line style.
+ Thin line styles are not taken into account.
+ """
+
+ _REC_ID = 0x0208
+
+ def __init__(self, index, first_col, last_col, height_options, options):
+ self._rec_data = pack('<6HL', index, first_col, last_col + 1,
+ height_options,
+ 0x00, 0x00,
+ options)
+
+class LabelSSTRecord(BiffRecord):
+ """
+ This record represents a cell that contains a string. It replaces the
+ LABEL record and RSTRING record used in BIFF2-BIFF7.
+ """
+ _REC_ID = 0x00FD
+
+ def __init__(self, row, col, xf_idx, sst_idx):
+ self._rec_data = pack('<3HL', row, col, xf_idx, sst_idx)
+
+
+class MergedCellsRecord(BiffRecord):
+ """
+ This record contains all merged cell ranges of the current sheet.
+
+ Record MERGEDCELLS, BIFF8:
+
+ Offset Size Contents
+ 0 var. Cell range address list with all merged ranges
+
+ ------------------------------------------------------------------
+
+ A cell range address list consists of a field with the number of ranges
+ and the list of the range addresses.
+
+ Cell range address list, BIFF8:
+
+ Offset Size Contents
+ 0 2 Number of following cell range addresses (nm)
+ 2 8*nm List of nm cell range addresses
+
+ ---------------------------------------------------------------------
+ Cell range address, BIFF8:
+
+ Offset Size Contents
+ 0 2 Index to first row
+ 2 2 Index to last row
+ 4 2 Index to first column
+ 6 2 Index to last column
+
+ """
+ _REC_ID = 0x00E5
+
+ def __init__(self, merged_list):
+ i = len(merged_list) - 1
+ while i >= 0:
+ j = 0
+ merged = ''
+ while (i >= 0) and (j < 0x403):
+ r1, r2, c1, c2 = merged_list[i]
+ merged += pack('<4H', r1, r2, c1, c2)
+ i -= 1
+ j += 1
+ self._rec_data += pack('<3H', self._REC_ID, len(merged) + 2, j) + \
+ merged
+
+ # for some reason Excel doesn't use CONTINUE
+ def get(self):
+ return self._rec_data
+
+class MulBlankRecord(BiffRecord):
+ """
+ This record represents a cell range of empty cells. All cells are
+ located in the same row.
+
+ Record MULBLANK, BIFF5-BIFF8:
+
+ Offset Size Contents
+ 0 2 Index to row
+ 2 2 Index to first column (fc)
+ 4 2*nc List of nc=lc-fc+1 16-bit indexes to XF records
+ 4+2*nc 2 Index to last column (lc)
+ """
+ _REC_ID = 0x00BE
+
+ def __init__(self, row, first_col, last_col, xf_index):
+ blanks_count = last_col-first_col+1
+ self._rec_data = pack('%dH' % blanks_count, *([xf_index]*blanks_count))
+ self._rec_data = pack('<2H', row, first_col) + self._rec_data + pack('" Set new font
+ &","
+ Set new font with specified style .
+ The style is in most cases one of
+ "Regular", "Bold", "Italic", or "Bold Italic".
+ But this setting is dependent on the used font,
+ it may differ (localised style names, or "Standard",
+ "Oblique", ...). (BIFF5-BIFF8)
+ & Set font height in points ( is a decimal value).
+ If this command is followed by a plain number to be printed
+ in the header, it will be separated from the font height
+ with a space character.
+
+ """
+ _REC_ID = 0x0014
+
+ def __init__(self, header_str):
+ self._rec_data = upack2(header_str)
+
+class FooterRecord(BiffRecord):
+ """
+ Semantic is equal to HEADER record
+ """
+ _REC_ID = 0x0015
+
+ def __init__(self, footer_str):
+ self._rec_data = upack2(footer_str)
+
+
+class HCenterRecord(BiffRecord):
+ """
+ This record is part of the Page Settings Block. It specifies if the
+ sheet is centred horizontally when printed.
+
+ Record HCENTER, BIFF3-BIFF8:
+
+ Offset Size Contents
+ 0 2 0 = Print sheet left aligned
+ 1 = Print sheet centred horizontally
+
+ """
+ _REC_ID = 0x0083
+
+ def __init__(self, is_horz_center):
+ self._rec_data = pack(' 0) Menu text (Unicode string without length field, 3.4)
+ [var.] var. (optional, only if ld > 0) Description text (Unicode string without length field, 3.4)
+ [var.] var. (optional, only if lh > 0) Help topic text (Unicode string without length field, 3.4)
+ [var.] var. (optional, only if ls > 0) Status bar text (Unicode string without length field, 3.4)
+ """
+ _REC_ID = 0x0018
+
+ def __init__(self, options, keyboard_shortcut, name, sheet_index, rpn, menu_text='', desc_text='', help_text='', status_text=''):
+ if type(name) == int:
+ uname = chr(name)
+ else:
+ uname = upack1(name)[1:]
+ uname_len = len(uname)
+
+ #~ self._rec_data = pack('", see 3.9.1)
+
+ """
+
+ def __init__(self, num_sheets):
+ self._rec_data = pack('
+# Portions are Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
+
+from BIFFRecords import BiffRecord
+from struct import *
+
+
+def _size_col(sheet, col):
+ return sheet.col_width(col)
+
+
+def _size_row(sheet, row):
+ return sheet.row_height(row)
+
+
+def _position_image(sheet, row_start, col_start, x1, y1, width, height):
+ """Calculate the vertices that define the position of the image as required by
+ the OBJ record.
+
+ +------------+------------+
+ | A | B |
+ +-----+------------+------------+
+ | |(x1,y1) | |
+ | 1 |(A1)._______|______ |
+ | | | | |
+ | | | | |
+ +-----+----| BITMAP |-----+
+ | | | | |
+ | 2 | |______________. |
+ | | | (B2)|
+ | | | (x2,y2)|
+ +---- +------------+------------+
+
+ Example of a bitmap that covers some of the area from cell A1 to cell B2.
+
+ Based on the width and height of the bitmap we need to calculate 8 vars:
+ col_start, row_start, col_end, row_end, x1, y1, x2, y2.
+ The width and height of the cells are also variable and have to be taken into
+ account.
+ The values of col_start and row_start are passed in from the calling
+ function. The values of col_end and row_end are calculated by subtracting
+ the width and height of the bitmap from the width and height of the
+ underlying cells.
+ The vertices are expressed as a percentage of the underlying cell width as
+ follows (rhs values are in pixels):
+
+ x1 = X / W *1024
+ y1 = Y / H *256
+ x2 = (X-1) / W *1024
+ y2 = (Y-1) / H *256
+
+ Where: X is distance from the left side of the underlying cell
+ Y is distance from the top of the underlying cell
+ W is the width of the cell
+ H is the height of the cell
+
+ Note: the SDK incorrectly states that the height should be expressed as a
+ percentage of 1024.
+
+ col_start - Col containing upper left corner of object
+ row_start - Row containing top left corner of object
+ x1 - Distance to left side of object
+ y1 - Distance to top of object
+ width - Width of image frame
+ height - Height of image frame
+
+ """
+ # Adjust start column for offsets that are greater than the col width
+ while x1 >= _size_col(sheet, col_start):
+ x1 -= _size_col(sheet, col_start)
+ col_start += 1
+ # Adjust start row for offsets that are greater than the row height
+ while y1 >= _size_row(sheet, row_start):
+ y1 -= _size_row(sheet, row_start)
+ row_start += 1
+ # Initialise end cell to the same as the start cell
+ row_end = row_start # Row containing bottom right corner of object
+ col_end = col_start # Col containing lower right corner of object
+ width = width + x1 - 1
+ height = height + y1 - 1
+ # Subtract the underlying cell widths to find the end cell of the image
+ while (width >= _size_col(sheet, col_end)):
+ width -= _size_col(sheet, col_end)
+ col_end += 1
+ # Subtract the underlying cell heights to find the end cell of the image
+ while (height >= _size_row(sheet, row_end)):
+ height -= _size_row(sheet, row_end)
+ row_end += 1
+ # Bitmap isn't allowed to start or finish in a hidden cell, i.e. a cell
+ # with zero height or width.
+ if ((_size_col(sheet, col_start) == 0) or (_size_col(sheet, col_end) == 0)
+ or (_size_row(sheet, row_start) == 0) or (_size_row(sheet, row_end) == 0)):
+ return
+ # Convert the pixel values to the percentage value expected by Excel
+ x1 = int(float(x1) / _size_col(sheet, col_start) * 1024)
+ y1 = int(float(y1) / _size_row(sheet, row_start) * 256)
+ # Distance to right side of object
+ x2 = int(float(width) / _size_col(sheet, col_end) * 1024)
+ # Distance to bottom of object
+ y2 = int(float(height) / _size_row(sheet, row_end) * 256)
+ return (col_start, x1, row_start, y1, col_end, x2, row_end, y2)
+
+
+class ObjBmpRecord(BiffRecord):
+ _REC_ID = 0x005D # Record identifier
+
+ def __init__(self, row, col, sheet, im_data_bmp, x, y, scale_x, scale_y):
+ # Scale the frame of the image.
+ width = im_data_bmp.width * scale_x
+ height = im_data_bmp.height * scale_y
+
+ # Calculate the vertices of the image and write the OBJ record
+ coordinates = _position_image(sheet, row, col, x, y, width, height)
+ # print coordinates
+ col_start, x1, row_start, y1, col_end, x2, row_end, y2 = coordinates
+
+ """Store the OBJ record that precedes an IMDATA record. This could be generalise
+ to support other Excel objects.
+
+ """
+ cObj = 0x0001 # Count of objects in file (set to 1)
+ OT = 0x0008 # Object type. 8 = Picture
+ id = 0x0001 # Object ID
+ grbit = 0x0614 # Option flags
+ colL = col_start # Col containing upper left corner of object
+ dxL = x1 # Distance from left side of cell
+ rwT = row_start # Row containing top left corner of object
+ dyT = y1 # Distance from top of cell
+ colR = col_end # Col containing lower right corner of object
+ dxR = x2 # Distance from right of cell
+ rwB = row_end # Row containing bottom right corner of object
+ dyB = y2 # Distance from bottom of cell
+ cbMacro = 0x0000 # Length of FMLA structure
+ Reserved1 = 0x0000 # Reserved
+ Reserved2 = 0x0000 # Reserved
+ icvBack = 0x09 # Background colour
+ icvFore = 0x09 # Foreground colour
+ fls = 0x00 # Fill pattern
+ fAuto = 0x00 # Automatic fill
+ icv = 0x08 # Line colour
+ lns = 0xff # Line style
+ lnw = 0x01 # Line weight
+ fAutoB = 0x00 # Automatic border
+ frs = 0x0000 # Frame style
+ cf = 0x0009 # Image format, 9 = bitmap
+ Reserved3 = 0x0000 # Reserved
+ cbPictFmla = 0x0000 # Length of FMLA structure
+ Reserved4 = 0x0000 # Reserved
+ grbit2 = 0x0001 # Option flags
+ Reserved5 = 0x0000 # Reserved
+
+ data = pack(" 0xFFFF):
+ raise Exception("bitmap: largest image width supported is 65k.")
+ if (height > 0xFFFF):
+ raise Exception("bitmap: largest image height supported is 65k.")
+ # Read and remove the bitmap planes and bpp data. Verify them.
+ planes, bitcount = unpack(" 0:
+ self.__build_SSAT()
+ else:
+ if self.dump and (self.total_ssat_sectors != 0 or self.ssat_start_sid != -2):
+ print 'NOTE: header says that must be', self.total_ssat_sectors, 'short sectors'
+ print 'NOTE: starting at', self.ssat_start_sid, 'sector'
+ print 'NOTE: but file does not contains data in short sectors'
+ self.ssat_start_sid = -2
+ self.total_ssat_sectors = 0
+ self.SSAT = [-2]
+
+ for dentry in self.dir_entry_list[1:]:
+ (did,
+ sz, name,
+ t, c,
+ did_left, did_right, did_root,
+ dentry_start_sid,
+ stream_size
+ ) = dentry
+ stream_data = ''
+ if stream_size > 0:
+ if stream_size >= self.min_stream_size:
+ args = (self.data, self.SAT, dentry_start_sid, self.sect_size)
+ else:
+ args = (self.short_sectors_data, self.SSAT, dentry_start_sid, self.short_sect_size)
+ stream_data = self.get_stream_data(*args)
+
+ if name != '':
+ # BAD IDEA: names may be equal. NEED use full paths...
+ self.STREAMS[name] = stream_data
+
+
+ def __build_header(self):
+ self.doc_magic = self.header[0:8]
+
+ if self.doc_magic != '\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1':
+ raise Exception, 'Not an OLE file.'
+
+ self.file_uid = self.header[8:24]
+ self.rev_num = self.header[24:26]
+ self.ver_num = self.header[26:28]
+ self.byte_order = self.header[28:30]
+ self.log2_sect_size, = struct.unpack(' 0:
+ msat_sector = struct.unpack('<128l', self.data[next*self.sect_size:(next+1)*self.sect_size])
+ self.MSAT.extend(msat_sector[:127])
+ next = msat_sector[-1]
+
+ if self.dump:
+ print 'MSAT (header part): \n', self.MSAT[:109]
+ print 'additional MSAT sectors: \n', self.MSAT[109:]
+
+
+ def __build_SAT(self):
+ sat_stream = ''.join([self.data[i*self.sect_size:(i+1)*self.sect_size] for i in self.MSAT if i >= 0])
+
+ sat_sids_count = len(sat_stream) >> 2
+ self.SAT = struct.unpack('<%dl' % sat_sids_count, sat_stream) # SIDs tuple
+
+ if self.dump:
+ print 'SAT sid count:\n', sat_sids_count
+ print 'SAT content:\n', self.SAT
+
+
+ def __build_SSAT(self):
+ ssat_stream = self.get_stream_data(self.data, self.SAT, self.ssat_start_sid, self.sect_size)
+
+ ssids_count = len(ssat_stream) >> 2
+ self.SSAT = struct.unpack('<%dl' % ssids_count, ssat_stream)
+
+ if self.dump:
+ print 'SSID count:', ssids_count
+ print 'SSAT content:\n', self.SSAT
+
+
+ def __build_directory(self):
+ dir_stream = self.get_stream_data(self.data, self.SAT, self.dir_start_sid, self.sect_size)
+
+ self.dir_entry_list = []
+
+ i = 0
+ while i < len(dir_stream):
+ dentry = dir_stream[i:i+128] # 128 -- dir entry size
+ i += 128
+
+ did = len(self.dir_entry_list)
+ sz, = struct.unpack(' 0 :
+ name = dentry[0:sz-2].decode('utf_16_le', 'replace')
+ else:
+ name = u''
+ t, = struct.unpack('B', dentry[66])
+ c, = struct.unpack('B', dentry[67])
+ did_left , = struct.unpack('= self.min_stream_size:
+ print 'stream stored as normal stream'
+ else:
+ print 'stream stored as short-stream'
+
+
+ def __build_short_sectors_data(self):
+ (did, sz, name, t, c,
+ did_left, did_right, did_root,
+ dentry_start_sid, stream_size) = self.dir_entry_list[0]
+ assert t == 0x05 # Short-Stream Container Stream (SSCS) resides in Root Storage
+ if stream_size == 0:
+ self.short_sectors_data = ''
+ else:
+ self.short_sectors_data = self.get_stream_data(self.data, self.SAT, dentry_start_sid, self.sect_size)
+
+
+ def get_stream_data(self, data, SAT, start_sid, sect_size):
+ sid = start_sid
+ chunks = [(sid, sid)]
+ stream_data = ''
+
+ while SAT[sid] >= 0:
+ next_in_chain = SAT[sid]
+ last_chunk_start, last_chunk_finish = chunks[-1]
+ if next_in_chain == last_chunk_finish + 1:
+ chunks[-1] = last_chunk_start, next_in_chain
+ else:
+ chunks.extend([(next_in_chain, next_in_chain)])
+ sid = next_in_chain
+ for s, f in chunks:
+ stream_data += data[s*sect_size:(f+1)*sect_size]
+ #print chunks
+ return stream_data
+
+
+def print_bin_data(data):
+ i = 0
+ while i < len(data):
+ j = 0
+ while (i < len(data)) and (j < 16):
+ c = '0x%02X' % ord(data[i])
+ sys.stdout.write(c)
+ sys.stdout.write(' ')
+ i += 1
+ j += 1
+ print
+ if i == 0:
+ print ''
+
+
+
+# This implementation writes only 'Root Entry', 'Workbook' streams
+# and 2 empty streams for aligning directory stream on sector boundary
+#
+# LAYOUT:
+# 0 header
+# 76 MSAT (1st part: 109 SID)
+# 512 workbook stream
+# ... additional MSAT sectors if streams' size > about 7 Mb == (109*512 * 128)
+# ... SAT
+# ... directory stream
+#
+# NOTE: this layout is "ad hoc". It can be more general. RTFM
+
+class XlsDoc:
+ SECTOR_SIZE = 0x0200
+ MIN_LIMIT = 0x1000
+
+ SID_FREE_SECTOR = -1
+ SID_END_OF_CHAIN = -2
+ SID_USED_BY_SAT = -3
+ SID_USED_BY_MSAT = -4
+
+ def __init__(self):
+ #self.book_stream = '' # padded
+ self.book_stream_sect = []
+
+ self.dir_stream = ''
+ self.dir_stream_sect = []
+
+ self.packed_SAT = ''
+ self.SAT_sect = []
+
+ self.packed_MSAT_1st = ''
+ self.packed_MSAT_2nd = ''
+ self.MSAT_sect_2nd = []
+
+ self.header = ''
+
+ def __build_directory(self): # align on sector boundary
+ self.dir_stream = ''
+
+ dentry_name = '\x00'.join('Root Entry\x00') + '\x00'
+ dentry_name_sz = len(dentry_name)
+ dentry_name_pad = '\x00'*(64 - dentry_name_sz)
+ dentry_type = 0x05 # root storage
+ dentry_colour = 0x01 # black
+ dentry_did_left = -1
+ dentry_did_right = -1
+ dentry_did_root = 1
+ dentry_start_sid = -2
+ dentry_stream_sz = 0
+
+ self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
+ dentry_name + dentry_name_pad,
+ dentry_name_sz,
+ dentry_type,
+ dentry_colour,
+ dentry_did_left,
+ dentry_did_right,
+ dentry_did_root,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ dentry_start_sid,
+ dentry_stream_sz,
+ 0
+ )
+
+ dentry_name = '\x00'.join('Workbook\x00') + '\x00'
+ dentry_name_sz = len(dentry_name)
+ dentry_name_pad = '\x00'*(64 - dentry_name_sz)
+ dentry_type = 0x02 # user stream
+ dentry_colour = 0x01 # black
+ dentry_did_left = -1
+ dentry_did_right = -1
+ dentry_did_root = -1
+ dentry_start_sid = 0
+ dentry_stream_sz = self.book_stream_len
+
+ self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
+ dentry_name + dentry_name_pad,
+ dentry_name_sz,
+ dentry_type,
+ dentry_colour,
+ dentry_did_left,
+ dentry_did_right,
+ dentry_did_root,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ dentry_start_sid,
+ dentry_stream_sz,
+ 0
+ )
+
+ # padding
+ dentry_name = ''
+ dentry_name_sz = len(dentry_name)
+ dentry_name_pad = '\x00'*(64 - dentry_name_sz)
+ dentry_type = 0x00 # empty
+ dentry_colour = 0x01 # black
+ dentry_did_left = -1
+ dentry_did_right = -1
+ dentry_did_root = -1
+ dentry_start_sid = -2
+ dentry_stream_sz = 0
+
+ self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
+ dentry_name + dentry_name_pad,
+ dentry_name_sz,
+ dentry_type,
+ dentry_colour,
+ dentry_did_left,
+ dentry_did_right,
+ dentry_did_root,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ dentry_start_sid,
+ dentry_stream_sz,
+ 0
+ ) * 2
+
+ def __build_sat(self):
+ # Build SAT
+ book_sect_count = self.book_stream_len >> 9
+ dir_sect_count = len(self.dir_stream) >> 9
+
+ total_sect_count = book_sect_count + dir_sect_count
+ SAT_sect_count = 0
+ MSAT_sect_count = 0
+ SAT_sect_count_limit = 109
+ while total_sect_count > 128*SAT_sect_count or SAT_sect_count > SAT_sect_count_limit:
+ SAT_sect_count += 1
+ total_sect_count += 1
+ if SAT_sect_count > SAT_sect_count_limit:
+ MSAT_sect_count += 1
+ total_sect_count += 1
+ SAT_sect_count_limit += 127
+
+
+ SAT = [self.SID_FREE_SECTOR]*128*SAT_sect_count
+
+ sect = 0
+ while sect < book_sect_count - 1:
+ self.book_stream_sect.append(sect)
+ SAT[sect] = sect + 1
+ sect += 1
+ self.book_stream_sect.append(sect)
+ SAT[sect] = self.SID_END_OF_CHAIN
+ sect += 1
+
+ while sect < book_sect_count + MSAT_sect_count:
+ self.MSAT_sect_2nd.append(sect)
+ SAT[sect] = self.SID_USED_BY_MSAT
+ sect += 1
+
+ while sect < book_sect_count + MSAT_sect_count + SAT_sect_count:
+ self.SAT_sect.append(sect)
+ SAT[sect] = self.SID_USED_BY_SAT
+ sect += 1
+
+ while sect < book_sect_count + MSAT_sect_count + SAT_sect_count + dir_sect_count - 1:
+ self.dir_stream_sect.append(sect)
+ SAT[sect] = sect + 1
+ sect += 1
+ self.dir_stream_sect.append(sect)
+ SAT[sect] = self.SID_END_OF_CHAIN
+ sect += 1
+
+ self.packed_SAT = struct.pack('<%dl' % (SAT_sect_count*128), *SAT)
+
+ MSAT_1st = [self.SID_FREE_SECTOR]*109
+ for i, SAT_sect_num in zip(range(0, 109), self.SAT_sect):
+ MSAT_1st[i] = SAT_sect_num
+ self.packed_MSAT_1st = struct.pack('<109l', *MSAT_1st)
+
+ MSAT_2nd = [self.SID_FREE_SECTOR]*128*MSAT_sect_count
+ if MSAT_sect_count > 0:
+ MSAT_2nd[- 1] = self.SID_END_OF_CHAIN
+
+ i = 109
+ msat_sect = 0
+ sid_num = 0
+ while i < SAT_sect_count:
+ if (sid_num + 1) % 128 == 0:
+ #print 'link: ',
+ msat_sect += 1
+ if msat_sect < len(self.MSAT_sect_2nd):
+ MSAT_2nd[sid_num] = self.MSAT_sect_2nd[msat_sect]
+ else:
+ #print 'sid: ',
+ MSAT_2nd[sid_num] = self.SAT_sect[i]
+ i += 1
+ #print sid_num, MSAT_2nd[sid_num]
+ sid_num += 1
+
+ self.packed_MSAT_2nd = struct.pack('<%dl' % (MSAT_sect_count*128), *MSAT_2nd)
+
+ #print vars()
+ #print zip(range(0, sect), SAT)
+ #print self.book_stream_sect
+ #print self.MSAT_sect_2nd
+ #print MSAT_2nd
+ #print self.SAT_sect
+ #print self.dir_stream_sect
+
+
+ def __build_header(self):
+ doc_magic = '\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1'
+ file_uid = '\x00'*16
+ rev_num = '\x3E\x00'
+ ver_num = '\x03\x00'
+ byte_order = '\xFE\xFF'
+ log_sect_size = struct.pack('"
+ge_pattern = r">="
+le_pattern = r"<="
+
+pattern_type_tuples = (
+ (flt_const_pattern, ExcelFormulaParser.NUM_CONST),
+ (int_const_pattern, ExcelFormulaParser.INT_CONST),
+ (str_const_pattern, ExcelFormulaParser.STR_CONST),
+# (range2d_pattern , ExcelFormulaParser.RANGE2D),
+ (ref2d_r1c1_pattern, ExcelFormulaParser.REF2D_R1C1),
+ (ref2d_pattern , ExcelFormulaParser.REF2D),
+ (true_pattern , ExcelFormulaParser.TRUE_CONST),
+ (false_pattern , ExcelFormulaParser.FALSE_CONST),
+ (if_pattern , ExcelFormulaParser.FUNC_IF),
+ (choose_pattern , ExcelFormulaParser.FUNC_CHOOSE),
+ (name_pattern , ExcelFormulaParser.NAME),
+ (quotename_pattern, ExcelFormulaParser.QUOTENAME),
+ (ne_pattern, ExcelFormulaParser.NE),
+ (ge_pattern, ExcelFormulaParser.GE),
+ (le_pattern, ExcelFormulaParser.LE),
+)
+
+_re = recompile(
+ '(' + ')|('.join([i[0] for i in pattern_type_tuples]) + ')',
+ VERBOSE+LOCALE+IGNORECASE)
+
+_toktype = [None] + [i[1] for i in pattern_type_tuples]
+# need dummy at start because re.MatchObject.lastindex counts from 1
+
+single_char_lookup = {
+ '=': ExcelFormulaParser.EQ,
+ '<': ExcelFormulaParser.LT,
+ '>': ExcelFormulaParser.GT,
+ '+': ExcelFormulaParser.ADD,
+ '-': ExcelFormulaParser.SUB,
+ '*': ExcelFormulaParser.MUL,
+ '/': ExcelFormulaParser.DIV,
+ ':': ExcelFormulaParser.COLON,
+ ';': ExcelFormulaParser.SEMICOLON,
+ ',': ExcelFormulaParser.COMMA,
+ '(': ExcelFormulaParser.LP,
+ ')': ExcelFormulaParser.RP,
+ '&': ExcelFormulaParser.CONCAT,
+ '%': ExcelFormulaParser.PERCENT,
+ '^': ExcelFormulaParser.POWER,
+ '!': ExcelFormulaParser.BANG,
+ }
+
+class Lexer(TokenStream):
+ def __init__(self, text):
+ self._text = text[:]
+ self._pos = 0
+ self._line = 0
+
+ def isEOF(self):
+ return len(self._text) <= self._pos
+
+ def curr_ch(self):
+ return self._text[self._pos]
+
+ def next_ch(self, n = 1):
+ self._pos += n
+
+ def is_whitespace(self):
+ return self.curr_ch() in " \t\n\r\f\v"
+
+ def match_pattern(self):
+ m = _re.match(self._text, self._pos)
+ if not m:
+ return None
+ self._pos = m.end(0)
+ return Tok(type = _toktype[m.lastindex], text = m.group(0), col = m.start(0) + 1)
+
+ def nextToken(self):
+ # skip whitespace
+ while not self.isEOF() and self.is_whitespace():
+ self.next_ch()
+ if self.isEOF():
+ return Tok(type = EOF)
+ # first, try to match token with 2 or more chars
+ t = self.match_pattern()
+ if t:
+ return t
+ # second, we want 1-char tokens
+ te = self.curr_ch()
+ try:
+ ty = single_char_lookup[te]
+ except KeyError:
+ raise TokenStreamException(
+ "Unexpected char %r in column %u." % (self.curr_ch(), self._pos))
+ self.next_ch()
+ return Tok(type=ty, text=te, col=self._pos)
+
+if __name__ == '__main__':
+ try:
+ for t in Lexer(""" 1.23 456 "abcd" R2C2 a1 iv65536 true false if choose a_name 'qname' <> >= <= """):
+ print t
+ except TokenStreamException, e:
+ print "error:", e
diff --git a/tablib/packages/xlwt/ExcelFormulaParser.py b/tablib/packages/xlwt/ExcelFormulaParser.py
new file mode 100644
index 0000000..000a8a0
--- /dev/null
+++ b/tablib/packages/xlwt/ExcelFormulaParser.py
@@ -0,0 +1,677 @@
+### $ANTLR 2.7.7 (20060930): "xlwt/excel-formula.g" -> "ExcelFormulaParser.py"$
+### import antlr and other modules ..
+import sys
+import antlr
+
+version = sys.version.split()[0]
+if version < '2.2.1':
+ False = 0
+if version < '2.3':
+ True = not False
+### header action >>>
+import struct
+import Utils
+from UnicodeUtils import upack1
+from ExcelMagic import *
+
+_RVAdelta = {"R": 0, "V": 0x20, "A": 0x40}
+_RVAdeltaRef = {"R": 0, "V": 0x20, "A": 0x40, "D": 0x20}
+_RVAdeltaArea = {"R": 0, "V": 0x20, "A": 0x40, "D": 0}
+
+
+class FormulaParseException(Exception):
+ """
+ An exception indicating that a Formula could not be successfully parsed.
+ """
+### header action <<<
+### preamble action>>>
+
+### preamble action <<<
+
+### import antlr.Token
+from antlr import Token
+### >>>The Known Token Types <<<
+SKIP = antlr.SKIP
+INVALID_TYPE = antlr.INVALID_TYPE
+EOF_TYPE = antlr.EOF_TYPE
+EOF = antlr.EOF
+NULL_TREE_LOOKAHEAD = antlr.NULL_TREE_LOOKAHEAD
+MIN_USER_TYPE = antlr.MIN_USER_TYPE
+TRUE_CONST = 4
+FALSE_CONST = 5
+STR_CONST = 6
+NUM_CONST = 7
+INT_CONST = 8
+FUNC_IF = 9
+FUNC_CHOOSE = 10
+NAME = 11
+QUOTENAME = 12
+EQ = 13
+NE = 14
+GT = 15
+LT = 16
+GE = 17
+LE = 18
+ADD = 19
+SUB = 20
+MUL = 21
+DIV = 22
+POWER = 23
+PERCENT = 24
+LP = 25
+RP = 26
+LB = 27
+RB = 28
+COLON = 29
+COMMA = 30
+SEMICOLON = 31
+REF2D = 32
+REF2D_R1C1 = 33
+BANG = 34
+CONCAT = 35
+
+class Parser(antlr.LLkParser):
+ ### user action >>>
+ ### user action <<<
+
+ def __init__(self, *args, **kwargs):
+ antlr.LLkParser.__init__(self, *args, **kwargs)
+ self.tokenNames = _tokenNames
+ ### __init__ header action >>>
+ self.rpn = ""
+ self.sheet_references = []
+ self.xcall_references = []
+ ### __init__ header action <<<
+
+ def formula(self):
+
+ pass
+ self.expr("V")
+
+ def expr(self,
+ arg_type
+ ):
+
+ pass
+ self.prec0_expr(arg_type)
+ while True:
+ if ((self.LA(1) >= EQ and self.LA(1) <= LE)):
+ pass
+ la1 = self.LA(1)
+ if False:
+ pass
+ elif la1 and la1 in [EQ]:
+ pass
+ self.match(EQ)
+ op = struct.pack('B', ptgEQ)
+ elif la1 and la1 in [NE]:
+ pass
+ self.match(NE)
+ op = struct.pack('B', ptgNE)
+ elif la1 and la1 in [GT]:
+ pass
+ self.match(GT)
+ op = struct.pack('B', ptgGT)
+ elif la1 and la1 in [LT]:
+ pass
+ self.match(LT)
+ op = struct.pack('B', ptgLT)
+ elif la1 and la1 in [GE]:
+ pass
+ self.match(GE)
+ op = struct.pack('B', ptgGE)
+ elif la1 and la1 in [LE]:
+ pass
+ self.match(LE)
+ op = struct.pack('B', ptgLE)
+ else:
+ raise antlr.NoViableAltException(self.LT(1), self.getFilename())
+
+ self.prec0_expr(arg_type)
+ self.rpn += op
+ else:
+ break
+
+
+ def prec0_expr(self,
+ arg_type
+ ):
+
+ pass
+ self.prec1_expr(arg_type)
+ while True:
+ if (self.LA(1)==CONCAT):
+ pass
+ pass
+ self.match(CONCAT)
+ op = struct.pack('B', ptgConcat)
+ self.prec1_expr(arg_type)
+ self.rpn += op
+ else:
+ break
+
+
+ def prec1_expr(self,
+ arg_type
+ ):
+
+ pass
+ self.prec2_expr(arg_type)
+ while True:
+ if (self.LA(1)==ADD or self.LA(1)==SUB):
+ pass
+ la1 = self.LA(1)
+ if False:
+ pass
+ elif la1 and la1 in [ADD]:
+ pass
+ self.match(ADD)
+ op = struct.pack('B', ptgAdd)
+ elif la1 and la1 in [SUB]:
+ pass
+ self.match(SUB)
+ op = struct.pack('B', ptgSub)
+ else:
+ raise antlr.NoViableAltException(self.LT(1), self.getFilename())
+
+ self.prec2_expr(arg_type)
+ self.rpn += op;
+ # print "**prec1_expr4 %s" % arg_type
+ else:
+ break
+
+
+ def prec2_expr(self,
+ arg_type
+ ):
+
+ pass
+ self.prec3_expr(arg_type)
+ while True:
+ if (self.LA(1)==MUL or self.LA(1)==DIV):
+ pass
+ la1 = self.LA(1)
+ if False:
+ pass
+ elif la1 and la1 in [MUL]:
+ pass
+ self.match(MUL)
+ op = struct.pack('B', ptgMul)
+ elif la1 and la1 in [DIV]:
+ pass
+ self.match(DIV)
+ op = struct.pack('B', ptgDiv)
+ else:
+ raise antlr.NoViableAltException(self.LT(1), self.getFilename())
+
+ self.prec3_expr(arg_type)
+ self.rpn += op
+ else:
+ break
+
+
+ def prec3_expr(self,
+ arg_type
+ ):
+
+ pass
+ self.prec4_expr(arg_type)
+ while True:
+ if (self.LA(1)==POWER):
+ pass
+ pass
+ self.match(POWER)
+ op = struct.pack('B', ptgPower)
+ self.prec4_expr(arg_type)
+ self.rpn += op
+ else:
+ break
+
+
+ def prec4_expr(self,
+ arg_type
+ ):
+
+ pass
+ self.prec5_expr(arg_type)
+ la1 = self.LA(1)
+ if False:
+ pass
+ elif la1 and la1 in [PERCENT]:
+ pass
+ self.match(PERCENT)
+ self.rpn += struct.pack('B', ptgPercent)
+ elif la1 and la1 in [EOF,EQ,NE,GT,LT,GE,LE,ADD,SUB,MUL,DIV,POWER,RP,COMMA,SEMICOLON,CONCAT]:
+ pass
+ else:
+ raise antlr.NoViableAltException(self.LT(1), self.getFilename())
+
+
+ def prec5_expr(self,
+ arg_type
+ ):
+
+ la1 = self.LA(1)
+ if False:
+ pass
+ elif la1 and la1 in [TRUE_CONST,FALSE_CONST,STR_CONST,NUM_CONST,INT_CONST,FUNC_IF,FUNC_CHOOSE,NAME,QUOTENAME,LP,REF2D]:
+ pass
+ self.primary(arg_type)
+ elif la1 and la1 in [SUB]:
+ pass
+ self.match(SUB)
+ self.primary(arg_type)
+ self.rpn += struct.pack('B', ptgUminus)
+ else:
+ raise antlr.NoViableAltException(self.LT(1), self.getFilename())
+
+
+ def primary(self,
+ arg_type
+ ):
+
+ str_tok = None
+ int_tok = None
+ num_tok = None
+ ref2d_tok = None
+ ref2d1_tok = None
+ ref2d2_tok = None
+ ref3d_ref2d = None
+ ref3d_ref2d2 = None
+ name_tok = None
+ func_tok = None
+ la1 = self.LA(1)
+ if False:
+ pass
+ elif la1 and la1 in [TRUE_CONST]:
+ pass
+ self.match(TRUE_CONST)
+ self.rpn += struct.pack("2B", ptgBool, 1)
+ elif la1 and la1 in [FALSE_CONST]:
+ pass
+ self.match(FALSE_CONST)
+ self.rpn += struct.pack("2B", ptgBool, 0)
+ elif la1 and la1 in [STR_CONST]:
+ pass
+ str_tok = self.LT(1)
+ self.match(STR_CONST)
+ self.rpn += struct.pack("B", ptgStr) + upack1(str_tok.text[1:-1].replace("\"\"", "\""))
+ elif la1 and la1 in [NUM_CONST]:
+ pass
+ num_tok = self.LT(1)
+ self.match(NUM_CONST)
+ self.rpn += struct.pack(" max_argc or arg_count < min_argc:
+ raise Exception, "%d parameters for function: %s" % (arg_count, func_tok.text)
+ if xcall:
+ func_ptg = ptgFuncVarR + _RVAdelta[func_type]
+ self.rpn += struct.pack("<2BH", func_ptg, arg_count + 1, 255) # 255 is magic XCALL function
+ elif min_argc == max_argc:
+ func_ptg = ptgFuncR + _RVAdelta[func_type]
+ self.rpn += struct.pack("",
+ "EOF",
+ "<2>",
+ "NULL_TREE_LOOKAHEAD",
+ "TRUE_CONST",
+ "FALSE_CONST",
+ "STR_CONST",
+ "NUM_CONST",
+ "INT_CONST",
+ "FUNC_IF",
+ "FUNC_CHOOSE",
+ "NAME",
+ "QUOTENAME",
+ "EQ",
+ "NE",
+ "GT",
+ "LT",
+ "GE",
+ "LE",
+ "ADD",
+ "SUB",
+ "MUL",
+ "DIV",
+ "POWER",
+ "PERCENT",
+ "LP",
+ "RP",
+ "LB",
+ "RB",
+ "COLON",
+ "COMMA",
+ "SEMICOLON",
+ "REF2D",
+ "REF2D_R1C1",
+ "BANG",
+ "CONCAT"
+]
+
+
+### generate bit set
+def mk_tokenSet_0():
+ ### var1
+ data = [ 37681618946L, 0L]
+ return data
+_tokenSet_0 = antlr.BitSet(mk_tokenSet_0())
+
diff --git a/tablib/packages/xlwt/ExcelMagic.py b/tablib/packages/xlwt/ExcelMagic.py
new file mode 100644
index 0000000..a49ae1f
--- /dev/null
+++ b/tablib/packages/xlwt/ExcelMagic.py
@@ -0,0 +1,862 @@
+# -*- coding: ascii -*-
+"""
+lots of Excel Magic Numbers
+"""
+
+# Boundaries BIFF8+
+
+MAX_ROW = 65536
+MAX_COL = 256
+
+
+biff_records = {
+ 0x0000: "DIMENSIONS",
+ 0x0001: "BLANK",
+ 0x0002: "INTEGER",
+ 0x0003: "NUMBER",
+ 0x0004: "LABEL",
+ 0x0005: "BOOLERR",
+ 0x0006: "FORMULA",
+ 0x0007: "STRING",
+ 0x0008: "ROW",
+ 0x0009: "BOF",
+ 0x000A: "EOF",
+ 0x000B: "INDEX",
+ 0x000C: "CALCCOUNT",
+ 0x000D: "CALCMODE",
+ 0x000E: "PRECISION",
+ 0x000F: "REFMODE",
+ 0x0010: "DELTA",
+ 0x0011: "ITERATION",
+ 0x0012: "PROTECT",
+ 0x0013: "PASSWORD",
+ 0x0014: "HEADER",
+ 0x0015: "FOOTER",
+ 0x0016: "EXTERNCOUNT",
+ 0x0017: "EXTERNSHEET",
+ 0x0018: "NAME",
+ 0x0019: "WINDOWPROTECT",
+ 0x001A: "VERTICALPAGEBREAKS",
+ 0x001B: "HORIZONTALPAGEBREAKS",
+ 0x001C: "NOTE",
+ 0x001D: "SELECTION",
+ 0x001E: "FORMAT",
+ 0x001F: "FORMATCOUNT",
+ 0x0020: "COLUMNDEFAULT",
+ 0x0021: "ARRAY",
+ 0x0022: "1904",
+ 0x0023: "EXTERNNAME",
+ 0x0024: "COLWIDTH",
+ 0x0025: "DEFAULTROWHEIGHT",
+ 0x0026: "LEFTMARGIN",
+ 0x0027: "RIGHTMARGIN",
+ 0x0028: "TOPMARGIN",
+ 0x0029: "BOTTOMMARGIN",
+ 0x002A: "PRINTHEADERS",
+ 0x002B: "PRINTGRIDLINES",
+ 0x002F: "FILEPASS",
+ 0x0031: "FONT",
+ 0x0036: "TABLE",
+ 0x003C: "CONTINUE",
+ 0x003D: "WINDOW1",
+ 0x003E: "WINDOW2",
+ 0x0040: "BACKUP",
+ 0x0041: "PANE",
+ 0x0042: "CODEPAGE",
+ 0x0043: "XF",
+ 0x0044: "IXFE",
+ 0x0045: "EFONT",
+ 0x004D: "PLS",
+ 0x0050: "DCON",
+ 0x0051: "DCONREF",
+ 0x0053: "DCONNAME",
+ 0x0055: "DEFCOLWIDTH",
+ 0x0056: "BUILTINFMTCNT",
+ 0x0059: "XCT",
+ 0x005A: "CRN",
+ 0x005B: "FILESHARING",
+ 0x005C: "WRITEACCESS",
+ 0x005D: "OBJ",
+ 0x005E: "UNCALCED",
+ 0x005F: "SAFERECALC",
+ 0x0060: "TEMPLATE",
+ 0x0063: "OBJPROTECT",
+ 0x007D: "COLINFO",
+ 0x007E: "RK",
+ 0x007F: "IMDATA",
+ 0x0080: "GUTS",
+ 0x0081: "WSBOOL",
+ 0x0082: "GRIDSET",
+ 0x0083: "HCENTER",
+ 0x0084: "VCENTER",
+ 0x0085: "BOUNDSHEET",
+ 0x0086: "WRITEPROT",
+ 0x0087: "ADDIN",
+ 0x0088: "EDG",
+ 0x0089: "PUB",
+ 0x008C: "COUNTRY",
+ 0x008D: "HIDEOBJ",
+ 0x008E: "BUNDLESOFFSET",
+ 0x008F: "BUNDLEHEADER",
+ 0x0090: "SORT",
+ 0x0091: "SUB",
+ 0x0092: "PALETTE",
+ 0x0093: "STYLE",
+ 0x0094: "LHRECORD",
+ 0x0095: "LHNGRAPH",
+ 0x0096: "SOUND",
+ 0x0098: "LPR",
+ 0x0099: "STANDARDWIDTH",
+ 0x009A: "FNGROUPNAME",
+ 0x009B: "FILTERMODE",
+ 0x009C: "FNGROUPCOUNT",
+ 0x009D: "AUTOFILTERINFO",
+ 0x009E: "AUTOFILTER",
+ 0x00A0: "SCL",
+ 0x00A1: "SETUP",
+ 0x00A9: "COORDLIST",
+ 0x00AB: "GCW",
+ 0x00AE: "SCENMAN",
+ 0x00AF: "SCENARIO",
+ 0x00B0: "SXVIEW",
+ 0x00B1: "SXVD",
+ 0x00B2: "SXVI",
+ 0x00B4: "SXIVD",
+ 0x00B5: "SXLI",
+ 0x00B6: "SXPI",
+ 0x00B8: "DOCROUTE",
+ 0x00B9: "RECIPNAME",
+ 0x00BC: "SHRFMLA",
+ 0x00BD: "MULRK",
+ 0x00BE: "MULBLANK",
+ 0x00C1: "MMS",
+ 0x00C2: "ADDMENU",
+ 0x00C3: "DELMENU",
+ 0x00C5: "SXDI",
+ 0x00C6: "SXDB",
+ 0x00C7: "SXFIELD",
+ 0x00C8: "SXINDEXLIST",
+ 0x00C9: "SXDOUBLE",
+ 0x00CD: "SXSTRING",
+ 0x00CE: "SXDATETIME",
+ 0x00D0: "SXTBL",
+ 0x00D1: "SXTBRGITEM",
+ 0x00D2: "SXTBPG",
+ 0x00D3: "OBPROJ",
+ 0x00D5: "SXIDSTM",
+ 0x00D6: "RSTRING",
+ 0x00D7: "DBCELL",
+ 0x00DA: "BOOKBOOL",
+ 0x00DC: "SXEXT|PARAMQRY",
+ 0x00DD: "SCENPROTECT",
+ 0x00DE: "OLESIZE",
+ 0x00DF: "UDDESC",
+ 0x00E0: "XF",
+ 0x00E1: "INTERFACEHDR",
+ 0x00E2: "INTERFACEEND",
+ 0x00E3: "SXVS",
+ 0x00E5: "MERGEDCELLS",
+ 0x00E9: "BITMAP",
+ 0x00EB: "MSODRAWINGGROUP",
+ 0x00EC: "MSODRAWING",
+ 0x00ED: "MSODRAWINGSELECTION",
+ 0x00F0: "SXRULE",
+ 0x00F1: "SXEX",
+ 0x00F2: "SXFILT",
+ 0x00F6: "SXNAME",
+ 0x00F7: "SXSELECT",
+ 0x00F8: "SXPAIR",
+ 0x00F9: "SXFMLA",
+ 0x00FB: "SXFORMAT",
+ 0x00FC: "SST",
+ 0x00FD: "LABELSST",
+ 0x00FF: "EXTSST",
+ 0x0100: "SXVDEX",
+ 0x0103: "SXFORMULA",
+ 0x0122: "SXDBEX",
+ 0x0137: "CHTRINSERT",
+ 0x0138: "CHTRINFO",
+ 0x013B: "CHTRCELLCONTENT",
+ 0x013D: "TABID",
+ 0x0140: "CHTRMOVERANGE",
+ 0x014D: "CHTRINSERTTAB",
+ 0x015F: "LABELRANGES",
+ 0x0160: "USESELFS",
+ 0x0161: "DSF",
+ 0x0162: "XL5MODIFY",
+ 0x0196: "CHTRHEADER",
+ 0x01A9: "USERBVIEW",
+ 0x01AA: "USERSVIEWBEGIN",
+ 0x01AB: "USERSVIEWEND",
+ 0x01AD: "QSI",
+ 0x01AE: "SUPBOOK",
+ 0x01AF: "PROT4REV",
+ 0x01B0: "CONDFMT",
+ 0x01B1: "CF",
+ 0x01B2: "DVAL",
+ 0x01B5: "DCONBIN",
+ 0x01B6: "TXO",
+ 0x01B7: "REFRESHALL",
+ 0x01B8: "HLINK",
+ 0x01BA: "CODENAME",
+ 0x01BB: "SXFDBTYPE",
+ 0x01BC: "PROT4REVPASS",
+ 0x01BE: "DV",
+ 0x01C0: "XL9FILE",
+ 0x01C1: "RECALCID",
+ 0x0200: "DIMENSIONS",
+ 0x0201: "BLANK",
+ 0x0203: "NUMBER",
+ 0x0204: "LABEL",
+ 0x0205: "BOOLERR",
+ 0x0206: "FORMULA",
+ 0x0207: "STRING",
+ 0x0208: "ROW",
+ 0x0209: "BOF",
+ 0x020B: "INDEX",
+ 0x0218: "NAME",
+ 0x0221: "ARRAY",
+ 0x0223: "EXTERNNAME",
+ 0x0225: "DEFAULTROWHEIGHT",
+ 0x0231: "FONT",
+ 0x0236: "TABLE",
+ 0x023E: "WINDOW2",
+ 0x0243: "XF",
+ 0x027E: "RK",
+ 0x0293: "STYLE",
+ 0x0406: "FORMULA",
+ 0x0409: "BOF",
+ 0x041E: "FORMAT",
+ 0x0443: "XF",
+ 0x04BC: "SHRFMLA",
+ 0x0800: "SCREENTIP",
+ 0x0803: "WEBQRYSETTINGS",
+ 0x0804: "WEBQRYTABLES",
+ 0x0809: "BOF",
+ 0x0862: "SHEETLAYOUT",
+ 0x0867: "SHEETPROTECTION",
+ 0x1001: "UNITS",
+ 0x1002: "ChartChart",
+ 0x1003: "ChartSeries",
+ 0x1006: "ChartDataformat",
+ 0x1007: "ChartLineformat",
+ 0x1009: "ChartMarkerformat",
+ 0x100A: "ChartAreaformat",
+ 0x100B: "ChartPieformat",
+ 0x100C: "ChartAttachedlabel",
+ 0x100D: "ChartSeriestext",
+ 0x1014: "ChartChartformat",
+ 0x1015: "ChartLegend",
+ 0x1016: "ChartSerieslist",
+ 0x1017: "ChartBar",
+ 0x1018: "ChartLine",
+ 0x1019: "ChartPie",
+ 0x101A: "ChartArea",
+ 0x101B: "ChartScatter",
+ 0x101C: "ChartChartline",
+ 0x101D: "ChartAxis",
+ 0x101E: "ChartTick",
+ 0x101F: "ChartValuerange",
+ 0x1020: "ChartCatserrange",
+ 0x1021: "ChartAxislineformat",
+ 0x1022: "ChartFormatlink",
+ 0x1024: "ChartDefaulttext",
+ 0x1025: "ChartText",
+ 0x1026: "ChartFontx",
+ 0x1027: "ChartObjectLink",
+ 0x1032: "ChartFrame",
+ 0x1033: "BEGIN",
+ 0x1034: "END",
+ 0x1035: "ChartPlotarea",
+ 0x103A: "Chart3D",
+ 0x103C: "ChartPicf",
+ 0x103D: "ChartDropbar",
+ 0x103E: "ChartRadar",
+ 0x103F: "ChartSurface",
+ 0x1040: "ChartRadararea",
+ 0x1041: "ChartAxisparent",
+ 0x1043: "ChartLegendxn",
+ 0x1044: "ChartShtprops",
+ 0x1045: "ChartSertocrt",
+ 0x1046: "ChartAxesused",
+ 0x1048: "ChartSbaseref",
+ 0x104A: "ChartSerparent",
+ 0x104B: "ChartSerauxtrend",
+ 0x104E: "ChartIfmt",
+ 0x104F: "ChartPos",
+ 0x1050: "ChartAlruns",
+ 0x1051: "ChartAI",
+ 0x105B: "ChartSerauxerrbar",
+ 0x105D: "ChartSerfmt",
+ 0x105F: "Chart3DDataFormat",
+ 0x1060: "ChartFbi",
+ 0x1061: "ChartBoppop",
+ 0x1062: "ChartAxcext",
+ 0x1063: "ChartDat",
+ 0x1064: "ChartPlotgrowth",
+ 0x1065: "ChartSiindex",
+ 0x1066: "ChartGelframe",
+ 0x1067: "ChartBoppcustom",
+ 0xFFFF: ""
+}
+
+
+all_funcs_by_name = {
+ # Includes Analysis ToolPak aka ATP aka add-in aka xcall functions,
+ # distinguished by -ve opcode.
+ # name: (opcode, min # args, max # args, func return type, func arg types)
+ # + in func arg types means more of the same.
+ 'ABS' : ( 24, 1, 1, 'V', 'V'),
+ 'ACCRINT' : ( -1, 6, 7, 'V', 'VVVVVVV'),
+ 'ACCRINTM' : ( -1, 3, 5, 'V', 'VVVVV'),
+ 'ACOS' : ( 99, 1, 1, 'V', 'V'),
+ 'ACOSH' : (233, 1, 1, 'V', 'V'),
+ 'ADDRESS' : (219, 2, 5, 'V', 'VVVVV'),
+ 'AMORDEGRC' : ( -1, 7, 7, 'V', 'VVVVVVV'),
+ 'AMORLINC' : ( -1, 7, 7, 'V', 'VVVVVVV'),
+ 'AND' : ( 36, 1, 30, 'V', 'D+'),
+ 'AREAS' : ( 75, 1, 1, 'V', 'R'),
+ 'ASC' : (214, 1, 1, 'V', 'V'),
+ 'ASIN' : ( 98, 1, 1, 'V', 'V'),
+ 'ASINH' : (232, 1, 1, 'V', 'V'),
+ 'ATAN' : ( 18, 1, 1, 'V', 'V'),
+ 'ATAN2' : ( 97, 2, 2, 'V', 'VV'),
+ 'ATANH' : (234, 1, 1, 'V', 'V'),
+ 'AVEDEV' : (269, 1, 30, 'V', 'D+'),
+ 'AVERAGE' : ( 5, 1, 30, 'V', 'D+'),
+ 'AVERAGEA' : (361, 1, 30, 'V', 'D+'),
+ 'BAHTTEXT' : (368, 1, 1, 'V', 'V'),
+ 'BESSELI' : ( -1, 2, 2, 'V', 'VV'),
+ 'BESSELJ' : ( -1, 2, 2, 'V', 'VV'),
+ 'BESSELK' : ( -1, 2, 2, 'V', 'VV'),
+ 'BESSELY' : ( -1, 2, 2, 'V', 'VV'),
+ 'BETADIST' : (270, 3, 5, 'V', 'VVVVV'),
+ 'BETAINV' : (272, 3, 5, 'V', 'VVVVV'),
+ 'BIN2DEC' : ( -1, 1, 1, 'V', 'V'),
+ 'BIN2HEX' : ( -1, 1, 2, 'V', 'VV'),
+ 'BIN2OCT' : ( -1, 1, 2, 'V', 'VV'),
+ 'BINOMDIST' : (273, 4, 4, 'V', 'VVVV'),
+ 'CEILING' : (288, 2, 2, 'V', 'VV'),
+ 'CELL' : (125, 1, 2, 'V', 'VR'),
+ 'CHAR' : (111, 1, 1, 'V', 'V'),
+ 'CHIDIST' : (274, 2, 2, 'V', 'VV'),
+ 'CHIINV' : (275, 2, 2, 'V', 'VV'),
+ 'CHITEST' : (306, 2, 2, 'V', 'AA'),
+ 'CHOOSE' : (100, 2, 30, 'R', 'VR+'),
+ 'CLEAN' : (162, 1, 1, 'V', 'V'),
+ 'CODE' : (121, 1, 1, 'V', 'V'),
+ 'COLUMN' : ( 9, 0, 1, 'V', 'R'),
+ 'COLUMNS' : ( 77, 1, 1, 'V', 'R'),
+ 'COMBIN' : (276, 2, 2, 'V', 'VV'),
+ 'COMPLEX' : ( -1, 2, 3, 'V', 'VVV'),
+ 'CONCATENATE' : (336, 1, 30, 'V', 'V+'),
+ 'CONFIDENCE' : (277, 3, 3, 'V', 'VVV'),
+ 'CONVERT' : ( -1, 3, 3, 'V', 'VVV'),
+ 'CORREL' : (307, 2, 2, 'V', 'AA'),
+ 'COS' : ( 16, 1, 1, 'V', 'V'),
+ 'COSH' : (230, 1, 1, 'V', 'V'),
+ 'COUNT' : ( 0, 1, 30, 'V', 'D+'),
+ 'COUNTA' : (169, 1, 30, 'V', 'D+'),
+ 'COUNTBLANK' : (347, 1, 1, 'V', 'R'),
+ 'COUNTIF' : (346, 2, 2, 'V', 'RV'),
+ 'COUPDAYBS' : ( -1, 3, 5, 'V', 'VVVVV'),
+ 'COUPDAYS' : ( -1, 3, 5, 'V', 'VVVVV'),
+ 'COUPDAYSNC' : ( -1, 3, 5, 'V', 'VVVVV'),
+ 'COUPNCD' : ( -1, 3, 5, 'V', 'VVVVV'),
+ 'COUPNUM' : ( -1, 3, 5, 'V', 'VVVVV'),
+ 'COUPPCD' : ( -1, 3, 5, 'V', 'VVVVV'),
+ 'COVAR' : (308, 2, 2, 'V', 'AA'),
+ 'CRITBINOM' : (278, 3, 3, 'V', 'VVV'),
+ 'CUMIPMT' : ( -1, 6, 6, 'V', 'VVVVVV'),
+ 'CUMPRINC' : ( -1, 6, 6, 'V', 'VVVVVV'),
+ 'DATE' : ( 65, 3, 3, 'V', 'VVV'),
+ 'DATEDIF' : (351, 3, 3, 'V', 'VVV'),
+ 'DATEVALUE' : (140, 1, 1, 'V', 'V'),
+ 'DAVERAGE' : ( 42, 3, 3, 'V', 'RRR'),
+ 'DAY' : ( 67, 1, 1, 'V', 'V'),
+ 'DAYS360' : (220, 2, 3, 'V', 'VVV'),
+ 'DB' : (247, 4, 5, 'V', 'VVVVV'),
+ 'DBCS' : (215, 1, 1, 'V', 'V'),
+ 'DCOUNT' : ( 40, 3, 3, 'V', 'RRR'),
+ 'DCOUNTA' : (199, 3, 3, 'V', 'RRR'),
+ 'DDB' : (144, 4, 5, 'V', 'VVVVV'),
+ 'DEC2BIN' : ( -1, 1, 2, 'V', 'VV'),
+ 'DEC2HEX' : ( -1, 1, 2, 'V', 'VV'),
+ 'DEC2OCT' : ( -1, 1, 2, 'V', 'VV'),
+ 'DEGREES' : (343, 1, 1, 'V', 'V'),
+ 'DELTA' : ( -1, 1, 2, 'V', 'VV'),
+ 'DEVSQ' : (318, 1, 30, 'V', 'D+'),
+ 'DGET' : (235, 3, 3, 'V', 'RRR'),
+ 'DISC' : ( -1, 4, 5, 'V', 'VVVVV'),
+ 'DMAX' : ( 44, 3, 3, 'V', 'RRR'),
+ 'DMIN' : ( 43, 3, 3, 'V', 'RRR'),
+ 'DOLLAR' : ( 13, 1, 2, 'V', 'VV'),
+ 'DOLLARDE' : ( -1, 2, 2, 'V', 'VV'),
+ 'DOLLARFR' : ( -1, 2, 2, 'V', 'VV'),
+ 'DPRODUCT' : (189, 3, 3, 'V', 'RRR'),
+ 'DSTDEV' : ( 45, 3, 3, 'V', 'RRR'),
+ 'DSTDEVP' : (195, 3, 3, 'V', 'RRR'),
+ 'DSUM' : ( 41, 3, 3, 'V', 'RRR'),
+ 'DURATION' : ( -1, 5, 6, 'V', 'VVVVVV'),
+ 'DVAR' : ( 47, 3, 3, 'V', 'RRR'),
+ 'DVARP' : (196, 3, 3, 'V', 'RRR'),
+ 'EDATE' : ( -1, 2, 2, 'V', 'VV'),
+ 'EFFECT' : ( -1, 2, 2, 'V', 'VV'),
+ 'EOMONTH' : ( -1, 1, 2, 'V', 'VV'),
+ 'ERF' : ( -1, 1, 2, 'V', 'VV'),
+ 'ERFC' : ( -1, 1, 1, 'V', 'V'),
+ 'ERROR.TYPE' : (261, 1, 1, 'V', 'V'),
+ 'EVEN' : (279, 1, 1, 'V', 'V'),
+ 'EXACT' : (117, 2, 2, 'V', 'VV'),
+ 'EXP' : ( 21, 1, 1, 'V', 'V'),
+ 'EXPONDIST' : (280, 3, 3, 'V', 'VVV'),
+ 'FACT' : (184, 1, 1, 'V', 'V'),
+ 'FACTDOUBLE' : ( -1, 1, 1, 'V', 'V'),
+ 'FALSE' : ( 35, 0, 0, 'V', '-'),
+ 'FDIST' : (281, 3, 3, 'V', 'VVV'),
+ 'FIND' : (124, 2, 3, 'V', 'VVV'),
+ 'FINDB' : (205, 2, 3, 'V', 'VVV'),
+ 'FINV' : (282, 3, 3, 'V', 'VVV'),
+ 'FISHER' : (283, 1, 1, 'V', 'V'),
+ 'FISHERINV' : (284, 1, 1, 'V', 'V'),
+ 'FIXED' : ( 14, 2, 3, 'V', 'VVV'),
+ 'FLOOR' : (285, 2, 2, 'V', 'VV'),
+ 'FORECAST' : (309, 3, 3, 'V', 'VAA'),
+ 'FREQUENCY' : (252, 2, 2, 'A', 'RR'),
+ 'FTEST' : (310, 2, 2, 'V', 'AA'),
+ 'FV' : ( 57, 3, 5, 'V', 'VVVVV'),
+ 'FVSCHEDULE' : ( -1, 2, 2, 'V', 'VA'),
+ 'GAMMADIST' : (286, 4, 4, 'V', 'VVVV'),
+ 'GAMMAINV' : (287, 3, 3, 'V', 'VVV'),
+ 'GAMMALN' : (271, 1, 1, 'V', 'V'),
+ 'GCD' : ( -1, 1, 29, 'V', 'V+'),
+ 'GEOMEAN' : (319, 1, 30, 'V', 'D+'),
+ 'GESTEP' : ( -1, 1, 2, 'V', 'VV'),
+ 'GETPIVOTDATA': (358, 2, 30, 'A', 'VAV+'),
+ 'GROWTH' : ( 52, 1, 4, 'A', 'RRRV'),
+ 'HARMEAN' : (320, 1, 30, 'V', 'D+'),
+ 'HEX2BIN' : ( -1, 1, 2, 'V', 'VV'),
+ 'HEX2DEC' : ( -1, 1, 1, 'V', 'V'),
+ 'HEX2OCT' : ( -1, 1, 2, 'V', 'VV'),
+ 'HLOOKUP' : (101, 3, 4, 'V', 'VRRV'),
+ 'HOUR' : ( 71, 1, 1, 'V', 'V'),
+ 'HYPERLINK' : (359, 1, 2, 'V', 'VV'),
+ 'HYPGEOMDIST' : (289, 4, 4, 'V', 'VVVV'),
+ 'IF' : ( 1, 2, 3, 'R', 'VRR'),
+ 'IMABS' : ( -1, 1, 1, 'V', 'V'),
+ 'IMAGINARY' : ( -1, 1, 1, 'V', 'V'),
+ 'IMARGUMENT' : ( -1, 1, 1, 'V', 'V'),
+ 'IMCONJUGATE' : ( -1, 1, 1, 'V', 'V'),
+ 'IMCOS' : ( -1, 1, 1, 'V', 'V'),
+ 'IMDIV' : ( -1, 2, 2, 'V', 'VV'),
+ 'IMEXP' : ( -1, 1, 1, 'V', 'V'),
+ 'IMLN' : ( -1, 1, 1, 'V', 'V'),
+ 'IMLOG10' : ( -1, 1, 1, 'V', 'V'),
+ 'IMLOG2' : ( -1, 1, 1, 'V', 'V'),
+ 'IMPOWER' : ( -1, 2, 2, 'V', 'VV'),
+ 'IMPRODUCT' : ( -1, 2, 2, 'V', 'VV'),
+ 'IMREAL' : ( -1, 1, 1, 'V', 'V'),
+ 'IMSIN' : ( -1, 1, 1, 'V', 'V'),
+ 'IMSQRT' : ( -1, 1, 1, 'V', 'V'),
+ 'IMSUB' : ( -1, 2, 2, 'V', 'VV'),
+ 'IMSUM' : ( -1, 1, 29, 'V', 'V+'),
+ 'INDEX' : ( 29, 2, 4, 'R', 'RVVV'),
+ 'INDIRECT' : (148, 1, 2, 'R', 'VV'),
+ 'INFO' : (244, 1, 1, 'V', 'V'),
+ 'INT' : ( 25, 1, 1, 'V', 'V'),
+ 'INTERCEPT' : (311, 2, 2, 'V', 'AA'),
+ 'INTRATE' : ( -1, 4, 5, 'V', 'VVVVV'),
+ 'IPMT' : (167, 4, 6, 'V', 'VVVVVV'),
+ 'IRR' : ( 62, 1, 2, 'V', 'RV'),
+ 'ISBLANK' : (129, 1, 1, 'V', 'V'),
+ 'ISERR' : (126, 1, 1, 'V', 'V'),
+ 'ISERROR' : ( 3, 1, 1, 'V', 'V'),
+ 'ISEVEN' : ( -1, 1, 1, 'V', 'V'),
+ 'ISLOGICAL' : (198, 1, 1, 'V', 'V'),
+ 'ISNA' : ( 2, 1, 1, 'V', 'V'),
+ 'ISNONTEXT' : (190, 1, 1, 'V', 'V'),
+ 'ISNUMBER' : (128, 1, 1, 'V', 'V'),
+ 'ISODD' : ( -1, 1, 1, 'V', 'V'),
+ 'ISPMT' : (350, 4, 4, 'V', 'VVVV'),
+ 'ISREF' : (105, 1, 1, 'V', 'R'),
+ 'ISTEXT' : (127, 1, 1, 'V', 'V'),
+ 'KURT' : (322, 1, 30, 'V', 'D+'),
+ 'LARGE' : (325, 2, 2, 'V', 'RV'),
+ 'LCM' : ( -1, 1, 29, 'V', 'V+'),
+ 'LEFT' : (115, 1, 2, 'V', 'VV'),
+ 'LEFTB' : (208, 1, 2, 'V', 'VV'),
+ 'LEN' : ( 32, 1, 1, 'V', 'V'),
+ 'LENB' : (211, 1, 1, 'V', 'V'),
+ 'LINEST' : ( 49, 1, 4, 'A', 'RRVV'),
+ 'LN' : ( 22, 1, 1, 'V', 'V'),
+ 'LOG' : (109, 1, 2, 'V', 'VV'),
+ 'LOG10' : ( 23, 1, 1, 'V', 'V'),
+ 'LOGEST' : ( 51, 1, 4, 'A', 'RRVV'),
+ 'LOGINV' : (291, 3, 3, 'V', 'VVV'),
+ 'LOGNORMDIST' : (290, 3, 3, 'V', 'VVV'),
+ 'LOOKUP' : ( 28, 2, 3, 'V', 'VRR'),
+ 'LOWER' : (112, 1, 1, 'V', 'V'),
+ 'MATCH' : ( 64, 2, 3, 'V', 'VRR'),
+ 'MAX' : ( 7, 1, 30, 'V', 'D+'),
+ 'MAXA' : (362, 1, 30, 'V', 'D+'),
+ 'MDETERM' : (163, 1, 1, 'V', 'A'),
+ 'MDURATION' : ( -1, 5, 6, 'V', 'VVVVVV'),
+ 'MEDIAN' : (227, 1, 30, 'V', 'D+'),
+ 'MID' : ( 31, 3, 3, 'V', 'VVV'),
+ 'MIDB' : (210, 3, 3, 'V', 'VVV'),
+ 'MIN' : ( 6, 1, 30, 'V', 'D+'),
+ 'MINA' : (363, 1, 30, 'V', 'D+'),
+ 'MINUTE' : ( 72, 1, 1, 'V', 'V'),
+ 'MINVERSE' : (164, 1, 1, 'A', 'A'),
+ 'MIRR' : ( 61, 3, 3, 'V', 'RVV'),
+ 'MMULT' : (165, 2, 2, 'A', 'AA'),
+ 'MOD' : ( 39, 2, 2, 'V', 'VV'),
+ 'MODE' : (330, 1, 30, 'V', 'A+'), ################ weird #################
+ 'MONTH' : ( 68, 1, 1, 'V', 'V'),
+ 'MROUND' : ( -1, 2, 2, 'V', 'VV'),
+ 'MULTINOMIAL' : ( -1, 1, 29, 'V', 'V+'),
+ 'N' : (131, 1, 1, 'V', 'R'),
+ 'NA' : ( 10, 0, 0, 'V', '-'),
+ 'NEGBINOMDIST': (292, 3, 3, 'V', 'VVV'),
+ 'NETWORKDAYS' : ( -1, 2, 3, 'V', 'VVR'),
+ 'NOMINAL' : ( -1, 2, 2, 'V', 'VV'),
+ 'NORMDIST' : (293, 4, 4, 'V', 'VVVV'),
+ 'NORMINV' : (295, 3, 3, 'V', 'VVV'),
+ 'NORMSDIST' : (294, 1, 1, 'V', 'V'),
+ 'NORMSINV' : (296, 1, 1, 'V', 'V'),
+ 'NOT' : ( 38, 1, 1, 'V', 'V'),
+ 'NOW' : ( 74, 0, 0, 'V', '-'),
+ 'NPER' : ( 58, 3, 5, 'V', 'VVVVV'),
+ 'NPV' : ( 11, 2, 30, 'V', 'VD+'),
+ 'OCT2BIN' : ( -1, 1, 2, 'V', 'VV'),
+ 'OCT2DEC' : ( -1, 1, 1, 'V', 'V'),
+ 'OCT2HEX' : ( -1, 1, 2, 'V', 'VV'),
+ 'ODD' : (298, 1, 1, 'V', 'V'),
+ 'ODDFPRICE' : ( -1, 9, 9, 'V', 'VVVVVVVVV'),
+ 'ODDFYIELD' : ( -1, 9, 9, 'V', 'VVVVVVVVV'),
+ 'ODDLPRICE' : ( -1, 8, 8, 'V', 'VVVVVVVV'),
+ 'ODDLYIELD' : ( -1, 8, 8, 'V', 'VVVVVVVV'),
+ 'OFFSET' : ( 78, 3, 5, 'R', 'RVVVV'),
+ 'OR' : ( 37, 1, 30, 'V', 'D+'),
+ 'PEARSON' : (312, 2, 2, 'V', 'AA'),
+ 'PERCENTILE' : (328, 2, 2, 'V', 'RV'),
+ 'PERCENTRANK' : (329, 2, 3, 'V', 'RVV'),
+ 'PERMUT' : (299, 2, 2, 'V', 'VV'),
+ 'PHONETIC' : (360, 1, 1, 'V', 'R'),
+ 'PI' : ( 19, 0, 0, 'V', '-'),
+ 'PMT' : ( 59, 3, 5, 'V', 'VVVVV'),
+ 'POISSON' : (300, 3, 3, 'V', 'VVV'),
+ 'POWER' : (337, 2, 2, 'V', 'VV'),
+ 'PPMT' : (168, 4, 6, 'V', 'VVVVVV'),
+ 'PRICE' : ( -1, 6, 7, 'V', 'VVVVVVV'),
+ 'PRICEDISC' : ( -1, 4, 5, 'V', 'VVVVV'),
+ 'PRICEMAT' : ( -1, 5, 6, 'V', 'VVVVVV'),
+ 'PROB' : (317, 3, 4, 'V', 'AAVV'),
+ 'PRODUCT' : (183, 1, 30, 'V', 'D+'),
+ 'PROPER' : (114, 1, 1, 'V', 'V'),
+ 'PV' : ( 56, 3, 5, 'V', 'VVVVV'),
+ 'QUARTILE' : (327, 2, 2, 'V', 'RV'),
+ 'QUOTIENT' : ( -1, 2, 2, 'V', 'VV'),
+ 'RADIANS' : (342, 1, 1, 'V', 'V'),
+ 'RAND' : ( 63, 0, 0, 'V', '-'),
+ 'RANDBETWEEN' : ( -1, 2, 2, 'V', 'VV'),
+ 'RANK' : (216, 2, 3, 'V', 'VRV'),
+ 'RATE' : ( 60, 3, 6, 'V', 'VVVVVV'),
+ 'RECEIVED' : ( -1, 4, 5, 'V', 'VVVVV'),
+ 'REPLACE' : (119, 4, 4, 'V', 'VVVV'),
+ 'REPLACEB' : (207, 4, 4, 'V', 'VVVV'),
+ 'REPT' : ( 30, 2, 2, 'V', 'VV'),
+ 'RIGHT' : (116, 1, 2, 'V', 'VV'),
+ 'RIGHTB' : (209, 1, 2, 'V', 'VV'),
+ 'ROMAN' : (354, 1, 2, 'V', 'VV'),
+ 'ROUND' : ( 27, 2, 2, 'V', 'VV'),
+ 'ROUNDDOWN' : (213, 2, 2, 'V', 'VV'),
+ 'ROUNDUP' : (212, 2, 2, 'V', 'VV'),
+ 'ROW' : ( 8, 0, 1, 'V', 'R'),
+ 'ROWS' : ( 76, 1, 1, 'V', 'R'),
+ 'RSQ' : (313, 2, 2, 'V', 'AA'),
+ 'RTD' : (379, 3, 30, 'A', 'VVV+'),
+ 'SEARCH' : ( 82, 2, 3, 'V', 'VVV'),
+ 'SEARCHB' : (206, 2, 3, 'V', 'VVV'),
+ 'SECOND' : ( 73, 1, 1, 'V', 'V'),
+ 'SERIESSUM' : ( -1, 4, 4, 'V', 'VVVA'),
+ 'SIGN' : ( 26, 1, 1, 'V', 'V'),
+ 'SIN' : ( 15, 1, 1, 'V', 'V'),
+ 'SINH' : (229, 1, 1, 'V', 'V'),
+ 'SKEW' : (323, 1, 30, 'V', 'D+'),
+ 'SLN' : (142, 3, 3, 'V', 'VVV'),
+ 'SLOPE' : (315, 2, 2, 'V', 'AA'),
+ 'SMALL' : (326, 2, 2, 'V', 'RV'),
+ 'SQRT' : ( 20, 1, 1, 'V', 'V'),
+ 'SQRTPI' : ( -1, 1, 1, 'V', 'V'),
+ 'STANDARDIZE' : (297, 3, 3, 'V', 'VVV'),
+ 'STDEV' : ( 12, 1, 30, 'V', 'D+'),
+ 'STDEVA' : (366, 1, 30, 'V', 'D+'),
+ 'STDEVP' : (193, 1, 30, 'V', 'D+'),
+ 'STDEVPA' : (364, 1, 30, 'V', 'D+'),
+ 'STEYX' : (314, 2, 2, 'V', 'AA'),
+ 'SUBSTITUTE' : (120, 3, 4, 'V', 'VVVV'),
+ 'SUBTOTAL' : (344, 2, 30, 'V', 'VR+'),
+ 'SUM' : ( 4, 1, 30, 'V', 'D+'),
+ 'SUMIF' : (345, 2, 3, 'V', 'RVR'),
+ 'SUMPRODUCT' : (228, 1, 30, 'V', 'A+'),
+ 'SUMSQ' : (321, 1, 30, 'V', 'D+'),
+ 'SUMX2MY2' : (304, 2, 2, 'V', 'AA'),
+ 'SUMX2PY2' : (305, 2, 2, 'V', 'AA'),
+ 'SUMXMY2' : (303, 2, 2, 'V', 'AA'),
+ 'SYD' : (143, 4, 4, 'V', 'VVVV'),
+ 'T' : (130, 1, 1, 'V', 'R'),
+ 'TAN' : ( 17, 1, 1, 'V', 'V'),
+ 'TANH' : (231, 1, 1, 'V', 'V'),
+ 'TBILLEQ' : ( -1, 3, 3, 'V', 'VVV'),
+ 'TBILLPRICE' : ( -1, 3, 3, 'V', 'VVV'),
+ 'TBILLYIELD' : ( -1, 3, 3, 'V', 'VVV'),
+ 'TDIST' : (301, 3, 3, 'V', 'VVV'),
+ 'TEXT' : ( 48, 2, 2, 'V', 'VV'),
+ 'TIME' : ( 66, 3, 3, 'V', 'VVV'),
+ 'TIMEVALUE' : (141, 1, 1, 'V', 'V'),
+ 'TINV' : (332, 2, 2, 'V', 'VV'),
+ 'TODAY' : (221, 0, 0, 'V', '-'),
+ 'TRANSPOSE' : ( 83, 1, 1, 'A', 'A'),
+ 'TREND' : ( 50, 1, 4, 'A', 'RRRV'),
+ 'TRIM' : (118, 1, 1, 'V', 'V'),
+ 'TRIMMEAN' : (331, 2, 2, 'V', 'RV'),
+ 'TRUE' : ( 34, 0, 0, 'V', '-'),
+ 'TRUNC' : (197, 1, 2, 'V', 'VV'),
+ 'TTEST' : (316, 4, 4, 'V', 'AAVV'),
+ 'TYPE' : ( 86, 1, 1, 'V', 'V'),
+ 'UPPER' : (113, 1, 1, 'V', 'V'),
+ 'USDOLLAR' : (204, 1, 2, 'V', 'VV'),
+ 'VALUE' : ( 33, 1, 1, 'V', 'V'),
+ 'VAR' : ( 46, 1, 30, 'V', 'D+'),
+ 'VARA' : (367, 1, 30, 'V', 'D+'),
+ 'VARP' : (194, 1, 30, 'V', 'D+'),
+ 'VARPA' : (365, 1, 30, 'V', 'D+'),
+ 'VDB' : (222, 5, 7, 'V', 'VVVVVVV'),
+ 'VLOOKUP' : (102, 3, 4, 'V', 'VRRV'),
+ 'WEEKDAY' : ( 70, 1, 2, 'V', 'VV'),
+ 'WEEKNUM' : ( -1, 1, 2, 'V', 'VV'),
+ 'WEIBULL' : (302, 4, 4, 'V', 'VVVV'),
+ 'WORKDAY' : ( -1, 2, 3, 'V', 'VVR'),
+ 'XIRR' : ( -1, 2, 3, 'V', 'AAV'),
+ 'XNPV' : ( -1, 3, 3, 'V', 'VAA'),
+ 'YEAR' : ( 69, 1, 1, 'V', 'V'),
+ 'YEARFRAC' : ( -1, 2, 3, 'V', 'VVV'),
+ 'YIELD' : ( -1, 6, 7, 'V', 'VVVVVVV'),
+ 'YIELDDISC' : ( -1, 4, 5, 'V', 'VVVVV'),
+ 'YIELDMAT' : ( -1, 5, 6, 'V', 'VVVVVV'),
+ 'ZTEST' : (324, 2, 3, 'V', 'RVV'),
+ }
+
+# Formulas Parse things
+
+ptgExp = 0x01
+ptgTbl = 0x02
+ptgAdd = 0x03
+ptgSub = 0x04
+ptgMul = 0x05
+ptgDiv = 0x06
+ptgPower = 0x07
+ptgConcat = 0x08
+ptgLT = 0x09
+ptgLE = 0x0a
+ptgEQ = 0x0b
+ptgGE = 0x0c
+ptgGT = 0x0d
+ptgNE = 0x0e
+ptgIsect = 0x0f
+ptgUnion = 0x10
+ptgRange = 0x11
+ptgUplus = 0x12
+ptgUminus = 0x13
+ptgPercent = 0x14
+ptgParen = 0x15
+ptgMissArg = 0x16
+ptgStr = 0x17
+ptgExtend = 0x18
+ptgAttr = 0x19
+ptgSheet = 0x1a
+ptgEndSheet = 0x1b
+ptgErr = 0x1c
+ptgBool = 0x1d
+ptgInt = 0x1e
+ptgNum = 0x1f
+
+ptgArrayR = 0x20
+ptgFuncR = 0x21
+ptgFuncVarR = 0x22
+ptgNameR = 0x23
+ptgRefR = 0x24
+ptgAreaR = 0x25
+ptgMemAreaR = 0x26
+ptgMemErrR = 0x27
+ptgMemNoMemR = 0x28
+ptgMemFuncR = 0x29
+ptgRefErrR = 0x2a
+ptgAreaErrR = 0x2b
+ptgRefNR = 0x2c
+ptgAreaNR = 0x2d
+ptgMemAreaNR = 0x2e
+ptgMemNoMemNR = 0x2f
+ptgNameXR = 0x39
+ptgRef3dR = 0x3a
+ptgArea3dR = 0x3b
+ptgRefErr3dR = 0x3c
+ptgAreaErr3dR = 0x3d
+
+ptgArrayV = 0x40
+ptgFuncV = 0x41
+ptgFuncVarV = 0x42
+ptgNameV = 0x43
+ptgRefV = 0x44
+ptgAreaV = 0x45
+ptgMemAreaV = 0x46
+ptgMemErrV = 0x47
+ptgMemNoMemV = 0x48
+ptgMemFuncV = 0x49
+ptgRefErrV = 0x4a
+ptgAreaErrV = 0x4b
+ptgRefNV = 0x4c
+ptgAreaNV = 0x4d
+ptgMemAreaNV = 0x4e
+ptgMemNoMemNV = 0x4f
+ptgFuncCEV = 0x58
+ptgNameXV = 0x59
+ptgRef3dV = 0x5a
+ptgArea3dV = 0x5b
+ptgRefErr3dV = 0x5c
+ptgAreaErr3dV = 0x5d
+
+ptgArrayA = 0x60
+ptgFuncA = 0x61
+ptgFuncVarA = 0x62
+ptgNameA = 0x63
+ptgRefA = 0x64
+ptgAreaA = 0x65
+ptgMemAreaA = 0x66
+ptgMemErrA = 0x67
+ptgMemNoMemA = 0x68
+ptgMemFuncA = 0x69
+ptgRefErrA = 0x6a
+ptgAreaErrA = 0x6b
+ptgRefNA = 0x6c
+ptgAreaNA = 0x6d
+ptgMemAreaNA = 0x6e
+ptgMemNoMemNA = 0x6f
+ptgFuncCEA = 0x78
+ptgNameXA = 0x79
+ptgRef3dA = 0x7a
+ptgArea3dA = 0x7b
+ptgRefErr3dA = 0x7c
+ptgAreaErr3dA = 0x7d
+
+
+PtgNames = {
+ ptgExp : "ptgExp",
+ ptgTbl : "ptgTbl",
+ ptgAdd : "ptgAdd",
+ ptgSub : "ptgSub",
+ ptgMul : "ptgMul",
+ ptgDiv : "ptgDiv",
+ ptgPower : "ptgPower",
+ ptgConcat : "ptgConcat",
+ ptgLT : "ptgLT",
+ ptgLE : "ptgLE",
+ ptgEQ : "ptgEQ",
+ ptgGE : "ptgGE",
+ ptgGT : "ptgGT",
+ ptgNE : "ptgNE",
+ ptgIsect : "ptgIsect",
+ ptgUnion : "ptgUnion",
+ ptgRange : "ptgRange",
+ ptgUplus : "ptgUplus",
+ ptgUminus : "ptgUminus",
+ ptgPercent : "ptgPercent",
+ ptgParen : "ptgParen",
+ ptgMissArg : "ptgMissArg",
+ ptgStr : "ptgStr",
+ ptgExtend : "ptgExtend",
+ ptgAttr : "ptgAttr",
+ ptgSheet : "ptgSheet",
+ ptgEndSheet : "ptgEndSheet",
+ ptgErr : "ptgErr",
+ ptgBool : "ptgBool",
+ ptgInt : "ptgInt",
+ ptgNum : "ptgNum",
+ ptgArrayR : "ptgArrayR",
+ ptgFuncR : "ptgFuncR",
+ ptgFuncVarR : "ptgFuncVarR",
+ ptgNameR : "ptgNameR",
+ ptgRefR : "ptgRefR",
+ ptgAreaR : "ptgAreaR",
+ ptgMemAreaR : "ptgMemAreaR",
+ ptgMemErrR : "ptgMemErrR",
+ ptgMemNoMemR : "ptgMemNoMemR",
+ ptgMemFuncR : "ptgMemFuncR",
+ ptgRefErrR : "ptgRefErrR",
+ ptgAreaErrR : "ptgAreaErrR",
+ ptgRefNR : "ptgRefNR",
+ ptgAreaNR : "ptgAreaNR",
+ ptgMemAreaNR : "ptgMemAreaNR",
+ ptgMemNoMemNR : "ptgMemNoMemNR",
+ ptgNameXR : "ptgNameXR",
+ ptgRef3dR : "ptgRef3dR",
+ ptgArea3dR : "ptgArea3dR",
+ ptgRefErr3dR : "ptgRefErr3dR",
+ ptgAreaErr3dR : "ptgAreaErr3dR",
+ ptgArrayV : "ptgArrayV",
+ ptgFuncV : "ptgFuncV",
+ ptgFuncVarV : "ptgFuncVarV",
+ ptgNameV : "ptgNameV",
+ ptgRefV : "ptgRefV",
+ ptgAreaV : "ptgAreaV",
+ ptgMemAreaV : "ptgMemAreaV",
+ ptgMemErrV : "ptgMemErrV",
+ ptgMemNoMemV : "ptgMemNoMemV",
+ ptgMemFuncV : "ptgMemFuncV",
+ ptgRefErrV : "ptgRefErrV",
+ ptgAreaErrV : "ptgAreaErrV",
+ ptgRefNV : "ptgRefNV",
+ ptgAreaNV : "ptgAreaNV",
+ ptgMemAreaNV : "ptgMemAreaNV",
+ ptgMemNoMemNV : "ptgMemNoMemNV",
+ ptgFuncCEV : "ptgFuncCEV",
+ ptgNameXV : "ptgNameXV",
+ ptgRef3dV : "ptgRef3dV",
+ ptgArea3dV : "ptgArea3dV",
+ ptgRefErr3dV : "ptgRefErr3dV",
+ ptgAreaErr3dV : "ptgAreaErr3dV",
+ ptgArrayA : "ptgArrayA",
+ ptgFuncA : "ptgFuncA",
+ ptgFuncVarA : "ptgFuncVarA",
+ ptgNameA : "ptgNameA",
+ ptgRefA : "ptgRefA",
+ ptgAreaA : "ptgAreaA",
+ ptgMemAreaA : "ptgMemAreaA",
+ ptgMemErrA : "ptgMemErrA",
+ ptgMemNoMemA : "ptgMemNoMemA",
+ ptgMemFuncA : "ptgMemFuncA",
+ ptgRefErrA : "ptgRefErrA",
+ ptgAreaErrA : "ptgAreaErrA",
+ ptgRefNA : "ptgRefNA",
+ ptgAreaNA : "ptgAreaNA",
+ ptgMemAreaNA : "ptgMemAreaNA",
+ ptgMemNoMemNA : "ptgMemNoMemNA",
+ ptgFuncCEA : "ptgFuncCEA",
+ ptgNameXA : "ptgNameXA",
+ ptgRef3dA : "ptgRef3dA",
+ ptgArea3dA : "ptgArea3dA",
+ ptgRefErr3dA : "ptgRefErr3dA",
+ ptgAreaErr3dA : "ptgAreaErr3dA"
+}
+
+
+error_msg_by_code = {
+ 0x00: u"#NULL!", # intersection of two cell ranges is empty
+ 0x07: u"#DIV/0!", # division by zero
+ 0x0F: u"#VALUE!", # wrong type of operand
+ 0x17: u"#REF!", # illegal or deleted cell reference
+ 0x1D: u"#NAME?", # wrong function or range name
+ 0x24: u"#NUM!", # value range overflow
+ 0x2A: u"#N/A!" # argument or function not available
+}
diff --git a/tablib/packages/xlwt/Formatting.py b/tablib/packages/xlwt/Formatting.py
new file mode 100644
index 0000000..76b8e8f
--- /dev/null
+++ b/tablib/packages/xlwt/Formatting.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python
+'''
+The XF record is able to store explicit cell formatting attributes or the
+attributes of a cell style. Explicit formatting includes the reference to
+a cell style XF record. This allows to extend a defined cell style with
+some explicit attributes. The formatting attributes are divided into
+6 groups:
+
+Group Attributes
+-------------------------------------
+Number format Number format index (index to FORMAT record)
+Font Font index (index to FONT record)
+Alignment Horizontal and vertical alignment, text wrap, indentation,
+ orientation/rotation, text direction
+Border Border line styles and colours
+Background Background area style and colours
+Protection Cell locked, formula hidden
+
+For each group a flag in the cell XF record specifies whether to use the
+attributes contained in that XF record or in the referenced style
+XF record. In style XF records, these flags specify whether the attributes
+will overwrite explicit cell formatting when the style is applied to
+a cell. Changing a cell style (without applying this style to a cell) will
+change all cells which already use that style and do not contain explicit
+cell attributes for the changed style attributes. If a cell XF record does
+not contain explicit attributes in a group (if the attribute group flag
+is not set), it repeats the attributes of its style XF record.
+
+'''
+
+import BIFFRecords
+
+class Font(object):
+
+ ESCAPEMENT_NONE = 0x00
+ ESCAPEMENT_SUPERSCRIPT = 0x01
+ ESCAPEMENT_SUBSCRIPT = 0x02
+
+ UNDERLINE_NONE = 0x00
+ UNDERLINE_SINGLE = 0x01
+ UNDERLINE_SINGLE_ACC = 0x21
+ UNDERLINE_DOUBLE = 0x02
+ UNDERLINE_DOUBLE_ACC = 0x22
+
+ FAMILY_NONE = 0x00
+ FAMILY_ROMAN = 0x01
+ FAMILY_SWISS = 0x02
+ FAMILY_MODERN = 0x03
+ FAMILY_SCRIPT = 0x04
+ FAMILY_DECORATIVE = 0x05
+
+ CHARSET_ANSI_LATIN = 0x00
+ CHARSET_SYS_DEFAULT = 0x01
+ CHARSET_SYMBOL = 0x02
+ CHARSET_APPLE_ROMAN = 0x4D
+ CHARSET_ANSI_JAP_SHIFT_JIS = 0x80
+ CHARSET_ANSI_KOR_HANGUL = 0x81
+ CHARSET_ANSI_KOR_JOHAB = 0x82
+ CHARSET_ANSI_CHINESE_GBK = 0x86
+ CHARSET_ANSI_CHINESE_BIG5 = 0x88
+ CHARSET_ANSI_GREEK = 0xA1
+ CHARSET_ANSI_TURKISH = 0xA2
+ CHARSET_ANSI_VIETNAMESE = 0xA3
+ CHARSET_ANSI_HEBREW = 0xB1
+ CHARSET_ANSI_ARABIC = 0xB2
+ CHARSET_ANSI_BALTIC = 0xBA
+ CHARSET_ANSI_CYRILLIC = 0xCC
+ CHARSET_ANSI_THAI = 0xDE
+ CHARSET_ANSI_LATIN_II = 0xEE
+ CHARSET_OEM_LATIN_I = 0xFF
+
+ def __init__(self):
+ # twip = 1/20 of a point = 1/1440 of a inch
+ # usually resolution == 96 pixels per 1 inch
+ # (rarely 120 pixels per 1 inch or another one)
+
+ self.height = 0x00C8 # 200: this is font with height 10 points
+ self.italic = False
+ self.struck_out = False
+ self.outline = False
+ self.shadow = False
+ self.colour_index = 0x7FFF
+ self.bold = False
+ self._weight = 0x0190 # 0x02BC gives bold font
+ self.escapement = self.ESCAPEMENT_NONE
+ self.underline = self.UNDERLINE_NONE
+ self.family = self.FAMILY_NONE
+ self.charset = self.CHARSET_SYS_DEFAULT
+ self.name = 'Arial'
+
+ def get_biff_record(self):
+ height = self.height
+
+ options = 0x00
+ if self.bold:
+ options |= 0x01
+ self._weight = 0x02BC
+ if self.italic:
+ options |= 0x02
+ if self.underline != self.UNDERLINE_NONE:
+ options |= 0x04
+ if self.struck_out:
+ options |= 0x08
+ if self.outline:
+ options |= 0x010
+ if self.shadow:
+ options |= 0x020
+
+ colour_index = self.colour_index
+ weight = self._weight
+ escapement = self.escapement
+ underline = self.underline
+ family = self.family
+ charset = self.charset
+ name = self.name
+
+ return BIFFRecords.FontRecord(height, options, colour_index, weight, escapement,
+ underline, family, charset,
+ name)
+
+ def _search_key(self):
+ return (
+ self.height,
+ self.italic,
+ self.struck_out,
+ self.outline,
+ self.shadow,
+ self.colour_index,
+ self.bold,
+ self._weight,
+ self.escapement,
+ self.underline,
+ self.family,
+ self.charset,
+ self.name,
+ )
+
+class Alignment(object):
+ HORZ_GENERAL = 0x00
+ HORZ_LEFT = 0x01
+ HORZ_CENTER = 0x02
+ HORZ_RIGHT = 0x03
+ HORZ_FILLED = 0x04
+ HORZ_JUSTIFIED = 0x05 # BIFF4-BIFF8X
+ HORZ_CENTER_ACROSS_SEL = 0x06 # Centred across selection (BIFF4-BIFF8X)
+ HORZ_DISTRIBUTED = 0x07 # Distributed (BIFF8X)
+
+ VERT_TOP = 0x00
+ VERT_CENTER = 0x01
+ VERT_BOTTOM = 0x02
+ VERT_JUSTIFIED = 0x03 # Justified (BIFF5-BIFF8X)
+ VERT_DISTRIBUTED = 0x04 # Distributed (BIFF8X)
+
+ DIRECTION_GENERAL = 0x00 # BIFF8X
+ DIRECTION_LR = 0x01
+ DIRECTION_RL = 0x02
+
+ ORIENTATION_NOT_ROTATED = 0x00
+ ORIENTATION_STACKED = 0x01
+ ORIENTATION_90_CC = 0x02
+ ORIENTATION_90_CW = 0x03
+
+ ROTATION_0_ANGLE = 0x00
+ ROTATION_STACKED = 0xFF
+
+ WRAP_AT_RIGHT = 0x01
+ NOT_WRAP_AT_RIGHT = 0x00
+
+ SHRINK_TO_FIT = 0x01
+ NOT_SHRINK_TO_FIT = 0x00
+
+ def __init__(self):
+ self.horz = self.HORZ_GENERAL
+ self.vert = self.VERT_BOTTOM
+ self.dire = self.DIRECTION_GENERAL
+ self.orie = self.ORIENTATION_NOT_ROTATED
+ self.rota = self.ROTATION_0_ANGLE
+ self.wrap = self.NOT_WRAP_AT_RIGHT
+ self.shri = self.NOT_SHRINK_TO_FIT
+ self.inde = 0
+ self.merg = 0
+
+ def _search_key(self):
+ return (
+ self.horz, self.vert, self.dire, self.orie, self.rota,
+ self.wrap, self.shri, self.inde, self.merg,
+ )
+
+class Borders(object):
+ NO_LINE = 0x00
+ THIN = 0x01
+ MEDIUM = 0x02
+ DASHED = 0x03
+ DOTTED = 0x04
+ THICK = 0x05
+ DOUBLE = 0x06
+ HAIR = 0x07
+ #The following for BIFF8
+ MEDIUM_DASHED = 0x08
+ THIN_DASH_DOTTED = 0x09
+ MEDIUM_DASH_DOTTED = 0x0A
+ THIN_DASH_DOT_DOTTED = 0x0B
+ MEDIUM_DASH_DOT_DOTTED = 0x0C
+ SLANTED_MEDIUM_DASH_DOTTED = 0x0D
+
+ NEED_DIAG1 = 0x01
+ NEED_DIAG2 = 0x01
+ NO_NEED_DIAG1 = 0x00
+ NO_NEED_DIAG2 = 0x00
+
+ def __init__(self):
+ self.left = self.NO_LINE
+ self.right = self.NO_LINE
+ self.top = self.NO_LINE
+ self.bottom = self.NO_LINE
+ self.diag = self.NO_LINE
+
+ self.left_colour = 0x40
+ self.right_colour = 0x40
+ self.top_colour = 0x40
+ self.bottom_colour = 0x40
+ self.diag_colour = 0x40
+
+ self.need_diag1 = self.NO_NEED_DIAG1
+ self.need_diag2 = self.NO_NEED_DIAG2
+
+ def _search_key(self):
+ return (
+ self.left, self.right, self.top, self.bottom, self.diag,
+ self.left_colour, self.right_colour, self.top_colour,
+ self.bottom_colour, self.diag_colour,
+ self.need_diag1, self.need_diag2,
+ )
+
+class Pattern(object):
+ # patterns 0x00 - 0x12
+ NO_PATTERN = 0x00
+ SOLID_PATTERN = 0x01
+
+ def __init__(self):
+ self.pattern = self.NO_PATTERN
+ self.pattern_fore_colour = 0x40
+ self.pattern_back_colour = 0x41
+
+ def _search_key(self):
+ return (
+ self.pattern,
+ self.pattern_fore_colour,
+ self.pattern_back_colour,
+ )
+
+class Protection(object):
+ def __init__(self):
+ self.cell_locked = 1
+ self.formula_hidden = 0
+
+ def _search_key(self):
+ return (
+ self.cell_locked,
+ self.formula_hidden,
+ )
diff --git a/tablib/packages/xlwt/Row.py b/tablib/packages/xlwt/Row.py
new file mode 100644
index 0000000..a834ea1
--- /dev/null
+++ b/tablib/packages/xlwt/Row.py
@@ -0,0 +1,253 @@
+# -*- coding: windows-1252 -*-
+
+import BIFFRecords
+import Style
+from Cell import StrCell, BlankCell, NumberCell, FormulaCell, MulBlankCell, BooleanCell, ErrorCell, \
+ _get_cells_biff_data_mul
+import ExcelFormula
+import datetime as dt
+try:
+ from decimal import Decimal
+except ImportError:
+ # Python 2.3: decimal not supported; create dummy Decimal class
+ class Decimal(object):
+ pass
+
+
+class Row(object):
+ __slots__ = [# private variables
+ "__idx",
+ "__parent",
+ "__parent_wb",
+ "__cells",
+ "__min_col_idx",
+ "__max_col_idx",
+ "__xf_index",
+ "__has_default_xf_index",
+ "__height_in_pixels",
+ # public variables
+ "height",
+ "has_default_height",
+ "height_mismatch",
+ "level",
+ "collapse",
+ "hidden",
+ "space_above",
+ "space_below"]
+
+ def __init__(self, rowx, parent_sheet):
+ if not (isinstance(rowx, int) and 0 <= rowx <= 65535):
+ raise ValueError("row index (%r) not an int in range(65536)" % rowx)
+ self.__idx = rowx
+ self.__parent = parent_sheet
+ self.__parent_wb = parent_sheet.get_parent()
+ self.__cells = {}
+ self.__min_col_idx = 0
+ self.__max_col_idx = 0
+ self.__xf_index = 0x0F
+ self.__has_default_xf_index = 0
+ self.__height_in_pixels = 0x11
+
+ self.height = 0x00FF
+ self.has_default_height = 0x00
+ self.height_mismatch = 0
+ self.level = 0
+ self.collapse = 0
+ self.hidden = 0
+ self.space_above = 0
+ self.space_below = 0
+
+
+ def __adjust_height(self, style):
+ twips = style.font.height
+ points = float(twips)/20.0
+ # Cell height in pixels can be calcuted by following approx. formula:
+ # cell height in pixels = font height in points * 83/50 + 2/5
+ # It works when screen resolution is 96 dpi
+ pix = int(round(points*83.0/50.0 + 2.0/5.0))
+ if pix > self.__height_in_pixels:
+ self.__height_in_pixels = pix
+
+
+ def __adjust_bound_col_idx(self, *args):
+ for arg in args:
+ iarg = int(arg)
+ if not ((0 <= iarg <= 255) and arg == iarg):
+ raise ValueError("column index (%r) not an int in range(256)" % arg)
+ sheet = self.__parent
+ if iarg < self.__min_col_idx:
+ self.__min_col_idx = iarg
+ if iarg > self.__max_col_idx:
+ self.__max_col_idx = iarg
+ if iarg < sheet.first_used_col:
+ sheet.first_used_col = iarg
+ if iarg > sheet.last_used_col:
+ sheet.last_used_col = iarg
+
+ def __excel_date_dt(self, date):
+ if isinstance(date, dt.date) and (not isinstance(date, dt.datetime)):
+ epoch = dt.date(1899, 12, 31)
+ elif isinstance(date, dt.time):
+ date = dt.datetime.combine(dt.datetime(1900, 1, 1), date)
+ epoch = dt.datetime(1900, 1, 1, 0, 0, 0)
+ else:
+ epoch = dt.datetime(1899, 12, 31, 0, 0, 0)
+ delta = date - epoch
+ xldate = delta.days + float(delta.seconds) / (24*60*60)
+ # Add a day for Excel's missing leap day in 1900
+ if xldate > 59:
+ xldate += 1
+ return xldate
+
+ def get_height_in_pixels(self):
+ return self.__height_in_pixels
+
+
+ def set_style(self, style):
+ self.__adjust_height(style)
+ self.__xf_index = self.__parent_wb.add_style(style)
+ self.__has_default_xf_index = 1
+
+
+ def get_xf_index(self):
+ return self.__xf_index
+
+
+ def get_cells_count(self):
+ return len(self.__cells)
+
+
+ def get_min_col(self):
+ return self.__min_col_idx
+
+
+ def get_max_col(self):
+ return self.__max_col_idx
+
+
+ def get_row_biff_data(self):
+ height_options = (self.height & 0x07FFF)
+ height_options |= (self.has_default_height & 0x01) << 15
+
+ options = (self.level & 0x07) << 0
+ options |= (self.collapse & 0x01) << 4
+ options |= (self.hidden & 0x01) << 5
+ options |= (self.height_mismatch & 0x01) << 6
+ options |= (self.__has_default_xf_index & 0x01) << 7
+ options |= (0x01 & 0x01) << 8
+ options |= (self.__xf_index & 0x0FFF) << 16
+ options |= (self.space_above & 1) << 28
+ options |= (self.space_below & 1) << 29
+
+ return BIFFRecords.RowRecord(self.__idx, self.__min_col_idx,
+ self.__max_col_idx, height_options, options).get()
+
+ def insert_cell(self, col_index, cell_obj):
+ if col_index in self.__cells:
+ if not self.__parent._cell_overwrite_ok:
+ msg = "Attempt to overwrite cell: sheetname=%r rowx=%d colx=%d" \
+ % (self.__parent.name, self.__idx, col_index)
+ raise Exception(msg)
+ prev_cell_obj = self.__cells[col_index]
+ sst_idx = getattr(prev_cell_obj, 'sst_idx', None)
+ if sst_idx is not None:
+ self.__parent_wb.del_str(sst_idx)
+ self.__cells[col_index] = cell_obj
+
+ def insert_mulcells(self, colx1, colx2, cell_obj):
+ self.insert_cell(colx1, cell_obj)
+ for col_index in xrange(colx1+1, colx2+1):
+ self.insert_cell(col_index, None)
+
+ def get_cells_biff_data(self):
+ cell_items = [item for item in self.__cells.iteritems() if item[1] is not None]
+ cell_items.sort() # in column order
+ return _get_cells_biff_data_mul(self.__idx, cell_items)
+ # previously:
+ # return ''.join([cell.get_biff_data() for colx, cell in cell_items])
+
+ def get_index(self):
+ return self.__idx
+
+ def set_cell_text(self, colx, value, style=Style.default_style):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(colx)
+ xf_index = self.__parent_wb.add_style(style)
+ self.insert_cell(colx, StrCell(self.__idx, colx, xf_index, self.__parent_wb.add_str(value)))
+
+ def set_cell_blank(self, colx, style=Style.default_style):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(colx)
+ xf_index = self.__parent_wb.add_style(style)
+ self.insert_cell(colx, BlankCell(self.__idx, colx, xf_index))
+
+ def set_cell_mulblanks(self, first_colx, last_colx, style=Style.default_style):
+ assert 0 <= first_colx <= last_colx <= 255
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(first_colx, last_colx)
+ xf_index = self.__parent_wb.add_style(style)
+ # ncols = last_colx - first_colx + 1
+ self.insert_mulcells(first_colx, last_colx, MulBlankCell(self.__idx, first_colx, last_colx, xf_index))
+
+ def set_cell_number(self, colx, number, style=Style.default_style):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(colx)
+ xf_index = self.__parent_wb.add_style(style)
+ self.insert_cell(colx, NumberCell(self.__idx, colx, xf_index, number))
+
+ def set_cell_date(self, colx, datetime_obj, style=Style.default_style):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(colx)
+ xf_index = self.__parent_wb.add_style(style)
+ self.insert_cell(colx,
+ NumberCell(self.__idx, colx, xf_index, self.__excel_date_dt(datetime_obj)))
+
+ def set_cell_formula(self, colx, formula, style=Style.default_style, calc_flags=0):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(colx)
+ xf_index = self.__parent_wb.add_style(style)
+ self.__parent_wb.add_sheet_reference(formula)
+ self.insert_cell(colx, FormulaCell(self.__idx, colx, xf_index, formula, calc_flags=0))
+
+ def set_cell_boolean(self, colx, value, style=Style.default_style):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(colx)
+ xf_index = self.__parent_wb.add_style(style)
+ self.insert_cell(colx, BooleanCell(self.__idx, colx, xf_index, bool(value)))
+
+ def set_cell_error(self, colx, error_string_or_code, style=Style.default_style):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(colx)
+ xf_index = self.__parent_wb.add_style(style)
+ self.insert_cell(colx, ErrorCell(self.__idx, colx, xf_index, error_string_or_code))
+
+ def write(self, col, label, style=Style.default_style):
+ self.__adjust_height(style)
+ self.__adjust_bound_col_idx(col)
+ style_index = self.__parent_wb.add_style(style)
+ if isinstance(label, basestring):
+ if len(label) > 0:
+ self.insert_cell(col,
+ StrCell(self.__idx, col, style_index, self.__parent_wb.add_str(label))
+ )
+ else:
+ self.insert_cell(col, BlankCell(self.__idx, col, style_index))
+ elif isinstance(label, bool): # bool is subclass of int; test bool first
+ self.insert_cell(col, BooleanCell(self.__idx, col, style_index, label))
+ elif isinstance(label, (float, int, long, Decimal)):
+ self.insert_cell(col, NumberCell(self.__idx, col, style_index, label))
+ elif isinstance(label, (dt.datetime, dt.date, dt.time)):
+ date_number = self.__excel_date_dt(label)
+ self.insert_cell(col, NumberCell(self.__idx, col, style_index, date_number))
+ elif label is None:
+ self.insert_cell(col, BlankCell(self.__idx, col, style_index))
+ elif isinstance(label, ExcelFormula.Formula):
+ self.__parent_wb.add_sheet_reference(label)
+ self.insert_cell(col, FormulaCell(self.__idx, col, style_index, label))
+ else:
+ raise Exception("Unexpected data type %r" % type(label))
+
+ write_blanks = set_cell_mulblanks
+
+
+
diff --git a/tablib/packages/xlwt/Style.py b/tablib/packages/xlwt/Style.py
new file mode 100644
index 0000000..bf5fb4c
--- /dev/null
+++ b/tablib/packages/xlwt/Style.py
@@ -0,0 +1,592 @@
+# -*- coding: windows-1252 -*-
+
+import Formatting
+from BIFFRecords import *
+
+FIRST_USER_DEFINED_NUM_FORMAT_IDX = 164
+
+class XFStyle(object):
+
+ def __init__(self):
+ self.num_format_str = 'General'
+ self.font = Formatting.Font()
+ self.alignment = Formatting.Alignment()
+ self.borders = Formatting.Borders()
+ self.pattern = Formatting.Pattern()
+ self.protection = Formatting.Protection()
+
+default_style = XFStyle()
+
+class StyleCollection(object):
+ _std_num_fmt_list = [
+ 'general',
+ '0',
+ '0.00',
+ '#,##0',
+ '#,##0.00',
+ '"$"#,##0_);("$"#,##',
+ '"$"#,##0_);[Red]("$"#,##',
+ '"$"#,##0.00_);("$"#,##',
+ '"$"#,##0.00_);[Red]("$"#,##',
+ '0%',
+ '0.00%',
+ '0.00E+00',
+ '# ?/?',
+ '# ??/??',
+ 'M/D/YY',
+ 'D-MMM-YY',
+ 'D-MMM',
+ 'MMM-YY',
+ 'h:mm AM/PM',
+ 'h:mm:ss AM/PM',
+ 'h:mm',
+ 'h:mm:ss',
+ 'M/D/YY h:mm',
+ '_(#,##0_);(#,##0)',
+ '_(#,##0_);[Red](#,##0)',
+ '_(#,##0.00_);(#,##0.00)',
+ '_(#,##0.00_);[Red](#,##0.00)',
+ '_("$"* #,##0_);_("$"* (#,##0);_("$"* "-"_);_(@_)',
+ '_(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)',
+ '_("$"* #,##0.00_);_("$"* (#,##0.00);_("$"* "-"??_);_(@_)',
+ '_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)',
+ 'mm:ss',
+ '[h]:mm:ss',
+ 'mm:ss.0',
+ '##0.0E+0',
+ '@'
+ ]
+
+ def __init__(self, style_compression=0):
+ self.style_compression = style_compression
+ self.stats = [0, 0, 0, 0, 0, 0]
+ self._font_id2x = {}
+ self._font_x2id = {}
+ self._font_val2x = {}
+
+ for x in (0, 1, 2, 3, 5): # The font with index 4 is omitted in all BIFF versions
+ font = Formatting.Font()
+ search_key = font._search_key()
+ self._font_id2x[font] = x
+ self._font_x2id[x] = font
+ self._font_val2x[search_key] = x
+
+ self._xf_id2x = {}
+ self._xf_x2id = {}
+ self._xf_val2x = {}
+
+ self._num_formats = {}
+ for fmtidx, fmtstr in zip(range(0, 23), StyleCollection._std_num_fmt_list[0:23]):
+ self._num_formats[fmtstr] = fmtidx
+ for fmtidx, fmtstr in zip(range(37, 50), StyleCollection._std_num_fmt_list[23:]):
+ self._num_formats[fmtstr] = fmtidx
+
+ self.default_style = XFStyle()
+ self._default_xf = self._add_style(self.default_style)[0]
+
+ def add(self, style):
+ if style == None:
+ return 0x10
+ return self._add_style(style)[1]
+
+ def _add_style(self, style):
+ num_format_str = style.num_format_str
+ if num_format_str in self._num_formats:
+ num_format_idx = self._num_formats[num_format_str]
+ else:
+ num_format_idx = (
+ FIRST_USER_DEFINED_NUM_FORMAT_IDX
+ + len(self._num_formats)
+ - len(StyleCollection._std_num_fmt_list)
+ )
+ self._num_formats[num_format_str] = num_format_idx
+
+ font = style.font
+ if font in self._font_id2x:
+ font_idx = self._font_id2x[font]
+ self.stats[0] += 1
+ elif self.style_compression:
+ search_key = font._search_key()
+ font_idx = self._font_val2x.get(search_key)
+ if font_idx is not None:
+ self._font_id2x[font] = font_idx
+ self.stats[1] += 1
+ else:
+ font_idx = len(self._font_x2id) + 1 # Why plus 1? Font 4 is missing
+ self._font_id2x[font] = font_idx
+ self._font_val2x[search_key] = font_idx
+ self._font_x2id[font_idx] = font
+ self.stats[2] += 1
+ else:
+ font_idx = len(self._font_id2x) + 1
+ self._font_id2x[font] = font_idx
+ self.stats[2] += 1
+
+ gof = (style.alignment, style.borders, style.pattern, style.protection)
+ xf = (font_idx, num_format_idx) + gof
+ if xf in self._xf_id2x:
+ xf_index = self._xf_id2x[xf]
+ self.stats[3] += 1
+ elif self.style_compression == 2:
+ xf_key = (font_idx, num_format_idx) + tuple([obj._search_key() for obj in gof])
+ xf_index = self._xf_val2x.get(xf_key)
+ if xf_index is not None:
+ self._xf_id2x[xf] = xf_index
+ self.stats[4] += 1
+ else:
+ xf_index = 0x10 + len(self._xf_x2id)
+ self._xf_id2x[xf] = xf_index
+ self._xf_val2x[xf_key] = xf_index
+ self._xf_x2id[xf_index] = xf
+ self.stats[5] += 1
+ else:
+ xf_index = 0x10 + len(self._xf_id2x)
+ self._xf_id2x[xf] = xf_index
+ self.stats[5] += 1
+
+ if xf_index >= 0xFFF:
+ # 12 bits allowed, 0xFFF is a sentinel value
+ raise ValueError("More than 4094 XFs (styles)")
+
+ return xf, xf_index
+
+ def get_biff_data(self):
+ result = ''
+ result += self._all_fonts()
+ result += self._all_num_formats()
+ result += self._all_cell_styles()
+ result += self._all_styles()
+ return result
+
+ def _all_fonts(self):
+ result = ''
+ if self.style_compression:
+ alist = self._font_x2id.items()
+ else:
+ alist = [(x, o) for o, x in self._font_id2x.items()]
+ alist.sort()
+ for font_idx, font in alist:
+ result += font.get_biff_record().get()
+ return result
+
+ def _all_num_formats(self):
+ result = ''
+ alist = [
+ (v, k)
+ for k, v in self._num_formats.items()
+ if v >= FIRST_USER_DEFINED_NUM_FORMAT_IDX
+ ]
+ alist.sort()
+ for fmtidx, fmtstr in alist:
+ result += NumberFormatRecord(fmtidx, fmtstr).get()
+ return result
+
+ def _all_cell_styles(self):
+ result = ''
+ for i in range(0, 16):
+ result += XFRecord(self._default_xf, 'style').get()
+ if self.style_compression == 2:
+ alist = self._xf_x2id.items()
+ else:
+ alist = [(x, o) for o, x in self._xf_id2x.items()]
+ alist.sort()
+ for xf_idx, xf in alist:
+ result += XFRecord(xf).get()
+ return result
+
+ def _all_styles(self):
+ return StyleRecord().get()
+
+# easyxf and its supporting objects ###################################
+
+class EasyXFException(Exception):
+ pass
+
+class EasyXFCallerError(EasyXFException):
+ pass
+
+class EasyXFAuthorError(EasyXFException):
+ pass
+
+class IntULim(object):
+ # If astring represents a valid unsigned integer ('123', '0xabcd', etc)
+ # and it is <= limit, return the int value; otherwise return None.
+
+ def __init__(self, limit):
+ self.limit = limit
+
+ def __call__(self, astring):
+ try:
+ value = int(astring, 0)
+ except ValueError:
+ return None
+ if not 0 <= value <= self.limit:
+ return None
+ return value
+
+bool_map = {
+ # Text values for all Boolean attributes
+ '1': 1, 'yes': 1, 'true': 1, 'on': 1,
+ '0': 0, 'no': 0, 'false': 0, 'off': 0,
+ }
+
+border_line_map = {
+ # Text values for these borders attributes:
+ # left, right, top, bottom and diag
+ 'no_line': 0x00,
+ 'thin': 0x01,
+ 'medium': 0x02,
+ 'dashed': 0x03,
+ 'dotted': 0x04,
+ 'thick': 0x05,
+ 'double': 0x06,
+ 'hair': 0x07,
+ 'medium_dashed': 0x08,
+ 'thin_dash_dotted': 0x09,
+ 'medium_dash_dotted': 0x0a,
+ 'thin_dash_dot_dotted': 0x0b,
+ 'medium_dash_dot_dotted': 0x0c,
+ 'slanted_medium_dash_dotted': 0x0d,
+ }
+
+charset_map = {
+ # Text values for font.charset
+ 'ansi_latin': 0x00,
+ 'sys_default': 0x01,
+ 'symbol': 0x02,
+ 'apple_roman': 0x4d,
+ 'ansi_jap_shift_jis': 0x80,
+ 'ansi_kor_hangul': 0x81,
+ 'ansi_kor_johab': 0x82,
+ 'ansi_chinese_gbk': 0x86,
+ 'ansi_chinese_big5': 0x88,
+ 'ansi_greek': 0xa1,
+ 'ansi_turkish': 0xa2,
+ 'ansi_vietnamese': 0xa3,
+ 'ansi_hebrew': 0xb1,
+ 'ansi_arabic': 0xb2,
+ 'ansi_baltic': 0xba,
+ 'ansi_cyrillic': 0xcc,
+ 'ansi_thai': 0xde,
+ 'ansi_latin_ii': 0xee,
+ 'oem_latin_i': 0xff,
+ }
+
+
+# Text values for colour indices. "grey" is a synonym of "gray".
+# The names are those given by Microsoft Excel 2003 to the colours
+# in the default palette. There is no great correspondence with
+# any W3C name-to-RGB mapping.
+_colour_map_text = """\
+aqua 0x31
+black 0x08
+blue 0x0C
+blue_gray 0x36
+bright_green 0x0B
+brown 0x3C
+coral 0x1D
+cyan_ega 0x0F
+dark_blue 0x12
+dark_blue_ega 0x12
+dark_green 0x3A
+dark_green_ega 0x11
+dark_purple 0x1C
+dark_red 0x10
+dark_red_ega 0x10
+dark_teal 0x38
+dark_yellow 0x13
+gold 0x33
+gray_ega 0x17
+gray25 0x16
+gray40 0x37
+gray50 0x17
+gray80 0x3F
+green 0x11
+ice_blue 0x1F
+indigo 0x3E
+ivory 0x1A
+lavender 0x2E
+light_blue 0x30
+light_green 0x2A
+light_orange 0x34
+light_turquoise 0x29
+light_yellow 0x2B
+lime 0x32
+magenta_ega 0x0E
+ocean_blue 0x1E
+olive_ega 0x13
+olive_green 0x3B
+orange 0x35
+pale_blue 0x2C
+periwinkle 0x18
+pink 0x0E
+plum 0x3D
+purple_ega 0x14
+red 0x0A
+rose 0x2D
+sea_green 0x39
+silver_ega 0x16
+sky_blue 0x28
+tan 0x2F
+teal 0x15
+teal_ega 0x15
+turquoise 0x0F
+violet 0x14
+white 0x09
+yellow 0x0D"""
+
+colour_map = {}
+for _line in _colour_map_text.splitlines():
+ _name, _num = _line.split()
+ _num = int(_num, 0)
+ colour_map[_name] = _num
+ if 'gray' in _name:
+ colour_map[_name.replace('gray', 'grey')] = _num
+del _colour_map_text, _line, _name, _num
+
+
+pattern_map = {
+ # Text values for pattern.pattern
+ # xlwt/doc/pattern_examples.xls showcases all of these patterns.
+ 'no_fill': 0,
+ 'none': 0,
+ 'solid': 1,
+ 'solid_fill': 1,
+ 'solid_pattern': 1,
+ 'fine_dots': 2,
+ 'alt_bars': 3,
+ 'sparse_dots': 4,
+ 'thick_horz_bands': 5,
+ 'thick_vert_bands': 6,
+ 'thick_backward_diag': 7,
+ 'thick_forward_diag': 8,
+ 'big_spots': 9,
+ 'bricks': 10,
+ 'thin_horz_bands': 11,
+ 'thin_vert_bands': 12,
+ 'thin_backward_diag': 13,
+ 'thin_forward_diag': 14,
+ 'squares': 15,
+ 'diamonds': 16,
+ }
+
+def any_str_func(s):
+ return s.strip()
+
+def colour_index_func(s, maxval=0x7F):
+ try:
+ value = int(s, 0)
+ except ValueError:
+ return None
+ if not (0 <= value <= maxval):
+ return None
+ return value
+
+colour_index_func_7 = colour_index_func
+
+def colour_index_func_15(s):
+ return colour_index_func(s, maxval=0x7FFF)
+
+def rotation_func(s):
+ try:
+ value = int(s, 0)
+ except ValueError:
+ return None
+ if not (-90 <= value <= 90):
+ raise EasyXFCallerError("rotation %d: should be -90 to +90 degrees" % value)
+ if value < 0:
+ value = 90 - value # encode as 91 to 180 (clockwise)
+ return value
+
+xf_dict = {
+ 'align': 'alignment', # synonym
+ 'alignment': {
+ 'dire': {
+ 'general': 0,
+ 'lr': 1,
+ 'rl': 2,
+ },
+ 'direction': 'dire',
+ 'horiz': 'horz',
+ 'horizontal': 'horz',
+ 'horz': {
+ 'general': 0,
+ 'left': 1,
+ 'center': 2,
+ 'centre': 2, # "align: horiz centre" means xf.alignment.horz is set to 2
+ 'right': 3,
+ 'filled': 4,
+ 'justified': 5,
+ 'center_across_selection': 6,
+ 'centre_across_selection': 6,
+ 'distributed': 7,
+ },
+ 'inde': IntULim(15), # restriction: 0 <= value <= 15
+ 'indent': 'inde',
+ 'rota': [{'stacked': 255, 'none': 0, }, rotation_func],
+ 'rotation': 'rota',
+ 'shri': bool_map,
+ 'shrink': 'shri',
+ 'shrink_to_fit': 'shri',
+ 'vert': {
+ 'top': 0,
+ 'center': 1,
+ 'centre': 1,
+ 'bottom': 2,
+ 'justified': 3,
+ 'distributed': 4,
+ },
+ 'vertical': 'vert',
+ 'wrap': bool_map,
+ },
+ 'border': 'borders',
+ 'borders': {
+ 'left': [border_line_map, IntULim(0x0d)],
+ 'right': [border_line_map, IntULim(0x0d)],
+ 'top': [border_line_map, IntULim(0x0d)],
+ 'bottom': [border_line_map, IntULim(0x0d)],
+ 'diag': [border_line_map, IntULim(0x0d)],
+ 'top_colour': [colour_map, colour_index_func_7],
+ 'bottom_colour': [colour_map, colour_index_func_7],
+ 'left_colour': [colour_map, colour_index_func_7],
+ 'right_colour': [colour_map, colour_index_func_7],
+ 'diag_colour': [colour_map, colour_index_func_7],
+ 'top_color': 'top_colour',
+ 'bottom_color': 'bottom_colour',
+ 'left_color': 'left_colour',
+ 'right_color': 'right_colour',
+ 'diag_color': 'diag-colour',
+ 'need_diag_1': bool_map,
+ 'need_diag_2': bool_map,
+ },
+ 'font': {
+ 'bold': bool_map,
+ 'charset': charset_map,
+ 'color': 'colour_index',
+ 'color_index': 'colour_index',
+ 'colour': 'colour_index',
+ 'colour_index': [colour_map, colour_index_func_15],
+ 'escapement': {'none': 0, 'superscript': 1, 'subscript': 2},
+ 'family': {'none': 0, 'roman': 1, 'swiss': 2, 'modern': 3, 'script': 4, 'decorative': 5, },
+ 'height': IntULim(0xFFFF), # practical limits are much narrower e.g. 160 to 1440 (8pt to 72pt)
+ 'italic': bool_map,
+ 'name': any_str_func,
+ 'outline': bool_map,
+ 'shadow': bool_map,
+ 'struck_out': bool_map,
+ 'underline': [bool_map, {'none': 0, 'single': 1, 'single_acc': 0x21, 'double': 2, 'double_acc': 0x22, }],
+ },
+ 'pattern': {
+ 'back_color': 'pattern_back_colour',
+ 'back_colour': 'pattern_back_colour',
+ 'fore_color': 'pattern_fore_colour',
+ 'fore_colour': 'pattern_fore_colour',
+ 'pattern': [pattern_map, IntULim(16)],
+ 'pattern_back_color': 'pattern_back_colour',
+ 'pattern_back_colour': [colour_map, colour_index_func_7],
+ 'pattern_fore_color': 'pattern_fore_colour',
+ 'pattern_fore_colour': [colour_map, colour_index_func_7],
+ },
+ 'protection': {
+ 'cell_locked' : bool_map,
+ 'formula_hidden': bool_map,
+ },
+ }
+
+def _esplit(s, split_char, esc_char="\\"):
+ escaped = False
+ olist = ['']
+ for c in s:
+ if escaped:
+ olist[-1] += c
+ escaped = False
+ elif c == esc_char:
+ escaped = True
+ elif c == split_char:
+ olist.append('')
+ else:
+ olist[-1] += c
+ return olist
+
+def _parse_strg_to_obj(strg, obj, parse_dict,
+ field_sep=",", line_sep=";", intro_sep=":", esc_char="\\", debug=False):
+ for line in _esplit(strg, line_sep, esc_char):
+ line = line.strip()
+ if not line:
+ break
+ split_line = _esplit(line, intro_sep, esc_char)
+ if len(split_line) != 2:
+ raise EasyXFCallerError('line %r should have exactly 1 "%c"' % (line, intro_sep))
+ section, item_str = split_line
+ section = section.strip().lower()
+ for counter in range(2):
+ result = parse_dict.get(section)
+ if result is None:
+ raise EasyXFCallerError('section %r is unknown' % section)
+ if isinstance(result, dict):
+ break
+ if not isinstance(result, str):
+ raise EasyXFAuthorError(
+ 'section %r should map to dict or str object; found %r' % (section, type(result)))
+ # synonym
+ old_section = section
+ section = result
+ else:
+ raise EasyXFAuthorError('Attempt to define synonym of synonym (%r: %r)' % (old_section, result))
+ section_dict = result
+ section_obj = getattr(obj, section, None)
+ if section_obj is None:
+ raise EasyXFAuthorError('instance of %s class has no attribute named %s' % (obj.__class__.__name__, section))
+ for kv_str in _esplit(item_str, field_sep, esc_char):
+ guff = kv_str.split()
+ if not guff:
+ continue
+ k = guff[0].lower().replace('-', '_')
+ v = ' '.join(guff[1:])
+ if not v:
+ raise EasyXFCallerError("no value supplied for %s.%s" % (section, k))
+ for counter in xrange(2):
+ result = section_dict.get(k)
+ if result is None:
+ raise EasyXFCallerError('%s.%s is not a known attribute' % (section, k))
+ if not isinstance(result, basestring):
+ break
+ # synonym
+ old_k = k
+ k = result
+ else:
+ raise EasyXFAuthorError('Attempt to define synonym of synonym (%r: %r)' % (old_k, result))
+ value_info = result
+ if not isinstance(value_info, list):
+ value_info = [value_info]
+ for value_rule in value_info:
+ if isinstance(value_rule, dict):
+ # dict maps strings to integer field values
+ vl = v.lower().replace('-', '_')
+ if vl in value_rule:
+ value = value_rule[vl]
+ break
+ elif callable(value_rule):
+ value = value_rule(v)
+ if value is not None:
+ break
+ else:
+ raise EasyXFAuthorError("unknown value rule for attribute %r: %r" % (k, value_rule))
+ else:
+ raise EasyXFCallerError("unexpected value %r for %s.%s" % (v, section, k))
+ try:
+ orig = getattr(section_obj, k)
+ except AttributeError:
+ raise EasyXFAuthorError('%s.%s in dictionary but not in supplied object' % (section, k))
+ if debug: print "+++ %s.%s = %r # %s; was %r" % (section, k, value, v, orig)
+ setattr(section_obj, k, value)
+
+def easyxf(strg_to_parse="", num_format_str=None,
+ field_sep=",", line_sep=";", intro_sep=":", esc_char="\\", debug=False):
+ xfobj = XFStyle()
+ if num_format_str is not None:
+ xfobj.num_format_str = num_format_str
+ if strg_to_parse:
+ _parse_strg_to_obj(strg_to_parse, xfobj, xf_dict,
+ field_sep=field_sep, line_sep=line_sep, intro_sep=intro_sep, esc_char=esc_char, debug=debug)
+ return xfobj
diff --git a/tablib/packages/xlwt/UnicodeUtils.py b/tablib/packages/xlwt/UnicodeUtils.py
new file mode 100644
index 0000000..630c259
--- /dev/null
+++ b/tablib/packages/xlwt/UnicodeUtils.py
@@ -0,0 +1,81 @@
+# -*- coding: windows-1252 -*-
+
+'''
+From BIFF8 on, strings are always stored using UTF-16LE text encoding. The
+character array is a sequence of 16-bit values4. Additionally it is
+possible to use a compressed format, which omits the high bytes of all
+characters, if they are all zero.
+
+The following tables describe the standard format of the entire string, but
+in many records the strings differ from this format. This will be mentioned
+separately. It is possible (but not required) to store Rich-Text formatting
+information and Asian phonetic information inside a Unicode string. This
+results in four different ways to store a string. The character array
+is not zero-terminated.
+
+The string consists of the character count (as usual an 8-bit value or
+a 16-bit value), option flags, the character array and optional formatting
+information. If the string is empty, sometimes the option flags field will
+not occur. This is mentioned at the respective place.
+
+Offset Size Contents
+0 1 or 2 Length of the string (character count, ln)
+1 or 2 1 Option flags:
+ Bit Mask Contents
+ 0 01H Character compression (ccompr):
+ 0 = Compressed (8-bit characters)
+ 1 = Uncompressed (16-bit characters)
+ 2 04H Asian phonetic settings (phonetic):
+ 0 = Does not contain Asian phonetic settings
+ 1 = Contains Asian phonetic settings
+ 3 08H Rich-Text settings (richtext):
+ 0 = Does not contain Rich-Text settings
+ 1 = Contains Rich-Text settings
+[2 or 3] 2 (optional, only if richtext=1) Number of Rich-Text formatting runs (rt)
+[var.] 4 (optional, only if phonetic=1) Size of Asian phonetic settings block (in bytes, sz)
+var. ln or
+ 2·ln Character array (8-bit characters or 16-bit characters, dependent on ccompr)
+[var.] 4·rt (optional, only if richtext=1) List of rt formatting runs
+[var.] sz (optional, only if phonetic=1) Asian Phonetic Settings Block
+'''
+
+
+from struct import pack
+
+def upack2(s, encoding='ascii'):
+ # If not unicode, make it so.
+ if isinstance(s, unicode):
+ us = s
+ else:
+ us = unicode(s, encoding)
+ # Limit is based on number of content characters
+ # (not on number of bytes in packed result)
+ len_us = len(us)
+ if len_us > 65535:
+ raise Exception('String longer than 65535 characters')
+ try:
+ encs = us.encode('latin1')
+ # Success here means all chars are in U+0000 to U+00FF
+ # inclusive, meaning that we can use "compressed format".
+ flag = 0
+ except UnicodeEncodeError:
+ encs = us.encode('utf_16_le')
+ flag = 1
+ return pack(' 255:
+ raise Exception('String longer than 255 characters')
+ try:
+ encs = us.encode('latin1')
+ flag = 0
+ except UnicodeEncodeError:
+ encs = us.encode('utf_16_le')
+ flag = 1
+ return pack('
+# Copyright (c) 2002-2004 John McNamara (Perl Spreadsheet::WriteExcel)
+#
+# This library is free software; you can redistribute it and/or modify it
+# under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 2.1 of the License, or
+# (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+# General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this library; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#----------------------------------------------------------------------------
+# This module was written/ported from PERL Spreadsheet::WriteExcel module
+# The author of the PERL Spreadsheet::WriteExcel module is John McNamara
+#
+#----------------------------------------------------------------------------
+# See the README.txt distributed with pyXLWriter for more details.
+
+# Portions are (C) Roman V. Kiseliov, 2005
+
+
+# Utilities for work with reference to cells and with sheetnames
+
+
+__rev_id__ = """$Id: Utils.py 3844 2009-05-20 01:02:54Z sjmachin $"""
+
+import re
+from struct import pack
+from ExcelMagic import MAX_ROW, MAX_COL
+
+
+_re_cell_ex = re.compile(r"(\$?)([A-I]?[A-Z])(\$?)(\d+)", re.IGNORECASE)
+_re_row_range = re.compile(r"\$?(\d+):\$?(\d+)")
+_re_col_range = re.compile(r"\$?([A-I]?[A-Z]):\$?([A-I]?[A-Z])", re.IGNORECASE)
+_re_cell_range = re.compile(r"\$?([A-I]?[A-Z]\$?\d+):\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
+_re_cell_ref = re.compile(r"\$?([A-I]?[A-Z]\$?\d+)", re.IGNORECASE)
+
+
+def col_by_name(colname):
+ """
+ """
+ col = 0
+ pow = 1
+ for i in xrange(len(colname)-1, -1, -1):
+ ch = colname[i]
+ col += (ord(ch) - ord('A') + 1) * pow
+ pow *= 26
+ return col - 1
+
+
+def cell_to_rowcol(cell):
+ """Convert an Excel cell reference string in A1 notation
+ to numeric row/col notation.
+
+ Returns: row, col, row_abs, col_abs
+
+ """
+ m = _re_cell_ex.match(cell)
+ if not m:
+ raise Exception("Ill-formed single_cell reference: %s" % cell)
+ col_abs, col, row_abs, row = m.groups()
+ row_abs = bool(row_abs)
+ col_abs = bool(col_abs)
+ row = int(row) - 1
+ col = col_by_name(col.upper())
+ return row, col, row_abs, col_abs
+
+
+def cell_to_rowcol2(cell):
+ """Convert an Excel cell reference string in A1 notation
+ to numeric row/col notation.
+
+ Returns: row, col
+
+ """
+ m = _re_cell_ex.match(cell)
+ if not m:
+ raise Exception("Error in cell format")
+ col_abs, col, row_abs, row = m.groups()
+ # Convert base26 column string to number
+ # All your Base are belong to us.
+ row = int(row) - 1
+ col = col_by_name(col.upper())
+ return row, col
+
+
+def rowcol_to_cell(row, col, row_abs=False, col_abs=False):
+ """Convert numeric row/col notation to an Excel cell reference string in
+ A1 notation.
+
+ """
+ assert 0 <= row < MAX_ROW # MAX_ROW counts from 1
+ assert 0 <= col < MAX_COL # MAX_COL counts from 1
+ d = col // 26
+ m = col % 26
+ chr1 = "" # Most significant character in AA1
+ if row_abs:
+ row_abs = '$'
+ else:
+ row_abs = ''
+ if col_abs:
+ col_abs = '$'
+ else:
+ col_abs = ''
+ if d > 0:
+ chr1 = chr(ord('A') + d - 1)
+ chr2 = chr(ord('A') + m)
+ # Zero index to 1-index
+ return col_abs + chr1 + chr2 + row_abs + str(row + 1)
+
+def rowcol_pair_to_cellrange(row1, col1, row2, col2,
+ row1_abs=False, col1_abs=False, row2_abs=False, col2_abs=False):
+ """Convert two (row,column) pairs
+ into a cell range string in A1:B2 notation.
+
+ Returns: cell range string
+ """
+ assert row1 <= row2
+ assert col1 <= col2
+ return (
+ rowcol_to_cell(row1, col1, row1_abs, col1_abs)
+ + ":"
+ + rowcol_to_cell(row2, col2, row2_abs, col2_abs)
+ )
+
+def cellrange_to_rowcol_pair(cellrange):
+ """Convert cell range string in A1 notation to numeric row/col
+ pair.
+
+ Returns: row1, col1, row2, col2
+
+ """
+ cellrange = cellrange.upper()
+ # Convert a row range: '1:3'
+ res = _re_row_range.match(cellrange)
+ if res:
+ row1 = int(res.group(1)) - 1
+ col1 = 0
+ row2 = int(res.group(2)) - 1
+ col2 = -1
+ return row1, col1, row2, col2
+ # Convert a column range: 'A:A' or 'B:G'.
+ # A range such as A:A is equivalent to A1:A16384, so add rows as required
+ res = _re_col_range.match(cellrange)
+ if res:
+ col1 = col_by_name(res.group(1).upper())
+ row1 = 0
+ col2 = col_by_name(res.group(2).upper())
+ row2 = -1
+ return row1, col1, row2, col2
+ # Convert a cell range: 'A1:B7'
+ res = _re_cell_range.match(cellrange)
+ if res:
+ row1, col1 = cell_to_rowcol2(res.group(1))
+ row2, col2 = cell_to_rowcol2(res.group(2))
+ return row1, col1, row2, col2
+ # Convert a cell reference: 'A1' or 'AD2000'
+ res = _re_cell_ref.match(cellrange)
+ if res:
+ row1, col1 = cell_to_rowcol2(res.group(1))
+ return row1, col1, row1, col1
+ raise Exception("Unknown cell reference %s" % (cell))
+
+
+def cell_to_packed_rowcol(cell):
+ """ pack row and column into the required 4 byte format """
+ row, col, row_abs, col_abs = cell_to_rowcol(cell)
+ if col >= MAX_COL:
+ raise Exception("Column %s greater than IV in formula" % cell)
+ if row >= MAX_ROW: # this for BIFF8. for BIFF7 available 2^14
+ raise Exception("Row %s greater than %d in formula" % (cell, MAX_ROW))
+ col |= int(not row_abs) << 15
+ col |= int(not col_abs) << 14
+ return row, col
+
+# === sheetname functions ===
+
+def valid_sheet_name(sheet_name):
+ if sheet_name == u"" or sheet_name[0] == u"'" or len(sheet_name) > 31:
+ return False
+ for c in sheet_name:
+ if c in u"[]:\\?/*\x00":
+ return False
+ return True
+
+def quote_sheet_name(unquoted_sheet_name):
+ if not valid_sheet_name(unquoted_sheet_name):
+ raise Exception(
+ 'attempt to quote an invalid worksheet name %r' % unquoted_sheet_name)
+ return u"'" + unquoted_sheet_name.replace(u"'", u"''") + u"'"
diff --git a/tablib/packages/xlwt/Workbook.py b/tablib/packages/xlwt/Workbook.py
new file mode 100644
index 0000000..b24282b
--- /dev/null
+++ b/tablib/packages/xlwt/Workbook.py
@@ -0,0 +1,636 @@
+# -*- coding: windows-1252 -*-
+'''
+Record Order in BIFF8
+ Workbook Globals Substream
+ BOF Type = workbook globals
+ Interface Header
+ MMS
+ Interface End
+ WRITEACCESS
+ CODEPAGE
+ DSF
+ TABID
+ FNGROUPCOUNT
+ Workbook Protection Block
+ WINDOWPROTECT
+ PROTECT
+ PASSWORD
+ PROT4REV
+ PROT4REVPASS
+ BACKUP
+ HIDEOBJ
+ WINDOW1
+ DATEMODE
+ PRECISION
+ REFRESHALL
+ BOOKBOOL
+ FONT +
+ FORMAT *
+ XF +
+ STYLE +
+ ? PALETTE
+ USESELFS
+
+ BOUNDSHEET +
+
+ COUNTRY
+ ? Link Table
+ SST
+ ExtSST
+ EOF
+'''
+
+import BIFFRecords
+import Style
+
+class Workbook(object):
+
+ #################################################################
+ ## Constructor
+ #################################################################
+ def __init__(self, encoding='ascii', style_compression=0):
+ self.encoding = encoding
+ self.__owner = 'None'
+ self.__country_code = None # 0x07 is Russia :-)
+ self.__wnd_protect = 0
+ self.__obj_protect = 0
+ self.__protect = 0
+ self.__backup_on_save = 0
+ # for WINDOW1 record
+ self.__hpos_twips = 0x01E0
+ self.__vpos_twips = 0x005A
+ self.__width_twips = 0x3FCF
+ self.__height_twips = 0x2A4E
+
+ self.__active_sheet = 0
+ self.__first_tab_index = 0
+ self.__selected_tabs = 0x01
+ self.__tab_width_twips = 0x0258
+
+ self.__wnd_hidden = 0
+ self.__wnd_mini = 0
+ self.__hscroll_visible = 1
+ self.__vscroll_visible = 1
+ self.__tabs_visible = 1
+
+ self.__styles = Style.StyleCollection(style_compression)
+
+ self.__dates_1904 = 0
+ self.__use_cell_values = 1
+
+ self.__sst = BIFFRecords.SharedStringTable(self.encoding)
+
+ self.__worksheets = []
+ self.__worksheet_idx_from_name = {}
+ self.__sheet_refs = {}
+ self._supbook_xref = {}
+ self._xcall_xref = {}
+ self._ownbook_supbookx = None
+ self._ownbook_supbook_ref = None
+ self._xcall_supbookx = None
+ self._xcall_supbook_ref = None
+
+
+
+ #################################################################
+ ## Properties, "getters", "setters"
+ #################################################################
+
+ def get_style_stats(self):
+ return self.__styles.stats[:]
+
+ def set_owner(self, value):
+ self.__owner = value
+
+ def get_owner(self):
+ return self.__owner
+
+ owner = property(get_owner, set_owner)
+
+ #################################################################
+
+ def set_country_code(self, value):
+ self.__country_code = value
+
+ def get_country_code(self):
+ return self.__country_code
+
+ country_code = property(get_country_code, set_country_code)
+
+ #################################################################
+
+ def set_wnd_protect(self, value):
+ self.__wnd_protect = int(value)
+
+ def get_wnd_protect(self):
+ return bool(self.__wnd_protect)
+
+ wnd_protect = property(get_wnd_protect, set_wnd_protect)
+
+ #################################################################
+
+ def set_obj_protect(self, value):
+ self.__obj_protect = int(value)
+
+ def get_obj_protect(self):
+ return bool(self.__obj_protect)
+
+ obj_protect = property(get_obj_protect, set_obj_protect)
+
+ #################################################################
+
+ def set_protect(self, value):
+ self.__protect = int(value)
+
+ def get_protect(self):
+ return bool(self.__protect)
+
+ protect = property(get_protect, set_protect)
+
+ #################################################################
+
+ def set_backup_on_save(self, value):
+ self.__backup_on_save = int(value)
+
+ def get_backup_on_save(self):
+ return bool(self.__backup_on_save)
+
+ backup_on_save = property(get_backup_on_save, set_backup_on_save)
+
+ #################################################################
+
+ def set_hpos(self, value):
+ self.__hpos_twips = value & 0xFFFF
+
+ def get_hpos(self):
+ return self.__hpos_twips
+
+ hpos = property(get_hpos, set_hpos)
+
+ #################################################################
+
+ def set_vpos(self, value):
+ self.__vpos_twips = value & 0xFFFF
+
+ def get_vpos(self):
+ return self.__vpos_twips
+
+ vpos = property(get_vpos, set_vpos)
+
+ #################################################################
+
+ def set_width(self, value):
+ self.__width_twips = value & 0xFFFF
+
+ def get_width(self):
+ return self.__width_twips
+
+ width = property(get_width, set_width)
+
+ #################################################################
+
+ def set_height(self, value):
+ self.__height_twips = value & 0xFFFF
+
+ def get_height(self):
+ return self.__height_twips
+
+ height = property(get_height, set_height)
+
+ #################################################################
+
+ def set_active_sheet(self, value):
+ self.__active_sheet = value & 0xFFFF
+ self.__first_tab_index = self.__active_sheet
+
+ def get_active_sheet(self):
+ return self.__active_sheet
+
+ active_sheet = property(get_active_sheet, set_active_sheet)
+
+ #################################################################
+
+ def set_tab_width(self, value):
+ self.__tab_width_twips = value & 0xFFFF
+
+ def get_tab_width(self):
+ return self.__tab_width_twips
+
+ tab_width = property(get_tab_width, set_tab_width)
+
+ #################################################################
+
+ def set_wnd_visible(self, value):
+ self.__wnd_hidden = int(not value)
+
+ def get_wnd_visible(self):
+ return not bool(self.__wnd_hidden)
+
+ wnd_visible = property(get_wnd_visible, set_wnd_visible)
+
+ #################################################################
+
+ def set_wnd_mini(self, value):
+ self.__wnd_mini = int(value)
+
+ def get_wnd_mini(self):
+ return bool(self.__wnd_mini)
+
+ wnd_mini = property(get_wnd_mini, set_wnd_mini)
+
+ #################################################################
+
+ def set_hscroll_visible(self, value):
+ self.__hscroll_visible = int(value)
+
+ def get_hscroll_visible(self):
+ return bool(self.__hscroll_visible)
+
+ hscroll_visible = property(get_hscroll_visible, set_hscroll_visible)
+
+ #################################################################
+
+ def set_vscroll_visible(self, value):
+ self.__vscroll_visible = int(value)
+
+ def get_vscroll_visible(self):
+ return bool(self.__vscroll_visible)
+
+ vscroll_visible = property(get_vscroll_visible, set_vscroll_visible)
+
+ #################################################################
+
+ def set_tabs_visible(self, value):
+ self.__tabs_visible = int(value)
+
+ def get_tabs_visible(self):
+ return bool(self.__tabs_visible)
+
+ tabs_visible = property(get_tabs_visible, set_tabs_visible)
+
+ #################################################################
+
+ def set_dates_1904(self, value):
+ self.__dates_1904 = int(value)
+
+ def get_dates_1904(self):
+ return bool(self.__dates_1904)
+
+ dates_1904 = property(get_dates_1904, set_dates_1904)
+
+ #################################################################
+
+ def set_use_cell_values(self, value):
+ self.__use_cell_values = int(value)
+
+ def get_use_cell_values(self):
+ return bool(self.__use_cell_values)
+
+ use_cell_values = property(get_use_cell_values, set_use_cell_values)
+
+ #################################################################
+
+ def get_default_style(self):
+ return self.__styles.default_style
+
+ default_style = property(get_default_style)
+
+ ##################################################################
+ ## Methods
+ ##################################################################
+
+ def add_style(self, style):
+ return self.__styles.add(style)
+
+ def add_str(self, s):
+ return self.__sst.add_str(s)
+
+ def del_str(self, sst_idx):
+ self.__sst.del_str(sst_idx)
+
+ def str_index(self, s):
+ return self.__sst.str_index(s)
+
+ def add_sheet(self, sheetname, cell_overwrite_ok=False):
+ import Worksheet, Utils
+ if not isinstance(sheetname, unicode):
+ sheetname = sheetname.decode(self.encoding)
+ if not Utils.valid_sheet_name(sheetname):
+ raise Exception("invalid worksheet name %r" % sheetname)
+ lower_name = sheetname.lower()
+ if lower_name in self.__worksheet_idx_from_name:
+ raise Exception("duplicate worksheet name %r" % sheetname)
+ self.__worksheet_idx_from_name[lower_name] = len(self.__worksheets)
+ self.__worksheets.append(Worksheet.Worksheet(sheetname, self, cell_overwrite_ok))
+ return self.__worksheets[-1]
+
+ def get_sheet(self, sheetnum):
+ return self.__worksheets[sheetnum]
+
+ def raise_bad_sheetname(self, sheetname):
+ raise Exception("Formula: unknown sheet name %s" % sheetname)
+
+ def convert_sheetindex(self, strg_ref, n_sheets):
+ idx = int(strg_ref)
+ if 0 <= idx < n_sheets:
+ return idx
+ msg = "Formula: sheet index (%s) >= number of sheets (%d)" % (strg_ref, n_sheets)
+ raise Exception(msg)
+
+ def _get_supbook_index(self, tag):
+ if tag in self._supbook_xref:
+ return self._supbook_xref[tag]
+ self._supbook_xref[tag] = idx = len(self._supbook_xref)
+ return idx
+
+ def setup_ownbook(self):
+ self._ownbook_supbookx = self._get_supbook_index(('ownbook', 0))
+ self._ownbook_supbook_ref = None
+ reference = (self._ownbook_supbookx, 0xFFFE, 0xFFFE)
+ if reference in self.__sheet_refs:
+ raise Exception("can't happen")
+ self.__sheet_refs[reference] = self._ownbook_supbook_ref = len(self.__sheet_refs)
+
+ def setup_xcall(self):
+ self._xcall_supbookx = self._get_supbook_index(('xcall', 0))
+ self._xcall_supbook_ref = None
+ reference = (self._xcall_supbookx, 0xFFFE, 0xFFFE)
+ if reference in self.__sheet_refs:
+ raise Exception("can't happen")
+ self.__sheet_refs[reference] = self._xcall_supbook_ref = len(self.__sheet_refs)
+
+ def add_sheet_reference(self, formula):
+ patches = []
+ n_sheets = len(self.__worksheets)
+ sheet_refs, xcall_refs = formula.get_references()
+
+ for ref0, ref1, offset in sheet_refs:
+ if not ref0.isdigit():
+ try:
+ ref0n = self.__worksheet_idx_from_name[ref0.lower()]
+ except KeyError:
+ self.raise_bad_sheetname(ref0)
+ else:
+ ref0n = self.convert_sheetindex(ref0, n_sheets)
+ if ref1 == ref0:
+ ref1n = ref0n
+ elif not ref1.isdigit():
+ try:
+ ref1n = self.__worksheet_idx_from_name[ref1.lower()]
+ except KeyError:
+ self.raise_bad_sheetname(ref1)
+ else:
+ ref1n = self.convert_sheetindex(ref1, n_sheets)
+ if ref1n < ref0n:
+ msg = "Formula: sheets out of order; %r:%r -> (%d, %d)" \
+ % (ref0, ref1, ref0n, ref1n)
+ raise Exception(msg)
+ if self._ownbook_supbookx is None:
+ self.setup_ownbook()
+ reference = (self._ownbook_supbookx, ref0n, ref1n)
+ if reference in self.__sheet_refs:
+ patches.append((offset, self.__sheet_refs[reference]))
+ else:
+ nrefs = len(self.__sheet_refs)
+ if nrefs > 65535:
+ raise Exception('More than 65536 inter-sheet references')
+ self.__sheet_refs[reference] = nrefs
+ patches.append((offset, nrefs))
+
+ for funcname, offset in xcall_refs:
+ if self._ownbook_supbookx is None:
+ self.setup_ownbook()
+ if self._xcall_supbookx is None:
+ self.setup_xcall()
+ # print funcname, self._supbook_xref
+ patches.append((offset, self._xcall_supbook_ref))
+ if not isinstance(funcname, unicode):
+ funcname = funcname.decode(self.encoding)
+ if funcname in self._xcall_xref:
+ idx = self._xcall_xref[funcname]
+ else:
+ self._xcall_xref[funcname] = idx = len(self._xcall_xref)
+ patches.append((offset + 2, idx + 1))
+
+ formula.patch_references(patches)
+
+ ##################################################################
+ ## BIFF records generation
+ ##################################################################
+
+ def __bof_rec(self):
+ return BIFFRecords.Biff8BOFRecord(BIFFRecords.Biff8BOFRecord.BOOK_GLOBAL).get()
+
+ def __eof_rec(self):
+ return BIFFRecords.EOFRecord().get()
+
+ def __intf_hdr_rec(self):
+ return BIFFRecords.InteraceHdrRecord().get()
+
+ def __intf_end_rec(self):
+ return BIFFRecords.InteraceEndRecord().get()
+
+ def __intf_mms_rec(self):
+ return BIFFRecords.MMSRecord().get()
+
+ def __write_access_rec(self):
+ return BIFFRecords.WriteAccessRecord(self.__owner).get()
+
+ def __wnd_protect_rec(self):
+ return BIFFRecords.WindowProtectRecord(self.__wnd_protect).get()
+
+ def __obj_protect_rec(self):
+ return BIFFRecords.ObjectProtectRecord(self.__obj_protect).get()
+
+ def __protect_rec(self):
+ return BIFFRecords.ProtectRecord(self.__protect).get()
+
+ def __password_rec(self):
+ return BIFFRecords.PasswordRecord().get()
+
+ def __prot4rev_rec(self):
+ return BIFFRecords.Prot4RevRecord().get()
+
+ def __prot4rev_pass_rec(self):
+ return BIFFRecords.Prot4RevPassRecord().get()
+
+ def __backup_rec(self):
+ return BIFFRecords.BackupRecord(self.__backup_on_save).get()
+
+ def __hide_obj_rec(self):
+ return BIFFRecords.HideObjRecord().get()
+
+ def __window1_rec(self):
+ flags = 0
+ flags |= (self.__wnd_hidden) << 0
+ flags |= (self.__wnd_mini) << 1
+ flags |= (self.__hscroll_visible) << 3
+ flags |= (self.__vscroll_visible) << 4
+ flags |= (self.__tabs_visible) << 5
+
+ return BIFFRecords.Window1Record(self.__hpos_twips, self.__vpos_twips,
+ self.__width_twips, self.__height_twips,
+ flags,
+ self.__active_sheet, self.__first_tab_index,
+ self.__selected_tabs, self.__tab_width_twips).get()
+
+ def __codepage_rec(self):
+ return BIFFRecords.CodepageBiff8Record().get()
+
+ def __country_rec(self):
+ if not self.__country_code:
+ return ''
+ return BIFFRecords.CountryRecord(self.__country_code, self.__country_code).get()
+
+ def __dsf_rec(self):
+ return BIFFRecords.DSFRecord().get()
+
+ def __tabid_rec(self):
+ return BIFFRecords.TabIDRecord(len(self.__worksheets)).get()
+
+ def __fngroupcount_rec(self):
+ return BIFFRecords.FnGroupCountRecord().get()
+
+ def __datemode_rec(self):
+ return BIFFRecords.DateModeRecord(self.__dates_1904).get()
+
+ def __precision_rec(self):
+ return BIFFRecords.PrecisionRecord(self.__use_cell_values).get()
+
+ def __refresh_all_rec(self):
+ return BIFFRecords.RefreshAllRecord().get()
+
+ def __bookbool_rec(self):
+ return BIFFRecords.BookBoolRecord().get()
+
+ def __all_fonts_num_formats_xf_styles_rec(self):
+ return self.__styles.get_biff_data()
+
+ def __palette_rec(self):
+ result = ''
+ return result
+
+ def __useselfs_rec(self):
+ return BIFFRecords.UseSelfsRecord().get()
+
+ def __boundsheets_rec(self, data_len_before, data_len_after, sheet_biff_lens):
+ # .................................
+ # BOUNDSEHEET0
+ # BOUNDSEHEET1
+ # BOUNDSEHEET2
+ # ..................................
+ # WORKSHEET0
+ # WORKSHEET1
+ # WORKSHEET2
+ boundsheets_len = 0
+ for sheet in self.__worksheets:
+ boundsheets_len += len(BIFFRecords.BoundSheetRecord(
+ 0x00L, sheet.visibility, sheet.name, self.encoding
+ ).get())
+
+ start = data_len_before + boundsheets_len + data_len_after
+
+ result = ''
+ for sheet_biff_len, sheet in zip(sheet_biff_lens, self.__worksheets):
+ result += BIFFRecords.BoundSheetRecord(
+ start, sheet.visibility, sheet.name, self.encoding
+ ).get()
+ start += sheet_biff_len
+ return result
+
+ def __all_links_rec(self):
+ pieces = []
+ temp = [(idx, tag) for tag, idx in self._supbook_xref.items()]
+ temp.sort()
+ for idx, tag in temp:
+ stype, snum = tag
+ if stype == 'ownbook':
+ rec = BIFFRecords.InternalReferenceSupBookRecord(len(self.__worksheets)).get()
+ pieces.append(rec)
+ elif stype == 'xcall':
+ rec = BIFFRecords.XcallSupBookRecord().get()
+ pieces.append(rec)
+ temp = [(idx, name) for name, idx in self._xcall_xref.items()]
+ temp.sort()
+ for idx, name in temp:
+ rec = BIFFRecords.ExternnameRecord(
+ options=0, index=0, name=name, fmla='\x02\x00\x1c\x17').get()
+ pieces.append(rec)
+ else:
+ raise Exception('unknown supbook stype %r' % stype)
+ if len(self.__sheet_refs) > 0:
+ # get references in index order
+ temp = [(idx, ref) for ref, idx in self.__sheet_refs.items()]
+ temp.sort()
+ temp = [ref for idx, ref in temp]
+ externsheet_record = BIFFRecords.ExternSheetRecord(temp).get()
+ pieces.append(externsheet_record)
+ return ''.join(pieces)
+
+ def __sst_rec(self):
+ return self.__sst.get_biff_record()
+
+ def __ext_sst_rec(self, abs_stream_pos):
+ return ''
+ #return BIFFRecords.ExtSSTRecord(abs_stream_pos, self.sst_record.str_placement,
+ #self.sst_record.portions_len).get()
+
+ def get_biff_data(self):
+ before = ''
+ before += self.__bof_rec()
+ before += self.__intf_hdr_rec()
+ before += self.__intf_mms_rec()
+ before += self.__intf_end_rec()
+ before += self.__write_access_rec()
+ before += self.__codepage_rec()
+ before += self.__dsf_rec()
+ before += self.__tabid_rec()
+ before += self.__fngroupcount_rec()
+ before += self.__wnd_protect_rec()
+ before += self.__protect_rec()
+ before += self.__obj_protect_rec()
+ before += self.__password_rec()
+ before += self.__prot4rev_rec()
+ before += self.__prot4rev_pass_rec()
+ before += self.__backup_rec()
+ before += self.__hide_obj_rec()
+ before += self.__window1_rec()
+ before += self.__datemode_rec()
+ before += self.__precision_rec()
+ before += self.__refresh_all_rec()
+ before += self.__bookbool_rec()
+ before += self.__all_fonts_num_formats_xf_styles_rec()
+ before += self.__palette_rec()
+ before += self.__useselfs_rec()
+
+ country = self.__country_rec()
+ all_links = self.__all_links_rec()
+
+ shared_str_table = self.__sst_rec()
+ after = country + all_links + shared_str_table
+
+ ext_sst = self.__ext_sst_rec(0) # need fake cause we need calc stream pos
+ eof = self.__eof_rec()
+
+ self.__worksheets[self.__active_sheet].selected = True
+ sheets = ''
+ sheet_biff_lens = []
+ for sheet in self.__worksheets:
+ data = sheet.get_biff_data()
+ sheets += data
+ sheet_biff_lens.append(len(data))
+
+ bundlesheets = self.__boundsheets_rec(len(before), len(after)+len(ext_sst)+len(eof), sheet_biff_lens)
+
+ sst_stream_pos = len(before) + len(bundlesheets) + len(country) + len(all_links)
+ ext_sst = self.__ext_sst_rec(sst_stream_pos)
+
+ return before + bundlesheets + after + ext_sst + eof + sheets
+
+ def save(self, filename):
+ import CompoundDoc
+
+ doc = CompoundDoc.XlsDoc()
+ doc.save(filename, self.get_biff_data())
+
+
diff --git a/tablib/packages/xlwt/Worksheet.py b/tablib/packages/xlwt/Worksheet.py
new file mode 100644
index 0000000..ff36f1d
--- /dev/null
+++ b/tablib/packages/xlwt/Worksheet.py
@@ -0,0 +1,1297 @@
+# -*- coding: windows-1252 -*-
+'''
+ BOF
+ UNCALCED
+ INDEX
+ Calculation Settings Block
+ PRINTHEADERS
+ PRINTGRIDLINES
+ GRIDSET
+ GUTS
+ DEFAULTROWHEIGHT
+ WSBOOL
+ Page Settings Block
+ Worksheet Protection Block
+ DEFCOLWIDTH
+ COLINFO
+ SORT
+ DIMENSIONS
+ Row Blocks
+ WINDOW2
+ SCL
+ PANE
+ SELECTION
+ STANDARDWIDTH
+ MERGEDCELLS
+ LABELRANGES
+ PHONETIC
+ Conditional Formatting Table
+ Hyperlink Table
+ Data Validity Table
+ SHEETLAYOUT (BIFF8X only)
+ SHEETPROTECTION (BIFF8X only)
+ RANGEPROTECTION (BIFF8X only)
+ EOF
+'''
+
+import BIFFRecords
+import Bitmap
+import Formatting
+import Style
+import tempfile
+
+
+class Worksheet(object):
+ from Workbook import Workbook
+
+ #################################################################
+ ## Constructor
+ #################################################################
+ def __init__(self, sheetname, parent_book, cell_overwrite_ok=False):
+ import Row
+ self.Row = Row.Row
+
+ import Column
+ self.Column = Column.Column
+
+ self.__name = sheetname
+ self.__parent = parent_book
+ self._cell_overwrite_ok = cell_overwrite_ok
+
+ self.__rows = {}
+ self.__cols = {}
+ self.__merged_ranges = []
+ self.__bmp_rec = ''
+
+ self.__show_formulas = 0
+ self.__show_grid = 1
+ self.__show_headers = 1
+ self.__panes_frozen = 0
+ ### self.__show_empty_as_zero = 1 ### deprecated with extreme prejudice 2009-05-19
+ self.show_zero_values = 1
+ self.__auto_colour_grid = 1
+ self.__cols_right_to_left = 0
+ self.__show_outline = 1
+ self.__remove_splits = 0
+ self.__selected = 0
+ # RED HERRING ALERT: "sheet_visible" is a clone of the "selected" attribute.
+ # Typically a workbook created by the Excel UI will have one sheet
+ # (the sheet that was selected when the user saved it)
+ # with both bits set to 1, and all other sheets will have both
+ # bits set to 0. The true visibility of the sheet is found in the "visibility"
+ # attribute obtained from the BOUNDSHEET record.
+ self.__sheet_visible = 0
+ self.__page_preview = 0
+
+ self.__first_visible_row = 0
+ self.__first_visible_col = 0
+ self.__grid_colour = 0x40
+ self.__preview_magn = 60 # percent
+ self.__normal_magn = 100 # percent
+
+ self.visibility = 0 # from/to BOUNDSHEET record.
+
+ self.__vert_split_pos = None
+ self.__horz_split_pos = None
+ self.__vert_split_first_visible = None
+ self.__horz_split_first_visible = None
+ self.__split_active_pane = None
+
+ self.__row_gut_width = 0
+ self.__col_gut_height = 0
+
+ self.__show_auto_page_breaks = 1
+ self.__dialogue_sheet = 0
+ self.__auto_style_outline = 0
+ self.__outline_below = 0
+ self.__outline_right = 0
+ self.__fit_num_pages = 0
+ self.__show_row_outline = 1
+ self.__show_col_outline = 1
+ self.__alt_expr_eval = 0
+ self.__alt_formula_entries = 0
+
+ self.__row_default_height = 0x00FF
+ self.row_default_height_mismatch = 0
+ self.row_default_hidden = 0
+ self.row_default_space_above = 0
+ self.row_default_space_below = 0
+
+ self.__col_default_width = 0x0008
+
+ self.__calc_mode = 1
+ self.__calc_count = 0x0064
+ self.__RC_ref_mode = 1
+ self.__iterations_on = 0
+ self.__delta = 0.001
+ self.__save_recalc = 0
+
+ self.__print_headers = 0
+ self.__print_grid = 0
+ self.__grid_set = 1
+ self.__vert_page_breaks = []
+ self.__horz_page_breaks = []
+ self.__header_str = '&P'
+ self.__footer_str = '&F'
+ self.__print_centered_vert = 0
+ self.__print_centered_horz = 1
+ self.__left_margin = 0.3 #0.5
+ self.__right_margin = 0.3 #0.5
+ self.__top_margin = 0.61 #1.0
+ self.__bottom_margin = 0.37 #1.0
+ self.__paper_size_code = 9 # A4
+ self.__print_scaling = 100
+ self.__start_page_number = 1
+ self.__fit_width_to_pages = 1
+ self.__fit_height_to_pages = 1
+ self.__print_in_rows = 1
+ self.__portrait = 1
+ self.__print_not_colour = 0
+ self.__print_draft = 0
+ self.__print_notes = 0
+ self.__print_notes_at_end = 0
+ self.__print_omit_errors = 0
+ self.__print_hres = 0x012C # 300 dpi
+ self.__print_vres = 0x012C # 300 dpi
+ self.__header_margin = 0.1
+ self.__footer_margin = 0.1
+ self.__copies_num = 1
+
+ self.__wnd_protect = 0
+ self.__obj_protect = 0
+ self.__protect = 0
+ self.__scen_protect = 0
+ self.__password = ''
+
+ self.last_used_row = 0
+ self.first_used_row = 65535
+ self.last_used_col = 0
+ self.first_used_col = 255
+ self.row_tempfile = None
+ self.__flushed_rows = {}
+ self.__row_visible_levels = 0
+
+ #################################################################
+ ## Properties, "getters", "setters"
+ #################################################################
+
+ def set_name(self, value):
+ self.__name = value
+
+ def get_name(self):
+ return self.__name
+
+ name = property(get_name, set_name)
+
+ #################################################################
+
+ def get_parent(self):
+ return self.__parent
+
+ parent = property(get_parent)
+
+ #################################################################
+
+ def get_rows(self):
+ return self.__rows
+
+ rows = property(get_rows)
+
+ #################################################################
+
+ def get_cols(self):
+ return self.__cols
+
+ cols = property(get_cols)
+
+ #################################################################
+
+ def get_merged_ranges(self):
+ return self.__merged_ranges
+
+ merged_ranges = property(get_merged_ranges)
+
+ #################################################################
+
+ def get_bmp_rec(self):
+ return self.__bmp_rec
+
+ bmp_rec = property(get_bmp_rec)
+
+ #################################################################
+
+ def set_show_formulas(self, value):
+ self.__show_formulas = int(value)
+
+ def get_show_formulas(self):
+ return bool(self.__show_formulas)
+
+ show_formulas = property(get_show_formulas, set_show_formulas)
+
+ #################################################################
+
+ def set_show_grid(self, value):
+ self.__show_grid = int(value)
+
+ def get_show_grid(self):
+ return bool(self.__show_grid)
+
+ show_grid = property(get_show_grid, set_show_grid)
+
+ #################################################################
+
+ def set_show_headers(self, value):
+ self.__show_headers = int(value)
+
+ def get_show_headers(self):
+ return bool(self.__show_headers)
+
+ show_headers = property(get_show_headers, set_show_headers)
+
+ #################################################################
+
+ def set_panes_frozen(self, value):
+ self.__panes_frozen = int(value)
+
+ def get_panes_frozen(self):
+ return bool(self.__panes_frozen)
+
+ panes_frozen = property(get_panes_frozen, set_panes_frozen)
+
+ #################################################################
+
+ ### def set_show_empty_as_zero(self, value):
+ ### self.__show_empty_as_zero = int(value)
+
+ ### def get_show_empty_as_zero(self):
+ ### return bool(self.__show_empty_as_zero)
+
+ ### show_empty_as_zero = property(get_show_empty_as_zero, set_show_empty_as_zero)
+
+ #################################################################
+
+ def set_auto_colour_grid(self, value):
+ self.__auto_colour_grid = int(value)
+
+ def get_auto_colour_grid(self):
+ return bool(self.__auto_colour_grid)
+
+ auto_colour_grid = property(get_auto_colour_grid, set_auto_colour_grid)
+
+ #################################################################
+
+ def set_cols_right_to_left(self, value):
+ self.__cols_right_to_left = int(value)
+
+ def get_cols_right_to_left(self):
+ return bool(self.__cols_right_to_left)
+
+ cols_right_to_left = property(get_cols_right_to_left, set_cols_right_to_left)
+
+ #################################################################
+
+ def set_show_outline(self, value):
+ self.__show_outline = int(value)
+
+ def get_show_outline(self):
+ return bool(self.__show_outline)
+
+ show_outline = property(get_show_outline, set_show_outline)
+
+ #################################################################
+
+ def set_remove_splits(self, value):
+ self.__remove_splits = int(value)
+
+ def get_remove_splits(self):
+ return bool(self.__remove_splits)
+
+ remove_splits = property(get_remove_splits, set_remove_splits)
+
+ #################################################################
+
+ def set_selected(self, value):
+ self.__selected = int(value)
+
+ def get_selected(self):
+ return bool(self.__selected)
+
+ selected = property(get_selected, set_selected)
+
+ #################################################################
+
+ def set_sheet_visible(self, value):
+ self.__sheet_visible = int(value)
+
+ def get_sheet_visible(self):
+ return bool(self.__sheet_visible)
+
+ sheet_visible = property(get_sheet_visible, set_sheet_visible)
+
+ #################################################################
+
+ def set_page_preview(self, value):
+ self.__page_preview = int(value)
+
+ def get_page_preview(self):
+ return bool(self.__page_preview)
+
+ page_preview = property(get_page_preview, set_page_preview)
+
+ #################################################################
+
+ def set_first_visible_row(self, value):
+ self.__first_visible_row = value
+
+ def get_first_visible_row(self):
+ return self.__first_visible_row
+
+ first_visible_row = property(get_first_visible_row, set_first_visible_row)
+
+ #################################################################
+
+ def set_first_visible_col(self, value):
+ self.__first_visible_col = value
+
+ def get_first_visible_col(self):
+ return self.__first_visible_col
+
+ first_visible_col = property(get_first_visible_col, set_first_visible_col)
+
+ #################################################################
+
+ def set_grid_colour(self, value):
+ self.__grid_colour = value
+
+ def get_grid_colour(self):
+ return self.__grid_colour
+
+ grid_colour = property(get_grid_colour, set_grid_colour)
+
+ #################################################################
+
+ def set_preview_magn(self, value):
+ self.__preview_magn = value
+
+ def get_preview_magn(self):
+ return self.__preview_magn
+
+ preview_magn = property(get_preview_magn, set_preview_magn)
+
+ #################################################################
+
+ def set_normal_magn(self, value):
+ self.__normal_magn = value
+
+ def get_normal_magn(self):
+ return self.__normal_magn
+
+ normal_magn = property(get_normal_magn, set_normal_magn)
+
+ #################################################################
+
+ def set_vert_split_pos(self, value):
+ self.__vert_split_pos = abs(value)
+
+ def get_vert_split_pos(self):
+ return self.__vert_split_pos
+
+ vert_split_pos = property(get_vert_split_pos, set_vert_split_pos)
+
+ #################################################################
+
+ def set_horz_split_pos(self, value):
+ self.__horz_split_pos = abs(value)
+
+ def get_horz_split_pos(self):
+ return self.__horz_split_pos
+
+ horz_split_pos = property(get_horz_split_pos, set_horz_split_pos)
+
+ #################################################################
+
+ def set_vert_split_first_visible(self, value):
+ self.__vert_split_first_visible = abs(value)
+
+ def get_vert_split_first_visible(self):
+ return self.__vert_split_first_visible
+
+ vert_split_first_visible = property(get_vert_split_first_visible, set_vert_split_first_visible)
+
+ #################################################################
+
+ def set_horz_split_first_visible(self, value):
+ self.__horz_split_first_visible = abs(value)
+
+ def get_horz_split_first_visible(self):
+ return self.__horz_split_first_visible
+
+ horz_split_first_visible = property(get_horz_split_first_visible, set_horz_split_first_visible)
+
+ #################################################################
+
+ #def set_split_active_pane(self, value):
+ # self.__split_active_pane = abs(value) & 0x03
+ #
+ #def get_split_active_pane(self):
+ # return self.__split_active_pane
+ #
+ #split_active_pane = property(get_split_active_pane, set_split_active_pane)
+
+ #################################################################
+
+ #def set_row_gut_width(self, value):
+ # self.__row_gut_width = value
+ #
+ #def get_row_gut_width(self):
+ # return self.__row_gut_width
+ #
+ #row_gut_width = property(get_row_gut_width, set_row_gut_width)
+ #
+ #################################################################
+ #
+ #def set_col_gut_height(self, value):
+ # self.__col_gut_height = value
+ #
+ #def get_col_gut_height(self):
+ # return self.__col_gut_height
+ #
+ #col_gut_height = property(get_col_gut_height, set_col_gut_height)
+ #
+ #################################################################
+
+ def set_show_auto_page_breaks(self, value):
+ self.__show_auto_page_breaks = int(value)
+
+ def get_show_auto_page_breaks(self):
+ return bool(self.__show_auto_page_breaks)
+
+ show_auto_page_breaks = property(get_show_auto_page_breaks, set_show_auto_page_breaks)
+
+ #################################################################
+
+ def set_dialogue_sheet(self, value):
+ self.__dialogue_sheet = int(value)
+
+ def get_dialogue_sheet(self):
+ return bool(self.__dialogue_sheet)
+
+ dialogue_sheet = property(get_dialogue_sheet, set_dialogue_sheet)
+
+ #################################################################
+
+ def set_auto_style_outline(self, value):
+ self.__auto_style_outline = int(value)
+
+ def get_auto_style_outline(self):
+ return bool(self.__auto_style_outline)
+
+ auto_style_outline = property(get_auto_style_outline, set_auto_style_outline)
+
+ #################################################################
+
+ def set_outline_below(self, value):
+ self.__outline_below = int(value)
+
+ def get_outline_below(self):
+ return bool(self.__outline_below)
+
+ outline_below = property(get_outline_below, set_outline_below)
+
+ #################################################################
+
+ def set_outline_right(self, value):
+ self.__outline_right = int(value)
+
+ def get_outline_right(self):
+ return bool(self.__outline_right)
+
+ outline_right = property(get_outline_right, set_outline_right)
+
+ #################################################################
+
+ def set_fit_num_pages(self, value):
+ self.__fit_num_pages = value
+
+ def get_fit_num_pages(self):
+ return self.__fit_num_pages
+
+ fit_num_pages = property(get_fit_num_pages, set_fit_num_pages)
+
+ #################################################################
+
+ def set_show_row_outline(self, value):
+ self.__show_row_outline = int(value)
+
+ def get_show_row_outline(self):
+ return bool(self.__show_row_outline)
+
+ show_row_outline = property(get_show_row_outline, set_show_row_outline)
+
+ #################################################################
+
+ def set_show_col_outline(self, value):
+ self.__show_col_outline = int(value)
+
+ def get_show_col_outline(self):
+ return bool(self.__show_col_outline)
+
+ show_col_outline = property(get_show_col_outline, set_show_col_outline)
+
+ #################################################################
+
+ def set_alt_expr_eval(self, value):
+ self.__alt_expr_eval = int(value)
+
+ def get_alt_expr_eval(self):
+ return bool(self.__alt_expr_eval)
+
+ alt_expr_eval = property(get_alt_expr_eval, set_alt_expr_eval)
+
+ #################################################################
+
+ def set_alt_formula_entries(self, value):
+ self.__alt_formula_entries = int(value)
+
+ def get_alt_formula_entries(self):
+ return bool(self.__alt_formula_entries)
+
+ alt_formula_entries = property(get_alt_formula_entries, set_alt_formula_entries)
+
+ #################################################################
+
+ def set_row_default_height(self, value):
+ self.__row_default_height = value
+
+ def get_row_default_height(self):
+ return self.__row_default_height
+
+ row_default_height = property(get_row_default_height, set_row_default_height)
+
+ #################################################################
+
+ def set_col_default_width(self, value):
+ self.__col_default_width = value
+
+ def get_col_default_width(self):
+ return self.__col_default_width
+
+ col_default_width = property(get_col_default_width, set_col_default_width)
+
+ #################################################################
+
+ def set_calc_mode(self, value):
+ self.__calc_mode = value & 0x03
+
+ def get_calc_mode(self):
+ return self.__calc_mode
+
+ calc_mode = property(get_calc_mode, set_calc_mode)
+
+ #################################################################
+
+ def set_calc_count(self, value):
+ self.__calc_count = value
+
+ def get_calc_count(self):
+ return self.__calc_count
+
+ calc_count = property(get_calc_count, set_calc_count)
+
+ #################################################################
+
+ def set_RC_ref_mode(self, value):
+ self.__RC_ref_mode = int(value)
+
+ def get_RC_ref_mode(self):
+ return bool(self.__RC_ref_mode)
+
+ RC_ref_mode = property(get_RC_ref_mode, set_RC_ref_mode)
+
+ #################################################################
+
+ def set_iterations_on(self, value):
+ self.__iterations_on = int(value)
+
+ def get_iterations_on(self):
+ return bool(self.__iterations_on)
+
+ iterations_on = property(get_iterations_on, set_iterations_on)
+
+ #################################################################
+
+ def set_delta(self, value):
+ self.__delta = value
+
+ def get_delta(self):
+ return self.__delta
+
+ delta = property(get_delta, set_delta)
+
+ #################################################################
+
+ def set_save_recalc(self, value):
+ self.__save_recalc = int(value)
+
+ def get_save_recalc(self):
+ return bool(self.__save_recalc)
+
+ save_recalc = property(get_save_recalc, set_save_recalc)
+
+ #################################################################
+
+ def set_print_headers(self, value):
+ self.__print_headers = int(value)
+
+ def get_print_headers(self):
+ return bool(self.__print_headers)
+
+ print_headers = property(get_print_headers, set_print_headers)
+
+ #################################################################
+
+ def set_print_grid(self, value):
+ self.__print_grid = int(value)
+
+ def get_print_grid(self):
+ return bool(self.__print_grid)
+
+ print_grid = property(get_print_grid, set_print_grid)
+
+ #################################################################
+ #
+ #def set_grid_set(self, value):
+ # self.__grid_set = int(value)
+ #
+ #def get_grid_set(self):
+ # return bool(self.__grid_set)
+ #
+ #grid_set = property(get_grid_set, set_grid_set)
+ #
+ #################################################################
+
+ def set_vert_page_breaks(self, value):
+ self.__vert_page_breaks = value
+
+ def get_vert_page_breaks(self):
+ return self.__vert_page_breaks
+
+ vert_page_breaks = property(get_vert_page_breaks, set_vert_page_breaks)
+
+ #################################################################
+
+ def set_horz_page_breaks(self, value):
+ self.__horz_page_breaks = value
+
+ def get_horz_page_breaks(self):
+ return self.__horz_page_breaks
+
+ horz_page_breaks = property(get_horz_page_breaks, set_horz_page_breaks)
+
+ #################################################################
+
+ def set_header_str(self, value):
+ if isinstance(value, str):
+ value = unicode(value, self.__parent.encoding)
+ self.__header_str = value
+
+ def get_header_str(self):
+ return self.__header_str
+
+ header_str = property(get_header_str, set_header_str)
+
+ #################################################################
+
+ def set_footer_str(self, value):
+ if isinstance(value, str):
+ value = unicode(value, self.__parent.encoding)
+ self.__footer_str = value
+
+ def get_footer_str(self):
+ return self.__footer_str
+
+ footer_str = property(get_footer_str, set_footer_str)
+
+ #################################################################
+
+ def set_print_centered_vert(self, value):
+ self.__print_centered_vert = int(value)
+
+ def get_print_centered_vert(self):
+ return bool(self.__print_centered_vert)
+
+ print_centered_vert = property(get_print_centered_vert, set_print_centered_vert)
+
+ #################################################################
+
+ def set_print_centered_horz(self, value):
+ self.__print_centered_horz = int(value)
+
+ def get_print_centered_horz(self):
+ return bool(self.__print_centered_horz)
+
+ print_centered_horz = property(get_print_centered_horz, set_print_centered_horz)
+
+ #################################################################
+
+ def set_left_margin(self, value):
+ self.__left_margin = value
+
+ def get_left_margin(self):
+ return self.__left_margin
+
+ left_margin = property(get_left_margin, set_left_margin)
+
+ #################################################################
+
+ def set_right_margin(self, value):
+ self.__right_margin = value
+
+ def get_right_margin(self):
+ return self.__right_margin
+
+ right_margin = property(get_right_margin, set_right_margin)
+
+ #################################################################
+
+ def set_top_margin(self, value):
+ self.__top_margin = value
+
+ def get_top_margin(self):
+ return self.__top_margin
+
+ top_margin = property(get_top_margin, set_top_margin)
+
+ #################################################################
+
+ def set_bottom_margin(self, value):
+ self.__bottom_margin = value
+
+ def get_bottom_margin(self):
+ return self.__bottom_margin
+
+ bottom_margin = property(get_bottom_margin, set_bottom_margin)
+
+ #################################################################
+
+ def set_paper_size_code(self, value):
+ self.__paper_size_code = value
+
+ def get_paper_size_code(self):
+ return self.__paper_size_code
+
+ paper_size_code = property(get_paper_size_code, set_paper_size_code)
+
+ #################################################################
+
+ def set_print_scaling(self, value):
+ self.__print_scaling = value
+
+ def get_print_scaling(self):
+ return self.__print_scaling
+
+ print_scaling = property(get_print_scaling, set_print_scaling)
+
+ #################################################################
+
+ def set_start_page_number(self, value):
+ self.__start_page_number = value
+
+ def get_start_page_number(self):
+ return self.__start_page_number
+
+ start_page_number = property(get_start_page_number, set_start_page_number)
+
+ #################################################################
+
+ def set_fit_width_to_pages(self, value):
+ self.__fit_width_to_pages = value
+
+ def get_fit_width_to_pages(self):
+ return self.__fit_width_to_pages
+
+ fit_width_to_pages = property(get_fit_width_to_pages, set_fit_width_to_pages)
+
+ #################################################################
+
+ def set_fit_height_to_pages(self, value):
+ self.__fit_height_to_pages = value
+
+ def get_fit_height_to_pages(self):
+ return self.__fit_height_to_pages
+
+ fit_height_to_pages = property(get_fit_height_to_pages, set_fit_height_to_pages)
+
+ #################################################################
+
+ def set_print_in_rows(self, value):
+ self.__print_in_rows = int(value)
+
+ def get_print_in_rows(self):
+ return bool(self.__print_in_rows)
+
+ print_in_rows = property(get_print_in_rows, set_print_in_rows)
+
+ #################################################################
+
+ def set_portrait(self, value):
+ self.__portrait = int(value)
+
+ def get_portrait(self):
+ return bool(self.__portrait)
+
+ portrait = property(get_portrait, set_portrait)
+
+ #################################################################
+
+ def set_print_colour(self, value):
+ self.__print_not_colour = int(not value)
+
+ def get_print_colour(self):
+ return not bool(self.__print_not_colour)
+
+ print_colour = property(get_print_colour, set_print_colour)
+
+ #################################################################
+
+ def set_print_draft(self, value):
+ self.__print_draft = int(value)
+
+ def get_print_draft(self):
+ return bool(self.__print_draft)
+
+ print_draft = property(get_print_draft, set_print_draft)
+
+ #################################################################
+
+ def set_print_notes(self, value):
+ self.__print_notes = int(value)
+
+ def get_print_notes(self):
+ return bool(self.__print_notes)
+
+ print_notes = property(get_print_notes, set_print_notes)
+
+ #################################################################
+
+ def set_print_notes_at_end(self, value):
+ self.__print_notes_at_end = int(value)
+
+ def get_print_notes_at_end(self):
+ return bool(self.__print_notes_at_end)
+
+ print_notes_at_end = property(get_print_notes_at_end, set_print_notes_at_end)
+
+ #################################################################
+
+ def set_print_omit_errors(self, value):
+ self.__print_omit_errors = int(value)
+
+ def get_print_omit_errors(self):
+ return bool(self.__print_omit_errors)
+
+ print_omit_errors = property(get_print_omit_errors, set_print_omit_errors)
+
+ #################################################################
+
+ def set_print_hres(self, value):
+ self.__print_hres = value
+
+ def get_print_hres(self):
+ return self.__print_hres
+
+ print_hres = property(get_print_hres, set_print_hres)
+
+ #################################################################
+
+ def set_print_vres(self, value):
+ self.__print_vres = value
+
+ def get_print_vres(self):
+ return self.__print_vres
+
+ print_vres = property(get_print_vres, set_print_vres)
+
+ #################################################################
+
+ def set_header_margin(self, value):
+ self.__header_margin = value
+
+ def get_header_margin(self):
+ return self.__header_margin
+
+ header_margin = property(get_header_margin, set_header_margin)
+
+ #################################################################
+
+ def set_footer_margin(self, value):
+ self.__footer_margin = value
+
+ def get_footer_margin(self):
+ return self.__footer_margin
+
+ footer_margin = property(get_footer_margin, set_footer_margin)
+
+ #################################################################
+
+ def set_copies_num(self, value):
+ self.__copies_num = value
+
+ def get_copies_num(self):
+ return self.__copies_num
+
+ copies_num = property(get_copies_num, set_copies_num)
+
+ ##################################################################
+
+ def set_wnd_protect(self, value):
+ self.__wnd_protect = int(value)
+
+ def get_wnd_protect(self):
+ return bool(self.__wnd_protect)
+
+ wnd_protect = property(get_wnd_protect, set_wnd_protect)
+
+ #################################################################
+
+ def set_obj_protect(self, value):
+ self.__obj_protect = int(value)
+
+ def get_obj_protect(self):
+ return bool(self.__obj_protect)
+
+ obj_protect = property(get_obj_protect, set_obj_protect)
+
+ #################################################################
+
+ def set_protect(self, value):
+ self.__protect = int(value)
+
+ def get_protect(self):
+ return bool(self.__protect)
+
+ protect = property(get_protect, set_protect)
+
+ #################################################################
+
+ def set_scen_protect(self, value):
+ self.__scen_protect = int(value)
+
+ def get_scen_protect(self):
+ return bool(self.__scen_protect)
+
+ scen_protect = property(get_scen_protect, set_scen_protect)
+
+ #################################################################
+
+ def set_password(self, value):
+ self.__password = value
+
+ def get_password(self):
+ return self.__password
+
+ password = property(get_password, set_password)
+
+ ##################################################################
+ ## Methods
+ ##################################################################
+
+ def get_parent(self):
+ return self.__parent
+
+ def write(self, r, c, label="", style=Style.default_style):
+ self.row(r).write(c, label, style)
+
+ def merge(self, r1, r2, c1, c2, style=Style.default_style):
+ # Stand-alone merge of previously written cells.
+ # Problems: (1) style to be used should be existing style of
+ # the top-left cell, not an arg.
+ # (2) should ensure that any previous data value in
+ # non-top-left cells is nobbled.
+ # Note: if a cell is set by a data record then later
+ # is referenced by a [MUL]BLANK record, Excel will blank
+ # out the cell on the screen, but OOo & Gnu will not
+ # blank it out. Need to do something better than writing
+ # multiple records. In the meantime, avoid this method and use
+ # write_merge() instead.
+ if c2 > c1:
+ self.row(r1).write_blanks(c1 + 1, c2, style)
+ for r in range(r1+1, r2+1):
+ self.row(r).write_blanks(c1, c2, style)
+ self.__merged_ranges.append((r1, r2, c1, c2))
+
+ def write_merge(self, r1, r2, c1, c2, label="", style=Style.default_style):
+ assert 0 <= c1 <= c2 <= 255
+ assert 0 <= r1 <= r2 <= 65535
+ self.write(r1, c1, label, style)
+ if c2 > c1:
+ self.row(r1).write_blanks(c1 + 1, c2, style) # skip (r1, c1)
+ for r in range(r1+1, r2+1):
+ self.row(r).write_blanks(c1, c2, style)
+ self.__merged_ranges.append((r1, r2, c1, c2))
+
+ def insert_bitmap(self, filename, row, col, x = 0, y = 0, scale_x = 1, scale_y = 1):
+ bmp = Bitmap.ImDataBmpRecord(filename)
+ obj = Bitmap.ObjBmpRecord(row, col, self, bmp, x, y, scale_x, scale_y)
+
+ self.__bmp_rec += obj.get() + bmp.get()
+
+ def col(self, indx):
+ if indx not in self.__cols:
+ self.__cols[indx] = self.Column(indx, self)
+ return self.__cols[indx]
+
+ def row(self, indx):
+ if indx not in self.__rows:
+ if indx in self.__flushed_rows:
+ raise Exception("Attempt to reuse row index %d of sheet %r after flushing" % (indx, self.__name))
+ self.__rows[indx] = self.Row(indx, self)
+ if indx > self.last_used_row:
+ self.last_used_row = indx
+ if indx < self.first_used_row:
+ self.first_used_row = indx
+ return self.__rows[indx]
+
+ def row_height(self, row): # in pixels
+ if row in self.__rows:
+ return self.__rows[row].get_height_in_pixels()
+ else:
+ return 17
+
+ def col_width(self, col): # in pixels
+ if col in self.__cols:
+ return self.__cols[col].width_in_pixels()
+ else:
+ return 64
+
+
+ ##################################################################
+ ## BIFF records generation
+ ##################################################################
+
+ def __bof_rec(self):
+ return BIFFRecords.Biff8BOFRecord(BIFFRecords.Biff8BOFRecord.WORKSHEET).get()
+
+ def __update_row_visible_levels(self):
+ if self.__rows:
+ temp = max([self.__rows[r].level for r in self.__rows]) + 1
+ self.__row_visible_levels = max(temp, self.__row_visible_levels)
+
+ def __guts_rec(self):
+ self.__update_row_visible_levels()
+ col_visible_levels = 0
+ if len(self.__cols) != 0:
+ col_visible_levels = max([self.__cols[c].level for c in self.__cols]) + 1
+ return BIFFRecords.GutsRecord(
+ self.__row_gut_width, self.__col_gut_height, self.__row_visible_levels, col_visible_levels).get()
+
+ def __defaultrowheight_rec(self):
+ options = 0x0000
+ options |= (self.row_default_height_mismatch & 1) << 0
+ options |= (self.row_default_hidden & 1) << 1
+ options |= (self.row_default_space_above & 1) << 2
+ options |= (self.row_default_space_below & 1) << 3
+ defht = self.__row_default_height
+ return BIFFRecords.DefaultRowHeightRecord(options, defht).get()
+
+ def __wsbool_rec(self):
+ options = 0x00
+ options |= (self.__show_auto_page_breaks & 0x01) << 0
+ options |= (self.__dialogue_sheet & 0x01) << 4
+ options |= (self.__auto_style_outline & 0x01) << 5
+ options |= (self.__outline_below & 0x01) << 6
+ options |= (self.__outline_right & 0x01) << 7
+ options |= (self.__fit_num_pages & 0x01) << 8
+ options |= (self.__show_row_outline & 0x01) << 10
+ options |= (self.__show_col_outline & 0x01) << 11
+ options |= (self.__alt_expr_eval & 0x01) << 14
+ options |= (self.__alt_formula_entries & 0x01) << 15
+
+ return BIFFRecords.WSBoolRecord(options).get()
+
+ def __eof_rec(self):
+ return BIFFRecords.EOFRecord().get()
+
+ def __colinfo_rec(self):
+ result = ''
+ for col in self.__cols:
+ result += self.__cols[col].get_biff_record()
+ return result
+
+ def __dimensions_rec(self):
+ return BIFFRecords.DimensionsRecord(
+ self.first_used_row, self.last_used_row,
+ self.first_used_col, self.last_used_col
+ ).get()
+
+ def __window2_rec(self):
+ # Appends SCL record.
+ options = 0
+ options |= (self.__show_formulas & 0x01) << 0
+ options |= (self.__show_grid & 0x01) << 1
+ options |= (self.__show_headers & 0x01) << 2
+ options |= (self.__panes_frozen & 0x01) << 3
+ options |= (self.show_zero_values & 0x01) << 4
+ options |= (self.__auto_colour_grid & 0x01) << 5
+ options |= (self.__cols_right_to_left & 0x01) << 6
+ options |= (self.__show_outline & 0x01) << 7
+ options |= (self.__remove_splits & 0x01) << 8
+ options |= (self.__selected & 0x01) << 9
+ options |= (self.__sheet_visible & 0x01) << 10
+ options |= (self.__page_preview & 0x01) << 11
+ if self.__page_preview:
+ scl_magn = self.__preview_magn
+ else:
+ scl_magn = self.__normal_magn
+ return BIFFRecords.Window2Record(
+ options, self.__first_visible_row, self.__first_visible_col,
+ self.__grid_colour,
+ self.__preview_magn, self.__normal_magn, scl_magn).get()
+
+ def __panes_rec(self):
+ if self.__vert_split_pos is None and self.__horz_split_pos is None:
+ return ""
+
+ if self.__vert_split_pos is None:
+ self.__vert_split_pos = 0
+ if self.__horz_split_pos is None:
+ self.__horz_split_pos = 0
+
+ if self.__panes_frozen:
+ if self.__vert_split_first_visible is None:
+ self.__vert_split_first_visible = self.__vert_split_pos
+ if self.__horz_split_first_visible is None:
+ self.__horz_split_first_visible = self.__horz_split_pos
+ else:
+ if self.__vert_split_first_visible is None:
+ self.__vert_split_first_visible = 0
+ if self.__horz_split_first_visible is None:
+ self.__horz_split_first_visible = 0
+ # inspired by pyXLWriter
+ self.__horz_split_pos = 20*self.__horz_split_pos + 255
+ self.__vert_split_pos = 113.879*self.__vert_split_pos + 390
+
+ if self.__vert_split_pos > 0 and self.__horz_split_pos > 0:
+ self.__split_active_pane = 0
+ elif self.__vert_split_pos > 0 and self.__horz_split_pos == 0:
+ self.__split_active_pane = 1
+ elif self.__vert_split_pos == 0 and self.__horz_split_pos > 0:
+ self.__split_active_pane = 2
+ else:
+ self.__split_active_pane = 3
+
+ result = BIFFRecords.PanesRecord(self.__vert_split_pos,
+ self.__horz_split_pos,
+ self.__horz_split_first_visible,
+ self.__vert_split_first_visible,
+ self.__split_active_pane).get()
+ return result
+
+ def __row_blocks_rec(self):
+ result = []
+ for row in self.__rows.itervalues():
+ result.append(row.get_row_biff_data())
+ result.append(row.get_cells_biff_data())
+ return ''.join(result)
+
+ def __merged_rec(self):
+ return BIFFRecords.MergedCellsRecord(self.__merged_ranges).get()
+
+ def __bitmaps_rec(self):
+ return self.__bmp_rec
+
+ def __calc_settings_rec(self):
+ result = ''
+ result += BIFFRecords.CalcModeRecord(self.__calc_mode & 0x01).get()
+ result += BIFFRecords.CalcCountRecord(self.__calc_count & 0xFFFF).get()
+ result += BIFFRecords.RefModeRecord(self.__RC_ref_mode & 0x01).get()
+ result += BIFFRecords.IterationRecord(self.__iterations_on & 0x01).get()
+ result += BIFFRecords.DeltaRecord(self.__delta).get()
+ result += BIFFRecords.SaveRecalcRecord(self.__save_recalc & 0x01).get()
+ return result
+
+ def __print_settings_rec(self):
+ result = ''
+ result += BIFFRecords.PrintHeadersRecord(self.__print_headers).get()
+ result += BIFFRecords.PrintGridLinesRecord(self.__print_grid).get()
+ result += BIFFRecords.GridSetRecord(self.__grid_set).get()
+ result += BIFFRecords.HorizontalPageBreaksRecord(self.__horz_page_breaks).get()
+ result += BIFFRecords.VerticalPageBreaksRecord(self.__vert_page_breaks).get()
+ result += BIFFRecords.HeaderRecord(self.__header_str).get()
+ result += BIFFRecords.FooterRecord(self.__footer_str).get()
+ result += BIFFRecords.HCenterRecord(self.__print_centered_horz).get()
+ result += BIFFRecords.VCenterRecord(self.__print_centered_vert).get()
+ result += BIFFRecords.LeftMarginRecord(self.__left_margin).get()
+ result += BIFFRecords.RightMarginRecord(self.__right_margin).get()
+ result += BIFFRecords.TopMarginRecord(self.__top_margin).get()
+ result += BIFFRecords.BottomMarginRecord(self.__bottom_margin).get()
+
+ setup_page_options = (self.__print_in_rows & 0x01) << 0
+ setup_page_options |= (self.__portrait & 0x01) << 1
+ setup_page_options |= (0x00 & 0x01) << 2
+ setup_page_options |= (self.__print_not_colour & 0x01) << 3
+ setup_page_options |= (self.__print_draft & 0x01) << 4
+ setup_page_options |= (self.__print_notes & 0x01) << 5
+ setup_page_options |= (0x00 & 0x01) << 6
+ setup_page_options |= (0x01 & 0x01) << 7
+ setup_page_options |= (self.__print_notes_at_end & 0x01) << 9
+ setup_page_options |= (self.__print_omit_errors & 0x03) << 10
+
+ result += BIFFRecords.SetupPageRecord(self.__paper_size_code,
+ self.__print_scaling,
+ self.__start_page_number,
+ self.__fit_width_to_pages,
+ self.__fit_height_to_pages,
+ setup_page_options,
+ self.__print_hres,
+ self.__print_vres,
+ self.__header_margin,
+ self.__footer_margin,
+ self.__copies_num).get()
+ return result
+
+ def __protection_rec(self):
+ result = ''
+ result += BIFFRecords.ProtectRecord(self.__protect).get()
+ result += BIFFRecords.ScenProtectRecord(self.__scen_protect).get()
+ result += BIFFRecords.WindowProtectRecord(self.__wnd_protect).get()
+ result += BIFFRecords.ObjectProtectRecord(self.__obj_protect).get()
+ result += BIFFRecords.PasswordRecord(self.__password).get()
+ return result
+
+ def get_biff_data(self):
+ result = [
+ self.__bof_rec(),
+ self.__calc_settings_rec(),
+ self.__guts_rec(),
+ self.__defaultrowheight_rec(),
+ self.__wsbool_rec(),
+ self.__colinfo_rec(),
+ self.__dimensions_rec(),
+ self.__print_settings_rec(),
+ self.__protection_rec(),
+ ]
+ if self.row_tempfile:
+ self.row_tempfile.flush()
+ self.row_tempfile.seek(0)
+ result.append(self.row_tempfile.read())
+ result.extend([
+ self.__row_blocks_rec(),
+ self.__merged_rec(),
+ self.__bitmaps_rec(),
+ self.__window2_rec(),
+ self.__panes_rec(),
+ self.__eof_rec(),
+ ])
+ return ''.join(result)
+
+ def flush_row_data(self):
+ if self.row_tempfile is None:
+ self.row_tempfile = tempfile.TemporaryFile()
+ self.row_tempfile.write(self.__row_blocks_rec())
+ for rowx in self.__rows:
+ self.__flushed_rows[rowx] = 1
+ self.__update_row_visible_levels()
+ self.__rows = {}
+
+
diff --git a/tablib/packages/xlwt/__init__.py b/tablib/packages/xlwt/__init__.py
new file mode 100644
index 0000000..dcc23f0
--- /dev/null
+++ b/tablib/packages/xlwt/__init__.py
@@ -0,0 +1,16 @@
+# -*- coding: windows-1252 -*-
+
+__VERSION__ = '0.7.2'
+
+import sys
+if sys.version_info[:2] < (2, 3):
+ print >> sys.stderr, "Sorry, xlwt requires Python 2.3 or later"
+ sys.exit(1)
+
+from Workbook import Workbook
+from Worksheet import Worksheet
+from Row import Row
+from Column import Column
+from Formatting import Font, Alignment, Borders, Pattern, Protection
+from Style import XFStyle, easyxf
+from ExcelFormula import *
diff --git a/tablib/packages/xlwt/antlr.py b/tablib/packages/xlwt/antlr.py
new file mode 100644
index 0000000..aaad447
--- /dev/null
+++ b/tablib/packages/xlwt/antlr.py
@@ -0,0 +1,2874 @@
+## This file is part of PyANTLR. See LICENSE.txt for license
+## details..........Copyright (C) Wolfgang Haefelinger, 2004.
+
+## This file was copied for use with xlwt from the 2.7.7 ANTLR distribution. Yes, it
+## says 2.7.5 below. The 2.7.5 distribution version didn't have a
+## version in it.
+
+## Here is the contents of the ANTLR 2.7.7 LICENSE.txt referred to above.
+
+# SOFTWARE RIGHTS
+#
+# ANTLR 1989-2006 Developed by Terence Parr
+# Partially supported by University of San Francisco & jGuru.com
+#
+# We reserve no legal rights to the ANTLR--it is fully in the
+# public domain. An individual or company may do whatever
+# they wish with source code distributed with ANTLR or the
+# code generated by ANTLR, including the incorporation of
+# ANTLR, or its output, into commerical software.
+#
+# We encourage users to develop software with ANTLR. However,
+# we do ask that credit is given to us for developing
+# ANTLR. By "credit", we mean that if you use ANTLR or
+# incorporate any source code into one of your programs
+# (commercial product, research project, or otherwise) that
+# you acknowledge this fact somewhere in the documentation,
+# research report, etc... If you like ANTLR and have
+# developed a nice tool with the output, please mention that
+# you developed it using ANTLR. In addition, we ask that the
+# headers remain intact in our source code. As long as these
+# guidelines are kept, we expect to continue enhancing this
+# system and expect to make other tools available as they are
+# completed.
+#
+# The primary ANTLR guy:
+#
+# Terence Parr
+# parrt@cs.usfca.edu
+# parrt@antlr.org
+
+## End of contents of the ANTLR 2.7.7 LICENSE.txt ########################
+
+## get sys module
+import sys
+
+version = sys.version.split()[0]
+if version < '2.2.1':
+ False = 0
+if version < '2.3':
+ True = not False
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### global symbols ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+### ANTLR Standard Tokens
+SKIP = -1
+INVALID_TYPE = 0
+EOF_TYPE = 1
+EOF = 1
+NULL_TREE_LOOKAHEAD = 3
+MIN_USER_TYPE = 4
+
+### ANTLR's EOF Symbol
+EOF_CHAR = ''
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### general functions ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+## Version should be automatically derived from configure.in. For now,
+## we need to bump it ourselfs. Don't remove the tags.
+##
+def version():
+ r = {
+ 'major' : '2',
+ 'minor' : '7',
+ 'micro' : '5',
+ 'patch' : '' ,
+ 'version': '2.7.5'
+ }
+ return r
+##
+
+def error(fmt,*args):
+ if fmt:
+ print "error: ", fmt % tuple(args)
+
+def ifelse(cond,_then,_else):
+ if cond :
+ r = _then
+ else:
+ r = _else
+ return r
+
+def is_string_type(x):
+ # return (isinstance(x,str) or isinstance(x,unicode))
+ # Simplify; xlwt doesn't support Python < 2.3
+ return isinstance(basestring)
+
+def assert_string_type(x):
+ assert is_string_type(x)
+ pass
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### ANTLR Exceptions ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class ANTLRException(Exception):
+
+ def __init__(self, *args):
+ Exception.__init__(self, *args)
+
+
+class RecognitionException(ANTLRException):
+
+ def __init__(self, *args):
+ ANTLRException.__init__(self, *args)
+ self.fileName = None
+ self.line = -1
+ self.column = -1
+ if len(args) >= 2:
+ self.fileName = args[1]
+ if len(args) >= 3:
+ self.line = args[2]
+ if len(args) >= 4:
+ self.column = args[3]
+
+ def __str__(self):
+ buf = ['']
+ if self.fileName:
+ buf.append(self.fileName + ":")
+ if self.line != -1:
+ if not self.fileName:
+ buf.append("line ")
+ buf.append(str(self.line))
+ if self.column != -1:
+ buf.append(":" + str(self.column))
+ buf.append(":")
+ buf.append(" ")
+ return str('').join(buf)
+
+ __repr__ = __str__
+
+
+class NoViableAltException(RecognitionException):
+
+ def __init__(self, *args):
+ RecognitionException.__init__(self, *args)
+ self.token = None
+ self.node = None
+ if isinstance(args[0],AST):
+ self.node = args[0]
+ elif isinstance(args[0],Token):
+ self.token = args[0]
+ else:
+ raise TypeError("NoViableAltException requires Token or AST argument")
+
+ def __str__(self):
+ if self.token:
+ line = self.token.getLine()
+ col = self.token.getColumn()
+ text = self.token.getText()
+ return "unexpected symbol at line %s (column %s): \"%s\"" % (line,col,text)
+ if self.node == ASTNULL:
+ return "unexpected end of subtree"
+ assert self.node
+ ### hackish, we assume that an AST contains method getText
+ return "unexpected node: %s" % (self.node.getText())
+
+ __repr__ = __str__
+
+
+class NoViableAltForCharException(RecognitionException):
+
+ def __init__(self, *args):
+ self.foundChar = None
+ if len(args) == 2:
+ self.foundChar = args[0]
+ scanner = args[1]
+ RecognitionException.__init__(self, "NoViableAlt",
+ scanner.getFilename(),
+ scanner.getLine(),
+ scanner.getColumn())
+ elif len(args) == 4:
+ self.foundChar = args[0]
+ fileName = args[1]
+ line = args[2]
+ column = args[3]
+ RecognitionException.__init__(self, "NoViableAlt",
+ fileName, line, column)
+ else:
+ RecognitionException.__init__(self, "NoViableAlt",
+ '', -1, -1)
+
+ def __str__(self):
+ mesg = "unexpected char: "
+ if self.foundChar >= ' ' and self.foundChar <= '~':
+ mesg += "'" + self.foundChar + "'"
+ elif self.foundChar:
+ mesg += "0x" + hex(ord(self.foundChar)).upper()[2:]
+ else:
+ mesg += ""
+ return mesg
+
+ __repr__ = __str__
+
+
+class SemanticException(RecognitionException):
+
+ def __init__(self, *args):
+ RecognitionException.__init__(self, *args)
+
+
+class MismatchedCharException(RecognitionException):
+
+ NONE = 0
+ CHAR = 1
+ NOT_CHAR = 2
+ RANGE = 3
+ NOT_RANGE = 4
+ SET = 5
+ NOT_SET = 6
+
+ def __init__(self, *args):
+ self.args = args
+ if len(args) == 5:
+ # Expected range / not range
+ if args[3]:
+ self.mismatchType = MismatchedCharException.NOT_RANGE
+ else:
+ self.mismatchType = MismatchedCharException.RANGE
+ self.foundChar = args[0]
+ self.expecting = args[1]
+ self.upper = args[2]
+ self.scanner = args[4]
+ RecognitionException.__init__(self, "Mismatched char range",
+ self.scanner.getFilename(),
+ self.scanner.getLine(),
+ self.scanner.getColumn())
+ elif len(args) == 4 and is_string_type(args[1]):
+ # Expected char / not char
+ if args[2]:
+ self.mismatchType = MismatchedCharException.NOT_CHAR
+ else:
+ self.mismatchType = MismatchedCharException.CHAR
+ self.foundChar = args[0]
+ self.expecting = args[1]
+ self.scanner = args[3]
+ RecognitionException.__init__(self, "Mismatched char",
+ self.scanner.getFilename(),
+ self.scanner.getLine(),
+ self.scanner.getColumn())
+ elif len(args) == 4 and isinstance(args[1], BitSet):
+ # Expected BitSet / not BitSet
+ if args[2]:
+ self.mismatchType = MismatchedCharException.NOT_SET
+ else:
+ self.mismatchType = MismatchedCharException.SET
+ self.foundChar = args[0]
+ self.set = args[1]
+ self.scanner = args[3]
+ RecognitionException.__init__(self, "Mismatched char set",
+ self.scanner.getFilename(),
+ self.scanner.getLine(),
+ self.scanner.getColumn())
+ else:
+ self.mismatchType = MismatchedCharException.NONE
+ RecognitionException.__init__(self, "Mismatched char")
+
+ ## Append a char to the msg buffer. If special,
+ # then show escaped version
+ #
+ def appendCharName(self, sb, c):
+ if not c or c == 65535:
+ # 65535 = (char) -1 = EOF
+ sb.append("''")
+ elif c == '\n':
+ sb.append("'\\n'")
+ elif c == '\r':
+ sb.append("'\\r'");
+ elif c == '\t':
+ sb.append("'\\t'")
+ else:
+ sb.append('\'' + c + '\'')
+
+ ##
+ # Returns an error message with line number/column information
+ #
+ def __str__(self):
+ sb = ['']
+ sb.append(RecognitionException.__str__(self))
+
+ if self.mismatchType == MismatchedCharException.CHAR:
+ sb.append("expecting ")
+ self.appendCharName(sb, self.expecting)
+ sb.append(", found ")
+ self.appendCharName(sb, self.foundChar)
+ elif self.mismatchType == MismatchedCharException.NOT_CHAR:
+ sb.append("expecting anything but '")
+ self.appendCharName(sb, self.expecting)
+ sb.append("'; got it anyway")
+ elif self.mismatchType in [MismatchedCharException.RANGE, MismatchedCharException.NOT_RANGE]:
+ sb.append("expecting char ")
+ if self.mismatchType == MismatchedCharException.NOT_RANGE:
+ sb.append("NOT ")
+ sb.append("in range: ")
+ appendCharName(sb, self.expecting)
+ sb.append("..")
+ appendCharName(sb, self.upper)
+ sb.append(", found ")
+ appendCharName(sb, self.foundChar)
+ elif self.mismatchType in [MismatchedCharException.SET, MismatchedCharException.NOT_SET]:
+ sb.append("expecting ")
+ if self.mismatchType == MismatchedCharException.NOT_SET:
+ sb.append("NOT ")
+ sb.append("one of (")
+ for i in range(len(self.set)):
+ self.appendCharName(sb, self.set[i])
+ sb.append("), found ")
+ self.appendCharName(sb, self.foundChar)
+
+ return str().join(sb).strip()
+
+ __repr__ = __str__
+
+
+class MismatchedTokenException(RecognitionException):
+
+ NONE = 0
+ TOKEN = 1
+ NOT_TOKEN = 2
+ RANGE = 3
+ NOT_RANGE = 4
+ SET = 5
+ NOT_SET = 6
+
+ def __init__(self, *args):
+ self.args = args
+ self.tokenNames = []
+ self.token = None
+ self.tokenText = ''
+ self.node = None
+ if len(args) == 6:
+ # Expected range / not range
+ if args[3]:
+ self.mismatchType = MismatchedTokenException.NOT_RANGE
+ else:
+ self.mismatchType = MismatchedTokenException.RANGE
+ self.tokenNames = args[0]
+ self.expecting = args[2]
+ self.upper = args[3]
+ self.fileName = args[5]
+
+ elif len(args) == 4 and isinstance(args[2], int):
+ # Expected token / not token
+ if args[3]:
+ self.mismatchType = MismatchedTokenException.NOT_TOKEN
+ else:
+ self.mismatchType = MismatchedTokenException.TOKEN
+ self.tokenNames = args[0]
+ self.expecting = args[2]
+
+ elif len(args) == 4 and isinstance(args[2], BitSet):
+ # Expected BitSet / not BitSet
+ if args[3]:
+ self.mismatchType = MismatchedTokenException.NOT_SET
+ else:
+ self.mismatchType = MismatchedTokenException.SET
+ self.tokenNames = args[0]
+ self.set = args[2]
+
+ else:
+ self.mismatchType = MismatchedTokenException.NONE
+ RecognitionException.__init__(self, "Mismatched Token: expecting any AST node", "", -1, -1)
+
+ if len(args) >= 2:
+ if isinstance(args[1],Token):
+ self.token = args[1]
+ self.tokenText = self.token.getText()
+ RecognitionException.__init__(self, "Mismatched Token",
+ self.fileName,
+ self.token.getLine(),
+ self.token.getColumn())
+ elif isinstance(args[1],AST):
+ self.node = args[1]
+ self.tokenText = str(self.node)
+ RecognitionException.__init__(self, "Mismatched Token",
+ "",
+ self.node.getLine(),
+ self.node.getColumn())
+ else:
+ self.tokenText = ""
+ RecognitionException.__init__(self, "Mismatched Token",
+ "", -1, -1)
+
+ def appendTokenName(self, sb, tokenType):
+ if tokenType == INVALID_TYPE:
+ sb.append("")
+ elif tokenType < 0 or tokenType >= len(self.tokenNames):
+ sb.append("<" + str(tokenType) + ">")
+ else:
+ sb.append(self.tokenNames[tokenType])
+
+ ##
+ # Returns an error message with line number/column information
+ #
+ def __str__(self):
+ sb = ['']
+ sb.append(RecognitionException.__str__(self))
+
+ if self.mismatchType == MismatchedTokenException.TOKEN:
+ sb.append("expecting ")
+ self.appendTokenName(sb, self.expecting)
+ sb.append(", found " + self.tokenText)
+ elif self.mismatchType == MismatchedTokenException.NOT_TOKEN:
+ sb.append("expecting anything but '")
+ self.appendTokenName(sb, self.expecting)
+ sb.append("'; got it anyway")
+ elif self.mismatchType in [MismatchedTokenException.RANGE, MismatchedTokenException.NOT_RANGE]:
+ sb.append("expecting token ")
+ if self.mismatchType == MismatchedTokenException.NOT_RANGE:
+ sb.append("NOT ")
+ sb.append("in range: ")
+ appendTokenName(sb, self.expecting)
+ sb.append("..")
+ appendTokenName(sb, self.upper)
+ sb.append(", found " + self.tokenText)
+ elif self.mismatchType in [MismatchedTokenException.SET, MismatchedTokenException.NOT_SET]:
+ sb.append("expecting ")
+ if self.mismatchType == MismatchedTokenException.NOT_SET:
+ sb.append("NOT ")
+ sb.append("one of (")
+ for i in range(len(self.set)):
+ self.appendTokenName(sb, self.set[i])
+ sb.append("), found " + self.tokenText)
+
+ return str().join(sb).strip()
+
+ __repr__ = __str__
+
+
+class TokenStreamException(ANTLRException):
+
+ def __init__(self, *args):
+ ANTLRException.__init__(self, *args)
+
+
+# Wraps an Exception in a TokenStreamException
+class TokenStreamIOException(TokenStreamException):
+
+ def __init__(self, *args):
+ if args and isinstance(args[0], Exception):
+ io = args[0]
+ TokenStreamException.__init__(self, str(io))
+ self.io = io
+ else:
+ TokenStreamException.__init__(self, *args)
+ self.io = self
+
+
+# Wraps a RecognitionException in a TokenStreamException
+class TokenStreamRecognitionException(TokenStreamException):
+
+ def __init__(self, *args):
+ if args and isinstance(args[0], RecognitionException):
+ recog = args[0]
+ TokenStreamException.__init__(self, str(recog))
+ self.recog = recog
+ else:
+ raise TypeError("TokenStreamRecognitionException requires RecognitionException argument")
+
+ def __str__(self):
+ return str(self.recog)
+
+ __repr__ = __str__
+
+
+class TokenStreamRetryException(TokenStreamException):
+
+ def __init__(self, *args):
+ TokenStreamException.__init__(self, *args)
+
+
+class CharStreamException(ANTLRException):
+
+ def __init__(self, *args):
+ ANTLRException.__init__(self, *args)
+
+
+# Wraps an Exception in a CharStreamException
+class CharStreamIOException(CharStreamException):
+
+ def __init__(self, *args):
+ if args and isinstance(args[0], Exception):
+ io = args[0]
+ CharStreamException.__init__(self, str(io))
+ self.io = io
+ else:
+ CharStreamException.__init__(self, *args)
+ self.io = self
+
+
+class TryAgain(Exception):
+ pass
+
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### Token ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class Token(object):
+ SKIP = -1
+ INVALID_TYPE = 0
+ EOF_TYPE = 1
+ EOF = 1
+ NULL_TREE_LOOKAHEAD = 3
+ MIN_USER_TYPE = 4
+
+ def __init__(self,**argv):
+ try:
+ self.type = argv['type']
+ except:
+ self.type = INVALID_TYPE
+ try:
+ self.text = argv['text']
+ except:
+ self.text = ""
+
+ def isEOF(self):
+ return (self.type == EOF_TYPE)
+
+ def getColumn(self):
+ return 0
+
+ def getLine(self):
+ return 0
+
+ def getFilename(self):
+ return None
+
+ def setFilename(self,name):
+ return self
+
+ def getText(self):
+ return ""
+
+ def setText(self,text):
+ if is_string_type(text):
+ pass
+ else:
+ raise TypeError("Token.setText requires string argument")
+ return self
+
+ def setColumn(self,column):
+ return self
+
+ def setLine(self,line):
+ return self
+
+ def getType(self):
+ return self.type
+
+ def setType(self,type):
+ if isinstance(type,int):
+ self.type = type
+ else:
+ raise TypeError("Token.setType requires integer argument")
+ return self
+
+ def toString(self):
+ ## not optimal
+ type_ = self.type
+ if type_ == 3:
+ tval = 'NULL_TREE_LOOKAHEAD'
+ elif type_ == 1:
+ tval = 'EOF_TYPE'
+ elif type_ == 0:
+ tval = 'INVALID_TYPE'
+ elif type_ == -1:
+ tval = 'SKIP'
+ else:
+ tval = type_
+ return '["%s",<%s>]' % (self.getText(),tval)
+
+ __str__ = toString
+ __repr__ = toString
+
+### static attribute ..
+Token.badToken = Token( type=INVALID_TYPE, text="")
+
+if __name__ == "__main__":
+ print "testing .."
+ T = Token.badToken
+ print T
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### CommonToken ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class CommonToken(Token):
+
+ def __init__(self,**argv):
+ Token.__init__(self,**argv)
+ self.line = 0
+ self.col = 0
+ try:
+ self.line = argv['line']
+ except:
+ pass
+ try:
+ self.col = argv['col']
+ except:
+ pass
+
+ def getLine(self):
+ return self.line
+
+ def getText(self):
+ return self.text
+
+ def getColumn(self):
+ return self.col
+
+ def setLine(self,line):
+ self.line = line
+ return self
+
+ def setText(self,text):
+ self.text = text
+ return self
+
+ def setColumn(self,col):
+ self.col = col
+ return self
+
+ def toString(self):
+ ## not optimal
+ type_ = self.type
+ if type_ == 3:
+ tval = 'NULL_TREE_LOOKAHEAD'
+ elif type_ == 1:
+ tval = 'EOF_TYPE'
+ elif type_ == 0:
+ tval = 'INVALID_TYPE'
+ elif type_ == -1:
+ tval = 'SKIP'
+ else:
+ tval = type_
+ d = {
+ 'text' : self.text,
+ 'type' : tval,
+ 'line' : self.line,
+ 'colm' : self.col
+ }
+
+ fmt = '["%(text)s",<%(type)s>,line=%(line)s,col=%(colm)s]'
+ return fmt % d
+
+ __str__ = toString
+ __repr__ = toString
+
+
+if __name__ == '__main__' :
+ T = CommonToken()
+ print T
+ T = CommonToken(col=15,line=1,text="some text", type=5)
+ print T
+ T = CommonToken()
+ T.setLine(1).setColumn(15).setText("some text").setType(5)
+ print T
+ print T.getLine()
+ print T.getColumn()
+ print T.getText()
+ print T.getType()
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### CommonHiddenStreamToken ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class CommonHiddenStreamToken(CommonToken):
+ def __init__(self,*args):
+ CommonToken.__init__(self,*args)
+ self.hiddenBefore = None
+ self.hiddenAfter = None
+
+ def getHiddenAfter(self):
+ return self.hiddenAfter
+
+ def getHiddenBefore(self):
+ return self.hiddenBefore
+
+ def setHiddenAfter(self,t):
+ self.hiddenAfter = t
+
+ def setHiddenBefore(self, t):
+ self.hiddenBefore = t
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### Queue ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+## Shall be a circular buffer on tokens ..
+class Queue(object):
+
+ def __init__(self):
+ self.buffer = [] # empty list
+
+ def append(self,item):
+ self.buffer.append(item)
+
+ def elementAt(self,index):
+ return self.buffer[index]
+
+ def reset(self):
+ self.buffer = []
+
+ def removeFirst(self):
+ self.buffer.pop(0)
+
+ def length(self):
+ return len(self.buffer)
+
+ def __str__(self):
+ return str(self.buffer)
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### InputBuffer ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class InputBuffer(object):
+ def __init__(self):
+ self.nMarkers = 0
+ self.markerOffset = 0
+ self.numToConsume = 0
+ self.queue = Queue()
+
+ def __str__(self):
+ return "(%s,%s,%s,%s)" % (
+ self.nMarkers,
+ self.markerOffset,
+ self.numToConsume,
+ self.queue)
+
+ def __repr__(self):
+ return str(self)
+
+ def commit(self):
+ self.nMarkers -= 1
+
+ def consume(self) :
+ self.numToConsume += 1
+
+ ## probably better to return a list of items
+ ## because of unicode. Or return a unicode
+ ## string ..
+ def getLAChars(self) :
+ i = self.markerOffset
+ n = self.queue.length()
+ s = ''
+ while i 0:
+ if self.nMarkers > 0:
+ # guess mode -- leave leading characters and bump offset.
+ self.markerOffset += 1
+ else:
+ # normal mode -- remove first character
+ self.queue.removeFirst()
+ self.numToConsume -= 1
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### CharBuffer ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class CharBuffer(InputBuffer):
+ def __init__(self,reader):
+ ##assert isinstance(reader,file)
+ super(CharBuffer,self).__init__()
+ ## a reader is supposed to be anything that has
+ ## a method 'read(int)'.
+ self.input = reader
+
+ def __str__(self):
+ base = super(CharBuffer,self).__str__()
+ return "CharBuffer{%s,%s" % (base,str(input))
+
+ def fill(self,amount):
+ try:
+ self.syncConsume()
+ while self.queue.length() < (amount + self.markerOffset) :
+ ## retrieve just one char - what happend at end
+ ## of input?
+ c = self.input.read(1)
+ ### python's behaviour is to return the empty string on
+ ### EOF, ie. no exception whatsoever is thrown. An empty
+ ### python string has the nice feature that it is of
+ ### type 'str' and "not ''" would return true. Contrary,
+ ### one can't do this: '' in 'abc'. This should return
+ ### false, but all we get is then a TypeError as an
+ ### empty string is not a character.
+
+ ### Let's assure then that we have either seen a
+ ### character or an empty string (EOF).
+ assert len(c) == 0 or len(c) == 1
+
+ ### And it shall be of type string (ASCII or UNICODE).
+ assert is_string_type(c)
+
+ ### Just append EOF char to buffer. Note that buffer may
+ ### contain then just more than one EOF char ..
+
+ ### use unicode chars instead of ASCII ..
+ self.queue.append(c)
+ except Exception,e:
+ raise CharStreamIOException(e)
+ ##except: # (mk) Cannot happen ...
+ ##error ("unexpected exception caught ..")
+ ##assert 0
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### LexerSharedInputState ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class LexerSharedInputState(object):
+ def __init__(self,ibuf):
+ assert isinstance(ibuf,InputBuffer)
+ self.input = ibuf
+ self.column = 1
+ self.line = 1
+ self.tokenStartColumn = 1
+ self.tokenStartLine = 1
+ self.guessing = 0
+ self.filename = None
+
+ def reset(self):
+ self.column = 1
+ self.line = 1
+ self.tokenStartColumn = 1
+ self.tokenStartLine = 1
+ self.guessing = 0
+ self.filename = None
+ self.input.reset()
+
+ def LA(self,k):
+ return self.input.LA(k)
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TokenStream ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TokenStream(object):
+ def nextToken(self):
+ pass
+
+ def __iter__(self):
+ return TokenStreamIterator(self)
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TokenStreamIterator ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TokenStreamIterator(object):
+ def __init__(self,inst):
+ if isinstance(inst,TokenStream):
+ self.inst = inst
+ return
+ raise TypeError("TokenStreamIterator requires TokenStream object")
+
+ def next(self):
+ assert self.inst
+ item = self.inst.nextToken()
+ if not item or item.isEOF():
+ raise StopIteration()
+ return item
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TokenStreamSelector ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TokenStreamSelector(TokenStream):
+
+ def __init__(self):
+ self._input = None
+ self._stmap = {}
+ self._stack = []
+
+ def addInputStream(self,stream,key):
+ self._stmap[key] = stream
+
+ def getCurrentStream(self):
+ return self._input
+
+ def getStream(self,sname):
+ try:
+ stream = self._stmap[sname]
+ except:
+ raise ValueError("TokenStream " + sname + " not found");
+ return stream;
+
+ def nextToken(self):
+ while 1:
+ try:
+ return self._input.nextToken()
+ except TokenStreamRetryException,r:
+ ### just retry "forever"
+ pass
+
+ def pop(self):
+ stream = self._stack.pop();
+ self.select(stream);
+ return stream;
+
+ def push(self,arg):
+ self._stack.append(self._input);
+ self.select(arg)
+
+ def retry(self):
+ raise TokenStreamRetryException()
+
+ def select(self,arg):
+ if isinstance(arg,TokenStream):
+ self._input = arg
+ return
+ if is_string_type(arg):
+ self._input = self.getStream(arg)
+ return
+ raise TypeError("TokenStreamSelector.select requires " +
+ "TokenStream or string argument")
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TokenStreamBasicFilter ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TokenStreamBasicFilter(TokenStream):
+
+ def __init__(self,input):
+
+ self.input = input;
+ self.discardMask = BitSet()
+
+ def discard(self,arg):
+ if isinstance(arg,int):
+ self.discardMask.add(arg)
+ return
+ if isinstance(arg,BitSet):
+ self.discardMark = arg
+ return
+ raise TypeError("TokenStreamBasicFilter.discard requires" +
+ "integer or BitSet argument")
+
+ def nextToken(self):
+ tok = self.input.nextToken()
+ while tok and self.discardMask.member(tok.getType()):
+ tok = self.input.nextToken()
+ return tok
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TokenStreamHiddenTokenFilter ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TokenStreamHiddenTokenFilter(TokenStreamBasicFilter):
+
+ def __init__(self,input):
+ TokenStreamBasicFilter.__init__(self,input)
+ self.hideMask = BitSet()
+ self.nextMonitoredToken = None
+ self.lastHiddenToken = None
+ self.firstHidden = None
+
+ def consume(self):
+ self.nextMonitoredToken = self.input.nextToken()
+
+ def consumeFirst(self):
+ self.consume()
+
+ p = None;
+ while self.hideMask.member(self.LA(1).getType()) or \
+ self.discardMask.member(self.LA(1).getType()):
+ if self.hideMask.member(self.LA(1).getType()):
+ if not p:
+ p = self.LA(1)
+ else:
+ p.setHiddenAfter(self.LA(1))
+ self.LA(1).setHiddenBefore(p)
+ p = self.LA(1)
+ self.lastHiddenToken = p
+ if not self.firstHidden:
+ self.firstHidden = p
+ self.consume()
+
+ def getDiscardMask(self):
+ return self.discardMask
+
+ def getHiddenAfter(self,t):
+ return t.getHiddenAfter()
+
+ def getHiddenBefore(self,t):
+ return t.getHiddenBefore()
+
+ def getHideMask(self):
+ return self.hideMask
+
+ def getInitialHiddenToken(self):
+ return self.firstHidden
+
+ def hide(self,m):
+ if isinstance(m,int):
+ self.hideMask.add(m)
+ return
+ if isinstance(m.BitMask):
+ self.hideMask = m
+ return
+
+ def LA(self,i):
+ return self.nextMonitoredToken
+
+ def nextToken(self):
+ if not self.LA(1):
+ self.consumeFirst()
+
+ monitored = self.LA(1)
+
+ monitored.setHiddenBefore(self.lastHiddenToken)
+ self.lastHiddenToken = None
+
+ self.consume()
+ p = monitored
+
+ while self.hideMask.member(self.LA(1).getType()) or \
+ self.discardMask.member(self.LA(1).getType()):
+ if self.hideMask.member(self.LA(1).getType()):
+ p.setHiddenAfter(self.LA(1))
+ if p != monitored:
+ self.LA(1).setHiddenBefore(p)
+ p = self.lastHiddenToken = self.LA(1)
+ self.consume()
+ return monitored
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### StringBuffer ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class StringBuffer:
+ def __init__(self,string=None):
+ if string:
+ self.text = list(string)
+ else:
+ self.text = []
+
+ def setLength(self,sz):
+ if not sz :
+ self.text = []
+ return
+ assert sz>0
+ if sz >= self.length():
+ return
+ ### just reset to empty buffer
+ self.text = self.text[0:sz]
+
+ def length(self):
+ return len(self.text)
+
+ def append(self,c):
+ self.text.append(c)
+
+ ### return buffer as string. Arg 'a' is used as index
+ ## into the buffer and 2nd argument shall be the length.
+ ## If 2nd args is absent, we return chars till end of
+ ## buffer starting with 'a'.
+ def getString(self,a=None,length=None):
+ if not a :
+ a = 0
+ assert a>=0
+ if a>= len(self.text) :
+ return ""
+
+ if not length:
+ ## no second argument
+ L = self.text[a:]
+ else:
+ assert (a+length) <= len(self.text)
+ b = a + length
+ L = self.text[a:b]
+ s = ""
+ for x in L : s += x
+ return s
+
+ toString = getString ## alias
+
+ def __str__(self):
+ return str(self.text)
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### Reader ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+## When reading Japanese chars, it happens that a stream returns a
+## 'char' of length 2. This looks like a bug in the appropriate
+## codecs - but I'm rather unsure about this. Anyway, if this is
+## the case, I'm going to split this string into a list of chars
+## and put them on hold, ie. on a buffer. Next time when called
+## we read from buffer until buffer is empty.
+## wh: nov, 25th -> problem does not appear in Python 2.4.0.c1.
+
+class Reader(object):
+ def __init__(self,stream):
+ self.cin = stream
+ self.buf = []
+
+ def read(self,num):
+ assert num==1
+
+ if len(self.buf):
+ return self.buf.pop()
+
+ ## Read a char - this may return a string.
+ ## Is this a bug in codecs/Python?
+ c = self.cin.read(1)
+
+ if not c or len(c)==1:
+ return c
+
+ L = list(c)
+ L.reverse()
+ for x in L:
+ self.buf.append(x)
+
+ ## read one char ..
+ return self.read(1)
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### CharScanner ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class CharScanner(TokenStream):
+ ## class members
+ NO_CHAR = 0
+ EOF_CHAR = '' ### EOF shall be the empty string.
+
+ def __init__(self, *argv, **kwargs):
+ super(CharScanner, self).__init__()
+ self.saveConsumedInput = True
+ self.tokenClass = None
+ self.caseSensitive = True
+ self.caseSensitiveLiterals = True
+ self.literals = None
+ self.tabsize = 8
+ self._returnToken = None
+ self.commitToPath = False
+ self.traceDepth = 0
+ self.text = StringBuffer()
+ self.hashString = hash(self)
+ self.setTokenObjectClass(CommonToken)
+ self.setInput(*argv)
+
+ def __iter__(self):
+ return CharScannerIterator(self)
+
+ def setInput(self,*argv):
+ ## case 1:
+ ## if there's no arg we default to read from
+ ## standard input
+ if not argv:
+ import sys
+ self.setInput(sys.stdin)
+ return
+
+ ## get 1st argument
+ arg1 = argv[0]
+
+ ## case 2:
+ ## if arg1 is a string, we assume it's a file name
+ ## and open a stream using 2nd argument as open
+ ## mode. If there's no 2nd argument we fall back to
+ ## mode '+rb'.
+ if is_string_type(arg1):
+ f = open(arg1,"rb")
+ self.setInput(f)
+ self.setFilename(arg1)
+ return
+
+ ## case 3:
+ ## if arg1 is a file we wrap it by a char buffer (
+ ## some additional checks?? No, can't do this in
+ ## general).
+ if isinstance(arg1,file):
+ self.setInput(CharBuffer(arg1))
+ return
+
+ ## case 4:
+ ## if arg1 is of type SharedLexerInputState we use
+ ## argument as is.
+ if isinstance(arg1,LexerSharedInputState):
+ self.inputState = arg1
+ return
+
+ ## case 5:
+ ## check whether argument type is of type input
+ ## buffer. If so create a SharedLexerInputState and
+ ## go ahead.
+ if isinstance(arg1,InputBuffer):
+ self.setInput(LexerSharedInputState(arg1))
+ return
+
+ ## case 6:
+ ## check whether argument type has a method read(int)
+ ## If so create CharBuffer ...
+ try:
+ if arg1.read:
+ rd = Reader(arg1)
+ cb = CharBuffer(rd)
+ ss = LexerSharedInputState(cb)
+ self.inputState = ss
+ return
+ except:
+ pass
+
+ ## case 7:
+ ## raise wrong argument exception
+ raise TypeError(argv)
+
+ def setTabSize(self,size) :
+ self.tabsize = size
+
+ def getTabSize(self) :
+ return self.tabsize
+
+ def setCaseSensitive(self,t) :
+ self.caseSensitive = t
+
+ def setCommitToPath(self,commit) :
+ self.commitToPath = commit
+
+ def setFilename(self,f) :
+ self.inputState.filename = f
+
+ def setLine(self,line) :
+ self.inputState.line = line
+
+ def setText(self,s) :
+ self.resetText()
+ self.text.append(s)
+
+ def getCaseSensitive(self) :
+ return self.caseSensitive
+
+ def getCaseSensitiveLiterals(self) :
+ return self.caseSensitiveLiterals
+
+ def getColumn(self) :
+ return self.inputState.column
+
+ def setColumn(self,c) :
+ self.inputState.column = c
+
+ def getCommitToPath(self) :
+ return self.commitToPath
+
+ def getFilename(self) :
+ return self.inputState.filename
+
+ def getInputBuffer(self) :
+ return self.inputState.input
+
+ def getInputState(self) :
+ return self.inputState
+
+ def setInputState(self,state) :
+ assert isinstance(state,LexerSharedInputState)
+ self.inputState = state
+
+ def getLine(self) :
+ return self.inputState.line
+
+ def getText(self) :
+ return str(self.text)
+
+ def getTokenObject(self) :
+ return self._returnToken
+
+ def LA(self,i) :
+ c = self.inputState.input.LA(i)
+ if not self.caseSensitive:
+ ### E0006
+ c = c.__class__.lower(c)
+ return c
+
+ def makeToken(self,type) :
+ try:
+ ## dynamically load a class
+ assert self.tokenClass
+ tok = self.tokenClass()
+ tok.setType(type)
+ tok.setColumn(self.inputState.tokenStartColumn)
+ tok.setLine(self.inputState.tokenStartLine)
+ return tok
+ except:
+ self.panic("unable to create new token")
+ return Token.badToken
+
+ def mark(self) :
+ return self.inputState.input.mark()
+
+ def _match_bitset(self,b) :
+ if b.member(self.LA(1)):
+ self.consume()
+ else:
+ raise MismatchedCharException(self.LA(1), b, False, self)
+
+ def _match_string(self,s) :
+ for c in s:
+ if self.LA(1) == c:
+ self.consume()
+ else:
+ raise MismatchedCharException(self.LA(1), c, False, self)
+
+ def match(self,item):
+ if is_string_type(item):
+ return self._match_string(item)
+ else:
+ return self._match_bitset(item)
+
+ def matchNot(self,c) :
+ if self.LA(1) != c:
+ self.consume()
+ else:
+ raise MismatchedCharException(self.LA(1), c, True, self)
+
+ def matchRange(self,c1,c2) :
+ if self.LA(1) < c1 or self.LA(1) > c2 :
+ raise MismatchedCharException(self.LA(1), c1, c2, False, self)
+ else:
+ self.consume()
+
+ def newline(self) :
+ self.inputState.line += 1
+ self.inputState.column = 1
+
+ def tab(self) :
+ c = self.getColumn()
+ nc = ( ((c-1)/self.tabsize) + 1) * self.tabsize + 1
+ self.setColumn(nc)
+
+ def panic(self,s='') :
+ print "CharScanner: panic: " + s
+ sys.exit(1)
+
+ def reportError(self,ex) :
+ print ex
+
+ def reportError(self,s) :
+ if not self.getFilename():
+ print "error: " + str(s)
+ else:
+ print self.getFilename() + ": error: " + str(s)
+
+ def reportWarning(self,s) :
+ if not self.getFilename():
+ print "warning: " + str(s)
+ else:
+ print self.getFilename() + ": warning: " + str(s)
+
+ def resetText(self) :
+ self.text.setLength(0)
+ self.inputState.tokenStartColumn = self.inputState.column
+ self.inputState.tokenStartLine = self.inputState.line
+
+ def rewind(self,pos) :
+ self.inputState.input.rewind(pos)
+
+ def setTokenObjectClass(self,cl):
+ self.tokenClass = cl
+
+ def testForLiteral(self,token):
+ if not token:
+ return
+ assert isinstance(token,Token)
+
+ _type = token.getType()
+
+ ## special tokens can't be literals
+ if _type in [SKIP,INVALID_TYPE,EOF_TYPE,NULL_TREE_LOOKAHEAD] :
+ return
+
+ _text = token.getText()
+ if not _text:
+ return
+
+ assert is_string_type(_text)
+ _type = self.testLiteralsTable(_text,_type)
+ token.setType(_type)
+ return _type
+
+ def testLiteralsTable(self,*args):
+ if is_string_type(args[0]):
+ s = args[0]
+ i = args[1]
+ else:
+ s = self.text.getString()
+ i = args[0]
+
+ ## check whether integer has been given
+ if not isinstance(i,int):
+ assert isinstance(i,int)
+
+ ## check whether we have a dict
+ assert isinstance(self.literals,dict)
+ try:
+ ## E0010
+ if not self.caseSensitiveLiterals:
+ s = s.__class__.lower(s)
+ i = self.literals[s]
+ except:
+ pass
+ return i
+
+ def toLower(self,c):
+ return c.__class__.lower()
+
+ def traceIndent(self):
+ print ' ' * self.traceDepth
+
+ def traceIn(self,rname):
+ self.traceDepth += 1
+ self.traceIndent()
+ print "> lexer %s c== %s" % (rname,self.LA(1))
+
+ def traceOut(self,rname):
+ self.traceIndent()
+ print "< lexer %s c== %s" % (rname,self.LA(1))
+ self.traceDepth -= 1
+
+ def uponEOF(self):
+ pass
+
+ def append(self,c):
+ if self.saveConsumedInput :
+ self.text.append(c)
+
+ def commit(self):
+ self.inputState.input.commit()
+
+ def consume(self):
+ if not self.inputState.guessing:
+ c = self.LA(1)
+ if self.caseSensitive:
+ self.append(c)
+ else:
+ # use input.LA(), not LA(), to get original case
+ # CharScanner.LA() would toLower it.
+ c = self.inputState.input.LA(1)
+ self.append(c)
+
+ if c and c in "\t":
+ self.tab()
+ else:
+ self.inputState.column += 1
+ self.inputState.input.consume()
+
+ ## Consume chars until one matches the given char
+ def consumeUntil_char(self,c):
+ while self.LA(1) != EOF_CHAR and self.LA(1) != c:
+ self.consume()
+
+ ## Consume chars until one matches the given set
+ def consumeUntil_bitset(self,bitset):
+ while self.LA(1) != EOF_CHAR and not self.set.member(self.LA(1)):
+ self.consume()
+
+ ### If symbol seen is EOF then generate and set token, otherwise
+ ### throw exception.
+ def default(self,la1):
+ if not la1 :
+ self.uponEOF()
+ self._returnToken = self.makeToken(EOF_TYPE)
+ else:
+ self.raise_NoViableAlt(la1)
+
+ def filterdefault(self,la1,*args):
+ if not la1:
+ self.uponEOF()
+ self._returnToken = self.makeToken(EOF_TYPE)
+ return
+
+ if not args:
+ self.consume()
+ raise TryAgain()
+ else:
+ ### apply filter object
+ self.commit();
+ try:
+ func=args[0]
+ args=args[1:]
+ apply(func,args)
+ except RecognitionException, e:
+ ## catastrophic failure
+ self.reportError(e);
+ self.consume();
+ raise TryAgain()
+
+ def raise_NoViableAlt(self,la1=None):
+ if not la1: la1 = self.LA(1)
+ fname = self.getFilename()
+ line = self.getLine()
+ col = self.getColumn()
+ raise NoViableAltForCharException(la1,fname,line,col)
+
+ def set_return_token(self,_create,_token,_ttype,_offset):
+ if _create and not _token and (not _ttype == SKIP):
+ string = self.text.getString(_offset)
+ _token = self.makeToken(_ttype)
+ _token.setText(string)
+ self._returnToken = _token
+ return _token
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### CharScannerIterator ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class CharScannerIterator:
+
+ def __init__(self,inst):
+ if isinstance(inst,CharScanner):
+ self.inst = inst
+ return
+ raise TypeError("CharScannerIterator requires CharScanner object")
+
+ def next(self):
+ assert self.inst
+ item = self.inst.nextToken()
+ if not item or item.isEOF():
+ raise StopIteration()
+ return item
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### BitSet ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+### I'm assuming here that a long is 64bits. It appears however, that
+### a long is of any size. That means we can use a single long as the
+### bitset (!), ie. Python would do almost all the work (TBD).
+
+class BitSet(object):
+ BITS = 64
+ NIBBLE = 4
+ LOG_BITS = 6
+ MOD_MASK = BITS -1
+
+ def __init__(self,data=None):
+ if not data:
+ BitSet.__init__(self,[long(0)])
+ return
+ if isinstance(data,int):
+ BitSet.__init__(self,[long(data)])
+ return
+ if isinstance(data,long):
+ BitSet.__init__(self,[data])
+ return
+ if not isinstance(data,list):
+ raise TypeError("BitSet requires integer, long, or " +
+ "list argument")
+ for x in data:
+ if not isinstance(x,long):
+ raise TypeError(self,"List argument item is " +
+ "not a long: %s" % (x))
+ self.data = data
+
+ def __str__(self):
+ bits = len(self.data) * BitSet.BITS
+ s = ""
+ for i in xrange(0,bits):
+ if self.at(i):
+ s += "1"
+ else:
+ s += "o"
+ if not ((i+1) % 10):
+ s += '|%s|' % (i+1)
+ return s
+
+ def __repr__(self):
+ return str(self)
+
+ def member(self,item):
+ if not item:
+ return False
+
+ if isinstance(item,int):
+ return self.at(item)
+
+ if not is_string_type(item):
+ raise TypeError(self,"char or unichar expected: %s" % (item))
+
+ ## char is a (unicode) string with at most lenght 1, ie.
+ ## a char.
+
+ if len(item) != 1:
+ raise TypeError(self,"char expected: %s" % (item))
+
+ ### handle ASCII/UNICODE char
+ num = ord(item)
+
+ ### check whether position num is in bitset
+ return self.at(num)
+
+ def wordNumber(self,bit):
+ return bit >> BitSet.LOG_BITS
+
+ def bitMask(self,bit):
+ pos = bit & BitSet.MOD_MASK ## bit mod BITS
+ return (1L << pos)
+
+ def set(self,bit,on=True):
+ # grow bitset as required (use with care!)
+ i = self.wordNumber(bit)
+ mask = self.bitMask(bit)
+ if i>=len(self.data):
+ d = i - len(self.data) + 1
+ for x in xrange(0,d):
+ self.data.append(0L)
+ assert len(self.data) == i+1
+ if on:
+ self.data[i] |= mask
+ else:
+ self.data[i] &= (~mask)
+
+ ### make add an alias for set
+ add = set
+
+ def off(self,bit,off=True):
+ self.set(bit,not off)
+
+ def at(self,bit):
+ i = self.wordNumber(bit)
+ v = self.data[i]
+ m = self.bitMask(bit)
+ return v & m
+
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### some further funcs ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+def illegalarg_ex(func):
+ raise ValueError(
+ "%s is only valid if parser is built for debugging" %
+ (func.func_name))
+
+def runtime_ex(func):
+ raise RuntimeException(
+ "%s is only valid if parser is built for debugging" %
+ (func.func_name))
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TokenBuffer ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TokenBuffer(object):
+ def __init__(self,stream):
+ self.input = stream
+ self.nMarkers = 0
+ self.markerOffset = 0
+ self.numToConsume = 0
+ self.queue = Queue()
+
+ def reset(self) :
+ self.nMarkers = 0
+ self.markerOffset = 0
+ self.numToConsume = 0
+ self.queue.reset()
+
+ def consume(self) :
+ self.numToConsume += 1
+
+ def fill(self, amount):
+ self.syncConsume()
+ while self.queue.length() < (amount + self.markerOffset):
+ self.queue.append(self.input.nextToken())
+
+ def getInput(self):
+ return self.input
+
+ def LA(self,k) :
+ self.fill(k)
+ return self.queue.elementAt(self.markerOffset + k - 1).type
+
+ def LT(self,k) :
+ self.fill(k)
+ return self.queue.elementAt(self.markerOffset + k - 1)
+
+ def mark(self) :
+ self.syncConsume()
+ self.nMarkers += 1
+ return self.markerOffset
+
+ def rewind(self,mark) :
+ self.syncConsume()
+ self.markerOffset = mark
+ self.nMarkers -= 1
+
+ def syncConsume(self) :
+ while self.numToConsume > 0:
+ if self.nMarkers > 0:
+ # guess mode -- leave leading characters and bump offset.
+ self.markerOffset += 1
+ else:
+ # normal mode -- remove first character
+ self.queue.removeFirst()
+ self.numToConsume -= 1
+
+ def __str__(self):
+ return "(%s,%s,%s,%s,%s)" % (
+ self.input,
+ self.nMarkers,
+ self.markerOffset,
+ self.numToConsume,
+ self.queue)
+
+ def __repr__(self):
+ return str(self)
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### ParserSharedInputState ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class ParserSharedInputState(object):
+
+ def __init__(self):
+ self.input = None
+ self.reset()
+
+ def reset(self):
+ self.guessing = 0
+ self.filename = None
+ if self.input:
+ self.input.reset()
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### Parser ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class Parser(object):
+
+ def __init__(self, *args, **kwargs):
+ self.tokenNames = None
+ self.returnAST = None
+ self.astFactory = None
+ self.tokenTypeToASTClassMap = {}
+ self.ignoreInvalidDebugCalls = False
+ self.traceDepth = 0
+ if not args:
+ self.inputState = ParserSharedInputState()
+ return
+ arg0 = args[0]
+ assert isinstance(arg0,ParserSharedInputState)
+ self.inputState = arg0
+ return
+
+ def getTokenTypeToASTClassMap(self):
+ return self.tokenTypeToASTClassMap
+
+
+ def addMessageListener(self, l):
+ if not self.ignoreInvalidDebugCalls:
+ illegalarg_ex(addMessageListener)
+
+ def addParserListener(self,l) :
+ if (not self.ignoreInvalidDebugCalls) :
+ illegalarg_ex(addParserListener)
+
+ def addParserMatchListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls) :
+ illegalarg_ex(addParserMatchListener)
+
+ def addParserTokenListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ illegalarg_ex(addParserTokenListener)
+
+ def addSemanticPredicateListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ illegalarg_ex(addSemanticPredicateListener)
+
+ def addSyntacticPredicateListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ illegalarg_ex(addSyntacticPredicateListener)
+
+ def addTraceListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ illegalarg_ex(addTraceListener)
+
+ def consume(self):
+ raise NotImplementedError()
+
+ def _consumeUntil_type(self,tokenType):
+ while self.LA(1) != EOF_TYPE and self.LA(1) != tokenType:
+ self.consume()
+
+ def _consumeUntil_bitset(self, set):
+ while self.LA(1) != EOF_TYPE and not set.member(self.LA(1)):
+ self.consume()
+
+ def consumeUntil(self,arg):
+ if isinstance(arg,int):
+ self._consumeUntil_type(arg)
+ else:
+ self._consumeUntil_bitset(arg)
+
+ def defaultDebuggingSetup(self):
+ pass
+
+ def getAST(self) :
+ return self.returnAST
+
+ def getASTFactory(self) :
+ return self.astFactory
+
+ def getFilename(self) :
+ return self.inputState.filename
+
+ def getInputState(self) :
+ return self.inputState
+
+ def setInputState(self, state) :
+ self.inputState = state
+
+ def getTokenName(self,num) :
+ return self.tokenNames[num]
+
+ def getTokenNames(self) :
+ return self.tokenNames
+
+ def isDebugMode(self) :
+ return self.false
+
+ def LA(self, i):
+ raise NotImplementedError()
+
+ def LT(self, i):
+ raise NotImplementedError()
+
+ def mark(self):
+ return self.inputState.input.mark()
+
+ def _match_int(self,t):
+ if (self.LA(1) != t):
+ raise MismatchedTokenException(
+ self.tokenNames, self.LT(1), t, False, self.getFilename())
+ else:
+ self.consume()
+
+ def _match_set(self, b):
+ if (not b.member(self.LA(1))):
+ raise MismatchedTokenException(
+ self.tokenNames,self.LT(1), b, False, self.getFilename())
+ else:
+ self.consume()
+
+ def match(self,set) :
+ if isinstance(set,int):
+ self._match_int(set)
+ return
+ if isinstance(set,BitSet):
+ self._match_set(set)
+ return
+ raise TypeError("Parser.match requires integer ot BitSet argument")
+
+ def matchNot(self,t):
+ if self.LA(1) == t:
+ raise MismatchedTokenException(
+ tokenNames, self.LT(1), t, True, self.getFilename())
+ else:
+ self.consume()
+
+ def removeMessageListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(removeMessageListener)
+
+ def removeParserListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(removeParserListener)
+
+ def removeParserMatchListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(removeParserMatchListener)
+
+ def removeParserTokenListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(removeParserTokenListener)
+
+ def removeSemanticPredicateListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(removeSemanticPredicateListener)
+
+ def removeSyntacticPredicateListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(removeSyntacticPredicateListener)
+
+ def removeTraceListener(self, l) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(removeTraceListener)
+
+ def reportError(self,x) :
+ fmt = "syntax error:"
+ f = self.getFilename()
+ if f:
+ fmt = ("%s:" % f) + fmt
+ if isinstance(x,Token):
+ line = x.getColumn()
+ col = x.getLine()
+ text = x.getText()
+ fmt = fmt + 'unexpected symbol at line %s (column %s) : "%s"'
+ print >>sys.stderr, fmt % (line,col,text)
+ else:
+ print >>sys.stderr, fmt,str(x)
+
+ def reportWarning(self,s):
+ f = self.getFilename()
+ if f:
+ print "%s:warning: %s" % (f,str(x))
+ else:
+ print "warning: %s" % (str(x))
+
+ def rewind(self, pos) :
+ self.inputState.input.rewind(pos)
+
+ def setASTFactory(self, f) :
+ self.astFactory = f
+
+ def setASTNodeClass(self, cl) :
+ self.astFactory.setASTNodeType(cl)
+
+ def setASTNodeType(self, nodeType) :
+ self.setASTNodeClass(nodeType)
+
+ def setDebugMode(self, debugMode) :
+ if (not self.ignoreInvalidDebugCalls):
+ runtime_ex(setDebugMode)
+
+ def setFilename(self, f) :
+ self.inputState.filename = f
+
+ def setIgnoreInvalidDebugCalls(self, value) :
+ self.ignoreInvalidDebugCalls = value
+
+ def setTokenBuffer(self, t) :
+ self.inputState.input = t
+
+ def traceIndent(self):
+ print " " * self.traceDepth
+
+ def traceIn(self,rname):
+ self.traceDepth += 1
+ self.trace("> ", rname)
+
+ def traceOut(self,rname):
+ self.trace("< ", rname)
+ self.traceDepth -= 1
+
+ ### wh: moved from ASTFactory to Parser
+ def addASTChild(self,currentAST, child):
+ if not child:
+ return
+ if not currentAST.root:
+ currentAST.root = child
+ elif not currentAST.child:
+ currentAST.root.setFirstChild(child)
+ else:
+ currentAST.child.setNextSibling(child)
+ currentAST.child = child
+ currentAST.advanceChildToEnd()
+
+ ### wh: moved from ASTFactory to Parser
+ def makeASTRoot(self,currentAST,root) :
+ if root:
+ ### Add the current root as a child of new root
+ root.addChild(currentAST.root)
+ ### The new current child is the last sibling of the old root
+ currentAST.child = currentAST.root
+ currentAST.advanceChildToEnd()
+ ### Set the new root
+ currentAST.root = root
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### LLkParser ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class LLkParser(Parser):
+
+ def __init__(self, *args, **kwargs):
+ try:
+ arg1 = args[0]
+ except:
+ arg1 = 1
+
+ if isinstance(arg1,int):
+ super(LLkParser,self).__init__()
+ self.k = arg1
+ return
+
+ if isinstance(arg1,ParserSharedInputState):
+ super(LLkParser,self).__init__(arg1)
+ self.set_k(1,*args)
+ return
+
+ if isinstance(arg1,TokenBuffer):
+ super(LLkParser,self).__init__()
+ self.setTokenBuffer(arg1)
+ self.set_k(1,*args)
+ return
+
+ if isinstance(arg1,TokenStream):
+ super(LLkParser,self).__init__()
+ tokenBuf = TokenBuffer(arg1)
+ self.setTokenBuffer(tokenBuf)
+ self.set_k(1,*args)
+ return
+
+ ### unknown argument
+ raise TypeError("LLkParser requires integer, " +
+ "ParserSharedInputStream or TokenStream argument")
+
+ def consume(self):
+ self.inputState.input.consume()
+
+ def LA(self,i):
+ return self.inputState.input.LA(i)
+
+ def LT(self,i):
+ return self.inputState.input.LT(i)
+
+ def set_k(self,index,*args):
+ try:
+ self.k = args[index]
+ except:
+ self.k = 1
+
+ def trace(self,ee,rname):
+ print type(self)
+ self.traceIndent()
+ guess = ""
+ if self.inputState.guessing > 0:
+ guess = " [guessing]"
+ print(ee + rname + guess)
+ for i in xrange(1,self.k+1):
+ if i != 1:
+ print(", ")
+ if self.LT(i) :
+ v = self.LT(i).getText()
+ else:
+ v = "null"
+ print "LA(%s) == %s" % (i,v)
+ print("\n")
+
+ def traceIn(self,rname):
+ self.traceDepth += 1;
+ self.trace("> ", rname);
+
+ def traceOut(self,rname):
+ self.trace("< ", rname);
+ self.traceDepth -= 1;
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TreeParserSharedInputState ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TreeParserSharedInputState(object):
+ def __init__(self):
+ self.guessing = 0
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### TreeParser ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class TreeParser(object):
+
+ def __init__(self, *args, **kwargs):
+ self.inputState = TreeParserSharedInputState()
+ self._retTree = None
+ self.tokenNames = []
+ self.returnAST = None
+ self.astFactory = ASTFactory()
+ self.traceDepth = 0
+
+ def getAST(self):
+ return self.returnAST
+
+ def getASTFactory(self):
+ return self.astFactory
+
+ def getTokenName(self,num) :
+ return self.tokenNames[num]
+
+ def getTokenNames(self):
+ return self.tokenNames
+
+ def match(self,t,set) :
+ assert isinstance(set,int) or isinstance(set,BitSet)
+ if not t or t == ASTNULL:
+ raise MismatchedTokenException(self.getTokenNames(), t,set, False)
+
+ if isinstance(set,int) and t.getType() != set:
+ raise MismatchedTokenException(self.getTokenNames(), t,set, False)
+
+ if isinstance(set,BitSet) and not set.member(t.getType):
+ raise MismatchedTokenException(self.getTokenNames(), t,set, False)
+
+ def matchNot(self,t, ttype) :
+ if not t or (t == ASTNULL) or (t.getType() == ttype):
+ raise MismatchedTokenException(getTokenNames(), t, ttype, True)
+
+ def reportError(self,ex):
+ print >>sys.stderr,"error:",ex
+
+ def reportWarning(self, s):
+ print "warning:",s
+
+ def setASTFactory(self,f):
+ self.astFactory = f
+
+ def setASTNodeType(self,nodeType):
+ self.setASTNodeClass(nodeType)
+
+ def setASTNodeClass(self,nodeType):
+ self.astFactory.setASTNodeType(nodeType)
+
+ def traceIndent(self):
+ print " " * self.traceDepth
+
+ def traceIn(self,rname,t):
+ self.traceDepth += 1
+ self.traceIndent()
+ print("> " + rname + "(" +
+ ifelse(t,str(t),"null") + ")" +
+ ifelse(self.inputState.guessing>0,"[guessing]",""))
+
+ def traceOut(self,rname,t):
+ self.traceIndent()
+ print("< " + rname + "(" +
+ ifelse(t,str(t),"null") + ")" +
+ ifelse(self.inputState.guessing>0,"[guessing]",""))
+ self.traceDepth -= 1
+
+ ### wh: moved from ASTFactory to TreeParser
+ def addASTChild(self,currentAST, child):
+ if not child:
+ return
+ if not currentAST.root:
+ currentAST.root = child
+ elif not currentAST.child:
+ currentAST.root.setFirstChild(child)
+ else:
+ currentAST.child.setNextSibling(child)
+ currentAST.child = child
+ currentAST.advanceChildToEnd()
+
+ ### wh: moved from ASTFactory to TreeParser
+ def makeASTRoot(self,currentAST,root):
+ if root:
+ ### Add the current root as a child of new root
+ root.addChild(currentAST.root)
+ ### The new current child is the last sibling of the old root
+ currentAST.child = currentAST.root
+ currentAST.advanceChildToEnd()
+ ### Set the new root
+ currentAST.root = root
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### funcs to work on trees ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+def rightmost(ast):
+ if ast:
+ while(ast.right):
+ ast = ast.right
+ return ast
+
+def cmptree(s,t,partial):
+ while(s and t):
+ ### as a quick optimization, check roots first.
+ if not s.equals(t):
+ return False
+
+ ### if roots match, do full list match test on children.
+ if not cmptree(s.getFirstChild(),t.getFirstChild(),partial):
+ return False
+
+ s = s.getNextSibling()
+ t = t.getNextSibling()
+
+ r = ifelse(partial,not t,not s and not t)
+ return r
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### AST ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class AST(object):
+ def __init__(self):
+ pass
+
+ def addChild(self, c):
+ pass
+
+ def equals(self, t):
+ return False
+
+ def equalsList(self, t):
+ return False
+
+ def equalsListPartial(self, t):
+ return False
+
+ def equalsTree(self, t):
+ return False
+
+ def equalsTreePartial(self, t):
+ return False
+
+ def findAll(self, tree):
+ return None
+
+ def findAllPartial(self, subtree):
+ return None
+
+ def getFirstChild(self):
+ return self
+
+ def getNextSibling(self):
+ return self
+
+ def getText(self):
+ return ""
+
+ def getType(self):
+ return INVALID_TYPE
+
+ def getLine(self):
+ return 0
+
+ def getColumn(self):
+ return 0
+
+ def getNumberOfChildren(self):
+ return 0
+
+ def initialize(self, t, txt):
+ pass
+
+ def initialize(self, t):
+ pass
+
+ def setFirstChild(self, c):
+ pass
+
+ def setNextSibling(self, n):
+ pass
+
+ def setText(self, text):
+ pass
+
+ def setType(self, ttype):
+ pass
+
+ def toString(self):
+ self.getText()
+
+ __str__ = toString
+
+ def toStringList(self):
+ return self.getText()
+
+ def toStringTree(self):
+ return self.getText()
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### ASTNULLType ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+### There is only one instance of this class **/
+class ASTNULLType(AST):
+ def __init__(self):
+ AST.__init__(self)
+ pass
+
+ def getText(self):
+ return ""
+
+ def getType(self):
+ return NULL_TREE_LOOKAHEAD
+
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### BaseAST ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class BaseAST(AST):
+
+ verboseStringConversion = False
+ tokenNames = None
+
+ def __init__(self):
+ self.down = None ## kid
+ self.right = None ## sibling
+
+ def addChild(self,node):
+ if node:
+ t = rightmost(self.down)
+ if t:
+ t.right = node
+ else:
+ assert not self.down
+ self.down = node
+
+ def getNumberOfChildren(self):
+ t = self.down
+ n = 0
+ while t:
+ n += 1
+ t = t.right
+ return n
+
+ def doWorkForFindAll(self,v,target,partialMatch):
+ sibling = self
+
+ while sibling:
+ c1 = partialMatch and sibling.equalsTreePartial(target)
+ if c1:
+ v.append(sibling)
+ else:
+ c2 = not partialMatch and sibling.equalsTree(target)
+ if c2:
+ v.append(sibling)
+
+ ### regardless of match or not, check any children for matches
+ if sibling.getFirstChild():
+ sibling.getFirstChild().doWorkForFindAll(v,target,partialMatch)
+
+ sibling = sibling.getNextSibling()
+
+ ### Is node t equal to 'self' in terms of token type and text?
+ def equals(self,t):
+ if not t:
+ return False
+ return self.getText() == t.getText() and self.getType() == t.getType()
+
+ ### Is t an exact structural and equals() match of this tree. The
+ ### 'self' reference is considered the start of a sibling list.
+ ###
+ def equalsList(self, t):
+ return cmptree(self, t, partial=False)
+
+ ### Is 't' a subtree of this list?
+ ### The siblings of the root are NOT ignored.
+ ###
+ def equalsListPartial(self,t):
+ return cmptree(self,t,partial=True)
+
+ ### Is tree rooted at 'self' equal to 't'? The siblings
+ ### of 'self' are ignored.
+ ###
+ def equalsTree(self, t):
+ return self.equals(t) and \
+ cmptree(self.getFirstChild(), t.getFirstChild(), partial=False)
+
+ ### Is 't' a subtree of the tree rooted at 'self'? The siblings
+ ### of 'self' are ignored.
+ ###
+ def equalsTreePartial(self, t):
+ if not t:
+ return True
+ return self.equals(t) and cmptree(
+ self.getFirstChild(), t.getFirstChild(), partial=True)
+
+ ### Walk the tree looking for all exact subtree matches. Return
+ ### an ASTEnumerator that lets the caller walk the list
+ ### of subtree roots found herein.
+ def findAll(self,target):
+ roots = []
+
+ ### the empty tree cannot result in an enumeration
+ if not target:
+ return None
+ # find all matches recursively
+ self.doWorkForFindAll(roots, target, False)
+ return roots
+
+ ### Walk the tree looking for all subtrees. Return
+ ### an ASTEnumerator that lets the caller walk the list
+ ### of subtree roots found herein.
+ def findAllPartial(self,sub):
+ roots = []
+
+ ### the empty tree cannot result in an enumeration
+ if not sub:
+ return None
+
+ self.doWorkForFindAll(roots, sub, True) ### find all matches recursively
+ return roots
+
+ ### Get the first child of this node None if not children
+ def getFirstChild(self):
+ return self.down
+
+ ### Get the next sibling in line after this one
+ def getNextSibling(self):
+ return self.right
+
+ ### Get the token text for this node
+ def getText(self):
+ return ""
+
+ ### Get the token type for this node
+ def getType(self):
+ return 0
+
+ def getLine(self):
+ return 0
+
+ def getColumn(self):
+ return 0
+
+ ### Remove all children */
+ def removeChildren(self):
+ self.down = None
+
+ def setFirstChild(self,c):
+ self.down = c
+
+ def setNextSibling(self, n):
+ self.right = n
+
+ ### Set the token text for this node
+ def setText(self, text):
+ pass
+
+ ### Set the token type for this node
+ def setType(self, ttype):
+ pass
+
+ ### static
+ def setVerboseStringConversion(verbose,names):
+ verboseStringConversion = verbose
+ tokenNames = names
+ setVerboseStringConversion = staticmethod(setVerboseStringConversion)
+
+ ### Return an array of strings that maps token ID to it's text.
+ ## @since 2.7.3
+ def getTokenNames():
+ return tokenNames
+
+ def toString(self):
+ return self.getText()
+
+ ### return tree as lisp string - sibling included
+ def toStringList(self):
+ ts = self.toStringTree()
+ sib = self.getNextSibling()
+ if sib:
+ ts += sib.toStringList()
+ return ts
+
+ __str__ = toStringList
+
+ ### return tree as string - siblings ignored
+ def toStringTree(self):
+ ts = ""
+ kid = self.getFirstChild()
+ if kid:
+ ts += " ("
+ ts += " " + self.toString()
+ if kid:
+ ts += kid.toStringList()
+ ts += " )"
+ return ts
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### CommonAST ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+### Common AST node implementation
+class CommonAST(BaseAST):
+ def __init__(self,token=None):
+ super(CommonAST,self).__init__()
+ self.ttype = INVALID_TYPE
+ self.text = ""
+ self.line = 0
+ self.column= 0
+ self.initialize(token)
+ #assert self.text
+
+ ### Get the token text for this node
+ def getText(self):
+ return self.text
+
+ ### Get the token type for this node
+ def getType(self):
+ return self.ttype
+
+ ### Get the line for this node
+ def getLine(self):
+ return self.line
+
+ ### Get the column for this node
+ def getColumn(self):
+ return self.column
+
+ def initialize(self,*args):
+ if not args:
+ return
+
+ arg0 = args[0]
+
+ if isinstance(arg0,int):
+ arg1 = args[1]
+ self.setType(arg0)
+ self.setText(arg1)
+ return
+
+ if isinstance(arg0,AST) or isinstance(arg0,Token):
+ self.setText(arg0.getText())
+ self.setType(arg0.getType())
+ self.line = arg0.getLine()
+ self.column = arg0.getColumn()
+ return
+
+ ### Set the token text for this node
+ def setText(self,text_):
+ assert is_string_type(text_)
+ self.text = text_
+
+ ### Set the token type for this node
+ def setType(self,ttype_):
+ assert isinstance(ttype_,int)
+ self.ttype = ttype_
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### CommonASTWithHiddenTokens ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class CommonASTWithHiddenTokens(CommonAST):
+
+ def __init__(self,*args):
+ CommonAST.__init__(self,*args)
+ self.hiddenBefore = None
+ self.hiddenAfter = None
+
+ def getHiddenAfter(self):
+ return self.hiddenAfter
+
+ def getHiddenBefore(self):
+ return self.hiddenBefore
+
+ def initialize(self,*args):
+ CommonAST.initialize(self,*args)
+ if args and isinstance(args[0],Token):
+ assert isinstance(args[0],CommonHiddenStreamToken)
+ self.hiddenBefore = args[0].getHiddenBefore()
+ self.hiddenAfter = args[0].getHiddenAfter()
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### ASTPair ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class ASTPair(object):
+ def __init__(self):
+ self.root = None ### current root of tree
+ self.child = None ### current child to which siblings are added
+
+ ### Make sure that child is the last sibling */
+ def advanceChildToEnd(self):
+ if self.child:
+ while self.child.getNextSibling():
+ self.child = self.child.getNextSibling()
+
+ ### Copy an ASTPair. Don't call it clone() because we want type-safety */
+ def copy(self):
+ tmp = ASTPair()
+ tmp.root = self.root
+ tmp.child = self.child
+ return tmp
+
+ def toString(self):
+ r = ifelse(not root,"null",self.root.getText())
+ c = ifelse(not child,"null",self.child.getText())
+ return "[%s,%s]" % (r,c)
+
+ __str__ = toString
+ __repr__ = toString
+
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### ASTFactory ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class ASTFactory(object):
+ def __init__(self,table=None):
+ self._class = None
+ self._classmap = ifelse(table,table,None)
+
+ def create(self,*args):
+ if not args:
+ return self.create(INVALID_TYPE)
+
+ arg0 = args[0]
+ arg1 = None
+ arg2 = None
+
+ try:
+ arg1 = args[1]
+ arg2 = args[2]
+ except:
+ pass
+
+ # ctor(int)
+ if isinstance(arg0,int) and not arg2:
+ ### get class for 'self' type
+ c = self.getASTNodeType(arg0)
+ t = self.create(c)
+ if t:
+ t.initialize(arg0, ifelse(arg1,arg1,""))
+ return t
+
+ # ctor(int,something)
+ if isinstance(arg0,int) and arg2:
+ t = self.create(arg2)
+ if t:
+ t.initialize(arg0,arg1)
+ return t
+
+ # ctor(AST)
+ if isinstance(arg0,AST):
+ t = self.create(arg0.getType())
+ if t:
+ t.initialize(arg0)
+ return t
+
+ # ctor(token)
+ if isinstance(arg0,Token) and not arg1:
+ ttype = arg0.getType()
+ assert isinstance(ttype,int)
+ t = self.create(ttype)
+ if t:
+ t.initialize(arg0)
+ return t
+
+ # ctor(token,class)
+ if isinstance(arg0,Token) and arg1:
+ assert isinstance(arg1,type)
+ assert issubclass(arg1,AST)
+ # this creates instance of 'arg1' using 'arg0' as
+ # argument. Wow, that's magic!
+ t = arg1(arg0)
+ assert t and isinstance(t,AST)
+ return t
+
+ # ctor(class)
+ if isinstance(arg0,type):
+ ### next statement creates instance of type (!)
+ t = arg0()
+ assert isinstance(t,AST)
+ return t
+
+
+ def setASTNodeClass(self,className=None):
+ if not className:
+ return
+ assert isinstance(className,type)
+ assert issubclass(className,AST)
+ self._class = className
+
+ ### kind of misnomer - use setASTNodeClass instead.
+ setASTNodeType = setASTNodeClass
+
+ def getASTNodeClass(self):
+ return self._class
+
+
+
+ def getTokenTypeToASTClassMap(self):
+ return self._classmap
+
+ def setTokenTypeToASTClassMap(self,amap):
+ self._classmap = amap
+
+ def error(self, e):
+ import sys
+ print >> sys.stderr, e
+
+ def setTokenTypeASTNodeType(self, tokenType, className):
+ """
+ Specify a mapping between a token type and a (AST) class.
+ """
+ if not self._classmap:
+ self._classmap = {}
+
+ if not className:
+ try:
+ del self._classmap[tokenType]
+ except:
+ pass
+ else:
+ ### here we should also perform actions to ensure that
+ ### a. class can be loaded
+ ### b. class is a subclass of AST
+ ###
+ assert isinstance(className,type)
+ assert issubclass(className,AST) ## a & b
+ ### enter the class
+ self._classmap[tokenType] = className
+
+ def getASTNodeType(self,tokenType):
+ """
+ For a given token type return the AST node type. First we
+ lookup a mapping table, second we try _class
+ and finally we resolve to "antlr.CommonAST".
+ """
+
+ # first
+ if self._classmap:
+ try:
+ c = self._classmap[tokenType]
+ if c:
+ return c
+ except:
+ pass
+ # second
+ if self._class:
+ return self._class
+
+ # default
+ return CommonAST
+
+ ### methods that have been moved to file scope - just listed
+ ### here to be somewhat consistent with original API
+ def dup(self,t):
+ return antlr.dup(t,self)
+
+ def dupList(self,t):
+ return antlr.dupList(t,self)
+
+ def dupTree(self,t):
+ return antlr.dupTree(t,self)
+
+ ### methods moved to other classes
+ ### 1. makeASTRoot -> Parser
+ ### 2. addASTChild -> Parser
+
+ ### non-standard: create alias for longish method name
+ maptype = setTokenTypeASTNodeType
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### ASTVisitor ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+class ASTVisitor(object):
+ def __init__(self,*args):
+ pass
+
+ def visit(self,ast):
+ pass
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+### static methods and variables ###
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx###
+
+ASTNULL = ASTNULLType()
+
+### wh: moved from ASTFactory as there's nothing ASTFactory-specific
+### in this method.
+def make(*nodes):
+ if not nodes:
+ return None
+
+ for i in xrange(0,len(nodes)):
+ node = nodes[i]
+ if node:
+ assert isinstance(node,AST)
+
+ root = nodes[0]
+ tail = None
+ if root:
+ root.setFirstChild(None)
+
+ for i in xrange(1,len(nodes)):
+ if not nodes[i]:
+ continue
+ if not root:
+ root = tail = nodes[i]
+ elif not tail:
+ root.setFirstChild(nodes[i])
+ tail = root.getFirstChild()
+ else:
+ tail.setNextSibling(nodes[i])
+ tail = tail.getNextSibling()
+
+ ### Chase tail to last sibling
+ while tail.getNextSibling():
+ tail = tail.getNextSibling()
+ return root
+
+def dup(t,factory):
+ if not t:
+ return None
+
+ if factory:
+ dup_t = factory.create(t.__class__)
+ else:
+ raise TypeError("dup function requires ASTFactory argument")
+ dup_t.initialize(t)
+ return dup_t
+
+def dupList(t,factory):
+ result = dupTree(t,factory)
+ nt = result
+ while t:
+ ## for each sibling of the root
+ t = t.getNextSibling()
+ nt.setNextSibling(dupTree(t,factory))
+ nt = nt.getNextSibling()
+ return result
+
+def dupTree(t,factory):
+ result = dup(t,factory)
+ if t:
+ result.setFirstChild(dupList(t.getFirstChild(),factory))
+ return result
+
+###xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+### $Id: antlr.py 3750 2009-02-13 00:13:04Z sjmachin $
+
+# Local Variables: ***
+# mode: python ***
+# py-indent-offset: 4 ***
+# End: ***
diff --git a/tablib/packages/xlwt/doc/xlwt.html b/tablib/packages/xlwt/doc/xlwt.html
new file mode 100644
index 0000000..6efb698
--- /dev/null
+++ b/tablib/packages/xlwt/doc/xlwt.html
@@ -0,0 +1,199 @@
+
+
+
+
+The xlwt Module
+
+
+
The xlwt Module
+
A Python package for generating Microsoft Excel ™ spreadsheet files.
+
+
+
General information
+
+
State of Documentation
+
+
+This documentation is currently incomplete. There may be methods and
+classes not included and any item marked with a [NC] is not complete and may have further
+parameters, methods, attributes and functionality that are not
+documented. In these cases, you'll have to refer to the source if the
+documentation provided is insufficient.
+
+
+
+
+
Module Contents [NC]
+
+
easyxf (function)
+
+
+ This function is used to create and configure XFStyle objects
+ for use with (for example) the Worksheet.write method.
+
+
+
strg_to_parse
+
+
+ A string to be parsed to obtain attribute values for Alignment, Borders, Font,
+ Pattern and Protection objects. Refer to the examples
+ in the file .../examples/xlwt_easyxf_simple_demo.py and to the xf_dict
+ dictionary in Style.py. Various synonyms including color/colour, center/centre and gray/grey
+ are allowed. Case is irrelevant (except maybe in font names). '-' may be used instead
+ of '_'.
+ Example: "font: bold on; align: wrap on, vert centre, horiz center"
+
+
+
num_format_str
+
+
+ To get the "number format string" of an existing cell whose format you want to reproduce,
+ select the cell and click on Format/Cells/Number/Custom. Otherwise, refer to Excel help.
+ Examples: "#,##0.00", "dd/mm/yyyy"
+
+ This is a class representing a workbook and all its contents.
+ When creating Excel files with xlwt, you will normally start by
+ instantiating an object of this class.
+
+ This method is used to write a cell to a Worksheet..
+
+
+
r
+
+ The zero-relative number of the row in the worksheet to which the cell should be written.
+
+
c
+
+ The zero-relative number of the column in the worksheet to which the cell should be written.
+
+
label
+
+ The data value to be written.
+ An int, long, or decimal.Decimal instance is converted to float.
+ A unicode instance is written as is.
+ A str instance is converted to unicode using the encoding (default: 'ascii') specified
+ when the Workbook instance was created.
+ A datetime.datetime, datetime.date, or datetime.time instance is converted into Excel date format
+ (a float representing the number of days since (typically) 1899-12-31T00:00:00,
+ under the pretence that 1900 was a leap year).
+ A bool instance will show up as TRUE or FALSE in Excel.
+ None causes the cell to be blank -- no data, only formatting.
+ An xlwt.Formula instance causes an Excel formula to be written.
+ [NC]
+
+
style
+
+ A style -- also known as an XF (extended format) -- is an XFStyle object, which encapsulates
+ the formatting applied to the cell and its contents. XFStyle objects are best set up using the
+ easyxf function. They may also be set up by setting attributes in
+ Alignment, Borders, Pattern, Font and Protection objects
+ then setting those objects and a format string as attributes of an XFStyle object.
+ [NC]
+
+
+
+
+
+
+
diff --git a/tablib/packages/xlwt/examples/big-16Mb.py b/tablib/packages/xlwt/examples/big-16Mb.py
new file mode 100644
index 0000000..91db123
--- /dev/null
+++ b/tablib/packages/xlwt/examples/big-16Mb.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# tries stress SST, SAT and MSAT
+
+from time import *
+from xlwt.Workbook import *
+from xlwt.Style import *
+
+style = XFStyle()
+
+wb = Workbook()
+ws0 = wb.add_sheet('0')
+
+colcount = 200 + 1
+rowcount = 6000 + 1
+
+t0 = time()
+print "\nstart: %s" % ctime(t0)
+
+print "Filling..."
+for col in xrange(colcount):
+ print "[%d]" % col,
+ for row in xrange(rowcount):
+ #ws0.write(row, col, "BIG(%d, %d)" % (row, col))
+ ws0.write(row, col, "BIG")
+
+t1 = time() - t0
+print "\nsince starting elapsed %.2f s" % (t1)
+
+print "Storing..."
+wb.save('big-16Mb.xls')
+
+t2 = time() - t0
+print "since starting elapsed %.2f s" % (t2)
+
+
diff --git a/tablib/packages/xlwt/examples/big-35Mb.py b/tablib/packages/xlwt/examples/big-35Mb.py
new file mode 100644
index 0000000..74be5a7
--- /dev/null
+++ b/tablib/packages/xlwt/examples/big-35Mb.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# tries stress SST, SAT and MSAT
+
+from time import *
+from xlwt import *
+
+style = XFStyle()
+
+wb = Workbook()
+ws0 = wb.add_sheet('0')
+
+colcount = 200 + 1
+rowcount = 6000 + 1
+
+t0 = time()
+print "\nstart: %s" % ctime(t0)
+
+print "Filling..."
+for col in xrange(colcount):
+ print "[%d]" % col,
+ for row in xrange(rowcount):
+ ws0.write(row, col, "BIG(%d, %d)" % (row, col))
+ #ws0.write(row, col, "BIG")
+
+t1 = time() - t0
+print "\nsince starting elapsed %.2f s" % (t1)
+
+print "Storing..."
+wb.save('big-35Mb.xls')
+
+t2 = time() - t0
+print "since starting elapsed %.2f s" % (t2)
+
+
diff --git a/tablib/packages/xlwt/examples/blanks.py b/tablib/packages/xlwt/examples/blanks.py
new file mode 100644
index 0000000..056a3ec
--- /dev/null
+++ b/tablib/packages/xlwt/examples/blanks.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+font0 = Font()
+font0.name = 'Times New Roman'
+font0.struck_out = True
+font0.bold = True
+
+style0 = XFStyle()
+style0.font = font0
+
+
+wb = Workbook()
+ws0 = wb.add_sheet('0')
+
+ws0.write(1, 1, 'Test', style0)
+
+for i in range(0, 0x53):
+ borders = Borders()
+ borders.left = i
+ borders.right = i
+ borders.top = i
+ borders.bottom = i
+
+ style = XFStyle()
+ style.borders = borders
+
+ ws0.write(i, 2, '', style)
+ ws0.write(i, 3, hex(i), style0)
+
+ws0.write_merge(5, 8, 6, 10, "")
+
+wb.save('blanks.xls')
diff --git a/tablib/packages/xlwt/examples/col_width.py b/tablib/packages/xlwt/examples/col_width.py
new file mode 100644
index 0000000..6e6cb33
--- /dev/null
+++ b/tablib/packages/xlwt/examples/col_width.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+__rev_id__ = """$Id: col_width.py 3315 2008-03-14 14:44:52Z chris $"""
+
+
+from xlwt import *
+
+w = Workbook()
+ws = w.add_sheet('Hey, Dude')
+
+for i in range(6, 80):
+ fnt = Font()
+ fnt.height = i*20
+ style = XFStyle()
+ style.font = fnt
+ ws.write(1, i, 'Test')
+ ws.col(i).width = 0x0d00 + i
+w.save('col_width.xls')
diff --git a/tablib/packages/xlwt/examples/country.py b/tablib/packages/xlwt/examples/country.py
new file mode 100644
index 0000000..cb26e29
--- /dev/null
+++ b/tablib/packages/xlwt/examples/country.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+# -*- coding: windows-1252 -*-
+# Copyright (C) 2007 John Machin
+
+from xlwt import *
+
+w = Workbook()
+w.country_code = 61
+ws = w.add_sheet('AU')
+w.save('country.xls')
diff --git a/tablib/packages/xlwt/examples/dates.py b/tablib/packages/xlwt/examples/dates.py
new file mode 100644
index 0000000..389b93b
--- /dev/null
+++ b/tablib/packages/xlwt/examples/dates.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+from datetime import datetime
+
+w = Workbook()
+ws = w.add_sheet('Hey, Dude')
+
+fmts = [
+ 'M/D/YY',
+ 'D-MMM-YY',
+ 'D-MMM',
+ 'MMM-YY',
+ 'h:mm AM/PM',
+ 'h:mm:ss AM/PM',
+ 'h:mm',
+ 'h:mm:ss',
+ 'M/D/YY h:mm',
+ 'mm:ss',
+ '[h]:mm:ss',
+ 'mm:ss.0',
+]
+
+i = 0
+for fmt in fmts:
+ ws.write(i, 0, fmt)
+
+ style = XFStyle()
+ style.num_format_str = fmt
+
+ ws.write(i, 4, datetime.now(), style)
+
+ i += 1
+
+w.save('dates.xls')
diff --git a/tablib/packages/xlwt/examples/format.py b/tablib/packages/xlwt/examples/format.py
new file mode 100644
index 0000000..fd49e0a
--- /dev/null
+++ b/tablib/packages/xlwt/examples/format.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+font0 = Font()
+font0.name = 'Times New Roman'
+font0.struck_out = True
+font0.bold = True
+
+style0 = XFStyle()
+style0.font = font0
+
+
+wb = Workbook()
+ws0 = wb.add_sheet('0')
+
+ws0.write(1, 1, 'Test', style0)
+
+for i in range(0, 0x53):
+ fnt = Font()
+ fnt.name = 'Arial'
+ fnt.colour_index = i
+ fnt.outline = True
+
+ borders = Borders()
+ borders.left = i
+
+ style = XFStyle()
+ style.font = fnt
+ style.borders = borders
+
+ ws0.write(i, 2, 'colour', style)
+ ws0.write(i, 3, hex(i), style0)
+
+
+wb.save('format.xls')
diff --git a/tablib/packages/xlwt/examples/formula_names.py b/tablib/packages/xlwt/examples/formula_names.py
new file mode 100644
index 0000000..f0354bc
--- /dev/null
+++ b/tablib/packages/xlwt/examples/formula_names.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+from xlwt.ExcelFormulaParser import FormulaParseException
+
+w = Workbook()
+ws = w.add_sheet('F')
+
+## This example is a little silly since the formula building is
+## so simplistic that it often fails because the generated text
+## has the wrong number of parameters for the function being
+## tested.
+
+i = 0
+succeed_count = 0
+fail_count = 0
+for n in sorted(ExcelMagic.std_func_by_name):
+ ws.write(i, 0, n)
+ text = n + "($A$1)"
+ try:
+ formula = Formula(text)
+ except FormulaParseException,e:
+ print "Could not parse %r: %s" % (text,e.args[0])
+ fail_count += 1
+ else:
+ ws.write(i, 3, formula)
+ succeed_count += 1
+ i += 1
+
+w.save('formula_names.xls')
+
+print "succeeded with %i functions, failed with %i" % (succeed_count,fail_count)
diff --git a/tablib/packages/xlwt/examples/formulas.py b/tablib/packages/xlwt/examples/formulas.py
new file mode 100644
index 0000000..b89f5f5
--- /dev/null
+++ b/tablib/packages/xlwt/examples/formulas.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws = w.add_sheet('F')
+
+ws.write(0, 0, Formula("-(1+1)"))
+ws.write(1, 0, Formula("-(1+1)/(-2-2)"))
+ws.write(2, 0, Formula("-(134.8780789+1)"))
+ws.write(3, 0, Formula("-(134.8780789e-10+1)"))
+ws.write(4, 0, Formula("-1/(1+1)+9344"))
+
+ws.write(0, 1, Formula("-(1+1)"))
+ws.write(1, 1, Formula("-(1+1)/(-2-2)"))
+ws.write(2, 1, Formula("-(134.8780789+1)"))
+ws.write(3, 1, Formula("-(134.8780789e-10+1)"))
+ws.write(4, 1, Formula("-1/(1+1)+9344"))
+
+ws.write(0, 2, Formula("A1*B1"))
+ws.write(1, 2, Formula("A2*B2"))
+ws.write(2, 2, Formula("A3*B3"))
+ws.write(3, 2, Formula("A4*B4*sin(pi()/4)"))
+ws.write(4, 2, Formula("A5%*B5*pi()/1000"))
+
+##############
+## NOTE: parameters are separated by semicolon!!!
+##############
+
+
+ws.write(5, 2, Formula("C1+C2+C3+C4+C5/(C1+C2+C3+C4/(C1+C2+C3+C4/(C1+C2+C3+C4)+C5)+C5)-20.3e-2"))
+ws.write(5, 3, Formula("C1^2"))
+ws.write(6, 2, Formula("SUM(C1;C2;;;;;C3;;;C4)"))
+ws.write(6, 3, Formula("SUM($A$1:$C$5)"))
+
+ws.write(7, 0, Formula('"lkjljllkllkl"'))
+ws.write(7, 1, Formula('"yuyiyiyiyi"'))
+ws.write(7, 2, Formula('A8 & B8 & A8'))
+ws.write(8, 2, Formula('now()'))
+
+ws.write(10, 2, Formula('TRUE'))
+ws.write(11, 2, Formula('FALSE'))
+ws.write(12, 3, Formula('IF(A1>A2;3;"hkjhjkhk")'))
+
+w.save('formulas.xls')
diff --git a/tablib/packages/xlwt/examples/hyperlinks.py b/tablib/packages/xlwt/examples/hyperlinks.py
new file mode 100644
index 0000000..8de611b
--- /dev/null
+++ b/tablib/packages/xlwt/examples/hyperlinks.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+f = Font()
+f.height = 20*72
+f.name = 'Verdana'
+f.bold = True
+f.underline = Font.UNDERLINE_DOUBLE
+f.colour_index = 4
+
+h_style = XFStyle()
+h_style.font = f
+
+w = Workbook()
+ws = w.add_sheet('F')
+
+##############
+## NOTE: parameters are separated by semicolon!!!
+##############
+
+n = "HYPERLINK"
+ws.write_merge(1, 1, 1, 10, Formula(n + '("http://www.irs.gov/pub/irs-pdf/f1000.pdf";"f1000.pdf")'), h_style)
+ws.write_merge(2, 2, 2, 25, Formula(n + '("mailto:roman.kiseliov@gmail.com?subject=pyExcelerator-feedback&Body=Hello,%20Roman!";"pyExcelerator-feedback")'), h_style)
+
+w.save("hyperlinks.xls")
diff --git a/tablib/packages/xlwt/examples/image.py b/tablib/packages/xlwt/examples/image.py
new file mode 100644
index 0000000..f926b8d
--- /dev/null
+++ b/tablib/packages/xlwt/examples/image.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws = w.add_sheet('Image')
+ws.insert_bitmap('python.bmp', 2, 2)
+ws.insert_bitmap('python.bmp', 10, 2)
+
+w.save('image.xls')
diff --git a/tablib/packages/xlwt/examples/merged.py b/tablib/packages/xlwt/examples/merged.py
new file mode 100644
index 0000000..f7f9c57
--- /dev/null
+++ b/tablib/packages/xlwt/examples/merged.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+fnt = Font()
+fnt.name = 'Arial'
+fnt.colour_index = 4
+fnt.bold = True
+
+borders = Borders()
+borders.left = 6
+borders.right = 6
+borders.top = 6
+borders.bottom = 6
+
+al = Alignment()
+al.horz = Alignment.HORZ_CENTER
+al.vert = Alignment.VERT_CENTER
+
+style = XFStyle()
+style.font = fnt
+style.borders = borders
+style.alignment = al
+
+
+wb = Workbook()
+ws0 = wb.add_sheet('sheet0')
+ws1 = wb.add_sheet('sheet1')
+ws2 = wb.add_sheet('sheet2')
+
+for i in range(0, 0x200, 2):
+ ws0.write_merge(i, i+1, 1, 5, 'test %d' % i, style)
+ ws1.write_merge(i, i, 1, 7, 'test %d' % i, style)
+ ws2.write_merge(i, i+1, 1, 7 + (i%10), 'test %d' % i, style)
+
+
+wb.save('merged.xls')
diff --git a/tablib/packages/xlwt/examples/merged0.py b/tablib/packages/xlwt/examples/merged0.py
new file mode 100644
index 0000000..93496c6
--- /dev/null
+++ b/tablib/packages/xlwt/examples/merged0.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+wb = Workbook()
+ws0 = wb.add_sheet('sheet0')
+
+
+fnt = Font()
+fnt.name = 'Arial'
+fnt.colour_index = 4
+fnt.bold = True
+
+borders = Borders()
+borders.left = 6
+borders.right = 6
+borders.top = 6
+borders.bottom = 6
+
+style = XFStyle()
+style.font = fnt
+style.borders = borders
+
+ws0.write_merge(3, 3, 1, 5, 'test1', style)
+ws0.write_merge(4, 10, 1, 5, 'test2', style)
+ws0.col(1).width = 0x0d00
+
+wb.save('merged0.xls')
diff --git a/tablib/packages/xlwt/examples/merged1.py b/tablib/packages/xlwt/examples/merged1.py
new file mode 100644
index 0000000..813530b
--- /dev/null
+++ b/tablib/packages/xlwt/examples/merged1.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+wb = Workbook()
+ws0 = wb.add_sheet('sheet0')
+
+fnt1 = Font()
+fnt1.name = 'Verdana'
+fnt1.bold = True
+fnt1.height = 18*0x14
+
+pat1 = Pattern()
+pat1.pattern = Pattern.SOLID_PATTERN
+pat1.pattern_fore_colour = 0x16
+
+brd1 = Borders()
+brd1.left = 0x06
+brd1.right = 0x06
+brd1.top = 0x06
+brd1.bottom = 0x06
+
+fnt2 = Font()
+fnt2.name = 'Verdana'
+fnt2.bold = True
+fnt2.height = 14*0x14
+
+brd2 = Borders()
+brd2.left = 0x01
+brd2.right = 0x01
+brd2.top = 0x01
+brd2.bottom = 0x01
+
+pat2 = Pattern()
+pat2.pattern = Pattern.SOLID_PATTERN
+pat2.pattern_fore_colour = 0x01F
+
+fnt3 = Font()
+fnt3.name = 'Verdana'
+fnt3.bold = True
+fnt3.italic = True
+fnt3.height = 12*0x14
+
+brd3 = Borders()
+brd3.left = 0x07
+brd3.right = 0x07
+brd3.top = 0x07
+brd3.bottom = 0x07
+
+fnt4 = Font()
+
+al1 = Alignment()
+al1.horz = Alignment.HORZ_CENTER
+al1.vert = Alignment.VERT_CENTER
+
+al2 = Alignment()
+al2.horz = Alignment.HORZ_RIGHT
+al2.vert = Alignment.VERT_CENTER
+
+al3 = Alignment()
+al3.horz = Alignment.HORZ_LEFT
+al3.vert = Alignment.VERT_CENTER
+
+style1 = XFStyle()
+style1.font = fnt1
+style1.alignment = al1
+style1.pattern = pat1
+style1.borders = brd1
+
+style2 = XFStyle()
+style2.font = fnt2
+style2.alignment = al1
+style2.pattern = pat2
+style2.borders = brd2
+
+style3 = XFStyle()
+style3.font = fnt3
+style3.alignment = al1
+style3.pattern = pat2
+style3.borders = brd3
+
+price_style = XFStyle()
+price_style.font = fnt4
+price_style.alignment = al2
+price_style.borders = brd3
+price_style.num_format_str = '_(#,##0.00_) "money"'
+
+ware_style = XFStyle()
+ware_style.font = fnt4
+ware_style.alignment = al3
+ware_style.borders = brd3
+
+
+ws0.merge(3, 3, 1, 5, style1)
+ws0.merge(4, 10, 1, 6, style2)
+ws0.merge(14, 16, 1, 7, style3)
+ws0.col(1).width = 0x0d00
+
+
+wb.save('merged1.xls')
diff --git a/tablib/packages/xlwt/examples/mini.py b/tablib/packages/xlwt/examples/mini.py
new file mode 100644
index 0000000..61bb30c
--- /dev/null
+++ b/tablib/packages/xlwt/examples/mini.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws = w.add_sheet('xlwt was here')
+w.save('mini.xls')
diff --git a/tablib/packages/xlwt/examples/num_formats.py b/tablib/packages/xlwt/examples/num_formats.py
new file mode 100644
index 0000000..3a56f6c
--- /dev/null
+++ b/tablib/packages/xlwt/examples/num_formats.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws = w.add_sheet('Hey, Dude')
+
+fmts = [
+ 'general',
+ '0',
+ '0.00',
+ '#,##0',
+ '#,##0.00',
+ '"$"#,##0_);("$"#,##',
+ '"$"#,##0_);[Red]("$"#,##',
+ '"$"#,##0.00_);("$"#,##',
+ '"$"#,##0.00_);[Red]("$"#,##',
+ '0%',
+ '0.00%',
+ '0.00E+00',
+ '# ?/?',
+ '# ??/??',
+ 'M/D/YY',
+ 'D-MMM-YY',
+ 'D-MMM',
+ 'MMM-YY',
+ 'h:mm AM/PM',
+ 'h:mm:ss AM/PM',
+ 'h:mm',
+ 'h:mm:ss',
+ 'M/D/YY h:mm',
+ '_(#,##0_);(#,##0)',
+ '_(#,##0_);[Red](#,##0)',
+ '_(#,##0.00_);(#,##0.00)',
+ '_(#,##0.00_);[Red](#,##0.00)',
+ '_("$"* #,##0_);_("$"* (#,##0);_("$"* "-"_);_(@_)',
+ '_(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)',
+ '_("$"* #,##0.00_);_("$"* (#,##0.00);_("$"* "-"??_);_(@_)',
+ '_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)',
+ 'mm:ss',
+ '[h]:mm:ss',
+ 'mm:ss.0',
+ '##0.0E+0',
+ '@'
+]
+
+i = 0
+for fmt in fmts:
+ ws.write(i, 0, fmt)
+
+ style = XFStyle()
+ style.num_format_str = fmt
+
+ ws.write(i, 4, -1278.9078, style)
+
+ i += 1
+
+w.save('num_formats.xls')
diff --git a/tablib/packages/xlwt/examples/numbers.py b/tablib/packages/xlwt/examples/numbers.py
new file mode 100644
index 0000000..524d9fc
--- /dev/null
+++ b/tablib/packages/xlwt/examples/numbers.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws = w.add_sheet('Hey, Dude')
+
+ws.write(0, 0, 1)
+ws.write(1, 0, 1.23)
+ws.write(2, 0, 12345678)
+ws.write(3, 0, 123456.78)
+
+ws.write(0, 1, -1)
+ws.write(1, 1, -1.23)
+ws.write(2, 1, -12345678)
+ws.write(3, 1, -123456.78)
+
+ws.write(0, 2, -17867868678687.0)
+ws.write(1, 2, -1.23e-5)
+ws.write(2, 2, -12345678.90780980)
+ws.write(3, 2, -123456.78)
+
+w.save('numbers.xls')
diff --git a/tablib/packages/xlwt/examples/outline.py b/tablib/packages/xlwt/examples/outline.py
new file mode 100644
index 0000000..45b8df9
--- /dev/null
+++ b/tablib/packages/xlwt/examples/outline.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+fnt = Font()
+fnt.name = 'Arial'
+fnt.colour_index = 4
+fnt.bold = True
+
+borders = Borders()
+borders.left = 6
+borders.right = 6
+borders.top = 6
+borders.bottom = 6
+
+style = XFStyle()
+style.font = fnt
+style.borders = borders
+
+wb = Workbook()
+
+ws0 = wb.add_sheet('Rows Outline')
+
+ws0.write_merge(1, 1, 1, 5, 'test 1', style)
+ws0.write_merge(2, 2, 1, 4, 'test 1', style)
+ws0.write_merge(3, 3, 1, 3, 'test 2', style)
+ws0.write_merge(4, 4, 1, 4, 'test 1', style)
+ws0.write_merge(5, 5, 1, 4, 'test 3', style)
+ws0.write_merge(6, 6, 1, 5, 'test 1', style)
+ws0.write_merge(7, 7, 1, 5, 'test 4', style)
+ws0.write_merge(8, 8, 1, 4, 'test 1', style)
+ws0.write_merge(9, 9, 1, 3, 'test 5', style)
+
+ws0.row(1).level = 1
+ws0.row(2).level = 1
+ws0.row(3).level = 2
+ws0.row(4).level = 2
+ws0.row(5).level = 2
+ws0.row(6).level = 2
+ws0.row(7).level = 2
+ws0.row(8).level = 1
+ws0.row(9).level = 1
+
+
+ws1 = wb.add_sheet('Columns Outline')
+
+ws1.write_merge(1, 1, 1, 5, 'test 1', style)
+ws1.write_merge(2, 2, 1, 4, 'test 1', style)
+ws1.write_merge(3, 3, 1, 3, 'test 2', style)
+ws1.write_merge(4, 4, 1, 4, 'test 1', style)
+ws1.write_merge(5, 5, 1, 4, 'test 3', style)
+ws1.write_merge(6, 6, 1, 5, 'test 1', style)
+ws1.write_merge(7, 7, 1, 5, 'test 4', style)
+ws1.write_merge(8, 8, 1, 4, 'test 1', style)
+ws1.write_merge(9, 9, 1, 3, 'test 5', style)
+
+ws1.col(1).level = 1
+ws1.col(2).level = 1
+ws1.col(3).level = 2
+ws1.col(4).level = 2
+ws1.col(5).level = 2
+ws1.col(6).level = 2
+ws1.col(7).level = 2
+ws1.col(8).level = 1
+ws1.col(9).level = 1
+
+
+ws2 = wb.add_sheet('Rows and Columns Outline')
+
+ws2.write_merge(1, 1, 1, 5, 'test 1', style)
+ws2.write_merge(2, 2, 1, 4, 'test 1', style)
+ws2.write_merge(3, 3, 1, 3, 'test 2', style)
+ws2.write_merge(4, 4, 1, 4, 'test 1', style)
+ws2.write_merge(5, 5, 1, 4, 'test 3', style)
+ws2.write_merge(6, 6, 1, 5, 'test 1', style)
+ws2.write_merge(7, 7, 1, 5, 'test 4', style)
+ws2.write_merge(8, 8, 1, 4, 'test 1', style)
+ws2.write_merge(9, 9, 1, 3, 'test 5', style)
+
+ws2.row(1).level = 1
+ws2.row(2).level = 1
+ws2.row(3).level = 2
+ws2.row(4).level = 2
+ws2.row(5).level = 2
+ws2.row(6).level = 2
+ws2.row(7).level = 2
+ws2.row(8).level = 1
+ws2.row(9).level = 1
+
+ws2.write_merge(1, 1, 1, 5, 'test 1', style)
+ws2.write_merge(2, 2, 1, 4, 'test 1', style)
+ws2.write_merge(3, 3, 1, 3, 'test 2', style)
+ws2.write_merge(4, 4, 1, 4, 'test 1', style)
+ws2.write_merge(5, 5, 1, 4, 'test 3', style)
+ws2.write_merge(6, 6, 1, 5, 'test 1', style)
+ws2.write_merge(7, 7, 1, 5, 'test 4', style)
+ws2.write_merge(8, 8, 1, 4, 'test 1', style)
+ws2.write_merge(9, 9, 1, 3, 'test 5', style)
+
+ws2.col(1).level = 1
+ws2.col(2).level = 1
+ws2.col(3).level = 2
+ws2.col(4).level = 2
+ws2.col(5).level = 2
+ws2.col(6).level = 2
+ws2.col(7).level = 2
+ws2.col(8).level = 1
+ws2.col(9).level = 1
+
+
+wb.save('outline.xls')
diff --git a/tablib/packages/xlwt/examples/panes.py b/tablib/packages/xlwt/examples/panes.py
new file mode 100644
index 0000000..9fd83b0
--- /dev/null
+++ b/tablib/packages/xlwt/examples/panes.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws1 = w.add_sheet('sheet 1')
+ws2 = w.add_sheet('sheet 2')
+ws3 = w.add_sheet('sheet 3')
+ws4 = w.add_sheet('sheet 4')
+ws5 = w.add_sheet('sheet 5')
+ws6 = w.add_sheet('sheet 6')
+
+for i in range(0x100):
+ ws1.write(i/0x10, i%0x10, i)
+
+for i in range(0x100):
+ ws2.write(i/0x10, i%0x10, i)
+
+for i in range(0x100):
+ ws3.write(i/0x10, i%0x10, i)
+
+for i in range(0x100):
+ ws4.write(i/0x10, i%0x10, i)
+
+for i in range(0x100):
+ ws5.write(i/0x10, i%0x10, i)
+
+for i in range(0x100):
+ ws6.write(i/0x10, i%0x10, i)
+
+ws1.panes_frozen = True
+ws1.horz_split_pos = 2
+
+ws2.panes_frozen = True
+ws2.vert_split_pos = 2
+
+ws3.panes_frozen = True
+ws3.horz_split_pos = 1
+ws3.vert_split_pos = 1
+
+ws4.panes_frozen = False
+ws4.horz_split_pos = 12
+ws4.horz_split_first_visible = 2
+
+ws5.panes_frozen = False
+ws5.vert_split_pos = 40
+ws4.vert_split_first_visible = 2
+
+ws6.panes_frozen = False
+ws6.horz_split_pos = 12
+ws4.horz_split_first_visible = 2
+ws6.vert_split_pos = 40
+ws4.vert_split_first_visible = 2
+
+w.save('panes.xls')
+
diff --git a/tablib/packages/xlwt/examples/parse-fmla.py b/tablib/packages/xlwt/examples/parse-fmla.py
new file mode 100644
index 0000000..06f68eb
--- /dev/null
+++ b/tablib/packages/xlwt/examples/parse-fmla.py
@@ -0,0 +1,12 @@
+from xlwt import ExcelFormulaParser, ExcelFormula
+import sys
+
+f = ExcelFormula.Formula(
+""" -((1.80 + 2.898 * 1)/(1.80 + 2.898))*
+AVERAGE((1.80 + 2.898 * 1)/(1.80 + 2.898);
+ (1.80 + 2.898 * 1)/(1.80 + 2.898);
+ (1.80 + 2.898 * 1)/(1.80 + 2.898)) +
+SIN(PI()/4)""")
+
+#for t in f.rpn():
+# print "%15s %15s" % (ExcelFormulaParser.PtgNames[t[0]], t[1])
diff --git a/tablib/packages/xlwt/examples/protection.py b/tablib/packages/xlwt/examples/protection.py
new file mode 100644
index 0000000..db54cb0
--- /dev/null
+++ b/tablib/packages/xlwt/examples/protection.py
@@ -0,0 +1,122 @@
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+fnt = Font()
+fnt.name = 'Arial'
+fnt.colour_index = 4
+fnt.bold = True
+
+borders = Borders()
+borders.left = 6
+borders.right = 6
+borders.top = 6
+borders.bottom = 6
+
+style = XFStyle()
+style.font = fnt
+style.borders = borders
+
+wb = Workbook()
+
+ws0 = wb.add_sheet('Rows Outline')
+
+ws0.write_merge(1, 1, 1, 5, 'test 1', style)
+ws0.write_merge(2, 2, 1, 4, 'test 1', style)
+ws0.write_merge(3, 3, 1, 3, 'test 2', style)
+ws0.write_merge(4, 4, 1, 4, 'test 1', style)
+ws0.write_merge(5, 5, 1, 4, 'test 3', style)
+ws0.write_merge(6, 6, 1, 5, 'test 1', style)
+ws0.write_merge(7, 7, 1, 5, 'test 4', style)
+ws0.write_merge(8, 8, 1, 4, 'test 1', style)
+ws0.write_merge(9, 9, 1, 3, 'test 5', style)
+
+ws0.row(1).level = 1
+ws0.row(2).level = 1
+ws0.row(3).level = 2
+ws0.row(4).level = 2
+ws0.row(5).level = 2
+ws0.row(6).level = 2
+ws0.row(7).level = 2
+ws0.row(8).level = 1
+ws0.row(9).level = 1
+
+
+ws1 = wb.add_sheet('Columns Outline')
+
+ws1.write_merge(1, 1, 1, 5, 'test 1', style)
+ws1.write_merge(2, 2, 1, 4, 'test 1', style)
+ws1.write_merge(3, 3, 1, 3, 'test 2', style)
+ws1.write_merge(4, 4, 1, 4, 'test 1', style)
+ws1.write_merge(5, 5, 1, 4, 'test 3', style)
+ws1.write_merge(6, 6, 1, 5, 'test 1', style)
+ws1.write_merge(7, 7, 1, 5, 'test 4', style)
+ws1.write_merge(8, 8, 1, 4, 'test 1', style)
+ws1.write_merge(9, 9, 1, 3, 'test 5', style)
+
+ws1.col(1).level = 1
+ws1.col(2).level = 1
+ws1.col(3).level = 2
+ws1.col(4).level = 2
+ws1.col(5).level = 2
+ws1.col(6).level = 2
+ws1.col(7).level = 2
+ws1.col(8).level = 1
+ws1.col(9).level = 1
+
+
+ws2 = wb.add_sheet('Rows and Columns Outline')
+
+ws2.write_merge(1, 1, 1, 5, 'test 1', style)
+ws2.write_merge(2, 2, 1, 4, 'test 1', style)
+ws2.write_merge(3, 3, 1, 3, 'test 2', style)
+ws2.write_merge(4, 4, 1, 4, 'test 1', style)
+ws2.write_merge(5, 5, 1, 4, 'test 3', style)
+ws2.write_merge(6, 6, 1, 5, 'test 1', style)
+ws2.write_merge(7, 7, 1, 5, 'test 4', style)
+ws2.write_merge(8, 8, 1, 4, 'test 1', style)
+ws2.write_merge(9, 9, 1, 3, 'test 5', style)
+
+ws2.row(1).level = 1
+ws2.row(2).level = 1
+ws2.row(3).level = 2
+ws2.row(4).level = 2
+ws2.row(5).level = 2
+ws2.row(6).level = 2
+ws2.row(7).level = 2
+ws2.row(8).level = 1
+ws2.row(9).level = 1
+
+ws2.col(1).level = 1
+ws2.col(2).level = 1
+ws2.col(3).level = 2
+ws2.col(4).level = 2
+ws2.col(5).level = 2
+ws2.col(6).level = 2
+ws2.col(7).level = 2
+ws2.col(8).level = 1
+ws2.col(9).level = 1
+
+
+ws0.protect = True
+ws0.wnd_protect = True
+ws0.obj_protect = True
+ws0.scen_protect = True
+ws0.password = "123456"
+
+ws1.protect = True
+ws1.wnd_protect = True
+ws1.obj_protect = True
+ws1.scen_protect = True
+ws1.password = "abcdefghij"
+
+ws2.protect = True
+ws2.wnd_protect = True
+ws2.obj_protect = True
+ws2.scen_protect = True
+ws2.password = "ok"
+
+wb.protect = True
+wb.wnd_protect = True
+wb.obj_protect = True
+wb.save('protection.xls')
diff --git a/tablib/packages/xlwt/examples/python.bmp b/tablib/packages/xlwt/examples/python.bmp
new file mode 100644
index 0000000..bd1ba3f
Binary files /dev/null and b/tablib/packages/xlwt/examples/python.bmp differ
diff --git a/tablib/packages/xlwt/examples/row_styles.py b/tablib/packages/xlwt/examples/row_styles.py
new file mode 100644
index 0000000..dd6d494
--- /dev/null
+++ b/tablib/packages/xlwt/examples/row_styles.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws = w.add_sheet('Hey, Dude')
+
+for i in range(6, 80):
+ fnt = Font()
+ fnt.height = i*20
+ style = XFStyle()
+ style.font = fnt
+ ws.write(i, 1, 'Test')
+ ws.row(i).set_style(style)
+w.save('row_styles.xls')
diff --git a/tablib/packages/xlwt/examples/row_styles_empty.py b/tablib/packages/xlwt/examples/row_styles_empty.py
new file mode 100644
index 0000000..cf6a65c
--- /dev/null
+++ b/tablib/packages/xlwt/examples/row_styles_empty.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+__rev_id__ = """$Id: row_styles_empty.py 3309 2008-03-14 11:04:30Z chris $"""
+
+
+from pyExcelerator import *
+
+w = Workbook()
+ws = w.add_sheet('Hey, Dude')
+
+for i in range(6, 80):
+ fnt = Font()
+ fnt.height = i*20
+ style = XFStyle()
+ style.font = fnt
+ ws.row(i).set_style(style)
+w.save('row_styles_empty.xls')
diff --git a/tablib/packages/xlwt/examples/simple.py b/tablib/packages/xlwt/examples/simple.py
new file mode 100644
index 0000000..44f7622
--- /dev/null
+++ b/tablib/packages/xlwt/examples/simple.py
@@ -0,0 +1,24 @@
+import xlwt
+from datetime import datetime
+
+font0 = xlwt.Font()
+font0.name = 'Times New Roman'
+font0.colour_index = 2
+font0.bold = True
+
+style0 = xlwt.XFStyle()
+style0.font = font0
+
+style1 = xlwt.XFStyle()
+style1.num_format_str = 'D-MMM-YY'
+
+wb = xlwt.Workbook()
+ws = wb.add_sheet('A Test Sheet')
+
+ws.write(0, 0, 'Test', style0)
+ws.write(1, 0, datetime.now(), style1)
+ws.write(2, 0, 1)
+ws.write(2, 1, 1)
+ws.write(2, 2, xlwt.Formula("A3+B3"))
+
+wb.save('example.xls')
diff --git a/tablib/packages/xlwt/examples/sst.py b/tablib/packages/xlwt/examples/sst.py
new file mode 100644
index 0000000..b91c2f5
--- /dev/null
+++ b/tablib/packages/xlwt/examples/sst.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+font0 = Formatting.Font()
+font0.name = 'Arial'
+font1 = Formatting.Font()
+font1.name = 'Arial Cyr'
+font2 = Formatting.Font()
+font2.name = 'Times New Roman'
+font3 = Formatting.Font()
+font3.name = 'Courier New Cyr'
+
+num_format0 = '0.00000'
+num_format1 = '0.000000'
+num_format2 = '0.0000000'
+num_format3 = '0.00000000'
+
+st0 = XFStyle()
+st1 = XFStyle()
+st2 = XFStyle()
+st3 = XFStyle()
+st4 = XFStyle()
+
+st0.font = font0
+st0.num_format = num_format0
+
+st1.font = font1
+st1.num_format = num_format1
+
+st2.font = font2
+st2.num_format = num_format2
+
+st3.font = font3
+st3.num_format = num_format3
+
+wb = Workbook()
+
+wb.add_style(st0)
+wb.add_style(st1)
+wb.add_style(st2)
+wb.add_style(st3)
+
+ws0 = wb.add_sheet('0')
+ws0.write(0, 0, 'Olya'*0x4000, st0)
+
+#for i in range(0, 0x10):
+# ws0.write(i, 2, ('%d'%i)*0x4000, st1)
+
+wb.save('sst.xls')
diff --git a/tablib/packages/xlwt/examples/unicode0.py b/tablib/packages/xlwt/examples/unicode0.py
new file mode 100644
index 0000000..3651ec9
--- /dev/null
+++ b/tablib/packages/xlwt/examples/unicode0.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+import xlwt
+
+# Strings passed to (for example) Worksheet.write can be unicode objects,
+# or str (8-bit) objects, which are then decoded into unicode.
+# The encoding to be used defaults to 'ascii'. This can be overridden
+# when the Workbook instance is created:
+
+book = xlwt.Workbook(encoding='cp1251')
+sheet = book.add_sheet('cp1251-demo')
+sheet.write(0, 0, '\xce\xeb\xff')
+book.save('unicode0.xls')
diff --git a/tablib/packages/xlwt/examples/unicode1.py b/tablib/packages/xlwt/examples/unicode1.py
new file mode 100644
index 0000000..90e99cc
--- /dev/null
+++ b/tablib/packages/xlwt/examples/unicode1.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws1 = w.add_sheet(u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER BETA}\N{GREEK SMALL LETTER GAMMA}')
+
+ws1.write(0, 0, u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER BETA}\N{GREEK SMALL LETTER GAMMA}')
+ws1.write(1, 1, u'\N{GREEK SMALL LETTER DELTA}x = 1 + \N{GREEK SMALL LETTER DELTA}')
+
+ws1.write(2,0, u'A\u2262\u0391.') # RFC2152 example
+ws1.write(3,0, u'Hi Mom -\u263a-!') # RFC2152 example
+ws1.write(4,0, u'\u65E5\u672C\u8A9E') # RFC2152 example
+ws1.write(5,0, u'Item 3 is \u00a31.') # RFC2152 example
+ws1.write(8,0, u'\N{INTEGRAL}') # RFC2152 example
+
+w.add_sheet(u'A\u2262\u0391.') # RFC2152 example
+w.add_sheet(u'Hi Mom -\u263a-!') # RFC2152 example
+one_more_ws = w.add_sheet(u'\u65E5\u672C\u8A9E') # RFC2152 example
+w.add_sheet(u'Item 3 is \u00a31.') # RFC2152 example
+
+one_more_ws.write(0, 0, u'\u2665\u2665')
+
+w.add_sheet(u'\N{GREEK SMALL LETTER ETA WITH TONOS}')
+w.save('unicode1.xls')
+
diff --git a/tablib/packages/xlwt/examples/unicode2.py b/tablib/packages/xlwt/examples/unicode2.py
new file mode 100644
index 0000000..18904b0
--- /dev/null
+++ b/tablib/packages/xlwt/examples/unicode2.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# -*- coding: windows-1251 -*-
+# Copyright (C) 2005 Kiseliov Roman
+
+from xlwt import *
+
+w = Workbook()
+ws1 = w.add_sheet(u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER BETA}\N{GREEK SMALL LETTER GAMMA}\u2665\u041e\u041b\u042f\u2665')
+
+fnt = Font()
+fnt.height = 26*20
+style = XFStyle()
+style.font = fnt
+
+for i in range(0x10000):
+ ws1.write(i/0x10, i%0x10, unichr(i), style)
+
+w.save('unicode2.xls')
+
diff --git a/tablib/packages/xlwt/examples/wsprops.py b/tablib/packages/xlwt/examples/wsprops.py
new file mode 100644
index 0000000..adc5a04
--- /dev/null
+++ b/tablib/packages/xlwt/examples/wsprops.py
@@ -0,0 +1,155 @@
+props = \
+[
+ 'name',
+ 'parent',
+ 'rows',
+ 'cols',
+ 'merged_ranges',
+ 'bmp_rec',
+ 'show_formulas',
+ 'show_grid',
+ 'show_headers',
+ 'panes_frozen',
+ 'show_empty_as_zero',
+ 'auto_colour_grid',
+ 'cols_right_to_left',
+ 'show_outline',
+ 'remove_splits',
+ 'selected',
+ 'hidden',
+ 'page_preview',
+ 'first_visible_row',
+ 'first_visible_col',
+ 'grid_colour',
+ 'preview_magn',
+ 'normal_magn',
+ 'row_gut_width',
+ 'col_gut_height',
+ 'show_auto_page_breaks',
+ 'dialogue_sheet',
+ 'auto_style_outline',
+ 'outline_below',
+ 'outline_right',
+ 'fit_num_pages',
+ 'show_row_outline',
+ 'show_col_outline',
+ 'alt_expr_eval',
+ 'alt_formula_entries',
+ 'row_default_height',
+ 'col_default_width',
+ 'calc_mode',
+ 'calc_count',
+ 'RC_ref_mode',
+ 'iterations_on',
+ 'delta',
+ 'save_recalc',
+ 'print_headers',
+ 'print_grid',
+ 'grid_set',
+ 'vert_page_breaks',
+ 'horz_page_breaks',
+ 'header_str',
+ 'footer_str',
+ 'print_centered_vert',
+ 'print_centered_horz',
+ 'left_margin',
+ 'right_margin',
+ 'top_margin',
+ 'bottom_margin',
+ 'paper_size_code',
+ 'print_scaling',
+ 'start_page_number',
+ 'fit_width_to_pages',
+ 'fit_height_to_pages',
+ 'print_in_rows',
+ 'portrait',
+ 'print_not_colour',
+ 'print_draft',
+ 'print_notes',
+ 'print_notes_at_end',
+ 'print_omit_errors',
+ 'print_hres',
+ 'print_vres',
+ 'header_margin',
+ 'footer_margin',
+ 'copies_num',
+]
+
+from xlwt import *
+
+wb = Workbook()
+ws = wb.add_sheet('sheet')
+
+print ws.name
+print ws.parent
+print ws.rows
+print ws.cols
+print ws.merged_ranges
+print ws.bmp_rec
+print ws.show_formulas
+print ws.show_grid
+print ws.show_headers
+print ws.panes_frozen
+print ws.show_empty_as_zero
+print ws.auto_colour_grid
+print ws.cols_right_to_left
+print ws.show_outline
+print ws.remove_splits
+print ws.selected
+# print ws.hidden
+print ws.page_preview
+print ws.first_visible_row
+print ws.first_visible_col
+print ws.grid_colour
+print ws.preview_magn
+print ws.normal_magn
+#print ws.row_gut_width
+#print ws.col_gut_height
+print ws.show_auto_page_breaks
+print ws.dialogue_sheet
+print ws.auto_style_outline
+print ws.outline_below
+print ws.outline_right
+print ws.fit_num_pages
+print ws.show_row_outline
+print ws.show_col_outline
+print ws.alt_expr_eval
+print ws.alt_formula_entries
+print ws.row_default_height
+print ws.col_default_width
+print ws.calc_mode
+print ws.calc_count
+print ws.RC_ref_mode
+print ws.iterations_on
+print ws.delta
+print ws.save_recalc
+print ws.print_headers
+print ws.print_grid
+#print ws.grid_set
+print ws.vert_page_breaks
+print ws.horz_page_breaks
+print ws.header_str
+print ws.footer_str
+print ws.print_centered_vert
+print ws.print_centered_horz
+print ws.left_margin
+print ws.right_margin
+print ws.top_margin
+print ws.bottom_margin
+print ws.paper_size_code
+print ws.print_scaling
+print ws.start_page_number
+print ws.fit_width_to_pages
+print ws.fit_height_to_pages
+print ws.print_in_rows
+print ws.portrait
+print ws.print_colour
+print ws.print_draft
+print ws.print_notes
+print ws.print_notes_at_end
+print ws.print_omit_errors
+print ws.print_hres
+print ws.print_vres
+print ws.header_margin
+print ws.footer_margin
+print ws.copies_num
diff --git a/tablib/packages/xlwt/examples/xlwt_easyxf_simple_demo.py b/tablib/packages/xlwt/examples/xlwt_easyxf_simple_demo.py
new file mode 100644
index 0000000..2afa69f
--- /dev/null
+++ b/tablib/packages/xlwt/examples/xlwt_easyxf_simple_demo.py
@@ -0,0 +1,46 @@
+
+# Write an XLS file with a single worksheet, containing
+# a heading row and some rows of data.
+
+import xlwt
+import datetime
+ezxf = xlwt.easyxf
+
+def write_xls(file_name, sheet_name, headings, data, heading_xf, data_xfs):
+ book = xlwt.Workbook()
+ sheet = book.add_sheet(sheet_name)
+ rowx = 0
+ for colx, value in enumerate(headings):
+ sheet.write(rowx, colx, value, heading_xf)
+ sheet.set_panes_frozen(True) # frozen headings instead of split panes
+ sheet.set_horz_split_pos(rowx+1) # in general, freeze after last heading row
+ sheet.set_remove_splits(True) # if user does unfreeze, don't leave a split there
+ for row in data:
+ rowx += 1
+ for colx, value in enumerate(row):
+ sheet.write(rowx, colx, value, data_xfs[colx])
+ book.save(file_name)
+
+if __name__ == '__main__':
+ import sys
+ mkd = datetime.date
+ hdngs = ['Date', 'Stock Code', 'Quantity', 'Unit Price', 'Value', 'Message']
+ kinds = 'date text int price money text'.split()
+ data = [
+ [mkd(2007, 7, 1), 'ABC', 1000, 1.234567, 1234.57, ''],
+ [mkd(2007, 12, 31), 'XYZ', -100, 4.654321, -465.43, 'Goods returned'],
+ ] + [
+ [mkd(2008, 6, 30), 'PQRCD', 100, 2.345678, 234.57, ''],
+ ] * 100
+
+ heading_xf = ezxf('font: bold on; align: wrap on, vert centre, horiz center')
+ kind_to_xf_map = {
+ 'date': ezxf(num_format_str='yyyy-mm-dd'),
+ 'int': ezxf(num_format_str='#,##0'),
+ 'money': ezxf('font: italic on; pattern: pattern solid, fore-colour grey25',
+ num_format_str='$#,##0.00'),
+ 'price': ezxf(num_format_str='#0.000000'),
+ 'text': ezxf(),
+ }
+ data_xfs = [kind_to_xf_map[k] for k in kinds]
+ write_xls('xlwt_easyxf_simple_demo.xls', 'Demo', hdngs, data, heading_xf, data_xfs)
diff --git a/tablib/packages/xlwt/excel-formula.g b/tablib/packages/xlwt/excel-formula.g
new file mode 100644
index 0000000..d98d9b9
--- /dev/null
+++ b/tablib/packages/xlwt/excel-formula.g
@@ -0,0 +1,374 @@
+header {
+ import struct
+ import Utils
+ from UnicodeUtils import upack1
+ from ExcelMagic import *
+
+ _RVAdelta = {"R": 0, "V": 0x20, "A": 0x40}
+ _RVAdeltaRef = {"R": 0, "V": 0x20, "A": 0x40, "D": 0x20}
+ _RVAdeltaArea = {"R": 0, "V": 0x20, "A": 0x40, "D": 0}
+
+
+ class FormulaParseException(Exception):
+ """
+ An exception indicating that a Formula could not be successfully parsed.
+ """
+}
+
+header "ExcelFormulaParser.__init__" {
+ self.rpn = ""
+ self.sheet_references = []
+ self.xcall_references = []
+}
+
+options {
+ language = "Python";
+}
+
+class ExcelFormulaParser extends Parser;
+options {
+ k = 2;
+ defaultErrorHandler = false;
+ buildAST = false;
+}
+
+
+tokens {
+ TRUE_CONST;
+ FALSE_CONST;
+ STR_CONST;
+ NUM_CONST;
+ INT_CONST;
+
+ FUNC_IF;
+ FUNC_CHOOSE;
+ NAME;
+ QUOTENAME;
+
+ EQ;
+ NE;
+ GT;
+ LT;
+ GE;
+ LE;
+
+ ADD;
+ SUB;
+ MUL;
+ DIV;
+
+ POWER;
+ PERCENT;
+
+ LP;
+ RP;
+
+ LB;
+ RB;
+
+ COLON;
+ COMMA;
+ SEMICOLON;
+ REF2D;
+ REF2D_R1C1;
+ BANG;
+}
+
+formula
+ : expr["V"]
+ ;
+
+expr[arg_type]
+ : // {print "\n**expr %s" % arg_type}
+ prec0_expr[arg_type]
+ (
+ (
+ EQ { op = struct.pack('B', ptgEQ) }
+ | NE { op = struct.pack('B', ptgNE) }
+ | GT { op = struct.pack('B', ptgGT) }
+ | LT { op = struct.pack('B', ptgLT) }
+ | GE { op = struct.pack('B', ptgGE) }
+ | LE { op = struct.pack('B', ptgLE) }
+ )
+ prec0_expr[arg_type] { self.rpn += op }
+ )*
+ ;
+
+prec0_expr[arg_type]
+ : prec1_expr[arg_type]
+ (
+ (
+ CONCAT { op = struct.pack('B', ptgConcat) }
+ )
+ prec1_expr[arg_type] { self.rpn += op }
+ )*
+ ;
+
+prec1_expr[arg_type]
+ : // {print "**prec1_expr1 %s" % arg_type}
+ prec2_expr[arg_type]
+ // {print "**prec1_expr2 %s" % arg_type}
+ (
+ (
+ ADD { op = struct.pack('B', ptgAdd) }
+ | SUB { op = struct.pack('B', ptgSub) }
+ )
+ // {print "**prec1_expr3 %s" % arg_type}
+ prec2_expr[arg_type]
+ { self.rpn += op;
+ // print "**prec1_expr4 %s" % arg_type
+ }
+ )*
+ ;
+
+
+prec2_expr[arg_type]
+ : prec3_expr[arg_type]
+ (
+ (
+ MUL { op = struct.pack('B', ptgMul) }
+ | DIV { op = struct.pack('B', ptgDiv) }
+ )
+ prec3_expr[arg_type] { self.rpn += op }
+ )*
+ ;
+
+prec3_expr[arg_type]
+ : prec4_expr[arg_type]
+ (
+ (
+ POWER { op = struct.pack('B', ptgPower) }
+ )
+ prec4_expr[arg_type] { self.rpn += op }
+ )*
+ ;
+
+prec4_expr[arg_type]
+ : prec5_expr[arg_type]
+ (
+ PERCENT { self.rpn += struct.pack('B', ptgPercent) }
+ )?
+ ;
+
+prec5_expr[arg_type]
+ : primary[arg_type]
+ | SUB primary[arg_type] { self.rpn += struct.pack('B', ptgUminus) }
+ ;
+
+primary[arg_type]
+ : TRUE_CONST
+ {
+ self.rpn += struct.pack("2B", ptgBool, 1)
+ }
+ | FALSE_CONST
+ {
+ self.rpn += struct.pack("2B", ptgBool, 0)
+ }
+ | str_tok:STR_CONST
+ {
+ self.rpn += struct.pack("B", ptgStr) + upack1(str_tok.text[1:-1].replace("\"\"", "\""))
+ }
+ | int_tok:INT_CONST
+ {
+ // print "**int_const", int_tok.text
+ int_value = int(int_tok.text)
+ if int_value <= 65535:
+ self.rpn += struct.pack(" max_argc or arg_count < min_argc:
+ raise Exception, "%d parameters for function: %s" % (arg_count, func_tok.text)
+ if xcall:
+ func_ptg = ptgFuncVarR + _RVAdelta[func_type]
+ self.rpn += struct.pack("<2BH", func_ptg, arg_count + 1, 255) // 255 is magic XCALL function
+ elif min_argc == max_argc:
+ func_ptg = ptgFuncR + _RVAdelta[func_type]
+ self.rpn += struct.pack("[0-9][0-9][0-9][0-9])
+ -(?P[0-9][0-9]?)
+ -(?P[0-9][0-9]?)
+ (?:(?:[Tt]|[ \t]+)
+ (?P[0-9][0-9]?)
+ :(?P[0-9][0-9])
+ :(?P[0-9][0-9])
+ (?:\.(?P[0-9]*))?
+ (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?)
+ (?::(?P[0-9][0-9]))?))?)?$''', re.X)
+
+ def construct_yaml_timestamp(self, node):
+ value = self.construct_scalar(node)
+ match = self.timestamp_regexp.match(node.value)
+ values = match.groupdict()
+ year = int(values['year'])
+ month = int(values['month'])
+ day = int(values['day'])
+ if not values['hour']:
+ return datetime.date(year, month, day)
+ hour = int(values['hour'])
+ minute = int(values['minute'])
+ second = int(values['second'])
+ fraction = 0
+ if values['fraction']:
+ fraction = values['fraction'][:6]
+ while len(fraction) < 6:
+ fraction += '0'
+ fraction = int(fraction)
+ delta = None
+ if values['tz_sign']:
+ tz_hour = int(values['tz_hour'])
+ tz_minute = int(values['tz_minute'] or 0)
+ delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
+ if values['tz_sign'] == '-':
+ delta = -delta
+ data = datetime.datetime(year, month, day, hour, minute, second, fraction)
+ if delta:
+ data -= delta
+ return data
+
+ def construct_yaml_omap(self, node):
+ # Note: we do not check for duplicate keys, because it's too
+ # CPU-expensive.
+ omap = []
+ yield omap
+ if not isinstance(node, SequenceNode):
+ raise ConstructorError("while constructing an ordered map", node.start_mark,
+ "expected a sequence, but found %s" % node.id, node.start_mark)
+ for subnode in node.value:
+ if not isinstance(subnode, MappingNode):
+ raise ConstructorError("while constructing an ordered map", node.start_mark,
+ "expected a mapping of length 1, but found %s" % subnode.id,
+ subnode.start_mark)
+ if len(subnode.value) != 1:
+ raise ConstructorError("while constructing an ordered map", node.start_mark,
+ "expected a single mapping item, but found %d items" % len(subnode.value),
+ subnode.start_mark)
+ key_node, value_node = subnode.value[0]
+ key = self.construct_object(key_node)
+ value = self.construct_object(value_node)
+ omap.append((key, value))
+
+ def construct_yaml_pairs(self, node):
+ # Note: the same code as `construct_yaml_omap`.
+ pairs = []
+ yield pairs
+ if not isinstance(node, SequenceNode):
+ raise ConstructorError("while constructing pairs", node.start_mark,
+ "expected a sequence, but found %s" % node.id, node.start_mark)
+ for subnode in node.value:
+ if not isinstance(subnode, MappingNode):
+ raise ConstructorError("while constructing pairs", node.start_mark,
+ "expected a mapping of length 1, but found %s" % subnode.id,
+ subnode.start_mark)
+ if len(subnode.value) != 1:
+ raise ConstructorError("while constructing pairs", node.start_mark,
+ "expected a single mapping item, but found %d items" % len(subnode.value),
+ subnode.start_mark)
+ key_node, value_node = subnode.value[0]
+ key = self.construct_object(key_node)
+ value = self.construct_object(value_node)
+ pairs.append((key, value))
+
+ def construct_yaml_set(self, node):
+ data = set()
+ yield data
+ value = self.construct_mapping(node)
+ data.update(value)
+
+ def construct_yaml_str(self, node):
+ value = self.construct_scalar(node)
+ try:
+ return value.encode('ascii')
+ except UnicodeEncodeError:
+ return value
+
+ def construct_yaml_seq(self, node):
+ data = []
+ yield data
+ data.extend(self.construct_sequence(node))
+
+ def construct_yaml_map(self, node):
+ data = {}
+ yield data
+ value = self.construct_mapping(node)
+ data.update(value)
+
+ def construct_yaml_object(self, node, cls):
+ data = cls.__new__(cls)
+ yield data
+ if hasattr(data, '__setstate__'):
+ state = self.construct_mapping(node, deep=True)
+ data.__setstate__(state)
+ else:
+ state = self.construct_mapping(node)
+ data.__dict__.update(state)
+
+ def construct_undefined(self, node):
+ raise ConstructorError(None, None,
+ "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
+ node.start_mark)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:null',
+ SafeConstructor.construct_yaml_null)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:bool',
+ SafeConstructor.construct_yaml_bool)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:int',
+ SafeConstructor.construct_yaml_int)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:float',
+ SafeConstructor.construct_yaml_float)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:binary',
+ SafeConstructor.construct_yaml_binary)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:timestamp',
+ SafeConstructor.construct_yaml_timestamp)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:omap',
+ SafeConstructor.construct_yaml_omap)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:pairs',
+ SafeConstructor.construct_yaml_pairs)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:set',
+ SafeConstructor.construct_yaml_set)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:str',
+ SafeConstructor.construct_yaml_str)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:seq',
+ SafeConstructor.construct_yaml_seq)
+
+SafeConstructor.add_constructor(
+ u'tag:yaml.org,2002:map',
+ SafeConstructor.construct_yaml_map)
+
+SafeConstructor.add_constructor(None,
+ SafeConstructor.construct_undefined)
+
+class Constructor(SafeConstructor):
+
+ def construct_python_str(self, node):
+ return self.construct_scalar(node).encode('utf-8')
+
+ def construct_python_unicode(self, node):
+ return self.construct_scalar(node)
+
+ def construct_python_long(self, node):
+ return long(self.construct_yaml_int(node))
+
+ def construct_python_complex(self, node):
+ return complex(self.construct_scalar(node))
+
+ def construct_python_tuple(self, node):
+ return tuple(self.construct_sequence(node))
+
+ def find_python_module(self, name, mark):
+ if not name:
+ raise ConstructorError("while constructing a Python module", mark,
+ "expected non-empty name appended to the tag", mark)
+ try:
+ __import__(name)
+ except ImportError, exc:
+ raise ConstructorError("while constructing a Python module", mark,
+ "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark)
+ return sys.modules[name]
+
+ def find_python_name(self, name, mark):
+ if not name:
+ raise ConstructorError("while constructing a Python object", mark,
+ "expected non-empty name appended to the tag", mark)
+ if u'.' in name:
+ # Python 2.4 only
+ #module_name, object_name = name.rsplit('.', 1)
+ items = name.split('.')
+ object_name = items.pop()
+ module_name = '.'.join(items)
+ else:
+ module_name = '__builtin__'
+ object_name = name
+ try:
+ __import__(module_name)
+ except ImportError, exc:
+ raise ConstructorError("while constructing a Python object", mark,
+ "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark)
+ module = sys.modules[module_name]
+ if not hasattr(module, object_name):
+ raise ConstructorError("while constructing a Python object", mark,
+ "cannot find %r in the module %r" % (object_name.encode('utf-8'),
+ module.__name__), mark)
+ return getattr(module, object_name)
+
+ def construct_python_name(self, suffix, node):
+ value = self.construct_scalar(node)
+ if value:
+ raise ConstructorError("while constructing a Python name", node.start_mark,
+ "expected the empty value, but found %r" % value.encode('utf-8'),
+ node.start_mark)
+ return self.find_python_name(suffix, node.start_mark)
+
+ def construct_python_module(self, suffix, node):
+ value = self.construct_scalar(node)
+ if value:
+ raise ConstructorError("while constructing a Python module", node.start_mark,
+ "expected the empty value, but found %r" % value.encode('utf-8'),
+ node.start_mark)
+ return self.find_python_module(suffix, node.start_mark)
+
+ class classobj: pass
+
+ def make_python_instance(self, suffix, node,
+ args=None, kwds=None, newobj=False):
+ if not args:
+ args = []
+ if not kwds:
+ kwds = {}
+ cls = self.find_python_name(suffix, node.start_mark)
+ if newobj and isinstance(cls, type(self.classobj)) \
+ and not args and not kwds:
+ instance = self.classobj()
+ instance.__class__ = cls
+ return instance
+ elif newobj and isinstance(cls, type):
+ return cls.__new__(cls, *args, **kwds)
+ else:
+ return cls(*args, **kwds)
+
+ def set_python_instance_state(self, instance, state):
+ if hasattr(instance, '__setstate__'):
+ instance.__setstate__(state)
+ else:
+ slotstate = {}
+ if isinstance(state, tuple) and len(state) == 2:
+ state, slotstate = state
+ if hasattr(instance, '__dict__'):
+ instance.__dict__.update(state)
+ elif state:
+ slotstate.update(state)
+ for key, value in slotstate.items():
+ setattr(object, key, value)
+
+ def construct_python_object(self, suffix, node):
+ # Format:
+ # !!python/object:module.name { ... state ... }
+ instance = self.make_python_instance(suffix, node, newobj=True)
+ yield instance
+ deep = hasattr(instance, '__setstate__')
+ state = self.construct_mapping(node, deep=deep)
+ self.set_python_instance_state(instance, state)
+
+ def construct_python_object_apply(self, suffix, node, newobj=False):
+ # Format:
+ # !!python/object/apply # (or !!python/object/new)
+ # args: [ ... arguments ... ]
+ # kwds: { ... keywords ... }
+ # state: ... state ...
+ # listitems: [ ... listitems ... ]
+ # dictitems: { ... dictitems ... }
+ # or short format:
+ # !!python/object/apply [ ... arguments ... ]
+ # The difference between !!python/object/apply and !!python/object/new
+ # is how an object is created, check make_python_instance for details.
+ if isinstance(node, SequenceNode):
+ args = self.construct_sequence(node, deep=True)
+ kwds = {}
+ state = {}
+ listitems = []
+ dictitems = {}
+ else:
+ value = self.construct_mapping(node, deep=True)
+ args = value.get('args', [])
+ kwds = value.get('kwds', {})
+ state = value.get('state', {})
+ listitems = value.get('listitems', [])
+ dictitems = value.get('dictitems', {})
+ instance = self.make_python_instance(suffix, node, args, kwds, newobj)
+ if state:
+ self.set_python_instance_state(instance, state)
+ if listitems:
+ instance.extend(listitems)
+ if dictitems:
+ for key in dictitems:
+ instance[key] = dictitems[key]
+ return instance
+
+ def construct_python_object_new(self, suffix, node):
+ return self.construct_python_object_apply(suffix, node, newobj=True)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/none',
+ Constructor.construct_yaml_null)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/bool',
+ Constructor.construct_yaml_bool)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/str',
+ Constructor.construct_python_str)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/unicode',
+ Constructor.construct_python_unicode)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/int',
+ Constructor.construct_yaml_int)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/long',
+ Constructor.construct_python_long)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/float',
+ Constructor.construct_yaml_float)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/complex',
+ Constructor.construct_python_complex)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/list',
+ Constructor.construct_yaml_seq)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/tuple',
+ Constructor.construct_python_tuple)
+
+Constructor.add_constructor(
+ u'tag:yaml.org,2002:python/dict',
+ Constructor.construct_yaml_map)
+
+Constructor.add_multi_constructor(
+ u'tag:yaml.org,2002:python/name:',
+ Constructor.construct_python_name)
+
+Constructor.add_multi_constructor(
+ u'tag:yaml.org,2002:python/module:',
+ Constructor.construct_python_module)
+
+Constructor.add_multi_constructor(
+ u'tag:yaml.org,2002:python/object:',
+ Constructor.construct_python_object)
+
+Constructor.add_multi_constructor(
+ u'tag:yaml.org,2002:python/object/apply:',
+ Constructor.construct_python_object_apply)
+
+Constructor.add_multi_constructor(
+ u'tag:yaml.org,2002:python/object/new:',
+ Constructor.construct_python_object_new)
+
diff --git a/tablib/packages/yaml/cyaml.py b/tablib/packages/yaml/cyaml.py
new file mode 100644
index 0000000..68dcd75
--- /dev/null
+++ b/tablib/packages/yaml/cyaml.py
@@ -0,0 +1,85 @@
+
+__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader',
+ 'CBaseDumper', 'CSafeDumper', 'CDumper']
+
+from _yaml import CParser, CEmitter
+
+from constructor import *
+
+from serializer import *
+from representer import *
+
+from resolver import *
+
+class CBaseLoader(CParser, BaseConstructor, BaseResolver):
+
+ def __init__(self, stream):
+ CParser.__init__(self, stream)
+ BaseConstructor.__init__(self)
+ BaseResolver.__init__(self)
+
+class CSafeLoader(CParser, SafeConstructor, Resolver):
+
+ def __init__(self, stream):
+ CParser.__init__(self, stream)
+ SafeConstructor.__init__(self)
+ Resolver.__init__(self)
+
+class CLoader(CParser, Constructor, Resolver):
+
+ def __init__(self, stream):
+ CParser.__init__(self, stream)
+ Constructor.__init__(self)
+ Resolver.__init__(self)
+
+class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
+
+ def __init__(self, stream,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ CEmitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width, encoding=encoding,
+ allow_unicode=allow_unicode, line_break=line_break,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ Representer.__init__(self, default_style=default_style,
+ default_flow_style=default_flow_style)
+ Resolver.__init__(self)
+
+class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
+
+ def __init__(self, stream,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ CEmitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width, encoding=encoding,
+ allow_unicode=allow_unicode, line_break=line_break,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ SafeRepresenter.__init__(self, default_style=default_style,
+ default_flow_style=default_flow_style)
+ Resolver.__init__(self)
+
+class CDumper(CEmitter, Serializer, Representer, Resolver):
+
+ def __init__(self, stream,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ CEmitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width, encoding=encoding,
+ allow_unicode=allow_unicode, line_break=line_break,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ Representer.__init__(self, default_style=default_style,
+ default_flow_style=default_flow_style)
+ Resolver.__init__(self)
+
diff --git a/tablib/packages/yaml/dumper.py b/tablib/packages/yaml/dumper.py
new file mode 100644
index 0000000..f811d2c
--- /dev/null
+++ b/tablib/packages/yaml/dumper.py
@@ -0,0 +1,62 @@
+
+__all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
+
+from emitter import *
+from serializer import *
+from representer import *
+from resolver import *
+
+class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
+
+ def __init__(self, stream,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ Emitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break)
+ Serializer.__init__(self, encoding=encoding,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ Representer.__init__(self, default_style=default_style,
+ default_flow_style=default_flow_style)
+ Resolver.__init__(self)
+
+class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
+
+ def __init__(self, stream,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ Emitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break)
+ Serializer.__init__(self, encoding=encoding,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ SafeRepresenter.__init__(self, default_style=default_style,
+ default_flow_style=default_flow_style)
+ Resolver.__init__(self)
+
+class Dumper(Emitter, Serializer, Representer, Resolver):
+
+ def __init__(self, stream,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=None, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ Emitter.__init__(self, stream, canonical=canonical,
+ indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break)
+ Serializer.__init__(self, encoding=encoding,
+ explicit_start=explicit_start, explicit_end=explicit_end,
+ version=version, tags=tags)
+ Representer.__init__(self, default_style=default_style,
+ default_flow_style=default_flow_style)
+ Resolver.__init__(self)
+
diff --git a/tablib/packages/yaml/emitter.py b/tablib/packages/yaml/emitter.py
new file mode 100644
index 0000000..4cb2c8a
--- /dev/null
+++ b/tablib/packages/yaml/emitter.py
@@ -0,0 +1,1135 @@
+
+# Emitter expects events obeying the following grammar:
+# stream ::= STREAM-START document* STREAM-END
+# document ::= DOCUMENT-START node DOCUMENT-END
+# node ::= SCALAR | sequence | mapping
+# sequence ::= SEQUENCE-START node* SEQUENCE-END
+# mapping ::= MAPPING-START (node node)* MAPPING-END
+
+__all__ = ['Emitter', 'EmitterError']
+
+from error import YAMLError
+from events import *
+
+class EmitterError(YAMLError):
+ pass
+
+class ScalarAnalysis(object):
+ def __init__(self, scalar, empty, multiline,
+ allow_flow_plain, allow_block_plain,
+ allow_single_quoted, allow_double_quoted,
+ allow_block):
+ self.scalar = scalar
+ self.empty = empty
+ self.multiline = multiline
+ self.allow_flow_plain = allow_flow_plain
+ self.allow_block_plain = allow_block_plain
+ self.allow_single_quoted = allow_single_quoted
+ self.allow_double_quoted = allow_double_quoted
+ self.allow_block = allow_block
+
+class Emitter(object):
+
+ DEFAULT_TAG_PREFIXES = {
+ u'!' : u'!',
+ u'tag:yaml.org,2002:' : u'!!',
+ }
+
+ def __init__(self, stream, canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None):
+
+ # The stream should have the methods `write` and possibly `flush`.
+ self.stream = stream
+
+ # Encoding can be overriden by STREAM-START.
+ self.encoding = None
+
+ # Emitter is a state machine with a stack of states to handle nested
+ # structures.
+ self.states = []
+ self.state = self.expect_stream_start
+
+ # Current event and the event queue.
+ self.events = []
+ self.event = None
+
+ # The current indentation level and the stack of previous indents.
+ self.indents = []
+ self.indent = None
+
+ # Flow level.
+ self.flow_level = 0
+
+ # Contexts.
+ self.root_context = False
+ self.sequence_context = False
+ self.mapping_context = False
+ self.simple_key_context = False
+
+ # Characteristics of the last emitted character:
+ # - current position.
+ # - is it a whitespace?
+ # - is it an indention character
+ # (indentation space, '-', '?', or ':')?
+ self.line = 0
+ self.column = 0
+ self.whitespace = True
+ self.indention = True
+
+ # Whether the document requires an explicit document indicator
+ self.open_ended = False
+
+ # Formatting details.
+ self.canonical = canonical
+ self.allow_unicode = allow_unicode
+ self.best_indent = 2
+ if indent and 1 < indent < 10:
+ self.best_indent = indent
+ self.best_width = 80
+ if width and width > self.best_indent*2:
+ self.best_width = width
+ self.best_line_break = u'\n'
+ if line_break in [u'\r', u'\n', u'\r\n']:
+ self.best_line_break = line_break
+
+ # Tag prefixes.
+ self.tag_prefixes = None
+
+ # Prepared anchor and tag.
+ self.prepared_anchor = None
+ self.prepared_tag = None
+
+ # Scalar analysis and style.
+ self.analysis = None
+ self.style = None
+
+ def emit(self, event):
+ self.events.append(event)
+ while not self.need_more_events():
+ self.event = self.events.pop(0)
+ self.state()
+ self.event = None
+
+ # In some cases, we wait for a few next events before emitting.
+
+ def need_more_events(self):
+ if not self.events:
+ return True
+ event = self.events[0]
+ if isinstance(event, DocumentStartEvent):
+ return self.need_events(1)
+ elif isinstance(event, SequenceStartEvent):
+ return self.need_events(2)
+ elif isinstance(event, MappingStartEvent):
+ return self.need_events(3)
+ else:
+ return False
+
+ def need_events(self, count):
+ level = 0
+ for event in self.events[1:]:
+ if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
+ level += 1
+ elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
+ level -= 1
+ elif isinstance(event, StreamEndEvent):
+ level = -1
+ if level < 0:
+ return False
+ return (len(self.events) < count+1)
+
+ def increase_indent(self, flow=False, indentless=False):
+ self.indents.append(self.indent)
+ if self.indent is None:
+ if flow:
+ self.indent = self.best_indent
+ else:
+ self.indent = 0
+ elif not indentless:
+ self.indent += self.best_indent
+
+ # States.
+
+ # Stream handlers.
+
+ def expect_stream_start(self):
+ if isinstance(self.event, StreamStartEvent):
+ if self.event.encoding and not getattr(self.stream, 'encoding', None):
+ self.encoding = self.event.encoding
+ self.write_stream_start()
+ self.state = self.expect_first_document_start
+ else:
+ raise EmitterError("expected StreamStartEvent, but got %s"
+ % self.event)
+
+ def expect_nothing(self):
+ raise EmitterError("expected nothing, but got %s" % self.event)
+
+ # Document handlers.
+
+ def expect_first_document_start(self):
+ return self.expect_document_start(first=True)
+
+ def expect_document_start(self, first=False):
+ if isinstance(self.event, DocumentStartEvent):
+ if (self.event.version or self.event.tags) and self.open_ended:
+ self.write_indicator(u'...', True)
+ self.write_indent()
+ if self.event.version:
+ version_text = self.prepare_version(self.event.version)
+ self.write_version_directive(version_text)
+ self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
+ if self.event.tags:
+ handles = self.event.tags.keys()
+ handles.sort()
+ for handle in handles:
+ prefix = self.event.tags[handle]
+ self.tag_prefixes[prefix] = handle
+ handle_text = self.prepare_tag_handle(handle)
+ prefix_text = self.prepare_tag_prefix(prefix)
+ self.write_tag_directive(handle_text, prefix_text)
+ implicit = (first and not self.event.explicit and not self.canonical
+ and not self.event.version and not self.event.tags
+ and not self.check_empty_document())
+ if not implicit:
+ self.write_indent()
+ self.write_indicator(u'---', True)
+ if self.canonical:
+ self.write_indent()
+ self.state = self.expect_document_root
+ elif isinstance(self.event, StreamEndEvent):
+ if self.open_ended:
+ self.write_indicator(u'...', True)
+ self.write_indent()
+ self.write_stream_end()
+ self.state = self.expect_nothing
+ else:
+ raise EmitterError("expected DocumentStartEvent, but got %s"
+ % self.event)
+
+ def expect_document_end(self):
+ if isinstance(self.event, DocumentEndEvent):
+ self.write_indent()
+ if self.event.explicit:
+ self.write_indicator(u'...', True)
+ self.write_indent()
+ self.flush_stream()
+ self.state = self.expect_document_start
+ else:
+ raise EmitterError("expected DocumentEndEvent, but got %s"
+ % self.event)
+
+ def expect_document_root(self):
+ self.states.append(self.expect_document_end)
+ self.expect_node(root=True)
+
+ # Node handlers.
+
+ def expect_node(self, root=False, sequence=False, mapping=False,
+ simple_key=False):
+ self.root_context = root
+ self.sequence_context = sequence
+ self.mapping_context = mapping
+ self.simple_key_context = simple_key
+ if isinstance(self.event, AliasEvent):
+ self.expect_alias()
+ elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
+ self.process_anchor(u'&')
+ self.process_tag()
+ if isinstance(self.event, ScalarEvent):
+ self.expect_scalar()
+ elif isinstance(self.event, SequenceStartEvent):
+ if self.flow_level or self.canonical or self.event.flow_style \
+ or self.check_empty_sequence():
+ self.expect_flow_sequence()
+ else:
+ self.expect_block_sequence()
+ elif isinstance(self.event, MappingStartEvent):
+ if self.flow_level or self.canonical or self.event.flow_style \
+ or self.check_empty_mapping():
+ self.expect_flow_mapping()
+ else:
+ self.expect_block_mapping()
+ else:
+ raise EmitterError("expected NodeEvent, but got %s" % self.event)
+
+ def expect_alias(self):
+ if self.event.anchor is None:
+ raise EmitterError("anchor is not specified for alias")
+ self.process_anchor(u'*')
+ self.state = self.states.pop()
+
+ def expect_scalar(self):
+ self.increase_indent(flow=True)
+ self.process_scalar()
+ self.indent = self.indents.pop()
+ self.state = self.states.pop()
+
+ # Flow sequence handlers.
+
+ def expect_flow_sequence(self):
+ self.write_indicator(u'[', True, whitespace=True)
+ self.flow_level += 1
+ self.increase_indent(flow=True)
+ self.state = self.expect_first_flow_sequence_item
+
+ def expect_first_flow_sequence_item(self):
+ if isinstance(self.event, SequenceEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ self.write_indicator(u']', False)
+ self.state = self.states.pop()
+ else:
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ self.states.append(self.expect_flow_sequence_item)
+ self.expect_node(sequence=True)
+
+ def expect_flow_sequence_item(self):
+ if isinstance(self.event, SequenceEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ if self.canonical:
+ self.write_indicator(u',', False)
+ self.write_indent()
+ self.write_indicator(u']', False)
+ self.state = self.states.pop()
+ else:
+ self.write_indicator(u',', False)
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ self.states.append(self.expect_flow_sequence_item)
+ self.expect_node(sequence=True)
+
+ # Flow mapping handlers.
+
+ def expect_flow_mapping(self):
+ self.write_indicator(u'{', True, whitespace=True)
+ self.flow_level += 1
+ self.increase_indent(flow=True)
+ self.state = self.expect_first_flow_mapping_key
+
+ def expect_first_flow_mapping_key(self):
+ if isinstance(self.event, MappingEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ self.write_indicator(u'}', False)
+ self.state = self.states.pop()
+ else:
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ if not self.canonical and self.check_simple_key():
+ self.states.append(self.expect_flow_mapping_simple_value)
+ self.expect_node(mapping=True, simple_key=True)
+ else:
+ self.write_indicator(u'?', True)
+ self.states.append(self.expect_flow_mapping_value)
+ self.expect_node(mapping=True)
+
+ def expect_flow_mapping_key(self):
+ if isinstance(self.event, MappingEndEvent):
+ self.indent = self.indents.pop()
+ self.flow_level -= 1
+ if self.canonical:
+ self.write_indicator(u',', False)
+ self.write_indent()
+ self.write_indicator(u'}', False)
+ self.state = self.states.pop()
+ else:
+ self.write_indicator(u',', False)
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ if not self.canonical and self.check_simple_key():
+ self.states.append(self.expect_flow_mapping_simple_value)
+ self.expect_node(mapping=True, simple_key=True)
+ else:
+ self.write_indicator(u'?', True)
+ self.states.append(self.expect_flow_mapping_value)
+ self.expect_node(mapping=True)
+
+ def expect_flow_mapping_simple_value(self):
+ self.write_indicator(u':', False)
+ self.states.append(self.expect_flow_mapping_key)
+ self.expect_node(mapping=True)
+
+ def expect_flow_mapping_value(self):
+ if self.canonical or self.column > self.best_width:
+ self.write_indent()
+ self.write_indicator(u':', True)
+ self.states.append(self.expect_flow_mapping_key)
+ self.expect_node(mapping=True)
+
+ # Block sequence handlers.
+
+ def expect_block_sequence(self):
+ indentless = (self.mapping_context and not self.indention)
+ self.increase_indent(flow=False, indentless=indentless)
+ self.state = self.expect_first_block_sequence_item
+
+ def expect_first_block_sequence_item(self):
+ return self.expect_block_sequence_item(first=True)
+
+ def expect_block_sequence_item(self, first=False):
+ if not first and isinstance(self.event, SequenceEndEvent):
+ self.indent = self.indents.pop()
+ self.state = self.states.pop()
+ else:
+ self.write_indent()
+ self.write_indicator(u'-', True, indention=True)
+ self.states.append(self.expect_block_sequence_item)
+ self.expect_node(sequence=True)
+
+ # Block mapping handlers.
+
+ def expect_block_mapping(self):
+ self.increase_indent(flow=False)
+ self.state = self.expect_first_block_mapping_key
+
+ def expect_first_block_mapping_key(self):
+ return self.expect_block_mapping_key(first=True)
+
+ def expect_block_mapping_key(self, first=False):
+ if not first and isinstance(self.event, MappingEndEvent):
+ self.indent = self.indents.pop()
+ self.state = self.states.pop()
+ else:
+ self.write_indent()
+ if self.check_simple_key():
+ self.states.append(self.expect_block_mapping_simple_value)
+ self.expect_node(mapping=True, simple_key=True)
+ else:
+ self.write_indicator(u'?', True, indention=True)
+ self.states.append(self.expect_block_mapping_value)
+ self.expect_node(mapping=True)
+
+ def expect_block_mapping_simple_value(self):
+ self.write_indicator(u':', False)
+ self.states.append(self.expect_block_mapping_key)
+ self.expect_node(mapping=True)
+
+ def expect_block_mapping_value(self):
+ self.write_indent()
+ self.write_indicator(u':', True, indention=True)
+ self.states.append(self.expect_block_mapping_key)
+ self.expect_node(mapping=True)
+
+ # Checkers.
+
+ def check_empty_sequence(self):
+ return (isinstance(self.event, SequenceStartEvent) and self.events
+ and isinstance(self.events[0], SequenceEndEvent))
+
+ def check_empty_mapping(self):
+ return (isinstance(self.event, MappingStartEvent) and self.events
+ and isinstance(self.events[0], MappingEndEvent))
+
+ def check_empty_document(self):
+ if not isinstance(self.event, DocumentStartEvent) or not self.events:
+ return False
+ event = self.events[0]
+ return (isinstance(event, ScalarEvent) and event.anchor is None
+ and event.tag is None and event.implicit and event.value == u'')
+
+ def check_simple_key(self):
+ length = 0
+ if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
+ if self.prepared_anchor is None:
+ self.prepared_anchor = self.prepare_anchor(self.event.anchor)
+ length += len(self.prepared_anchor)
+ if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
+ and self.event.tag is not None:
+ if self.prepared_tag is None:
+ self.prepared_tag = self.prepare_tag(self.event.tag)
+ length += len(self.prepared_tag)
+ if isinstance(self.event, ScalarEvent):
+ if self.analysis is None:
+ self.analysis = self.analyze_scalar(self.event.value)
+ length += len(self.analysis.scalar)
+ return (length < 128 and (isinstance(self.event, AliasEvent)
+ or (isinstance(self.event, ScalarEvent)
+ and not self.analysis.empty and not self.analysis.multiline)
+ or self.check_empty_sequence() or self.check_empty_mapping()))
+
+ # Anchor, Tag, and Scalar processors.
+
+ def process_anchor(self, indicator):
+ if self.event.anchor is None:
+ self.prepared_anchor = None
+ return
+ if self.prepared_anchor is None:
+ self.prepared_anchor = self.prepare_anchor(self.event.anchor)
+ if self.prepared_anchor:
+ self.write_indicator(indicator+self.prepared_anchor, True)
+ self.prepared_anchor = None
+
+ def process_tag(self):
+ tag = self.event.tag
+ if isinstance(self.event, ScalarEvent):
+ if self.style is None:
+ self.style = self.choose_scalar_style()
+ if ((not self.canonical or tag is None) and
+ ((self.style == '' and self.event.implicit[0])
+ or (self.style != '' and self.event.implicit[1]))):
+ self.prepared_tag = None
+ return
+ if self.event.implicit[0] and tag is None:
+ tag = u'!'
+ self.prepared_tag = None
+ else:
+ if (not self.canonical or tag is None) and self.event.implicit:
+ self.prepared_tag = None
+ return
+ if tag is None:
+ raise EmitterError("tag is not specified")
+ if self.prepared_tag is None:
+ self.prepared_tag = self.prepare_tag(tag)
+ if self.prepared_tag:
+ self.write_indicator(self.prepared_tag, True)
+ self.prepared_tag = None
+
+ def choose_scalar_style(self):
+ if self.analysis is None:
+ self.analysis = self.analyze_scalar(self.event.value)
+ if self.event.style == '"' or self.canonical:
+ return '"'
+ if not self.event.style and self.event.implicit[0]:
+ if (not (self.simple_key_context and
+ (self.analysis.empty or self.analysis.multiline))
+ and (self.flow_level and self.analysis.allow_flow_plain
+ or (not self.flow_level and self.analysis.allow_block_plain))):
+ return ''
+ if self.event.style and self.event.style in '|>':
+ if (not self.flow_level and not self.simple_key_context
+ and self.analysis.allow_block):
+ return self.event.style
+ if not self.event.style or self.event.style == '\'':
+ if (self.analysis.allow_single_quoted and
+ not (self.simple_key_context and self.analysis.multiline)):
+ return '\''
+ return '"'
+
+ def process_scalar(self):
+ if self.analysis is None:
+ self.analysis = self.analyze_scalar(self.event.value)
+ if self.style is None:
+ self.style = self.choose_scalar_style()
+ split = (not self.simple_key_context)
+ #if self.analysis.multiline and split \
+ # and (not self.style or self.style in '\'\"'):
+ # self.write_indent()
+ if self.style == '"':
+ self.write_double_quoted(self.analysis.scalar, split)
+ elif self.style == '\'':
+ self.write_single_quoted(self.analysis.scalar, split)
+ elif self.style == '>':
+ self.write_folded(self.analysis.scalar)
+ elif self.style == '|':
+ self.write_literal(self.analysis.scalar)
+ else:
+ self.write_plain(self.analysis.scalar, split)
+ self.analysis = None
+ self.style = None
+
+ # Analyzers.
+
+ def prepare_version(self, version):
+ major, minor = version
+ if major != 1:
+ raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
+ return u'%d.%d' % (major, minor)
+
+ def prepare_tag_handle(self, handle):
+ if not handle:
+ raise EmitterError("tag handle must not be empty")
+ if handle[0] != u'!' or handle[-1] != u'!':
+ raise EmitterError("tag handle must start and end with '!': %r"
+ % (handle.encode('utf-8')))
+ for ch in handle[1:-1]:
+ if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-_'):
+ raise EmitterError("invalid character %r in the tag handle: %r"
+ % (ch.encode('utf-8'), handle.encode('utf-8')))
+ return handle
+
+ def prepare_tag_prefix(self, prefix):
+ if not prefix:
+ raise EmitterError("tag prefix must not be empty")
+ chunks = []
+ start = end = 0
+ if prefix[0] == u'!':
+ end = 1
+ while end < len(prefix):
+ ch = prefix[end]
+ if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-;/?!:@&=+$,_.~*\'()[]':
+ end += 1
+ else:
+ if start < end:
+ chunks.append(prefix[start:end])
+ start = end = end+1
+ data = ch.encode('utf-8')
+ for ch in data:
+ chunks.append(u'%%%02X' % ord(ch))
+ if start < end:
+ chunks.append(prefix[start:end])
+ return u''.join(chunks)
+
+ def prepare_tag(self, tag):
+ if not tag:
+ raise EmitterError("tag must not be empty")
+ if tag == u'!':
+ return tag
+ handle = None
+ suffix = tag
+ prefixes = self.tag_prefixes.keys()
+ prefixes.sort()
+ for prefix in prefixes:
+ if tag.startswith(prefix) \
+ and (prefix == u'!' or len(prefix) < len(tag)):
+ handle = self.tag_prefixes[prefix]
+ suffix = tag[len(prefix):]
+ chunks = []
+ start = end = 0
+ while end < len(suffix):
+ ch = suffix[end]
+ if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-;/?:@&=+$,_.~*\'()[]' \
+ or (ch == u'!' and handle != u'!'):
+ end += 1
+ else:
+ if start < end:
+ chunks.append(suffix[start:end])
+ start = end = end+1
+ data = ch.encode('utf-8')
+ for ch in data:
+ chunks.append(u'%%%02X' % ord(ch))
+ if start < end:
+ chunks.append(suffix[start:end])
+ suffix_text = u''.join(chunks)
+ if handle:
+ return u'%s%s' % (handle, suffix_text)
+ else:
+ return u'!<%s>' % suffix_text
+
+ def prepare_anchor(self, anchor):
+ if not anchor:
+ raise EmitterError("anchor must not be empty")
+ for ch in anchor:
+ if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-_'):
+ raise EmitterError("invalid character %r in the anchor: %r"
+ % (ch.encode('utf-8'), anchor.encode('utf-8')))
+ return anchor
+
+ def analyze_scalar(self, scalar):
+
+ # Empty scalar is a special case.
+ if not scalar:
+ return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
+ allow_flow_plain=False, allow_block_plain=True,
+ allow_single_quoted=True, allow_double_quoted=True,
+ allow_block=False)
+
+ # Indicators and special characters.
+ block_indicators = False
+ flow_indicators = False
+ line_breaks = False
+ special_characters = False
+
+ # Important whitespace combinations.
+ leading_space = False
+ leading_break = False
+ trailing_space = False
+ trailing_break = False
+ break_space = False
+ space_break = False
+
+ # Check document indicators.
+ if scalar.startswith(u'---') or scalar.startswith(u'...'):
+ block_indicators = True
+ flow_indicators = True
+
+ # First character or preceded by a whitespace.
+ preceeded_by_whitespace = True
+
+ # Last character or followed by a whitespace.
+ followed_by_whitespace = (len(scalar) == 1 or
+ scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
+
+ # The previous character is a space.
+ previous_space = False
+
+ # The previous character is a break.
+ previous_break = False
+
+ index = 0
+ while index < len(scalar):
+ ch = scalar[index]
+
+ # Check for indicators.
+ if index == 0:
+ # Leading indicators are special characters.
+ if ch in u'#,[]{}&*!|>\'\"%@`':
+ flow_indicators = True
+ block_indicators = True
+ if ch in u'?:':
+ flow_indicators = True
+ if followed_by_whitespace:
+ block_indicators = True
+ if ch == u'-' and followed_by_whitespace:
+ flow_indicators = True
+ block_indicators = True
+ else:
+ # Some indicators cannot appear within a scalar as well.
+ if ch in u',?[]{}':
+ flow_indicators = True
+ if ch == u':':
+ flow_indicators = True
+ if followed_by_whitespace:
+ block_indicators = True
+ if ch == u'#' and preceeded_by_whitespace:
+ flow_indicators = True
+ block_indicators = True
+
+ # Check for line breaks, special, and unicode characters.
+ if ch in u'\n\x85\u2028\u2029':
+ line_breaks = True
+ if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
+ if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
+ or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
+ unicode_characters = True
+ if not self.allow_unicode:
+ special_characters = True
+ else:
+ special_characters = True
+
+ # Detect important whitespace combinations.
+ if ch == u' ':
+ if index == 0:
+ leading_space = True
+ if index == len(scalar)-1:
+ trailing_space = True
+ if previous_break:
+ break_space = True
+ previous_space = True
+ previous_break = False
+ elif ch in u'\n\x85\u2028\u2029':
+ if index == 0:
+ leading_break = True
+ if index == len(scalar)-1:
+ trailing_break = True
+ if previous_space:
+ space_break = True
+ previous_space = False
+ previous_break = True
+ else:
+ previous_space = False
+ previous_break = False
+
+ # Prepare for the next character.
+ index += 1
+ preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029')
+ followed_by_whitespace = (index+1 >= len(scalar) or
+ scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
+
+ # Let's decide what styles are allowed.
+ allow_flow_plain = True
+ allow_block_plain = True
+ allow_single_quoted = True
+ allow_double_quoted = True
+ allow_block = True
+
+ # Leading and trailing whitespaces are bad for plain scalars.
+ if (leading_space or leading_break
+ or trailing_space or trailing_break):
+ allow_flow_plain = allow_block_plain = False
+
+ # We do not permit trailing spaces for block scalars.
+ if trailing_space:
+ allow_block = False
+
+ # Spaces at the beginning of a new line are only acceptable for block
+ # scalars.
+ if break_space:
+ allow_flow_plain = allow_block_plain = allow_single_quoted = False
+
+ # Spaces followed by breaks, as well as special character are only
+ # allowed for double quoted scalars.
+ if space_break or special_characters:
+ allow_flow_plain = allow_block_plain = \
+ allow_single_quoted = allow_block = False
+
+ # Although the plain scalar writer supports breaks, we never emit
+ # multiline plain scalars.
+ if line_breaks:
+ allow_flow_plain = allow_block_plain = False
+
+ # Flow indicators are forbidden for flow plain scalars.
+ if flow_indicators:
+ allow_flow_plain = False
+
+ # Block indicators are forbidden for block plain scalars.
+ if block_indicators:
+ allow_block_plain = False
+
+ return ScalarAnalysis(scalar=scalar,
+ empty=False, multiline=line_breaks,
+ allow_flow_plain=allow_flow_plain,
+ allow_block_plain=allow_block_plain,
+ allow_single_quoted=allow_single_quoted,
+ allow_double_quoted=allow_double_quoted,
+ allow_block=allow_block)
+
+ # Writers.
+
+ def flush_stream(self):
+ if hasattr(self.stream, 'flush'):
+ self.stream.flush()
+
+ def write_stream_start(self):
+ # Write BOM if needed.
+ if self.encoding and self.encoding.startswith('utf-16'):
+ self.stream.write(u'\uFEFF'.encode(self.encoding))
+
+ def write_stream_end(self):
+ self.flush_stream()
+
+ def write_indicator(self, indicator, need_whitespace,
+ whitespace=False, indention=False):
+ if self.whitespace or not need_whitespace:
+ data = indicator
+ else:
+ data = u' '+indicator
+ self.whitespace = whitespace
+ self.indention = self.indention and indention
+ self.column += len(data)
+ self.open_ended = False
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+
+ def write_indent(self):
+ indent = self.indent or 0
+ if not self.indention or self.column > indent \
+ or (self.column == indent and not self.whitespace):
+ self.write_line_break()
+ if self.column < indent:
+ self.whitespace = True
+ data = u' '*(indent-self.column)
+ self.column = indent
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+
+ def write_line_break(self, data=None):
+ if data is None:
+ data = self.best_line_break
+ self.whitespace = True
+ self.indention = True
+ self.line += 1
+ self.column = 0
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+
+ def write_version_directive(self, version_text):
+ data = u'%%YAML %s' % version_text
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ self.write_line_break()
+
+ def write_tag_directive(self, handle_text, prefix_text):
+ data = u'%%TAG %s %s' % (handle_text, prefix_text)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ self.write_line_break()
+
+ # Scalar streams.
+
+ def write_single_quoted(self, text, split=True):
+ self.write_indicator(u'\'', True)
+ spaces = False
+ breaks = False
+ start = end = 0
+ while end <= len(text):
+ ch = None
+ if end < len(text):
+ ch = text[end]
+ if spaces:
+ if ch is None or ch != u' ':
+ if start+1 == end and self.column > self.best_width and split \
+ and start != 0 and end != len(text):
+ self.write_indent()
+ else:
+ data = text[start:end]
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end
+ elif breaks:
+ if ch is None or ch not in u'\n\x85\u2028\u2029':
+ if text[start] == u'\n':
+ self.write_line_break()
+ for br in text[start:end]:
+ if br == u'\n':
+ self.write_line_break()
+ else:
+ self.write_line_break(br)
+ self.write_indent()
+ start = end
+ else:
+ if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
+ if start < end:
+ data = text[start:end]
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end
+ if ch == u'\'':
+ data = u'\'\''
+ self.column += 2
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end + 1
+ if ch is not None:
+ spaces = (ch == u' ')
+ breaks = (ch in u'\n\x85\u2028\u2029')
+ end += 1
+ self.write_indicator(u'\'', False)
+
+ ESCAPE_REPLACEMENTS = {
+ u'\0': u'0',
+ u'\x07': u'a',
+ u'\x08': u'b',
+ u'\x09': u't',
+ u'\x0A': u'n',
+ u'\x0B': u'v',
+ u'\x0C': u'f',
+ u'\x0D': u'r',
+ u'\x1B': u'e',
+ u'\"': u'\"',
+ u'\\': u'\\',
+ u'\x85': u'N',
+ u'\xA0': u'_',
+ u'\u2028': u'L',
+ u'\u2029': u'P',
+ }
+
+ def write_double_quoted(self, text, split=True):
+ self.write_indicator(u'"', True)
+ start = end = 0
+ while end <= len(text):
+ ch = None
+ if end < len(text):
+ ch = text[end]
+ if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
+ or not (u'\x20' <= ch <= u'\x7E'
+ or (self.allow_unicode
+ and (u'\xA0' <= ch <= u'\uD7FF'
+ or u'\uE000' <= ch <= u'\uFFFD'))):
+ if start < end:
+ data = text[start:end]
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end
+ if ch is not None:
+ if ch in self.ESCAPE_REPLACEMENTS:
+ data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
+ elif ch <= u'\xFF':
+ data = u'\\x%02X' % ord(ch)
+ elif ch <= u'\uFFFF':
+ data = u'\\u%04X' % ord(ch)
+ else:
+ data = u'\\U%08X' % ord(ch)
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end+1
+ if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
+ and self.column+(end-start) > self.best_width and split:
+ data = text[start:end]+u'\\'
+ if start < end:
+ start = end
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ self.write_indent()
+ self.whitespace = False
+ self.indention = False
+ if text[start] == u' ':
+ data = u'\\'
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ end += 1
+ self.write_indicator(u'"', False)
+
+ def determine_block_hints(self, text):
+ hints = u''
+ if text:
+ if text[0] in u' \n\x85\u2028\u2029':
+ hints += unicode(self.best_indent)
+ if text[-1] not in u'\n\x85\u2028\u2029':
+ hints += u'-'
+ elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029':
+ hints += u'+'
+ return hints
+
+ def write_folded(self, text):
+ hints = self.determine_block_hints(text)
+ self.write_indicator(u'>'+hints, True)
+ if hints[-1:] == u'+':
+ self.open_ended = True
+ self.write_line_break()
+ leading_space = True
+ spaces = False
+ breaks = True
+ start = end = 0
+ while end <= len(text):
+ ch = None
+ if end < len(text):
+ ch = text[end]
+ if breaks:
+ if ch is None or ch not in u'\n\x85\u2028\u2029':
+ if not leading_space and ch is not None and ch != u' ' \
+ and text[start] == u'\n':
+ self.write_line_break()
+ leading_space = (ch == u' ')
+ for br in text[start:end]:
+ if br == u'\n':
+ self.write_line_break()
+ else:
+ self.write_line_break(br)
+ if ch is not None:
+ self.write_indent()
+ start = end
+ elif spaces:
+ if ch != u' ':
+ if start+1 == end and self.column > self.best_width:
+ self.write_indent()
+ else:
+ data = text[start:end]
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end
+ else:
+ if ch is None or ch in u' \n\x85\u2028\u2029':
+ data = text[start:end]
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ if ch is None:
+ self.write_line_break()
+ start = end
+ if ch is not None:
+ breaks = (ch in u'\n\x85\u2028\u2029')
+ spaces = (ch == u' ')
+ end += 1
+
+ def write_literal(self, text):
+ hints = self.determine_block_hints(text)
+ self.write_indicator(u'|'+hints, True)
+ if hints[-1:] == u'+':
+ self.open_ended = True
+ self.write_line_break()
+ breaks = True
+ start = end = 0
+ while end <= len(text):
+ ch = None
+ if end < len(text):
+ ch = text[end]
+ if breaks:
+ if ch is None or ch not in u'\n\x85\u2028\u2029':
+ for br in text[start:end]:
+ if br == u'\n':
+ self.write_line_break()
+ else:
+ self.write_line_break(br)
+ if ch is not None:
+ self.write_indent()
+ start = end
+ else:
+ if ch is None or ch in u'\n\x85\u2028\u2029':
+ data = text[start:end]
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ if ch is None:
+ self.write_line_break()
+ start = end
+ if ch is not None:
+ breaks = (ch in u'\n\x85\u2028\u2029')
+ end += 1
+
+ def write_plain(self, text, split=True):
+ if self.root_context:
+ self.open_ended = True
+ if not text:
+ return
+ if not self.whitespace:
+ data = u' '
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ self.whitespace = False
+ self.indention = False
+ spaces = False
+ breaks = False
+ start = end = 0
+ while end <= len(text):
+ ch = None
+ if end < len(text):
+ ch = text[end]
+ if spaces:
+ if ch != u' ':
+ if start+1 == end and self.column > self.best_width and split:
+ self.write_indent()
+ self.whitespace = False
+ self.indention = False
+ else:
+ data = text[start:end]
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end
+ elif breaks:
+ if ch not in u'\n\x85\u2028\u2029':
+ if text[start] == u'\n':
+ self.write_line_break()
+ for br in text[start:end]:
+ if br == u'\n':
+ self.write_line_break()
+ else:
+ self.write_line_break(br)
+ self.write_indent()
+ self.whitespace = False
+ self.indention = False
+ start = end
+ else:
+ if ch is None or ch in u' \n\x85\u2028\u2029':
+ data = text[start:end]
+ self.column += len(data)
+ if self.encoding:
+ data = data.encode(self.encoding)
+ self.stream.write(data)
+ start = end
+ if ch is not None:
+ spaces = (ch == u' ')
+ breaks = (ch in u'\n\x85\u2028\u2029')
+ end += 1
+
diff --git a/tablib/packages/yaml/error.py b/tablib/packages/yaml/error.py
new file mode 100644
index 0000000..577686d
--- /dev/null
+++ b/tablib/packages/yaml/error.py
@@ -0,0 +1,75 @@
+
+__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
+
+class Mark(object):
+
+ def __init__(self, name, index, line, column, buffer, pointer):
+ self.name = name
+ self.index = index
+ self.line = line
+ self.column = column
+ self.buffer = buffer
+ self.pointer = pointer
+
+ def get_snippet(self, indent=4, max_length=75):
+ if self.buffer is None:
+ return None
+ head = ''
+ start = self.pointer
+ while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
+ start -= 1
+ if self.pointer-start > max_length/2-1:
+ head = ' ... '
+ start += 5
+ break
+ tail = ''
+ end = self.pointer
+ while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
+ end += 1
+ if end-self.pointer > max_length/2-1:
+ tail = ' ... '
+ end -= 5
+ break
+ snippet = self.buffer[start:end].encode('utf-8')
+ return ' '*indent + head + snippet + tail + '\n' \
+ + ' '*(indent+self.pointer-start+len(head)) + '^'
+
+ def __str__(self):
+ snippet = self.get_snippet()
+ where = " in \"%s\", line %d, column %d" \
+ % (self.name, self.line+1, self.column+1)
+ if snippet is not None:
+ where += ":\n"+snippet
+ return where
+
+class YAMLError(Exception):
+ pass
+
+class MarkedYAMLError(YAMLError):
+
+ def __init__(self, context=None, context_mark=None,
+ problem=None, problem_mark=None, note=None):
+ self.context = context
+ self.context_mark = context_mark
+ self.problem = problem
+ self.problem_mark = problem_mark
+ self.note = note
+
+ def __str__(self):
+ lines = []
+ if self.context is not None:
+ lines.append(self.context)
+ if self.context_mark is not None \
+ and (self.problem is None or self.problem_mark is None
+ or self.context_mark.name != self.problem_mark.name
+ or self.context_mark.line != self.problem_mark.line
+ or self.context_mark.column != self.problem_mark.column):
+ lines.append(str(self.context_mark))
+ if self.problem is not None:
+ lines.append(self.problem)
+ if self.problem_mark is not None:
+ lines.append(str(self.problem_mark))
+ if self.note is not None:
+ lines.append(self.note)
+ return '\n'.join(lines)
+
diff --git a/tablib/packages/yaml/events.py b/tablib/packages/yaml/events.py
new file mode 100644
index 0000000..f79ad38
--- /dev/null
+++ b/tablib/packages/yaml/events.py
@@ -0,0 +1,86 @@
+
+# Abstract classes.
+
+class Event(object):
+ def __init__(self, start_mark=None, end_mark=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ def __repr__(self):
+ attributes = [key for key in ['anchor', 'tag', 'implicit', 'value']
+ if hasattr(self, key)]
+ arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
+ for key in attributes])
+ return '%s(%s)' % (self.__class__.__name__, arguments)
+
+class NodeEvent(Event):
+ def __init__(self, anchor, start_mark=None, end_mark=None):
+ self.anchor = anchor
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+
+class CollectionStartEvent(NodeEvent):
+ def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None,
+ flow_style=None):
+ self.anchor = anchor
+ self.tag = tag
+ self.implicit = implicit
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.flow_style = flow_style
+
+class CollectionEndEvent(Event):
+ pass
+
+# Implementations.
+
+class StreamStartEvent(Event):
+ def __init__(self, start_mark=None, end_mark=None, encoding=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.encoding = encoding
+
+class StreamEndEvent(Event):
+ pass
+
+class DocumentStartEvent(Event):
+ def __init__(self, start_mark=None, end_mark=None,
+ explicit=None, version=None, tags=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.explicit = explicit
+ self.version = version
+ self.tags = tags
+
+class DocumentEndEvent(Event):
+ def __init__(self, start_mark=None, end_mark=None,
+ explicit=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.explicit = explicit
+
+class AliasEvent(NodeEvent):
+ pass
+
+class ScalarEvent(NodeEvent):
+ def __init__(self, anchor, tag, implicit, value,
+ start_mark=None, end_mark=None, style=None):
+ self.anchor = anchor
+ self.tag = tag
+ self.implicit = implicit
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.style = style
+
+class SequenceStartEvent(CollectionStartEvent):
+ pass
+
+class SequenceEndEvent(CollectionEndEvent):
+ pass
+
+class MappingStartEvent(CollectionStartEvent):
+ pass
+
+class MappingEndEvent(CollectionEndEvent):
+ pass
+
diff --git a/tablib/packages/yaml/loader.py b/tablib/packages/yaml/loader.py
new file mode 100644
index 0000000..293ff46
--- /dev/null
+++ b/tablib/packages/yaml/loader.py
@@ -0,0 +1,40 @@
+
+__all__ = ['BaseLoader', 'SafeLoader', 'Loader']
+
+from reader import *
+from scanner import *
+from parser import *
+from composer import *
+from constructor import *
+from resolver import *
+
+class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
+
+ def __init__(self, stream):
+ Reader.__init__(self, stream)
+ Scanner.__init__(self)
+ Parser.__init__(self)
+ Composer.__init__(self)
+ BaseConstructor.__init__(self)
+ BaseResolver.__init__(self)
+
+class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
+
+ def __init__(self, stream):
+ Reader.__init__(self, stream)
+ Scanner.__init__(self)
+ Parser.__init__(self)
+ Composer.__init__(self)
+ SafeConstructor.__init__(self)
+ Resolver.__init__(self)
+
+class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
+
+ def __init__(self, stream):
+ Reader.__init__(self, stream)
+ Scanner.__init__(self)
+ Parser.__init__(self)
+ Composer.__init__(self)
+ Constructor.__init__(self)
+ Resolver.__init__(self)
+
diff --git a/tablib/packages/yaml/nodes.py b/tablib/packages/yaml/nodes.py
new file mode 100644
index 0000000..c4f070c
--- /dev/null
+++ b/tablib/packages/yaml/nodes.py
@@ -0,0 +1,49 @@
+
+class Node(object):
+ def __init__(self, tag, value, start_mark, end_mark):
+ self.tag = tag
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ def __repr__(self):
+ value = self.value
+ #if isinstance(value, list):
+ # if len(value) == 0:
+ # value = ''
+ # elif len(value) == 1:
+ # value = '<1 item>'
+ # else:
+ # value = '<%d items>' % len(value)
+ #else:
+ # if len(value) > 75:
+ # value = repr(value[:70]+u' ... ')
+ # else:
+ # value = repr(value)
+ value = repr(value)
+ return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value)
+
+class ScalarNode(Node):
+ id = 'scalar'
+ def __init__(self, tag, value,
+ start_mark=None, end_mark=None, style=None):
+ self.tag = tag
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.style = style
+
+class CollectionNode(Node):
+ def __init__(self, tag, value,
+ start_mark=None, end_mark=None, flow_style=None):
+ self.tag = tag
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.flow_style = flow_style
+
+class SequenceNode(CollectionNode):
+ id = 'sequence'
+
+class MappingNode(CollectionNode):
+ id = 'mapping'
+
diff --git a/tablib/packages/yaml/parser.py b/tablib/packages/yaml/parser.py
new file mode 100644
index 0000000..b6a7416
--- /dev/null
+++ b/tablib/packages/yaml/parser.py
@@ -0,0 +1,584 @@
+
+# The following YAML grammar is LL(1) and is parsed by a recursive descent
+# parser.
+#
+# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
+# implicit_document ::= block_node DOCUMENT-END*
+# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
+# block_node_or_indentless_sequence ::=
+# ALIAS
+# | properties (block_content | indentless_block_sequence)?
+# | block_content
+# | indentless_block_sequence
+# block_node ::= ALIAS
+# | properties block_content?
+# | block_content
+# flow_node ::= ALIAS
+# | properties flow_content?
+# | flow_content
+# properties ::= TAG ANCHOR? | ANCHOR TAG?
+# block_content ::= block_collection | flow_collection | SCALAR
+# flow_content ::= flow_collection | SCALAR
+# block_collection ::= block_sequence | block_mapping
+# flow_collection ::= flow_sequence | flow_mapping
+# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+# indentless_sequence ::= (BLOCK-ENTRY block_node?)+
+# block_mapping ::= BLOCK-MAPPING_START
+# ((KEY block_node_or_indentless_sequence?)?
+# (VALUE block_node_or_indentless_sequence?)?)*
+# BLOCK-END
+# flow_sequence ::= FLOW-SEQUENCE-START
+# (flow_sequence_entry FLOW-ENTRY)*
+# flow_sequence_entry?
+# FLOW-SEQUENCE-END
+# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+# flow_mapping ::= FLOW-MAPPING-START
+# (flow_mapping_entry FLOW-ENTRY)*
+# flow_mapping_entry?
+# FLOW-MAPPING-END
+# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+#
+# FIRST sets:
+#
+# stream: { STREAM-START }
+# explicit_document: { DIRECTIVE DOCUMENT-START }
+# implicit_document: FIRST(block_node)
+# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
+# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
+# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
+# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# block_sequence: { BLOCK-SEQUENCE-START }
+# block_mapping: { BLOCK-MAPPING-START }
+# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
+# indentless_sequence: { ENTRY }
+# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
+# flow_sequence: { FLOW-SEQUENCE-START }
+# flow_mapping: { FLOW-MAPPING-START }
+# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
+# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
+
+__all__ = ['Parser', 'ParserError']
+
+from error import MarkedYAMLError
+from tokens import *
+from events import *
+from scanner import *
+
+class ParserError(MarkedYAMLError):
+ pass
+
+class Parser(object):
+ # Since writing a recursive-descendant parser is a straightforward task, we
+ # do not give many comments here.
+
+ DEFAULT_TAGS = {
+ u'!': u'!',
+ u'!!': u'tag:yaml.org,2002:',
+ }
+
+ def __init__(self):
+ self.current_event = None
+ self.yaml_version = None
+ self.tag_handles = {}
+ self.states = []
+ self.marks = []
+ self.state = self.parse_stream_start
+
+ def check_event(self, *choices):
+ # Check the type of the next event.
+ if self.current_event is None:
+ if self.state:
+ self.current_event = self.state()
+ if self.current_event is not None:
+ if not choices:
+ return True
+ for choice in choices:
+ if isinstance(self.current_event, choice):
+ return True
+ return False
+
+ def peek_event(self):
+ # Get the next event.
+ if self.current_event is None:
+ if self.state:
+ self.current_event = self.state()
+ return self.current_event
+
+ def get_event(self):
+ # Get the next event and proceed further.
+ if self.current_event is None:
+ if self.state:
+ self.current_event = self.state()
+ value = self.current_event
+ self.current_event = None
+ return value
+
+ # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
+ # implicit_document ::= block_node DOCUMENT-END*
+ # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
+
+ def parse_stream_start(self):
+
+ # Parse the stream start.
+ token = self.get_token()
+ event = StreamStartEvent(token.start_mark, token.end_mark,
+ encoding=token.encoding)
+
+ # Prepare the next state.
+ self.state = self.parse_implicit_document_start
+
+ return event
+
+ def parse_implicit_document_start(self):
+
+ # Parse an implicit document.
+ if not self.check_token(DirectiveToken, DocumentStartToken,
+ StreamEndToken):
+ self.tag_handles = self.DEFAULT_TAGS
+ token = self.peek_token()
+ start_mark = end_mark = token.start_mark
+ event = DocumentStartEvent(start_mark, end_mark,
+ explicit=False)
+
+ # Prepare the next state.
+ self.states.append(self.parse_document_end)
+ self.state = self.parse_block_node
+
+ return event
+
+ else:
+ return self.parse_document_start()
+
+ def parse_document_start(self):
+
+ # Parse any extra document end indicators.
+ while self.check_token(DocumentEndToken):
+ self.get_token()
+
+ # Parse an explicit document.
+ if not self.check_token(StreamEndToken):
+ token = self.peek_token()
+ start_mark = token.start_mark
+ version, tags = self.process_directives()
+ if not self.check_token(DocumentStartToken):
+ raise ParserError(None, None,
+ "expected '', but found %r"
+ % self.peek_token().id,
+ self.peek_token().start_mark)
+ token = self.get_token()
+ end_mark = token.end_mark
+ event = DocumentStartEvent(start_mark, end_mark,
+ explicit=True, version=version, tags=tags)
+ self.states.append(self.parse_document_end)
+ self.state = self.parse_document_content
+ else:
+ # Parse the end of the stream.
+ token = self.get_token()
+ event = StreamEndEvent(token.start_mark, token.end_mark)
+ assert not self.states
+ assert not self.marks
+ self.state = None
+ return event
+
+ def parse_document_end(self):
+
+ # Parse the document end.
+ token = self.peek_token()
+ start_mark = end_mark = token.start_mark
+ explicit = False
+ if self.check_token(DocumentEndToken):
+ token = self.get_token()
+ end_mark = token.end_mark
+ explicit = True
+ event = DocumentEndEvent(start_mark, end_mark,
+ explicit=explicit)
+
+ # Prepare the next state.
+ self.state = self.parse_document_start
+
+ return event
+
+ def parse_document_content(self):
+ if self.check_token(DirectiveToken,
+ DocumentStartToken, DocumentEndToken, StreamEndToken):
+ event = self.process_empty_scalar(self.peek_token().start_mark)
+ self.state = self.states.pop()
+ return event
+ else:
+ return self.parse_block_node()
+
+ def process_directives(self):
+ self.yaml_version = None
+ self.tag_handles = {}
+ while self.check_token(DirectiveToken):
+ token = self.get_token()
+ if token.name == u'YAML':
+ if self.yaml_version is not None:
+ raise ParserError(None, None,
+ "found duplicate YAML directive", token.start_mark)
+ major, minor = token.value
+ if major != 1:
+ raise ParserError(None, None,
+ "found incompatible YAML document (version 1.* is required)",
+ token.start_mark)
+ self.yaml_version = token.value
+ elif token.name == u'TAG':
+ handle, prefix = token.value
+ if handle in self.tag_handles:
+ raise ParserError(None, None,
+ "duplicate tag handle %r" % handle.encode('utf-8'),
+ token.start_mark)
+ self.tag_handles[handle] = prefix
+ if self.tag_handles:
+ value = self.yaml_version, self.tag_handles.copy()
+ else:
+ value = self.yaml_version, None
+ for key in self.DEFAULT_TAGS:
+ if key not in self.tag_handles:
+ self.tag_handles[key] = self.DEFAULT_TAGS[key]
+ return value
+
+ # block_node_or_indentless_sequence ::= ALIAS
+ # | properties (block_content | indentless_block_sequence)?
+ # | block_content
+ # | indentless_block_sequence
+ # block_node ::= ALIAS
+ # | properties block_content?
+ # | block_content
+ # flow_node ::= ALIAS
+ # | properties flow_content?
+ # | flow_content
+ # properties ::= TAG ANCHOR? | ANCHOR TAG?
+ # block_content ::= block_collection | flow_collection | SCALAR
+ # flow_content ::= flow_collection | SCALAR
+ # block_collection ::= block_sequence | block_mapping
+ # flow_collection ::= flow_sequence | flow_mapping
+
+ def parse_block_node(self):
+ return self.parse_node(block=True)
+
+ def parse_flow_node(self):
+ return self.parse_node()
+
+ def parse_block_node_or_indentless_sequence(self):
+ return self.parse_node(block=True, indentless_sequence=True)
+
+ def parse_node(self, block=False, indentless_sequence=False):
+ if self.check_token(AliasToken):
+ token = self.get_token()
+ event = AliasEvent(token.value, token.start_mark, token.end_mark)
+ self.state = self.states.pop()
+ else:
+ anchor = None
+ tag = None
+ start_mark = end_mark = tag_mark = None
+ if self.check_token(AnchorToken):
+ token = self.get_token()
+ start_mark = token.start_mark
+ end_mark = token.end_mark
+ anchor = token.value
+ if self.check_token(TagToken):
+ token = self.get_token()
+ tag_mark = token.start_mark
+ end_mark = token.end_mark
+ tag = token.value
+ elif self.check_token(TagToken):
+ token = self.get_token()
+ start_mark = tag_mark = token.start_mark
+ end_mark = token.end_mark
+ tag = token.value
+ if self.check_token(AnchorToken):
+ token = self.get_token()
+ end_mark = token.end_mark
+ anchor = token.value
+ if tag is not None:
+ handle, suffix = tag
+ if handle is not None:
+ if handle not in self.tag_handles:
+ raise ParserError("while parsing a node", start_mark,
+ "found undefined tag handle %r" % handle.encode('utf-8'),
+ tag_mark)
+ tag = self.tag_handles[handle]+suffix
+ else:
+ tag = suffix
+ #if tag == u'!':
+ # raise ParserError("while parsing a node", start_mark,
+ # "found non-specific tag '!'", tag_mark,
+ # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
+ if start_mark is None:
+ start_mark = end_mark = self.peek_token().start_mark
+ event = None
+ implicit = (tag is None or tag == u'!')
+ if indentless_sequence and self.check_token(BlockEntryToken):
+ end_mark = self.peek_token().end_mark
+ event = SequenceStartEvent(anchor, tag, implicit,
+ start_mark, end_mark)
+ self.state = self.parse_indentless_sequence_entry
+ else:
+ if self.check_token(ScalarToken):
+ token = self.get_token()
+ end_mark = token.end_mark
+ if (token.plain and tag is None) or tag == u'!':
+ implicit = (True, False)
+ elif tag is None:
+ implicit = (False, True)
+ else:
+ implicit = (False, False)
+ event = ScalarEvent(anchor, tag, implicit, token.value,
+ start_mark, end_mark, style=token.style)
+ self.state = self.states.pop()
+ elif self.check_token(FlowSequenceStartToken):
+ end_mark = self.peek_token().end_mark
+ event = SequenceStartEvent(anchor, tag, implicit,
+ start_mark, end_mark, flow_style=True)
+ self.state = self.parse_flow_sequence_first_entry
+ elif self.check_token(FlowMappingStartToken):
+ end_mark = self.peek_token().end_mark
+ event = MappingStartEvent(anchor, tag, implicit,
+ start_mark, end_mark, flow_style=True)
+ self.state = self.parse_flow_mapping_first_key
+ elif block and self.check_token(BlockSequenceStartToken):
+ end_mark = self.peek_token().start_mark
+ event = SequenceStartEvent(anchor, tag, implicit,
+ start_mark, end_mark, flow_style=False)
+ self.state = self.parse_block_sequence_first_entry
+ elif block and self.check_token(BlockMappingStartToken):
+ end_mark = self.peek_token().start_mark
+ event = MappingStartEvent(anchor, tag, implicit,
+ start_mark, end_mark, flow_style=False)
+ self.state = self.parse_block_mapping_first_key
+ elif anchor is not None or tag is not None:
+ # Empty scalars are allowed even if a tag or an anchor is
+ # specified.
+ event = ScalarEvent(anchor, tag, (implicit, False), u'',
+ start_mark, end_mark)
+ self.state = self.states.pop()
+ else:
+ if block:
+ node = 'block'
+ else:
+ node = 'flow'
+ token = self.peek_token()
+ raise ParserError("while parsing a %s node" % node, start_mark,
+ "expected the node content, but found %r" % token.id,
+ token.start_mark)
+ return event
+
+ # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
+
+ def parse_block_sequence_first_entry(self):
+ token = self.get_token()
+ self.marks.append(token.start_mark)
+ return self.parse_block_sequence_entry()
+
+ def parse_block_sequence_entry(self):
+ if self.check_token(BlockEntryToken):
+ token = self.get_token()
+ if not self.check_token(BlockEntryToken, BlockEndToken):
+ self.states.append(self.parse_block_sequence_entry)
+ return self.parse_block_node()
+ else:
+ self.state = self.parse_block_sequence_entry
+ return self.process_empty_scalar(token.end_mark)
+ if not self.check_token(BlockEndToken):
+ token = self.peek_token()
+ raise ParserError("while parsing a block collection", self.marks[-1],
+ "expected , but found %r" % token.id, token.start_mark)
+ token = self.get_token()
+ event = SequenceEndEvent(token.start_mark, token.end_mark)
+ self.state = self.states.pop()
+ self.marks.pop()
+ return event
+
+ # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
+
+ def parse_indentless_sequence_entry(self):
+ if self.check_token(BlockEntryToken):
+ token = self.get_token()
+ if not self.check_token(BlockEntryToken,
+ KeyToken, ValueToken, BlockEndToken):
+ self.states.append(self.parse_indentless_sequence_entry)
+ return self.parse_block_node()
+ else:
+ self.state = self.parse_indentless_sequence_entry
+ return self.process_empty_scalar(token.end_mark)
+ token = self.peek_token()
+ event = SequenceEndEvent(token.start_mark, token.start_mark)
+ self.state = self.states.pop()
+ return event
+
+ # block_mapping ::= BLOCK-MAPPING_START
+ # ((KEY block_node_or_indentless_sequence?)?
+ # (VALUE block_node_or_indentless_sequence?)?)*
+ # BLOCK-END
+
+ def parse_block_mapping_first_key(self):
+ token = self.get_token()
+ self.marks.append(token.start_mark)
+ return self.parse_block_mapping_key()
+
+ def parse_block_mapping_key(self):
+ if self.check_token(KeyToken):
+ token = self.get_token()
+ if not self.check_token(KeyToken, ValueToken, BlockEndToken):
+ self.states.append(self.parse_block_mapping_value)
+ return self.parse_block_node_or_indentless_sequence()
+ else:
+ self.state = self.parse_block_mapping_value
+ return self.process_empty_scalar(token.end_mark)
+ if not self.check_token(BlockEndToken):
+ token = self.peek_token()
+ raise ParserError("while parsing a block mapping", self.marks[-1],
+ "expected , but found %r" % token.id, token.start_mark)
+ token = self.get_token()
+ event = MappingEndEvent(token.start_mark, token.end_mark)
+ self.state = self.states.pop()
+ self.marks.pop()
+ return event
+
+ def parse_block_mapping_value(self):
+ if self.check_token(ValueToken):
+ token = self.get_token()
+ if not self.check_token(KeyToken, ValueToken, BlockEndToken):
+ self.states.append(self.parse_block_mapping_key)
+ return self.parse_block_node_or_indentless_sequence()
+ else:
+ self.state = self.parse_block_mapping_key
+ return self.process_empty_scalar(token.end_mark)
+ else:
+ self.state = self.parse_block_mapping_key
+ token = self.peek_token()
+ return self.process_empty_scalar(token.start_mark)
+
+ # flow_sequence ::= FLOW-SEQUENCE-START
+ # (flow_sequence_entry FLOW-ENTRY)*
+ # flow_sequence_entry?
+ # FLOW-SEQUENCE-END
+ # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+ #
+ # Note that while production rules for both flow_sequence_entry and
+ # flow_mapping_entry are equal, their interpretations are different.
+ # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
+ # generate an inline mapping (set syntax).
+
+ def parse_flow_sequence_first_entry(self):
+ token = self.get_token()
+ self.marks.append(token.start_mark)
+ return self.parse_flow_sequence_entry(first=True)
+
+ def parse_flow_sequence_entry(self, first=False):
+ if not self.check_token(FlowSequenceEndToken):
+ if not first:
+ if self.check_token(FlowEntryToken):
+ self.get_token()
+ else:
+ token = self.peek_token()
+ raise ParserError("while parsing a flow sequence", self.marks[-1],
+ "expected ',' or ']', but got %r" % token.id, token.start_mark)
+
+ if self.check_token(KeyToken):
+ token = self.peek_token()
+ event = MappingStartEvent(None, None, True,
+ token.start_mark, token.end_mark,
+ flow_style=True)
+ self.state = self.parse_flow_sequence_entry_mapping_key
+ return event
+ elif not self.check_token(FlowSequenceEndToken):
+ self.states.append(self.parse_flow_sequence_entry)
+ return self.parse_flow_node()
+ token = self.get_token()
+ event = SequenceEndEvent(token.start_mark, token.end_mark)
+ self.state = self.states.pop()
+ self.marks.pop()
+ return event
+
+ def parse_flow_sequence_entry_mapping_key(self):
+ token = self.get_token()
+ if not self.check_token(ValueToken,
+ FlowEntryToken, FlowSequenceEndToken):
+ self.states.append(self.parse_flow_sequence_entry_mapping_value)
+ return self.parse_flow_node()
+ else:
+ self.state = self.parse_flow_sequence_entry_mapping_value
+ return self.process_empty_scalar(token.end_mark)
+
+ def parse_flow_sequence_entry_mapping_value(self):
+ if self.check_token(ValueToken):
+ token = self.get_token()
+ if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
+ self.states.append(self.parse_flow_sequence_entry_mapping_end)
+ return self.parse_flow_node()
+ else:
+ self.state = self.parse_flow_sequence_entry_mapping_end
+ return self.process_empty_scalar(token.end_mark)
+ else:
+ self.state = self.parse_flow_sequence_entry_mapping_end
+ token = self.peek_token()
+ return self.process_empty_scalar(token.start_mark)
+
+ def parse_flow_sequence_entry_mapping_end(self):
+ self.state = self.parse_flow_sequence_entry
+ token = self.peek_token()
+ return MappingEndEvent(token.start_mark, token.start_mark)
+
+ # flow_mapping ::= FLOW-MAPPING-START
+ # (flow_mapping_entry FLOW-ENTRY)*
+ # flow_mapping_entry?
+ # FLOW-MAPPING-END
+ # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
+
+ def parse_flow_mapping_first_key(self):
+ token = self.get_token()
+ self.marks.append(token.start_mark)
+ return self.parse_flow_mapping_key(first=True)
+
+ def parse_flow_mapping_key(self, first=False):
+ if not self.check_token(FlowMappingEndToken):
+ if not first:
+ if self.check_token(FlowEntryToken):
+ self.get_token()
+ else:
+ token = self.peek_token()
+ raise ParserError("while parsing a flow mapping", self.marks[-1],
+ "expected ',' or '}', but got %r" % token.id, token.start_mark)
+ if self.check_token(KeyToken):
+ token = self.get_token()
+ if not self.check_token(ValueToken,
+ FlowEntryToken, FlowMappingEndToken):
+ self.states.append(self.parse_flow_mapping_value)
+ return self.parse_flow_node()
+ else:
+ self.state = self.parse_flow_mapping_value
+ return self.process_empty_scalar(token.end_mark)
+ elif not self.check_token(FlowMappingEndToken):
+ self.states.append(self.parse_flow_mapping_empty_value)
+ return self.parse_flow_node()
+ token = self.get_token()
+ event = MappingEndEvent(token.start_mark, token.end_mark)
+ self.state = self.states.pop()
+ self.marks.pop()
+ return event
+
+ def parse_flow_mapping_value(self):
+ if self.check_token(ValueToken):
+ token = self.get_token()
+ if not self.check_token(FlowEntryToken, FlowMappingEndToken):
+ self.states.append(self.parse_flow_mapping_key)
+ return self.parse_flow_node()
+ else:
+ self.state = self.parse_flow_mapping_key
+ return self.process_empty_scalar(token.end_mark)
+ else:
+ self.state = self.parse_flow_mapping_key
+ token = self.peek_token()
+ return self.process_empty_scalar(token.start_mark)
+
+ def parse_flow_mapping_empty_value(self):
+ self.state = self.parse_flow_mapping_key
+ return self.process_empty_scalar(self.peek_token().start_mark)
+
+ def process_empty_scalar(self, mark):
+ return ScalarEvent(None, None, (True, False), u'', mark, mark)
+
diff --git a/tablib/packages/yaml/reader.py b/tablib/packages/yaml/reader.py
new file mode 100644
index 0000000..1e7a4db
--- /dev/null
+++ b/tablib/packages/yaml/reader.py
@@ -0,0 +1,225 @@
+# This module contains abstractions for the input stream. You don't have to
+# looks further, there are no pretty code.
+#
+# We define two classes here.
+#
+# Mark(source, line, column)
+# It's just a record and its only use is producing nice error messages.
+# Parser does not use it for any other purposes.
+#
+# Reader(source, data)
+# Reader determines the encoding of `data` and converts it to unicode.
+# Reader provides the following methods and attributes:
+# reader.peek(length=1) - return the next `length` characters
+# reader.forward(length=1) - move the current position to `length` characters.
+# reader.index - the number of the current character.
+# reader.line, stream.column - the line and the column of the current character.
+
+__all__ = ['Reader', 'ReaderError']
+
+from error import YAMLError, Mark
+
+import codecs, re
+
+# Unfortunately, codec functions in Python 2.3 does not support the `finish`
+# arguments, so we have to write our own wrappers.
+
+try:
+ codecs.utf_8_decode('', 'strict', False)
+ from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
+
+except TypeError:
+
+ def utf_16_le_decode(data, errors, finish=False):
+ if not finish and len(data) % 2 == 1:
+ data = data[:-1]
+ return codecs.utf_16_le_decode(data, errors)
+
+ def utf_16_be_decode(data, errors, finish=False):
+ if not finish and len(data) % 2 == 1:
+ data = data[:-1]
+ return codecs.utf_16_be_decode(data, errors)
+
+ def utf_8_decode(data, errors, finish=False):
+ if not finish:
+ # We are trying to remove a possible incomplete multibyte character
+ # from the suffix of the data.
+ # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
+ # All further bytes are in the range 0x80 to 0xbf.
+ # UTF-8 encoded UCS characters may be up to six bytes long.
+ count = 0
+ while count < 5 and count < len(data) \
+ and '\x80' <= data[-count-1] <= '\xBF':
+ count -= 1
+ if count < 5 and count < len(data) \
+ and '\xC0' <= data[-count-1] <= '\xFD':
+ data = data[:-count-1]
+ return codecs.utf_8_decode(data, errors)
+
+class ReaderError(YAMLError):
+
+ def __init__(self, name, position, character, encoding, reason):
+ self.name = name
+ self.character = character
+ self.position = position
+ self.encoding = encoding
+ self.reason = reason
+
+ def __str__(self):
+ if isinstance(self.character, str):
+ return "'%s' codec can't decode byte #x%02x: %s\n" \
+ " in \"%s\", position %d" \
+ % (self.encoding, ord(self.character), self.reason,
+ self.name, self.position)
+ else:
+ return "unacceptable character #x%04x: %s\n" \
+ " in \"%s\", position %d" \
+ % (self.character, self.reason,
+ self.name, self.position)
+
+class Reader(object):
+ # Reader:
+ # - determines the data encoding and converts it to unicode,
+ # - checks if characters are in allowed range,
+ # - adds '\0' to the end.
+
+ # Reader accepts
+ # - a `str` object,
+ # - a `unicode` object,
+ # - a file-like object with its `read` method returning `str`,
+ # - a file-like object with its `read` method returning `unicode`.
+
+ # Yeah, it's ugly and slow.
+
+ def __init__(self, stream):
+ self.name = None
+ self.stream = None
+ self.stream_pointer = 0
+ self.eof = True
+ self.buffer = u''
+ self.pointer = 0
+ self.raw_buffer = None
+ self.raw_decode = None
+ self.encoding = None
+ self.index = 0
+ self.line = 0
+ self.column = 0
+ if isinstance(stream, unicode):
+ self.name = ""
+ self.check_printable(stream)
+ self.buffer = stream+u'\0'
+ elif isinstance(stream, str):
+ self.name = ""
+ self.raw_buffer = stream
+ self.determine_encoding()
+ else:
+ self.stream = stream
+ self.name = getattr(stream, 'name', "")
+ self.eof = False
+ self.raw_buffer = ''
+ self.determine_encoding()
+
+ def peek(self, index=0):
+ try:
+ return self.buffer[self.pointer+index]
+ except IndexError:
+ self.update(index+1)
+ return self.buffer[self.pointer+index]
+
+ def prefix(self, length=1):
+ if self.pointer+length >= len(self.buffer):
+ self.update(length)
+ return self.buffer[self.pointer:self.pointer+length]
+
+ def forward(self, length=1):
+ if self.pointer+length+1 >= len(self.buffer):
+ self.update(length+1)
+ while length:
+ ch = self.buffer[self.pointer]
+ self.pointer += 1
+ self.index += 1
+ if ch in u'\n\x85\u2028\u2029' \
+ or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
+ self.line += 1
+ self.column = 0
+ elif ch != u'\uFEFF':
+ self.column += 1
+ length -= 1
+
+ def get_mark(self):
+ if self.stream is None:
+ return Mark(self.name, self.index, self.line, self.column,
+ self.buffer, self.pointer)
+ else:
+ return Mark(self.name, self.index, self.line, self.column,
+ None, None)
+
+ def determine_encoding(self):
+ while not self.eof and len(self.raw_buffer) < 2:
+ self.update_raw()
+ if not isinstance(self.raw_buffer, unicode):
+ if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
+ self.raw_decode = utf_16_le_decode
+ self.encoding = 'utf-16-le'
+ elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
+ self.raw_decode = utf_16_be_decode
+ self.encoding = 'utf-16-be'
+ else:
+ self.raw_decode = utf_8_decode
+ self.encoding = 'utf-8'
+ self.update(1)
+
+ NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+ def check_printable(self, data):
+ match = self.NON_PRINTABLE.search(data)
+ if match:
+ character = match.group()
+ position = self.index+(len(self.buffer)-self.pointer)+match.start()
+ raise ReaderError(self.name, position, ord(character),
+ 'unicode', "special characters are not allowed")
+
+ def update(self, length):
+ if self.raw_buffer is None:
+ return
+ self.buffer = self.buffer[self.pointer:]
+ self.pointer = 0
+ while len(self.buffer) < length:
+ if not self.eof:
+ self.update_raw()
+ if self.raw_decode is not None:
+ try:
+ data, converted = self.raw_decode(self.raw_buffer,
+ 'strict', self.eof)
+ except UnicodeDecodeError, exc:
+ character = exc.object[exc.start]
+ if self.stream is not None:
+ position = self.stream_pointer-len(self.raw_buffer)+exc.start
+ else:
+ position = exc.start
+ raise ReaderError(self.name, position, character,
+ exc.encoding, exc.reason)
+ else:
+ data = self.raw_buffer
+ converted = len(data)
+ self.check_printable(data)
+ self.buffer += data
+ self.raw_buffer = self.raw_buffer[converted:]
+ if self.eof:
+ self.buffer += u'\0'
+ self.raw_buffer = None
+ break
+
+ def update_raw(self, size=1024):
+ data = self.stream.read(size)
+ if data:
+ self.raw_buffer += data
+ self.stream_pointer += len(data)
+ else:
+ self.eof = True
+
+#try:
+# import psyco
+# psyco.bind(Reader)
+#except ImportError:
+# pass
+
diff --git a/tablib/packages/yaml/representer.py b/tablib/packages/yaml/representer.py
new file mode 100644
index 0000000..f5606ec
--- /dev/null
+++ b/tablib/packages/yaml/representer.py
@@ -0,0 +1,489 @@
+
+__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
+ 'RepresenterError']
+
+from error import *
+from nodes import *
+
+import datetime
+
+try:
+ set
+except NameError:
+ from sets import Set as set
+
+import sys, copy_reg, types
+
+class RepresenterError(YAMLError):
+ pass
+
+class BaseRepresenter(object):
+
+ yaml_representers = {}
+ yaml_multi_representers = {}
+
+ def __init__(self, default_style=None, default_flow_style=None):
+ self.default_style = default_style
+ self.default_flow_style = default_flow_style
+ self.represented_objects = {}
+ self.object_keeper = []
+ self.alias_key = None
+
+ def represent(self, data):
+ node = self.represent_data(data)
+ self.serialize(node)
+ self.represented_objects = {}
+ self.object_keeper = []
+ self.alias_key = None
+
+ def get_classobj_bases(self, cls):
+ bases = [cls]
+ for base in cls.__bases__:
+ bases.extend(self.get_classobj_bases(base))
+ return bases
+
+ def represent_data(self, data):
+ if self.ignore_aliases(data):
+ self.alias_key = None
+ else:
+ self.alias_key = id(data)
+ if self.alias_key is not None:
+ if self.alias_key in self.represented_objects:
+ node = self.represented_objects[self.alias_key]
+ #if node is None:
+ # raise RepresenterError("recursive objects are not allowed: %r" % data)
+ return node
+ #self.represented_objects[alias_key] = None
+ self.object_keeper.append(data)
+ data_types = type(data).__mro__
+ if type(data) is types.InstanceType:
+ data_types = self.get_classobj_bases(data.__class__)+list(data_types)
+ if data_types[0] in self.yaml_representers:
+ node = self.yaml_representers[data_types[0]](self, data)
+ else:
+ for data_type in data_types:
+ if data_type in self.yaml_multi_representers:
+ node = self.yaml_multi_representers[data_type](self, data)
+ break
+ else:
+ if None in self.yaml_multi_representers:
+ node = self.yaml_multi_representers[None](self, data)
+ elif None in self.yaml_representers:
+ node = self.yaml_representers[None](self, data)
+ else:
+ node = ScalarNode(None, unicode(data))
+ #if alias_key is not None:
+ # self.represented_objects[alias_key] = node
+ return node
+
+ def add_representer(cls, data_type, representer):
+ if not 'yaml_representers' in cls.__dict__:
+ cls.yaml_representers = cls.yaml_representers.copy()
+ cls.yaml_representers[data_type] = representer
+ add_representer = classmethod(add_representer)
+
+ def add_multi_representer(cls, data_type, representer):
+ if not 'yaml_multi_representers' in cls.__dict__:
+ cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
+ cls.yaml_multi_representers[data_type] = representer
+ add_multi_representer = classmethod(add_multi_representer)
+
+ def represent_scalar(self, tag, value, style=None):
+ if style is None:
+ style = self.default_style
+ node = ScalarNode(tag, value, style=style)
+ if self.alias_key is not None:
+ self.represented_objects[self.alias_key] = node
+ return node
+
+ def represent_sequence(self, tag, sequence, flow_style=None):
+ value = []
+ node = SequenceNode(tag, value, flow_style=flow_style)
+ if self.alias_key is not None:
+ self.represented_objects[self.alias_key] = node
+ best_style = True
+ for item in sequence:
+ node_item = self.represent_data(item)
+ if not (isinstance(node_item, ScalarNode) and not node_item.style):
+ best_style = False
+ value.append(node_item)
+ if flow_style is None:
+ if self.default_flow_style is not None:
+ node.flow_style = self.default_flow_style
+ else:
+ node.flow_style = best_style
+ return node
+
+ def represent_mapping(self, tag, mapping, flow_style=None):
+ value = []
+ node = MappingNode(tag, value, flow_style=flow_style)
+ if self.alias_key is not None:
+ self.represented_objects[self.alias_key] = node
+ best_style = True
+ if hasattr(mapping, 'items'):
+ mapping = mapping.items()
+ mapping.sort()
+ for item_key, item_value in mapping:
+ node_key = self.represent_data(item_key)
+ node_value = self.represent_data(item_value)
+ if not (isinstance(node_key, ScalarNode) and not node_key.style):
+ best_style = False
+ if not (isinstance(node_value, ScalarNode) and not node_value.style):
+ best_style = False
+ value.append((node_key, node_value))
+ if flow_style is None:
+ if self.default_flow_style is not None:
+ node.flow_style = self.default_flow_style
+ else:
+ node.flow_style = best_style
+ return node
+
+ def ignore_aliases(self, data):
+ return False
+
+class SafeRepresenter(BaseRepresenter):
+
+ def ignore_aliases(self, data):
+ if data in [None, ()]:
+ return True
+ if isinstance(data, (str, unicode, bool, int, float)):
+ return True
+
+ def represent_none(self, data):
+ return self.represent_scalar(u'tag:yaml.org,2002:null',
+ u'null')
+
+ def represent_str(self, data):
+ tag = None
+ style = None
+ try:
+ data = unicode(data, 'ascii')
+ tag = u'tag:yaml.org,2002:str'
+ except UnicodeDecodeError:
+ try:
+ data = unicode(data, 'utf-8')
+ tag = u'tag:yaml.org,2002:str'
+ except UnicodeDecodeError:
+ data = data.encode('base64')
+ tag = u'tag:yaml.org,2002:binary'
+ style = '|'
+ return self.represent_scalar(tag, data, style=style)
+
+ def represent_unicode(self, data):
+ return self.represent_scalar(u'tag:yaml.org,2002:str', data)
+
+ def represent_bool(self, data):
+ if data:
+ value = u'true'
+ else:
+ value = u'false'
+ return self.represent_scalar(u'tag:yaml.org,2002:bool', value)
+
+ def represent_int(self, data):
+ return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
+
+ def represent_long(self, data):
+ return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
+
+ inf_value = 1e300
+ while repr(inf_value) != repr(inf_value*inf_value):
+ inf_value *= inf_value
+
+ def represent_float(self, data):
+ if data != data or (data == 0.0 and data == 1.0):
+ value = u'.nan'
+ elif data == self.inf_value:
+ value = u'.inf'
+ elif data == -self.inf_value:
+ value = u'-.inf'
+ else:
+ value = unicode(repr(data)).lower()
+ # Note that in some cases `repr(data)` represents a float number
+ # without the decimal parts. For instance:
+ # >>> repr(1e17)
+ # '1e17'
+ # Unfortunately, this is not a valid float representation according
+ # to the definition of the `!!float` tag. We fix this by adding
+ # '.0' before the 'e' symbol.
+ if u'.' not in value and u'e' in value:
+ value = value.replace(u'e', u'.0e', 1)
+ return self.represent_scalar(u'tag:yaml.org,2002:float', value)
+
+ def represent_list(self, data):
+ #pairs = (len(data) > 0 and isinstance(data, list))
+ #if pairs:
+ # for item in data:
+ # if not isinstance(item, tuple) or len(item) != 2:
+ # pairs = False
+ # break
+ #if not pairs:
+ return self.represent_sequence(u'tag:yaml.org,2002:seq', data)
+ #value = []
+ #for item_key, item_value in data:
+ # value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
+ # [(item_key, item_value)]))
+ #return SequenceNode(u'tag:yaml.org,2002:pairs', value)
+
+ def represent_dict(self, data):
+ return self.represent_mapping(u'tag:yaml.org,2002:map', data)
+
+ def represent_set(self, data):
+ value = {}
+ for key in data:
+ value[key] = None
+ return self.represent_mapping(u'tag:yaml.org,2002:set', value)
+
+ def represent_date(self, data):
+ value = unicode(data.isoformat())
+ return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
+
+ def represent_datetime(self, data):
+ value = unicode(data.isoformat(' '))
+ return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
+
+ def represent_yaml_object(self, tag, data, cls, flow_style=None):
+ if hasattr(data, '__getstate__'):
+ state = data.__getstate__()
+ else:
+ state = data.__dict__.copy()
+ return self.represent_mapping(tag, state, flow_style=flow_style)
+
+ def represent_undefined(self, data):
+ raise RepresenterError("cannot represent an object: %s" % data)
+
+SafeRepresenter.add_representer(type(None),
+ SafeRepresenter.represent_none)
+
+SafeRepresenter.add_representer(str,
+ SafeRepresenter.represent_str)
+
+SafeRepresenter.add_representer(unicode,
+ SafeRepresenter.represent_unicode)
+
+SafeRepresenter.add_representer(bool,
+ SafeRepresenter.represent_bool)
+
+SafeRepresenter.add_representer(int,
+ SafeRepresenter.represent_int)
+
+SafeRepresenter.add_representer(long,
+ SafeRepresenter.represent_long)
+
+SafeRepresenter.add_representer(float,
+ SafeRepresenter.represent_float)
+
+SafeRepresenter.add_representer(list,
+ SafeRepresenter.represent_list)
+
+SafeRepresenter.add_representer(tuple,
+ SafeRepresenter.represent_list)
+
+SafeRepresenter.add_representer(dict,
+ SafeRepresenter.represent_dict)
+
+SafeRepresenter.add_representer(set,
+ SafeRepresenter.represent_set)
+
+SafeRepresenter.add_representer(datetime.date,
+ SafeRepresenter.represent_date)
+
+SafeRepresenter.add_representer(datetime.datetime,
+ SafeRepresenter.represent_datetime)
+
+SafeRepresenter.add_representer(None,
+ SafeRepresenter.represent_undefined)
+
+class Representer(SafeRepresenter):
+
+ def represent_str(self, data):
+ tag = None
+ style = None
+ try:
+ data = unicode(data, 'ascii')
+ tag = u'tag:yaml.org,2002:str'
+ except UnicodeDecodeError:
+ try:
+ data = unicode(data, 'utf-8')
+ tag = u'tag:yaml.org,2002:python/str'
+ except UnicodeDecodeError:
+ data = data.encode('base64')
+ tag = u'tag:yaml.org,2002:binary'
+ style = '|'
+ return self.represent_scalar(tag, data, style=style)
+
+ def represent_unicode(self, data):
+ tag = None
+ try:
+ data.encode('ascii')
+ tag = u'tag:yaml.org,2002:python/unicode'
+ except UnicodeEncodeError:
+ tag = u'tag:yaml.org,2002:str'
+ return self.represent_scalar(tag, data)
+
+ def represent_long(self, data):
+ tag = u'tag:yaml.org,2002:int'
+ if int(data) is not data:
+ tag = u'tag:yaml.org,2002:python/long'
+ return self.represent_scalar(tag, unicode(data))
+
+ def represent_complex(self, data):
+ if data.imag == 0.0:
+ data = u'%r' % data.real
+ elif data.real == 0.0:
+ data = u'%rj' % data.imag
+ elif data.imag > 0:
+ data = u'%r+%rj' % (data.real, data.imag)
+ else:
+ data = u'%r%rj' % (data.real, data.imag)
+ return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data)
+
+ def represent_tuple(self, data):
+ return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data)
+
+ def represent_name(self, data):
+ name = u'%s.%s' % (data.__module__, data.__name__)
+ return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'')
+
+ def represent_module(self, data):
+ return self.represent_scalar(
+ u'tag:yaml.org,2002:python/module:'+data.__name__, u'')
+
+ def represent_instance(self, data):
+ # For instances of classic classes, we use __getinitargs__ and
+ # __getstate__ to serialize the data.
+
+ # If data.__getinitargs__ exists, the object must be reconstructed by
+ # calling cls(**args), where args is a tuple returned by
+ # __getinitargs__. Otherwise, the cls.__init__ method should never be
+ # called and the class instance is created by instantiating a trivial
+ # class and assigning to the instance's __class__ variable.
+
+ # If data.__getstate__ exists, it returns the state of the object.
+ # Otherwise, the state of the object is data.__dict__.
+
+ # We produce either a !!python/object or !!python/object/new node.
+ # If data.__getinitargs__ does not exist and state is a dictionary, we
+ # produce a !!python/object node . Otherwise we produce a
+ # !!python/object/new node.
+
+ cls = data.__class__
+ class_name = u'%s.%s' % (cls.__module__, cls.__name__)
+ args = None
+ state = None
+ if hasattr(data, '__getinitargs__'):
+ args = list(data.__getinitargs__())
+ if hasattr(data, '__getstate__'):
+ state = data.__getstate__()
+ else:
+ state = data.__dict__
+ if args is None and isinstance(state, dict):
+ return self.represent_mapping(
+ u'tag:yaml.org,2002:python/object:'+class_name, state)
+ if isinstance(state, dict) and not state:
+ return self.represent_sequence(
+ u'tag:yaml.org,2002:python/object/new:'+class_name, args)
+ value = {}
+ if args:
+ value['args'] = args
+ value['state'] = state
+ return self.represent_mapping(
+ u'tag:yaml.org,2002:python/object/new:'+class_name, value)
+
+ def represent_object(self, data):
+ # We use __reduce__ API to save the data. data.__reduce__ returns
+ # a tuple of length 2-5:
+ # (function, args, state, listitems, dictitems)
+
+ # For reconstructing, we calls function(*args), then set its state,
+ # listitems, and dictitems if they are not None.
+
+ # A special case is when function.__name__ == '__newobj__'. In this
+ # case we create the object with args[0].__new__(*args).
+
+ # Another special case is when __reduce__ returns a string - we don't
+ # support it.
+
+ # We produce a !!python/object, !!python/object/new or
+ # !!python/object/apply node.
+
+ cls = type(data)
+ if cls in copy_reg.dispatch_table:
+ reduce = copy_reg.dispatch_table[cls](data)
+ elif hasattr(data, '__reduce_ex__'):
+ reduce = data.__reduce_ex__(2)
+ elif hasattr(data, '__reduce__'):
+ reduce = data.__reduce__()
+ else:
+ raise RepresenterError("cannot represent object: %r" % data)
+ reduce = (list(reduce)+[None]*5)[:5]
+ function, args, state, listitems, dictitems = reduce
+ args = list(args)
+ if state is None:
+ state = {}
+ if listitems is not None:
+ listitems = list(listitems)
+ if dictitems is not None:
+ dictitems = dict(dictitems)
+ if function.__name__ == '__newobj__':
+ function = args[0]
+ args = args[1:]
+ tag = u'tag:yaml.org,2002:python/object/new:'
+ newobj = True
+ else:
+ tag = u'tag:yaml.org,2002:python/object/apply:'
+ newobj = False
+ function_name = u'%s.%s' % (function.__module__, function.__name__)
+ if not args and not listitems and not dictitems \
+ and isinstance(state, dict) and newobj:
+ return self.represent_mapping(
+ u'tag:yaml.org,2002:python/object:'+function_name, state)
+ if not listitems and not dictitems \
+ and isinstance(state, dict) and not state:
+ return self.represent_sequence(tag+function_name, args)
+ value = {}
+ if args:
+ value['args'] = args
+ if state or not isinstance(state, dict):
+ value['state'] = state
+ if listitems:
+ value['listitems'] = listitems
+ if dictitems:
+ value['dictitems'] = dictitems
+ return self.represent_mapping(tag+function_name, value)
+
+Representer.add_representer(str,
+ Representer.represent_str)
+
+Representer.add_representer(unicode,
+ Representer.represent_unicode)
+
+Representer.add_representer(long,
+ Representer.represent_long)
+
+Representer.add_representer(complex,
+ Representer.represent_complex)
+
+Representer.add_representer(tuple,
+ Representer.represent_tuple)
+
+Representer.add_representer(type,
+ Representer.represent_name)
+
+Representer.add_representer(types.ClassType,
+ Representer.represent_name)
+
+Representer.add_representer(types.FunctionType,
+ Representer.represent_name)
+
+Representer.add_representer(types.BuiltinFunctionType,
+ Representer.represent_name)
+
+Representer.add_representer(types.ModuleType,
+ Representer.represent_module)
+
+Representer.add_multi_representer(types.InstanceType,
+ Representer.represent_instance)
+
+Representer.add_multi_representer(object,
+ Representer.represent_object)
+
diff --git a/tablib/packages/yaml/resolver.py b/tablib/packages/yaml/resolver.py
new file mode 100644
index 0000000..6b5ab87
--- /dev/null
+++ b/tablib/packages/yaml/resolver.py
@@ -0,0 +1,224 @@
+
+__all__ = ['BaseResolver', 'Resolver']
+
+from error import *
+from nodes import *
+
+import re
+
+class ResolverError(YAMLError):
+ pass
+
+class BaseResolver(object):
+
+ DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
+ DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
+ DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
+
+ yaml_implicit_resolvers = {}
+ yaml_path_resolvers = {}
+
+ def __init__(self):
+ self.resolver_exact_paths = []
+ self.resolver_prefix_paths = []
+
+ def add_implicit_resolver(cls, tag, regexp, first):
+ if not 'yaml_implicit_resolvers' in cls.__dict__:
+ cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
+ if first is None:
+ first = [None]
+ for ch in first:
+ cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
+ add_implicit_resolver = classmethod(add_implicit_resolver)
+
+ def add_path_resolver(cls, tag, path, kind=None):
+ # Note: `add_path_resolver` is experimental. The API could be changed.
+ # `new_path` is a pattern that is matched against the path from the
+ # root to the node that is being considered. `node_path` elements are
+ # tuples `(node_check, index_check)`. `node_check` is a node class:
+ # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None`
+ # matches any kind of a node. `index_check` could be `None`, a boolean
+ # value, a string value, or a number. `None` and `False` match against
+ # any _value_ of sequence and mapping nodes. `True` matches against
+ # any _key_ of a mapping node. A string `index_check` matches against
+ # a mapping value that corresponds to a scalar key which content is
+ # equal to the `index_check` value. An integer `index_check` matches
+ # against a sequence value with the index equal to `index_check`.
+ if not 'yaml_path_resolvers' in cls.__dict__:
+ cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy()
+ new_path = []
+ for element in path:
+ if isinstance(element, (list, tuple)):
+ if len(element) == 2:
+ node_check, index_check = element
+ elif len(element) == 1:
+ node_check = element[0]
+ index_check = True
+ else:
+ raise ResolverError("Invalid path element: %s" % element)
+ else:
+ node_check = None
+ index_check = element
+ if node_check is str:
+ node_check = ScalarNode
+ elif node_check is list:
+ node_check = SequenceNode
+ elif node_check is dict:
+ node_check = MappingNode
+ elif node_check not in [ScalarNode, SequenceNode, MappingNode] \
+ and not isinstance(node_check, basestring) \
+ and node_check is not None:
+ raise ResolverError("Invalid node checker: %s" % node_check)
+ if not isinstance(index_check, (basestring, int)) \
+ and index_check is not None:
+ raise ResolverError("Invalid index checker: %s" % index_check)
+ new_path.append((node_check, index_check))
+ if kind is str:
+ kind = ScalarNode
+ elif kind is list:
+ kind = SequenceNode
+ elif kind is dict:
+ kind = MappingNode
+ elif kind not in [ScalarNode, SequenceNode, MappingNode] \
+ and kind is not None:
+ raise ResolverError("Invalid node kind: %s" % kind)
+ cls.yaml_path_resolvers[tuple(new_path), kind] = tag
+ add_path_resolver = classmethod(add_path_resolver)
+
+ def descend_resolver(self, current_node, current_index):
+ if not self.yaml_path_resolvers:
+ return
+ exact_paths = {}
+ prefix_paths = []
+ if current_node:
+ depth = len(self.resolver_prefix_paths)
+ for path, kind in self.resolver_prefix_paths[-1]:
+ if self.check_resolver_prefix(depth, path, kind,
+ current_node, current_index):
+ if len(path) > depth:
+ prefix_paths.append((path, kind))
+ else:
+ exact_paths[kind] = self.yaml_path_resolvers[path, kind]
+ else:
+ for path, kind in self.yaml_path_resolvers:
+ if not path:
+ exact_paths[kind] = self.yaml_path_resolvers[path, kind]
+ else:
+ prefix_paths.append((path, kind))
+ self.resolver_exact_paths.append(exact_paths)
+ self.resolver_prefix_paths.append(prefix_paths)
+
+ def ascend_resolver(self):
+ if not self.yaml_path_resolvers:
+ return
+ self.resolver_exact_paths.pop()
+ self.resolver_prefix_paths.pop()
+
+ def check_resolver_prefix(self, depth, path, kind,
+ current_node, current_index):
+ node_check, index_check = path[depth-1]
+ if isinstance(node_check, basestring):
+ if current_node.tag != node_check:
+ return
+ elif node_check is not None:
+ if not isinstance(current_node, node_check):
+ return
+ if index_check is True and current_index is not None:
+ return
+ if (index_check is False or index_check is None) \
+ and current_index is None:
+ return
+ if isinstance(index_check, basestring):
+ if not (isinstance(current_index, ScalarNode)
+ and index_check == current_index.value):
+ return
+ elif isinstance(index_check, int) and not isinstance(index_check, bool):
+ if index_check != current_index:
+ return
+ return True
+
+ def resolve(self, kind, value, implicit):
+ if kind is ScalarNode and implicit[0]:
+ if value == u'':
+ resolvers = self.yaml_implicit_resolvers.get(u'', [])
+ else:
+ resolvers = self.yaml_implicit_resolvers.get(value[0], [])
+ resolvers += self.yaml_implicit_resolvers.get(None, [])
+ for tag, regexp in resolvers:
+ if regexp.match(value):
+ return tag
+ implicit = implicit[1]
+ if self.yaml_path_resolvers:
+ exact_paths = self.resolver_exact_paths[-1]
+ if kind in exact_paths:
+ return exact_paths[kind]
+ if None in exact_paths:
+ return exact_paths[None]
+ if kind is ScalarNode:
+ return self.DEFAULT_SCALAR_TAG
+ elif kind is SequenceNode:
+ return self.DEFAULT_SEQUENCE_TAG
+ elif kind is MappingNode:
+ return self.DEFAULT_MAPPING_TAG
+
+class Resolver(BaseResolver):
+ pass
+
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:bool',
+ re.compile(ur'''^(?:yes|Yes|YES|no|No|NO
+ |true|True|TRUE|false|False|FALSE
+ |on|On|ON|off|Off|OFF)$''', re.X),
+ list(u'yYnNtTfFoO'))
+
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:float',
+ re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
+ |\.[0-9_]+(?:[eE][-+][0-9]+)?
+ |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
+ |[-+]?\.(?:inf|Inf|INF)
+ |\.(?:nan|NaN|NAN))$''', re.X),
+ list(u'-+0123456789.'))
+
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:int',
+ re.compile(ur'''^(?:[-+]?0b[0-1_]+
+ |[-+]?0[0-7_]+
+ |[-+]?(?:0|[1-9][0-9_]*)
+ |[-+]?0x[0-9a-fA-F_]+
+ |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
+ list(u'-+0123456789'))
+
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:merge',
+ re.compile(ur'^(?:<<)$'),
+ [u'<'])
+
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:null',
+ re.compile(ur'''^(?: ~
+ |null|Null|NULL
+ | )$''', re.X),
+ [u'~', u'n', u'N', u''])
+
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:timestamp',
+ re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
+ |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
+ (?:[Tt]|[ \t]+)[0-9][0-9]?
+ :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
+ (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
+ list(u'0123456789'))
+
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:value',
+ re.compile(ur'^(?:=)$'),
+ [u'='])
+
+# The following resolver is only for documentation purposes. It cannot work
+# because plain scalars cannot start with '!', '&', or '*'.
+Resolver.add_implicit_resolver(
+ u'tag:yaml.org,2002:yaml',
+ re.compile(ur'^(?:!|&|\*)$'),
+ list(u'!&*'))
+
diff --git a/tablib/packages/yaml/scanner.py b/tablib/packages/yaml/scanner.py
new file mode 100644
index 0000000..5228fad
--- /dev/null
+++ b/tablib/packages/yaml/scanner.py
@@ -0,0 +1,1457 @@
+
+# Scanner produces tokens of the following types:
+# STREAM-START
+# STREAM-END
+# DIRECTIVE(name, value)
+# DOCUMENT-START
+# DOCUMENT-END
+# BLOCK-SEQUENCE-START
+# BLOCK-MAPPING-START
+# BLOCK-END
+# FLOW-SEQUENCE-START
+# FLOW-MAPPING-START
+# FLOW-SEQUENCE-END
+# FLOW-MAPPING-END
+# BLOCK-ENTRY
+# FLOW-ENTRY
+# KEY
+# VALUE
+# ALIAS(value)
+# ANCHOR(value)
+# TAG(value)
+# SCALAR(value, plain, style)
+#
+# Read comments in the Scanner code for more details.
+#
+
+__all__ = ['Scanner', 'ScannerError']
+
+from error import MarkedYAMLError
+from tokens import *
+
+class ScannerError(MarkedYAMLError):
+ pass
+
+class SimpleKey(object):
+ # See below simple keys treatment.
+
+ def __init__(self, token_number, required, index, line, column, mark):
+ self.token_number = token_number
+ self.required = required
+ self.index = index
+ self.line = line
+ self.column = column
+ self.mark = mark
+
+class Scanner(object):
+
+ def __init__(self):
+ """Initialize the scanner."""
+ # It is assumed that Scanner and Reader will have a common descendant.
+ # Reader do the dirty work of checking for BOM and converting the
+ # input data to Unicode. It also adds NUL to the end.
+ #
+ # Reader supports the following methods
+ # self.peek(i=0) # peek the next i-th character
+ # self.prefix(l=1) # peek the next l characters
+ # self.forward(l=1) # read the next l characters and move the pointer.
+
+ # Had we reached the end of the stream?
+ self.done = False
+
+ # The number of unclosed '{' and '['. `flow_level == 0` means block
+ # context.
+ self.flow_level = 0
+
+ # List of processed tokens that are not yet emitted.
+ self.tokens = []
+
+ # Add the STREAM-START token.
+ self.fetch_stream_start()
+
+ # Number of tokens that were emitted through the `get_token` method.
+ self.tokens_taken = 0
+
+ # The current indentation level.
+ self.indent = -1
+
+ # Past indentation levels.
+ self.indents = []
+
+ # Variables related to simple keys treatment.
+
+ # A simple key is a key that is not denoted by the '?' indicator.
+ # Example of simple keys:
+ # ---
+ # block simple key: value
+ # ? not a simple key:
+ # : { flow simple key: value }
+ # We emit the KEY token before all keys, so when we find a potential
+ # simple key, we try to locate the corresponding ':' indicator.
+ # Simple keys should be limited to a single line and 1024 characters.
+
+ # Can a simple key start at the current position? A simple key may
+ # start:
+ # - at the beginning of the line, not counting indentation spaces
+ # (in block context),
+ # - after '{', '[', ',' (in the flow context),
+ # - after '?', ':', '-' (in the block context).
+ # In the block context, this flag also signifies if a block collection
+ # may start at the current position.
+ self.allow_simple_key = True
+
+ # Keep track of possible simple keys. This is a dictionary. The key
+ # is `flow_level`; there can be no more that one possible simple key
+ # for each level. The value is a SimpleKey record:
+ # (token_number, required, index, line, column, mark)
+ # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
+ # '[', or '{' tokens.
+ self.possible_simple_keys = {}
+
+ # Public methods.
+
+ def check_token(self, *choices):
+ # Check if the next token is one of the given types.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ if self.tokens:
+ if not choices:
+ return True
+ for choice in choices:
+ if isinstance(self.tokens[0], choice):
+ return True
+ return False
+
+ def peek_token(self):
+ # Return the next token, but do not delete if from the queue.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ if self.tokens:
+ return self.tokens[0]
+
+ def get_token(self):
+ # Return the next token.
+ while self.need_more_tokens():
+ self.fetch_more_tokens()
+ if self.tokens:
+ self.tokens_taken += 1
+ return self.tokens.pop(0)
+
+ # Private methods.
+
+ def need_more_tokens(self):
+ if self.done:
+ return False
+ if not self.tokens:
+ return True
+ # The current token may be a potential simple key, so we
+ # need to look further.
+ self.stale_possible_simple_keys()
+ if self.next_possible_simple_key() == self.tokens_taken:
+ return True
+
+ def fetch_more_tokens(self):
+
+ # Eat whitespaces and comments until we reach the next token.
+ self.scan_to_next_token()
+
+ # Remove obsolete possible simple keys.
+ self.stale_possible_simple_keys()
+
+ # Compare the current indentation and column. It may add some tokens
+ # and decrease the current indentation level.
+ self.unwind_indent(self.column)
+
+ # Peek the next character.
+ ch = self.peek()
+
+ # Is it the end of stream?
+ if ch == u'\0':
+ return self.fetch_stream_end()
+
+ # Is it a directive?
+ if ch == u'%' and self.check_directive():
+ return self.fetch_directive()
+
+ # Is it the document start?
+ if ch == u'-' and self.check_document_start():
+ return self.fetch_document_start()
+
+ # Is it the document end?
+ if ch == u'.' and self.check_document_end():
+ return self.fetch_document_end()
+
+ # TODO: support for BOM within a stream.
+ #if ch == u'\uFEFF':
+ # return self.fetch_bom() <-- issue BOMToken
+
+ # Note: the order of the following checks is NOT significant.
+
+ # Is it the flow sequence start indicator?
+ if ch == u'[':
+ return self.fetch_flow_sequence_start()
+
+ # Is it the flow mapping start indicator?
+ if ch == u'{':
+ return self.fetch_flow_mapping_start()
+
+ # Is it the flow sequence end indicator?
+ if ch == u']':
+ return self.fetch_flow_sequence_end()
+
+ # Is it the flow mapping end indicator?
+ if ch == u'}':
+ return self.fetch_flow_mapping_end()
+
+ # Is it the flow entry indicator?
+ if ch == u',':
+ return self.fetch_flow_entry()
+
+ # Is it the block entry indicator?
+ if ch == u'-' and self.check_block_entry():
+ return self.fetch_block_entry()
+
+ # Is it the key indicator?
+ if ch == u'?' and self.check_key():
+ return self.fetch_key()
+
+ # Is it the value indicator?
+ if ch == u':' and self.check_value():
+ return self.fetch_value()
+
+ # Is it an alias?
+ if ch == u'*':
+ return self.fetch_alias()
+
+ # Is it an anchor?
+ if ch == u'&':
+ return self.fetch_anchor()
+
+ # Is it a tag?
+ if ch == u'!':
+ return self.fetch_tag()
+
+ # Is it a literal scalar?
+ if ch == u'|' and not self.flow_level:
+ return self.fetch_literal()
+
+ # Is it a folded scalar?
+ if ch == u'>' and not self.flow_level:
+ return self.fetch_folded()
+
+ # Is it a single quoted scalar?
+ if ch == u'\'':
+ return self.fetch_single()
+
+ # Is it a double quoted scalar?
+ if ch == u'\"':
+ return self.fetch_double()
+
+ # It must be a plain scalar then.
+ if self.check_plain():
+ return self.fetch_plain()
+
+ # No? It's an error. Let's produce a nice error message.
+ raise ScannerError("while scanning for the next token", None,
+ "found character %r that cannot start any token"
+ % ch.encode('utf-8'), self.get_mark())
+
+ # Simple keys treatment.
+
+ def next_possible_simple_key(self):
+ # Return the number of the nearest possible simple key. Actually we
+ # don't need to loop through the whole dictionary. We may replace it
+ # with the following code:
+ # if not self.possible_simple_keys:
+ # return None
+ # return self.possible_simple_keys[
+ # min(self.possible_simple_keys.keys())].token_number
+ min_token_number = None
+ for level in self.possible_simple_keys:
+ key = self.possible_simple_keys[level]
+ if min_token_number is None or key.token_number < min_token_number:
+ min_token_number = key.token_number
+ return min_token_number
+
+ def stale_possible_simple_keys(self):
+ # Remove entries that are no longer possible simple keys. According to
+ # the YAML specification, simple keys
+ # - should be limited to a single line,
+ # - should be no longer than 1024 characters.
+ # Disabling this procedure will allow simple keys of any length and
+ # height (may cause problems if indentation is broken though).
+ for level in self.possible_simple_keys.keys():
+ key = self.possible_simple_keys[level]
+ if key.line != self.line \
+ or self.index-key.index > 1024:
+ if key.required:
+ raise ScannerError("while scanning a simple key", key.mark,
+ "could not found expected ':'", self.get_mark())
+ del self.possible_simple_keys[level]
+
+ def save_possible_simple_key(self):
+ # The next token may start a simple key. We check if it's possible
+ # and save its position. This function is called for
+ # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
+
+ # Check if a simple key is required at the current position.
+ required = not self.flow_level and self.indent == self.column
+
+ # A simple key is required only if it is the first token in the current
+ # line. Therefore it is always allowed.
+ assert self.allow_simple_key or not required
+
+ # The next token might be a simple key. Let's save it's number and
+ # position.
+ if self.allow_simple_key:
+ self.remove_possible_simple_key()
+ token_number = self.tokens_taken+len(self.tokens)
+ key = SimpleKey(token_number, required,
+ self.index, self.line, self.column, self.get_mark())
+ self.possible_simple_keys[self.flow_level] = key
+
+ def remove_possible_simple_key(self):
+ # Remove the saved possible key position at the current flow level.
+ if self.flow_level in self.possible_simple_keys:
+ key = self.possible_simple_keys[self.flow_level]
+
+ if key.required:
+ raise ScannerError("while scanning a simple key", key.mark,
+ "could not found expected ':'", self.get_mark())
+
+ del self.possible_simple_keys[self.flow_level]
+
+ # Indentation functions.
+
+ def unwind_indent(self, column):
+
+ ## In flow context, tokens should respect indentation.
+ ## Actually the condition should be `self.indent >= column` according to
+ ## the spec. But this condition will prohibit intuitively correct
+ ## constructions such as
+ ## key : {
+ ## }
+ #if self.flow_level and self.indent > column:
+ # raise ScannerError(None, None,
+ # "invalid intendation or unclosed '[' or '{'",
+ # self.get_mark())
+
+ # In the flow context, indentation is ignored. We make the scanner less
+ # restrictive then specification requires.
+ if self.flow_level:
+ return
+
+ # In block context, we may need to issue the BLOCK-END tokens.
+ while self.indent > column:
+ mark = self.get_mark()
+ self.indent = self.indents.pop()
+ self.tokens.append(BlockEndToken(mark, mark))
+
+ def add_indent(self, column):
+ # Check if we need to increase indentation.
+ if self.indent < column:
+ self.indents.append(self.indent)
+ self.indent = column
+ return True
+ return False
+
+ # Fetchers.
+
+ def fetch_stream_start(self):
+ # We always add STREAM-START as the first token and STREAM-END as the
+ # last token.
+
+ # Read the token.
+ mark = self.get_mark()
+
+ # Add STREAM-START.
+ self.tokens.append(StreamStartToken(mark, mark,
+ encoding=self.encoding))
+
+
+ def fetch_stream_end(self):
+
+ # Set the current intendation to -1.
+ self.unwind_indent(-1)
+
+ # Reset simple keys.
+ self.remove_possible_simple_key()
+ self.allow_simple_key = False
+ self.possible_simple_keys = {}
+
+ # Read the token.
+ mark = self.get_mark()
+
+ # Add STREAM-END.
+ self.tokens.append(StreamEndToken(mark, mark))
+
+ # The steam is finished.
+ self.done = True
+
+ def fetch_directive(self):
+
+ # Set the current intendation to -1.
+ self.unwind_indent(-1)
+
+ # Reset simple keys.
+ self.remove_possible_simple_key()
+ self.allow_simple_key = False
+
+ # Scan and add DIRECTIVE.
+ self.tokens.append(self.scan_directive())
+
+ def fetch_document_start(self):
+ self.fetch_document_indicator(DocumentStartToken)
+
+ def fetch_document_end(self):
+ self.fetch_document_indicator(DocumentEndToken)
+
+ def fetch_document_indicator(self, TokenClass):
+
+ # Set the current intendation to -1.
+ self.unwind_indent(-1)
+
+ # Reset simple keys. Note that there could not be a block collection
+ # after '---'.
+ self.remove_possible_simple_key()
+ self.allow_simple_key = False
+
+ # Add DOCUMENT-START or DOCUMENT-END.
+ start_mark = self.get_mark()
+ self.forward(3)
+ end_mark = self.get_mark()
+ self.tokens.append(TokenClass(start_mark, end_mark))
+
+ def fetch_flow_sequence_start(self):
+ self.fetch_flow_collection_start(FlowSequenceStartToken)
+
+ def fetch_flow_mapping_start(self):
+ self.fetch_flow_collection_start(FlowMappingStartToken)
+
+ def fetch_flow_collection_start(self, TokenClass):
+
+ # '[' and '{' may start a simple key.
+ self.save_possible_simple_key()
+
+ # Increase the flow level.
+ self.flow_level += 1
+
+ # Simple keys are allowed after '[' and '{'.
+ self.allow_simple_key = True
+
+ # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
+ self.tokens.append(TokenClass(start_mark, end_mark))
+
+ def fetch_flow_sequence_end(self):
+ self.fetch_flow_collection_end(FlowSequenceEndToken)
+
+ def fetch_flow_mapping_end(self):
+ self.fetch_flow_collection_end(FlowMappingEndToken)
+
+ def fetch_flow_collection_end(self, TokenClass):
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Decrease the flow level.
+ self.flow_level -= 1
+
+ # No simple keys after ']' or '}'.
+ self.allow_simple_key = False
+
+ # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
+ self.tokens.append(TokenClass(start_mark, end_mark))
+
+ def fetch_flow_entry(self):
+
+ # Simple keys are allowed after ','.
+ self.allow_simple_key = True
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Add FLOW-ENTRY.
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
+ self.tokens.append(FlowEntryToken(start_mark, end_mark))
+
+ def fetch_block_entry(self):
+
+ # Block context needs additional checks.
+ if not self.flow_level:
+
+ # Are we allowed to start a new entry?
+ if not self.allow_simple_key:
+ raise ScannerError(None, None,
+ "sequence entries are not allowed here",
+ self.get_mark())
+
+ # We may need to add BLOCK-SEQUENCE-START.
+ if self.add_indent(self.column):
+ mark = self.get_mark()
+ self.tokens.append(BlockSequenceStartToken(mark, mark))
+
+ # It's an error for the block entry to occur in the flow context,
+ # but we let the parser detect this.
+ else:
+ pass
+
+ # Simple keys are allowed after '-'.
+ self.allow_simple_key = True
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Add BLOCK-ENTRY.
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
+ self.tokens.append(BlockEntryToken(start_mark, end_mark))
+
+ def fetch_key(self):
+
+ # Block context needs additional checks.
+ if not self.flow_level:
+
+ # Are we allowed to start a key (not nessesary a simple)?
+ if not self.allow_simple_key:
+ raise ScannerError(None, None,
+ "mapping keys are not allowed here",
+ self.get_mark())
+
+ # We may need to add BLOCK-MAPPING-START.
+ if self.add_indent(self.column):
+ mark = self.get_mark()
+ self.tokens.append(BlockMappingStartToken(mark, mark))
+
+ # Simple keys are allowed after '?' in the block context.
+ self.allow_simple_key = not self.flow_level
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Add KEY.
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
+ self.tokens.append(KeyToken(start_mark, end_mark))
+
+ def fetch_value(self):
+
+ # Do we determine a simple key?
+ if self.flow_level in self.possible_simple_keys:
+
+ # Add KEY.
+ key = self.possible_simple_keys[self.flow_level]
+ del self.possible_simple_keys[self.flow_level]
+ self.tokens.insert(key.token_number-self.tokens_taken,
+ KeyToken(key.mark, key.mark))
+
+ # If this key starts a new block mapping, we need to add
+ # BLOCK-MAPPING-START.
+ if not self.flow_level:
+ if self.add_indent(key.column):
+ self.tokens.insert(key.token_number-self.tokens_taken,
+ BlockMappingStartToken(key.mark, key.mark))
+
+ # There cannot be two simple keys one after another.
+ self.allow_simple_key = False
+
+ # It must be a part of a complex key.
+ else:
+
+ # Block context needs additional checks.
+ # (Do we really need them? They will be catched by the parser
+ # anyway.)
+ if not self.flow_level:
+
+ # We are allowed to start a complex value if and only if
+ # we can start a simple key.
+ if not self.allow_simple_key:
+ raise ScannerError(None, None,
+ "mapping values are not allowed here",
+ self.get_mark())
+
+ # If this value starts a new block mapping, we need to add
+ # BLOCK-MAPPING-START. It will be detected as an error later by
+ # the parser.
+ if not self.flow_level:
+ if self.add_indent(self.column):
+ mark = self.get_mark()
+ self.tokens.append(BlockMappingStartToken(mark, mark))
+
+ # Simple keys are allowed after ':' in the block context.
+ self.allow_simple_key = not self.flow_level
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Add VALUE.
+ start_mark = self.get_mark()
+ self.forward()
+ end_mark = self.get_mark()
+ self.tokens.append(ValueToken(start_mark, end_mark))
+
+ def fetch_alias(self):
+
+ # ALIAS could be a simple key.
+ self.save_possible_simple_key()
+
+ # No simple keys after ALIAS.
+ self.allow_simple_key = False
+
+ # Scan and add ALIAS.
+ self.tokens.append(self.scan_anchor(AliasToken))
+
+ def fetch_anchor(self):
+
+ # ANCHOR could start a simple key.
+ self.save_possible_simple_key()
+
+ # No simple keys after ANCHOR.
+ self.allow_simple_key = False
+
+ # Scan and add ANCHOR.
+ self.tokens.append(self.scan_anchor(AnchorToken))
+
+ def fetch_tag(self):
+
+ # TAG could start a simple key.
+ self.save_possible_simple_key()
+
+ # No simple keys after TAG.
+ self.allow_simple_key = False
+
+ # Scan and add TAG.
+ self.tokens.append(self.scan_tag())
+
+ def fetch_literal(self):
+ self.fetch_block_scalar(style='|')
+
+ def fetch_folded(self):
+ self.fetch_block_scalar(style='>')
+
+ def fetch_block_scalar(self, style):
+
+ # A simple key may follow a block scalar.
+ self.allow_simple_key = True
+
+ # Reset possible simple key on the current level.
+ self.remove_possible_simple_key()
+
+ # Scan and add SCALAR.
+ self.tokens.append(self.scan_block_scalar(style))
+
+ def fetch_single(self):
+ self.fetch_flow_scalar(style='\'')
+
+ def fetch_double(self):
+ self.fetch_flow_scalar(style='"')
+
+ def fetch_flow_scalar(self, style):
+
+ # A flow scalar could be a simple key.
+ self.save_possible_simple_key()
+
+ # No simple keys after flow scalars.
+ self.allow_simple_key = False
+
+ # Scan and add SCALAR.
+ self.tokens.append(self.scan_flow_scalar(style))
+
+ def fetch_plain(self):
+
+ # A plain scalar could be a simple key.
+ self.save_possible_simple_key()
+
+ # No simple keys after plain scalars. But note that `scan_plain` will
+ # change this flag if the scan is finished at the beginning of the
+ # line.
+ self.allow_simple_key = False
+
+ # Scan and add SCALAR. May change `allow_simple_key`.
+ self.tokens.append(self.scan_plain())
+
+ # Checkers.
+
+ def check_directive(self):
+
+ # DIRECTIVE: ^ '%' ...
+ # The '%' indicator is already checked.
+ if self.column == 0:
+ return True
+
+ def check_document_start(self):
+
+ # DOCUMENT-START: ^ '---' (' '|'\n')
+ if self.column == 0:
+ if self.prefix(3) == u'---' \
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ return True
+
+ def check_document_end(self):
+
+ # DOCUMENT-END: ^ '...' (' '|'\n')
+ if self.column == 0:
+ if self.prefix(3) == u'...' \
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ return True
+
+ def check_block_entry(self):
+
+ # BLOCK-ENTRY: '-' (' '|'\n')
+ return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+
+ def check_key(self):
+
+ # KEY(flow context): '?'
+ if self.flow_level:
+ return True
+
+ # KEY(block context): '?' (' '|'\n')
+ else:
+ return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+
+ def check_value(self):
+
+ # VALUE(flow context): ':'
+ if self.flow_level:
+ return True
+
+ # VALUE(block context): ':' (' '|'\n')
+ else:
+ return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+
+ def check_plain(self):
+
+ # A plain scalar may start with any non-space character except:
+ # '-', '?', ':', ',', '[', ']', '{', '}',
+ # '#', '&', '*', '!', '|', '>', '\'', '\"',
+ # '%', '@', '`'.
+ #
+ # It may also start with
+ # '-', '?', ':'
+ # if it is followed by a non-space character.
+ #
+ # Note that we limit the last rule to the block context (except the
+ # '-' character) because we want the flow context to be space
+ # independent.
+ ch = self.peek()
+ return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
+ or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
+ and (ch == u'-' or (not self.flow_level and ch in u'?:')))
+
+ # Scanners.
+
+ def scan_to_next_token(self):
+ # We ignore spaces, line breaks and comments.
+ # If we find a line break in the block context, we set the flag
+ # `allow_simple_key` on.
+ # The byte order mark is stripped if it's the first character in the
+ # stream. We do not yet support BOM inside the stream as the
+ # specification requires. Any such mark will be considered as a part
+ # of the document.
+ #
+ # TODO: We need to make tab handling rules more sane. A good rule is
+ # Tabs cannot precede tokens
+ # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
+ # KEY(block), VALUE(block), BLOCK-ENTRY
+ # So the checking code is
+ # if :
+ # self.allow_simple_keys = False
+ # We also need to add the check for `allow_simple_keys == True` to
+ # `unwind_indent` before issuing BLOCK-END.
+ # Scanners for block, flow, and plain scalars need to be modified.
+
+ if self.index == 0 and self.peek() == u'\uFEFF':
+ self.forward()
+ found = False
+ while not found:
+ while self.peek() == u' ':
+ self.forward()
+ if self.peek() == u'#':
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
+ if self.scan_line_break():
+ if not self.flow_level:
+ self.allow_simple_key = True
+ else:
+ found = True
+
+ def scan_directive(self):
+ # See the specification for details.
+ start_mark = self.get_mark()
+ self.forward()
+ name = self.scan_directive_name(start_mark)
+ value = None
+ if name == u'YAML':
+ value = self.scan_yaml_directive_value(start_mark)
+ end_mark = self.get_mark()
+ elif name == u'TAG':
+ value = self.scan_tag_directive_value(start_mark)
+ end_mark = self.get_mark()
+ else:
+ end_mark = self.get_mark()
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
+ self.scan_directive_ignored_line(start_mark)
+ return DirectiveToken(name, value, start_mark, end_mark)
+
+ def scan_directive_name(self, start_mark):
+ # See the specification for details.
+ length = 0
+ ch = self.peek(length)
+ while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-_':
+ length += 1
+ ch = self.peek(length)
+ if not length:
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected alphabetic or numeric character, but found %r"
+ % ch.encode('utf-8'), self.get_mark())
+ value = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
+ if ch not in u'\0 \r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected alphabetic or numeric character, but found %r"
+ % ch.encode('utf-8'), self.get_mark())
+ return value
+
+ def scan_yaml_directive_value(self, start_mark):
+ # See the specification for details.
+ while self.peek() == u' ':
+ self.forward()
+ major = self.scan_yaml_directive_number(start_mark)
+ if self.peek() != '.':
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected a digit or '.', but found %r"
+ % self.peek().encode('utf-8'),
+ self.get_mark())
+ self.forward()
+ minor = self.scan_yaml_directive_number(start_mark)
+ if self.peek() not in u'\0 \r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected a digit or ' ', but found %r"
+ % self.peek().encode('utf-8'),
+ self.get_mark())
+ return (major, minor)
+
+ def scan_yaml_directive_number(self, start_mark):
+ # See the specification for details.
+ ch = self.peek()
+ if not (u'0' <= ch <= u'9'):
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected a digit, but found %r" % ch.encode('utf-8'),
+ self.get_mark())
+ length = 0
+ while u'0' <= self.peek(length) <= u'9':
+ length += 1
+ value = int(self.prefix(length))
+ self.forward(length)
+ return value
+
+ def scan_tag_directive_value(self, start_mark):
+ # See the specification for details.
+ while self.peek() == u' ':
+ self.forward()
+ handle = self.scan_tag_directive_handle(start_mark)
+ while self.peek() == u' ':
+ self.forward()
+ prefix = self.scan_tag_directive_prefix(start_mark)
+ return (handle, prefix)
+
+ def scan_tag_directive_handle(self, start_mark):
+ # See the specification for details.
+ value = self.scan_tag_handle('directive', start_mark)
+ ch = self.peek()
+ if ch != u' ':
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected ' ', but found %r" % ch.encode('utf-8'),
+ self.get_mark())
+ return value
+
+ def scan_tag_directive_prefix(self, start_mark):
+ # See the specification for details.
+ value = self.scan_tag_uri('directive', start_mark)
+ ch = self.peek()
+ if ch not in u'\0 \r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected ' ', but found %r" % ch.encode('utf-8'),
+ self.get_mark())
+ return value
+
+ def scan_directive_ignored_line(self, start_mark):
+ # See the specification for details.
+ while self.peek() == u' ':
+ self.forward()
+ if self.peek() == u'#':
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
+ ch = self.peek()
+ if ch not in u'\0\r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a directive", start_mark,
+ "expected a comment or a line break, but found %r"
+ % ch.encode('utf-8'), self.get_mark())
+ self.scan_line_break()
+
+ def scan_anchor(self, TokenClass):
+ # The specification does not restrict characters for anchors and
+ # aliases. This may lead to problems, for instance, the document:
+ # [ *alias, value ]
+ # can be interpteted in two ways, as
+ # [ "value" ]
+ # and
+ # [ *alias , "value" ]
+ # Therefore we restrict aliases to numbers and ASCII letters.
+ start_mark = self.get_mark()
+ indicator = self.peek()
+ if indicator == u'*':
+ name = 'alias'
+ else:
+ name = 'anchor'
+ self.forward()
+ length = 0
+ ch = self.peek(length)
+ while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-_':
+ length += 1
+ ch = self.peek(length)
+ if not length:
+ raise ScannerError("while scanning an %s" % name, start_mark,
+ "expected alphabetic or numeric character, but found %r"
+ % ch.encode('utf-8'), self.get_mark())
+ value = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
+ if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
+ raise ScannerError("while scanning an %s" % name, start_mark,
+ "expected alphabetic or numeric character, but found %r"
+ % ch.encode('utf-8'), self.get_mark())
+ end_mark = self.get_mark()
+ return TokenClass(value, start_mark, end_mark)
+
+ def scan_tag(self):
+ # See the specification for details.
+ start_mark = self.get_mark()
+ ch = self.peek(1)
+ if ch == u'<':
+ handle = None
+ self.forward(2)
+ suffix = self.scan_tag_uri('tag', start_mark)
+ if self.peek() != u'>':
+ raise ScannerError("while parsing a tag", start_mark,
+ "expected '>', but found %r" % self.peek().encode('utf-8'),
+ self.get_mark())
+ self.forward()
+ elif ch in u'\0 \t\r\n\x85\u2028\u2029':
+ handle = None
+ suffix = u'!'
+ self.forward()
+ else:
+ length = 1
+ use_handle = False
+ while ch not in u'\0 \r\n\x85\u2028\u2029':
+ if ch == u'!':
+ use_handle = True
+ break
+ length += 1
+ ch = self.peek(length)
+ handle = u'!'
+ if use_handle:
+ handle = self.scan_tag_handle('tag', start_mark)
+ else:
+ handle = u'!'
+ self.forward()
+ suffix = self.scan_tag_uri('tag', start_mark)
+ ch = self.peek()
+ if ch not in u'\0 \r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a tag", start_mark,
+ "expected ' ', but found %r" % ch.encode('utf-8'),
+ self.get_mark())
+ value = (handle, suffix)
+ end_mark = self.get_mark()
+ return TagToken(value, start_mark, end_mark)
+
+ def scan_block_scalar(self, style):
+ # See the specification for details.
+
+ if style == '>':
+ folded = True
+ else:
+ folded = False
+
+ chunks = []
+ start_mark = self.get_mark()
+
+ # Scan the header.
+ self.forward()
+ chomping, increment = self.scan_block_scalar_indicators(start_mark)
+ self.scan_block_scalar_ignored_line(start_mark)
+
+ # Determine the indentation level and go to the first non-empty line.
+ min_indent = self.indent+1
+ if min_indent < 1:
+ min_indent = 1
+ if increment is None:
+ breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
+ indent = max(min_indent, max_indent)
+ else:
+ indent = min_indent+increment-1
+ breaks, end_mark = self.scan_block_scalar_breaks(indent)
+ line_break = u''
+
+ # Scan the inner part of the block scalar.
+ while self.column == indent and self.peek() != u'\0':
+ chunks.extend(breaks)
+ leading_non_space = self.peek() not in u' \t'
+ length = 0
+ while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':
+ length += 1
+ chunks.append(self.prefix(length))
+ self.forward(length)
+ line_break = self.scan_line_break()
+ breaks, end_mark = self.scan_block_scalar_breaks(indent)
+ if self.column == indent and self.peek() != u'\0':
+
+ # Unfortunately, folding rules are ambiguous.
+ #
+ # This is the folding according to the specification:
+
+ if folded and line_break == u'\n' \
+ and leading_non_space and self.peek() not in u' \t':
+ if not breaks:
+ chunks.append(u' ')
+ else:
+ chunks.append(line_break)
+
+ # This is Clark Evans's interpretation (also in the spec
+ # examples):
+ #
+ #if folded and line_break == u'\n':
+ # if not breaks:
+ # if self.peek() not in ' \t':
+ # chunks.append(u' ')
+ # else:
+ # chunks.append(line_break)
+ #else:
+ # chunks.append(line_break)
+ else:
+ break
+
+ # Chomp the tail.
+ if chomping is not False:
+ chunks.append(line_break)
+ if chomping is True:
+ chunks.extend(breaks)
+
+ # We are done.
+ return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+ style)
+
+ def scan_block_scalar_indicators(self, start_mark):
+ # See the specification for details.
+ chomping = None
+ increment = None
+ ch = self.peek()
+ if ch in u'+-':
+ if ch == '+':
+ chomping = True
+ else:
+ chomping = False
+ self.forward()
+ ch = self.peek()
+ if ch in u'0123456789':
+ increment = int(ch)
+ if increment == 0:
+ raise ScannerError("while scanning a block scalar", start_mark,
+ "expected indentation indicator in the range 1-9, but found 0",
+ self.get_mark())
+ self.forward()
+ elif ch in u'0123456789':
+ increment = int(ch)
+ if increment == 0:
+ raise ScannerError("while scanning a block scalar", start_mark,
+ "expected indentation indicator in the range 1-9, but found 0",
+ self.get_mark())
+ self.forward()
+ ch = self.peek()
+ if ch in u'+-':
+ if ch == '+':
+ chomping = True
+ else:
+ chomping = False
+ self.forward()
+ ch = self.peek()
+ if ch not in u'\0 \r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a block scalar", start_mark,
+ "expected chomping or indentation indicators, but found %r"
+ % ch.encode('utf-8'), self.get_mark())
+ return chomping, increment
+
+ def scan_block_scalar_ignored_line(self, start_mark):
+ # See the specification for details.
+ while self.peek() == u' ':
+ self.forward()
+ if self.peek() == u'#':
+ while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+ self.forward()
+ ch = self.peek()
+ if ch not in u'\0\r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a block scalar", start_mark,
+ "expected a comment or a line break, but found %r"
+ % ch.encode('utf-8'), self.get_mark())
+ self.scan_line_break()
+
+ def scan_block_scalar_indentation(self):
+ # See the specification for details.
+ chunks = []
+ max_indent = 0
+ end_mark = self.get_mark()
+ while self.peek() in u' \r\n\x85\u2028\u2029':
+ if self.peek() != u' ':
+ chunks.append(self.scan_line_break())
+ end_mark = self.get_mark()
+ else:
+ self.forward()
+ if self.column > max_indent:
+ max_indent = self.column
+ return chunks, max_indent, end_mark
+
+ def scan_block_scalar_breaks(self, indent):
+ # See the specification for details.
+ chunks = []
+ end_mark = self.get_mark()
+ while self.column < indent and self.peek() == u' ':
+ self.forward()
+ while self.peek() in u'\r\n\x85\u2028\u2029':
+ chunks.append(self.scan_line_break())
+ end_mark = self.get_mark()
+ while self.column < indent and self.peek() == u' ':
+ self.forward()
+ return chunks, end_mark
+
+ def scan_flow_scalar(self, style):
+ # See the specification for details.
+ # Note that we loose indentation rules for quoted scalars. Quoted
+ # scalars don't need to adhere indentation because " and ' clearly
+ # mark the beginning and the end of them. Therefore we are less
+ # restrictive then the specification requires. We only need to check
+ # that document separators are not included in scalars.
+ if style == '"':
+ double = True
+ else:
+ double = False
+ chunks = []
+ start_mark = self.get_mark()
+ quote = self.peek()
+ self.forward()
+ chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
+ while self.peek() != quote:
+ chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
+ chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
+ self.forward()
+ end_mark = self.get_mark()
+ return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+ style)
+
+ ESCAPE_REPLACEMENTS = {
+ u'0': u'\0',
+ u'a': u'\x07',
+ u'b': u'\x08',
+ u't': u'\x09',
+ u'\t': u'\x09',
+ u'n': u'\x0A',
+ u'v': u'\x0B',
+ u'f': u'\x0C',
+ u'r': u'\x0D',
+ u'e': u'\x1B',
+ u' ': u'\x20',
+ u'\"': u'\"',
+ u'\\': u'\\',
+ u'N': u'\x85',
+ u'_': u'\xA0',
+ u'L': u'\u2028',
+ u'P': u'\u2029',
+ }
+
+ ESCAPE_CODES = {
+ u'x': 2,
+ u'u': 4,
+ u'U': 8,
+ }
+
+ def scan_flow_scalar_non_spaces(self, double, start_mark):
+ # See the specification for details.
+ chunks = []
+ while True:
+ length = 0
+ while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
+ length += 1
+ if length:
+ chunks.append(self.prefix(length))
+ self.forward(length)
+ ch = self.peek()
+ if not double and ch == u'\'' and self.peek(1) == u'\'':
+ chunks.append(u'\'')
+ self.forward(2)
+ elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
+ chunks.append(ch)
+ self.forward()
+ elif double and ch == u'\\':
+ self.forward()
+ ch = self.peek()
+ if ch in self.ESCAPE_REPLACEMENTS:
+ chunks.append(self.ESCAPE_REPLACEMENTS[ch])
+ self.forward()
+ elif ch in self.ESCAPE_CODES:
+ length = self.ESCAPE_CODES[ch]
+ self.forward()
+ for k in range(length):
+ if self.peek(k) not in u'0123456789ABCDEFabcdef':
+ raise ScannerError("while scanning a double-quoted scalar", start_mark,
+ "expected escape sequence of %d hexdecimal numbers, but found %r" %
+ (length, self.peek(k).encode('utf-8')), self.get_mark())
+ code = int(self.prefix(length), 16)
+ chunks.append(unichr(code))
+ self.forward(length)
+ elif ch in u'\r\n\x85\u2028\u2029':
+ self.scan_line_break()
+ chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
+ else:
+ raise ScannerError("while scanning a double-quoted scalar", start_mark,
+ "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark())
+ else:
+ return chunks
+
+ def scan_flow_scalar_spaces(self, double, start_mark):
+ # See the specification for details.
+ chunks = []
+ length = 0
+ while self.peek(length) in u' \t':
+ length += 1
+ whitespaces = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
+ if ch == u'\0':
+ raise ScannerError("while scanning a quoted scalar", start_mark,
+ "found unexpected end of stream", self.get_mark())
+ elif ch in u'\r\n\x85\u2028\u2029':
+ line_break = self.scan_line_break()
+ breaks = self.scan_flow_scalar_breaks(double, start_mark)
+ if line_break != u'\n':
+ chunks.append(line_break)
+ elif not breaks:
+ chunks.append(u' ')
+ chunks.extend(breaks)
+ else:
+ chunks.append(whitespaces)
+ return chunks
+
+ def scan_flow_scalar_breaks(self, double, start_mark):
+ # See the specification for details.
+ chunks = []
+ while True:
+ # Instead of checking indentation, we check for document
+ # separators.
+ prefix = self.prefix(3)
+ if (prefix == u'---' or prefix == u'...') \
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ raise ScannerError("while scanning a quoted scalar", start_mark,
+ "found unexpected document separator", self.get_mark())
+ while self.peek() in u' \t':
+ self.forward()
+ if self.peek() in u'\r\n\x85\u2028\u2029':
+ chunks.append(self.scan_line_break())
+ else:
+ return chunks
+
+ def scan_plain(self):
+ # See the specification for details.
+ # We add an additional restriction for the flow context:
+ # plain scalars in the flow context cannot contain ',', ':' and '?'.
+ # We also keep track of the `allow_simple_key` flag here.
+ # Indentation rules are loosed for the flow context.
+ chunks = []
+ start_mark = self.get_mark()
+ end_mark = start_mark
+ indent = self.indent+1
+ # We allow zero indentation for scalars, but then we need to check for
+ # document separators at the beginning of the line.
+ #if indent == 0:
+ # indent = 1
+ spaces = []
+ while True:
+ length = 0
+ if self.peek() == u'#':
+ break
+ while True:
+ ch = self.peek(length)
+ if ch in u'\0 \t\r\n\x85\u2028\u2029' \
+ or (not self.flow_level and ch == u':' and
+ self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \
+ or (self.flow_level and ch in u',:?[]{}'):
+ break
+ length += 1
+ # It's not clear what we should do with ':' in the flow context.
+ if (self.flow_level and ch == u':'
+ and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):
+ self.forward(length)
+ raise ScannerError("while scanning a plain scalar", start_mark,
+ "found unexpected ':'", self.get_mark(),
+ "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
+ if length == 0:
+ break
+ self.allow_simple_key = False
+ chunks.extend(spaces)
+ chunks.append(self.prefix(length))
+ self.forward(length)
+ end_mark = self.get_mark()
+ spaces = self.scan_plain_spaces(indent, start_mark)
+ if not spaces or self.peek() == u'#' \
+ or (not self.flow_level and self.column < indent):
+ break
+ return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
+
+ def scan_plain_spaces(self, indent, start_mark):
+ # See the specification for details.
+ # The specification is really confusing about tabs in plain scalars.
+ # We just forbid them completely. Do not use tabs in YAML!
+ chunks = []
+ length = 0
+ while self.peek(length) in u' ':
+ length += 1
+ whitespaces = self.prefix(length)
+ self.forward(length)
+ ch = self.peek()
+ if ch in u'\r\n\x85\u2028\u2029':
+ line_break = self.scan_line_break()
+ self.allow_simple_key = True
+ prefix = self.prefix(3)
+ if (prefix == u'---' or prefix == u'...') \
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ return
+ breaks = []
+ while self.peek() in u' \r\n\x85\u2028\u2029':
+ if self.peek() == ' ':
+ self.forward()
+ else:
+ breaks.append(self.scan_line_break())
+ prefix = self.prefix(3)
+ if (prefix == u'---' or prefix == u'...') \
+ and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+ return
+ if line_break != u'\n':
+ chunks.append(line_break)
+ elif not breaks:
+ chunks.append(u' ')
+ chunks.extend(breaks)
+ elif whitespaces:
+ chunks.append(whitespaces)
+ return chunks
+
+ def scan_tag_handle(self, name, start_mark):
+ # See the specification for details.
+ # For some strange reasons, the specification does not allow '_' in
+ # tag handles. I have allowed it anyway.
+ ch = self.peek()
+ if ch != u'!':
+ raise ScannerError("while scanning a %s" % name, start_mark,
+ "expected '!', but found %r" % ch.encode('utf-8'),
+ self.get_mark())
+ length = 1
+ ch = self.peek(length)
+ if ch != u' ':
+ while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-_':
+ length += 1
+ ch = self.peek(length)
+ if ch != u'!':
+ self.forward(length)
+ raise ScannerError("while scanning a %s" % name, start_mark,
+ "expected '!', but found %r" % ch.encode('utf-8'),
+ self.get_mark())
+ length += 1
+ value = self.prefix(length)
+ self.forward(length)
+ return value
+
+ def scan_tag_uri(self, name, start_mark):
+ # See the specification for details.
+ # Note: we do not check if URI is well-formed.
+ chunks = []
+ length = 0
+ ch = self.peek(length)
+ while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \
+ or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
+ if ch == u'%':
+ chunks.append(self.prefix(length))
+ self.forward(length)
+ length = 0
+ chunks.append(self.scan_uri_escapes(name, start_mark))
+ else:
+ length += 1
+ ch = self.peek(length)
+ if length:
+ chunks.append(self.prefix(length))
+ self.forward(length)
+ length = 0
+ if not chunks:
+ raise ScannerError("while parsing a %s" % name, start_mark,
+ "expected URI, but found %r" % ch.encode('utf-8'),
+ self.get_mark())
+ return u''.join(chunks)
+
+ def scan_uri_escapes(self, name, start_mark):
+ # See the specification for details.
+ bytes = []
+ mark = self.get_mark()
+ while self.peek() == u'%':
+ self.forward()
+ for k in range(2):
+ if self.peek(k) not in u'0123456789ABCDEFabcdef':
+ raise ScannerError("while scanning a %s" % name, start_mark,
+ "expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
+ (self.peek(k).encode('utf-8')), self.get_mark())
+ bytes.append(chr(int(self.prefix(2), 16)))
+ self.forward(2)
+ try:
+ value = unicode(''.join(bytes), 'utf-8')
+ except UnicodeDecodeError, exc:
+ raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
+ return value
+
+ def scan_line_break(self):
+ # Transforms:
+ # '\r\n' : '\n'
+ # '\r' : '\n'
+ # '\n' : '\n'
+ # '\x85' : '\n'
+ # '\u2028' : '\u2028'
+ # '\u2029 : '\u2029'
+ # default : ''
+ ch = self.peek()
+ if ch in u'\r\n\x85':
+ if self.prefix(2) == u'\r\n':
+ self.forward(2)
+ else:
+ self.forward()
+ return u'\n'
+ elif ch in u'\u2028\u2029':
+ self.forward()
+ return ch
+ return u''
+
+#try:
+# import psyco
+# psyco.bind(Scanner)
+#except ImportError:
+# pass
+
diff --git a/tablib/packages/yaml/serializer.py b/tablib/packages/yaml/serializer.py
new file mode 100644
index 0000000..0bf1e96
--- /dev/null
+++ b/tablib/packages/yaml/serializer.py
@@ -0,0 +1,111 @@
+
+__all__ = ['Serializer', 'SerializerError']
+
+from error import YAMLError
+from events import *
+from nodes import *
+
+class SerializerError(YAMLError):
+ pass
+
+class Serializer(object):
+
+ ANCHOR_TEMPLATE = u'id%03d'
+
+ def __init__(self, encoding=None,
+ explicit_start=None, explicit_end=None, version=None, tags=None):
+ self.use_encoding = encoding
+ self.use_explicit_start = explicit_start
+ self.use_explicit_end = explicit_end
+ self.use_version = version
+ self.use_tags = tags
+ self.serialized_nodes = {}
+ self.anchors = {}
+ self.last_anchor_id = 0
+ self.closed = None
+
+ def open(self):
+ if self.closed is None:
+ self.emit(StreamStartEvent(encoding=self.use_encoding))
+ self.closed = False
+ elif self.closed:
+ raise SerializerError("serializer is closed")
+ else:
+ raise SerializerError("serializer is already opened")
+
+ def close(self):
+ if self.closed is None:
+ raise SerializerError("serializer is not opened")
+ elif not self.closed:
+ self.emit(StreamEndEvent())
+ self.closed = True
+
+ #def __del__(self):
+ # self.close()
+
+ def serialize(self, node):
+ if self.closed is None:
+ raise SerializerError("serializer is not opened")
+ elif self.closed:
+ raise SerializerError("serializer is closed")
+ self.emit(DocumentStartEvent(explicit=self.use_explicit_start,
+ version=self.use_version, tags=self.use_tags))
+ self.anchor_node(node)
+ self.serialize_node(node, None, None)
+ self.emit(DocumentEndEvent(explicit=self.use_explicit_end))
+ self.serialized_nodes = {}
+ self.anchors = {}
+ self.last_anchor_id = 0
+
+ def anchor_node(self, node):
+ if node in self.anchors:
+ if self.anchors[node] is None:
+ self.anchors[node] = self.generate_anchor(node)
+ else:
+ self.anchors[node] = None
+ if isinstance(node, SequenceNode):
+ for item in node.value:
+ self.anchor_node(item)
+ elif isinstance(node, MappingNode):
+ for key, value in node.value:
+ self.anchor_node(key)
+ self.anchor_node(value)
+
+ def generate_anchor(self, node):
+ self.last_anchor_id += 1
+ return self.ANCHOR_TEMPLATE % self.last_anchor_id
+
+ def serialize_node(self, node, parent, index):
+ alias = self.anchors[node]
+ if node in self.serialized_nodes:
+ self.emit(AliasEvent(alias))
+ else:
+ self.serialized_nodes[node] = True
+ self.descend_resolver(parent, index)
+ if isinstance(node, ScalarNode):
+ detected_tag = self.resolve(ScalarNode, node.value, (True, False))
+ default_tag = self.resolve(ScalarNode, node.value, (False, True))
+ implicit = (node.tag == detected_tag), (node.tag == default_tag)
+ self.emit(ScalarEvent(alias, node.tag, implicit, node.value,
+ style=node.style))
+ elif isinstance(node, SequenceNode):
+ implicit = (node.tag
+ == self.resolve(SequenceNode, node.value, True))
+ self.emit(SequenceStartEvent(alias, node.tag, implicit,
+ flow_style=node.flow_style))
+ index = 0
+ for item in node.value:
+ self.serialize_node(item, node, index)
+ index += 1
+ self.emit(SequenceEndEvent())
+ elif isinstance(node, MappingNode):
+ implicit = (node.tag
+ == self.resolve(MappingNode, node.value, True))
+ self.emit(MappingStartEvent(alias, node.tag, implicit,
+ flow_style=node.flow_style))
+ for key, value in node.value:
+ self.serialize_node(key, node, None)
+ self.serialize_node(value, node, key)
+ self.emit(MappingEndEvent())
+ self.ascend_resolver()
+
diff --git a/tablib/packages/yaml/tokens.py b/tablib/packages/yaml/tokens.py
new file mode 100644
index 0000000..4d0b48a
--- /dev/null
+++ b/tablib/packages/yaml/tokens.py
@@ -0,0 +1,104 @@
+
+class Token(object):
+ def __init__(self, start_mark, end_mark):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ def __repr__(self):
+ attributes = [key for key in self.__dict__
+ if not key.endswith('_mark')]
+ attributes.sort()
+ arguments = ', '.join(['%s=%r' % (key, getattr(self, key))
+ for key in attributes])
+ return '%s(%s)' % (self.__class__.__name__, arguments)
+
+#class BOMToken(Token):
+# id = ''
+
+class DirectiveToken(Token):
+ id = ''
+ def __init__(self, name, value, start_mark, end_mark):
+ self.name = name
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+
+class DocumentStartToken(Token):
+ id = ''
+
+class DocumentEndToken(Token):
+ id = ''
+
+class StreamStartToken(Token):
+ id = ''
+ def __init__(self, start_mark=None, end_mark=None,
+ encoding=None):
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.encoding = encoding
+
+class StreamEndToken(Token):
+ id = ''
+
+class BlockSequenceStartToken(Token):
+ id = ''
+
+class BlockMappingStartToken(Token):
+ id = ''
+
+class BlockEndToken(Token):
+ id = ''
+
+class FlowSequenceStartToken(Token):
+ id = '['
+
+class FlowMappingStartToken(Token):
+ id = '{'
+
+class FlowSequenceEndToken(Token):
+ id = ']'
+
+class FlowMappingEndToken(Token):
+ id = '}'
+
+class KeyToken(Token):
+ id = '?'
+
+class ValueToken(Token):
+ id = ':'
+
+class BlockEntryToken(Token):
+ id = '-'
+
+class FlowEntryToken(Token):
+ id = ','
+
+class AliasToken(Token):
+ id = ''
+ def __init__(self, value, start_mark, end_mark):
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+
+class AnchorToken(Token):
+ id = ''
+ def __init__(self, value, start_mark, end_mark):
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+
+class TagToken(Token):
+ id = ''
+ def __init__(self, value, start_mark, end_mark):
+ self.value = value
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+
+class ScalarToken(Token):
+ id = ''
+ def __init__(self, value, plain, start_mark, end_mark, style=None):
+ self.value = value
+ self.plain = plain
+ self.start_mark = start_mark
+ self.end_mark = end_mark
+ self.style = style
+