diff --git a/tablib/packages/__init__.py b/tablib/packages/__init__.py deleted file mode 100644 index a38d74d..0000000 --- a/tablib/packages/__init__.py +++ /dev/null @@ -1 +0,0 @@ -all = ['simplejson', 'typecheck', 'xlwt', 'opster'] \ No newline at end of file diff --git a/tablib/packages/opster.py b/tablib/packages/opster.py deleted file mode 100644 index 5fe64be..0000000 --- a/tablib/packages/opster.py +++ /dev/null @@ -1,612 +0,0 @@ -# (c) Alexander Solovyov, 2009, under terms of the new BSD License -'''Command line arguments parser -''' - -import sys, traceback, getopt, types, textwrap, inspect, os -from itertools import imap - -__all__ = ['command', 'dispatch'] -__version__ = '0.9.10' -__author__ = 'Alexander Solovyov' -__email__ = 'piranha@piranha.org.ua' - -write = sys.stdout.write -err = sys.stderr.write - -CMDTABLE = {} - -# -------- -# Public interface -# -------- - -def command(options=None, usage=None, name=None, shortlist=False, hide=False): - '''Decorator to mark function to be used for command line processing. - - All arguments are optional: - - - ``options``: options in format described in docs. If not supplied, - will be determined from function. - - ``usage``: usage string for function, replaces ``%name`` with name - of program or subcommand. In case if it's subcommand and ``%name`` - is not present, usage is prepended by ``name`` - - ``name``: used for multiple subcommands. Defaults to wrapped - function name - - ``shortlist``: if command should be included in shortlist. Used - only with multiple subcommands - - ``hide``: if command should be hidden from help listing. Used only - with multiple subcommands, overrides ``shortlist`` - ''' - def wrapper(func): - try: - options_ = list(guess_options(func)) - except TypeError: - options_ = [] - try: - options_ = options_ + list(options) - except TypeError: - pass - - name_ = name or func.__name__.replace('_', '-') - if usage is None: - usage_ = guess_usage(func, options_) - else: - usage_ = usage - prefix = hide and '~' or (shortlist and '^' or '') - CMDTABLE[prefix + name_] = (func, options_, usage_) - - def help_func(name=None): - return help_cmd(func, replace_name(usage_, sysname()), options_) - - @wraps(func) - def inner(*args, **opts): - # look if we need to add 'help' option - try: - (True for option in reversed(options_) - if option[1] == 'help').next() - except StopIteration: - options_.append(('h', 'help', False, 'show help')) - - argv = opts.pop('argv', sys.argv[1:]) - if opts.pop('help', False): - return help_func() - - if args or opts: - # no catcher here because this is call from Python - return call_cmd_regular(func, options_)(*args, **opts) - - try: - opts, args = catcher(lambda: parse(argv, options_), help_func) - except Abort: - return -1 - - try: - if opts.pop('help', False): - return help_func() - return catcher(lambda: call_cmd(name_, func)(*args, **opts), - help_func) - except Abort: - return -1 - - return inner - return wrapper - - -def dispatch(args=None, cmdtable=None, globaloptions=None, - middleware=lambda x: x): - '''Dispatch command arguments based on subcommands. - - - ``args``: list of arguments, default: ``sys.argv[1:]`` - - ``cmdtable``: dict of commands in format described below. - If not supplied, will use functions decorated with ``@command``. - - ``globaloptions``: list of options which are applied to all - commands, will contain ``--help`` option at least. - - ``middleware``: global decorator for all commands. - - cmdtable format description:: - - {'name': (function, options, usage)} - - - ``name`` is the name used on command-line. Can contain aliases - (separate them with ``|``), pointer to a fact that this command - should be displayed in short help (start name with ``^``), or to - a fact that this command should be hidden (start name with ``~``) - - ``function`` is the actual callable - - ``options`` is options list in format described in docs - - ``usage`` is the short string of usage - ''' - args = args or sys.argv[1:] - cmdtable = cmdtable or CMDTABLE - - globaloptions = globaloptions or [] - globaloptions.append(('h', 'help', False, 'display help')) - - cmdtable['help'] = (help_(cmdtable, globaloptions), [], '[TOPIC]') - help_func = cmdtable['help'][0] - - autocomplete(cmdtable, args) - - try: - name, func, args, kwargs = catcher( - lambda: _dispatch(args, cmdtable, globaloptions), - help_func) - return catcher( - lambda: call_cmd(name, middleware(func))(*args, **kwargs), - help_func) - except Abort: - return -1 - -# -------- -# Help -# -------- - -def help_(cmdtable, globalopts): - def help_inner(name=None): - '''Show help for a given help topic or a help overview - - With no arguments, print a list of commands with short help messages. - - Given a command name, print help for that command. - ''' - def helplist(): - hlp = {} - # determine if any command is marked for shortlist - shortlist = (name == 'shortlist' and - any(imap(lambda x: x.startswith('^'), cmdtable))) - - for cmd, info in cmdtable.items(): - if cmd.startswith('~'): - continue # do not display hidden commands - if shortlist and not cmd.startswith('^'): - continue # short help contains only marked commands - cmd = cmd.lstrip('^~') - doc = info[0].__doc__ or '(no help text available)' - hlp[cmd] = doc.splitlines()[0].rstrip() - - hlplist = sorted(hlp) - maxlen = max(map(len, hlplist)) - - write('usage: %s [options]\n' % sysname()) - write('\ncommands:\n\n') - for cmd in hlplist: - doc = hlp[cmd] - if False: # verbose? - write(' %s:\n %s\n' % (cmd.replace('|', ', '), doc)) - else: - write(' %-*s %s\n' % (maxlen, cmd.split('|', 1)[0], - doc)) - - if not cmdtable: - return err('No commands specified!\n') - - if not name or name == 'shortlist': - return helplist() - - aliases, (cmd, options, usage) = findcmd(name, cmdtable) - return help_cmd(cmd, - replace_name(usage, sysname() + ' ' + aliases[0]), - options + globalopts) - return help_inner - -def help_cmd(func, usage, options): - '''show help for given command - - - ``func``: function to generate help for (``func.__doc__`` is taken) - - ``usage``: usage string - - ``options``: options in usual format - - >>> def test(*args, **opts): - ... """that's a test command - ... - ... you can do nothing with this command""" - ... pass - >>> opts = [('l', 'listen', 'localhost', - ... 'ip to listen on'), - ... ('p', 'port', 8000, - ... 'port to listen on'), - ... ('d', 'daemonize', False, - ... 'daemonize process'), - ... ('', 'pid-file', '', - ... 'name of file to write process ID to')] - >>> help_cmd(test, 'test [-l HOST] [NAME]', opts) - test [-l HOST] [NAME] - - that's a test command - - you can do nothing with this command - - options: - - -l --listen ip to listen on (default: localhost) - -p --port port to listen on (default: 8000) - -d --daemonize daemonize process - --pid-file name of file to write process ID to - - ''' - write(usage + '\n') - doc = func.__doc__ - if not doc: - doc = '(no help text available)' - write('\n' + doc.strip() + '\n\n') - if options: - write(''.join(help_options(options))) - -def help_options(options): - yield 'options:\n\n' - output = [] - for short, name, default, desc in options: - if hasattr(default, '__call__'): - default = default(None) - default = default and ' (default: %s)' % default or '' - output.append(('%2s%s' % (short and '-%s' % short, - name and ' --%s' % name), - '%s%s' % (desc, default))) - - opts_len = max([len(first) for first, second in output if second] or [0]) - for first, second in output: - if second: - # wrap description at 78 chars - second = textwrap.wrap(second, width=(78 - opts_len - 3)) - pad = '\n' + ' ' * (opts_len + 3) - yield ' %-*s %s\n' % (opts_len, first, pad.join(second)) - else: - yield '%s\n' % first - - -# -------- -# Options parsing -# -------- - -def parse(args, options): - ''' - >>> opts = [('l', 'listen', 'localhost', - ... 'ip to listen on'), - ... ('p', 'port', 8000, - ... 'port to listen on'), - ... ('d', 'daemonize', False, - ... 'daemonize process'), - ... ('', 'pid-file', '', - ... 'name of file to write process ID to')] - >>> print parse(['-l', '0.0.0.0', '--pi', 'test', 'all'], opts) - ({'pid_file': 'test', 'daemonize': False, 'port': 8000, 'listen': '0.0.0.0'}, ['all']) - - ''' - argmap, defmap, state = {}, {}, {} - shortlist, namelist, funlist = '', [], [] - - for short, name, default, comment in options: - if short and len(short) != 1: - raise FOError('Short option should be only a single' - ' character: %s' % short) - if not name: - raise FOError( - 'Long name should be defined for every option') - # change name to match Python styling - pyname = name.replace('-', '_') - argmap['-' + short] = argmap['--' + name] = pyname - defmap[pyname] = default - - # copy defaults to state - if isinstance(default, list): - state[pyname] = default[:] - elif hasattr(default, '__call__'): - funlist.append(pyname) - state[pyname] = None - else: - state[pyname] = default - - # getopt wants indication that it takes a parameter - if not (default is None or default is True or default is False): - if short: short += ':' - if name: name += '=' - if short: - shortlist += short - if name: - namelist.append(name) - - opts, args = getopt.gnu_getopt(args, shortlist, namelist) - - # transfer result to state - for opt, val in opts: - name = argmap[opt] - t = type(defmap[name]) - if t is types.FunctionType: - del funlist[funlist.index(name)] - state[name] = defmap[name](val) - elif t is types.ListType: - state[name].append(val) - elif t in (types.NoneType, types.BooleanType): - state[name] = not defmap[name] - else: - state[name] = t(val) - - for name in funlist: - state[name] = defmap[name](None) - - return state, args -# -------- -# Subcommand system -# -------- - -def _dispatch(args, cmdtable, globalopts): - cmd, func, args, options = cmdparse(args, cmdtable, globalopts) - - if options.pop('help', False): - return 'help', cmdtable['help'][0], [cmd], {} - elif not cmd: - return 'help', cmdtable['help'][0], ['shortlist'], {} - - return cmd, func, args, options - -def cmdparse(args, cmdtable, globalopts): - # command is the first non-option - cmd = None - for arg in args: - if not arg.startswith('-'): - cmd = arg - break - - if cmd: - args.pop(args.index(cmd)) - - aliases, info = findcmd(cmd, cmdtable) - cmd = aliases[0] - possibleopts = list(info[1]) - else: - possibleopts = [] - - possibleopts.extend(globalopts) - - try: - options, args = parse(args, possibleopts) - except getopt.GetoptError, e: - raise ParseError(cmd, e) - - return (cmd, cmd and info[0] or None, args, options) - -def aliases_(cmdtable_key): - return cmdtable_key.lstrip("^~").split("|") - -def findpossible(cmd, table): - """ - Return cmd -> (aliases, command table entry) - for each matching command. - """ - choice = {} - for e in table.keys(): - aliases = aliases_(e) - found = None - if cmd in aliases: - found = cmd - else: - for a in aliases: - if a.startswith(cmd): - found = a - break - if found is not None: - choice[found] = (aliases, table[e]) - - return choice - -def findcmd(cmd, table): - """Return (aliases, command table entry) for command string.""" - choice = findpossible(cmd, table) - - if cmd in choice: - return choice[cmd] - - if len(choice) > 1: - clist = choice.keys() - clist.sort() - raise AmbiguousCommand(cmd, clist) - - if choice: - return choice.values()[0] - - raise UnknownCommand(cmd) - -# -------- -# Helpers -# -------- - -def guess_options(func): - args, varargs, varkw, defaults = inspect.getargspec(func) - for name, option in zip(args[-len(defaults):], defaults): - try: - sname, default, hlp = option - yield (sname, name.replace('_', '-'), default, hlp) - except TypeError: - pass - -def guess_usage(func, options): - usage = '%name ' - if options: - usage += '[OPTIONS] ' - args, varargs = inspect.getargspec(func)[:2] - argnum = len(args) - len(options) - if argnum > 0: - usage += args[0].upper() - if argnum > 1: - usage += 'S' - elif varargs: - usage += '[%s]' % varargs.upper() - return usage - -def catcher(target, help_func): - '''Catches all exceptions and prints human-readable information on them - ''' - try: - return target() - except UnknownCommand, e: - err("unknown command: '%s'\n" % e) - except AmbiguousCommand, e: - err("command '%s' is ambiguous:\n %s\n" % - (e.args[0], ' '.join(e.args[1]))) - except ParseError, e: - err('%s: %s\n' % (e.args[0], e.args[1])) - help_func(e.args[0]) - except getopt.GetoptError, e: - err('error: %s\n' % e) - help_func() - except FOError, e: - err('%s\n' % e) - except KeyboardInterrupt: - err('interrupted!\n') - except SystemExit: - raise - except: - err('unknown exception encountered') - raise - - raise Abort - -def call_cmd(name, func): - def inner(*args, **kwargs): - try: - return func(*args, **kwargs) - except TypeError: - if len(traceback.extract_tb(sys.exc_info()[2])) == 1: - raise ParseError(name, "invalid arguments") - raise - return inner - -def call_cmd_regular(func, opts): - def inner(*args, **kwargs): - funcargs, _, varkw, defaults = inspect.getargspec(func) - if len(args) > len(funcargs): - raise TypeError('You have supplied more positional arguments' - ' than applicable') - - funckwargs = dict((lname.replace('-', '_'), default) - for _, lname, default, _ in opts) - if 'help' not in (defaults or ()) and not varkw: - funckwargs.pop('help', None) - funckwargs.update(kwargs) - return func(*args, **funckwargs) - return inner - -def replace_name(usage, name): - if '%name' in usage: - return usage.replace('%name', name, 1) - return name + ' ' + usage - -def sysname(): - name = sys.argv[0] - if name.startswith('./'): - return name[2:] - return name - -try: - from functools import wraps -except ImportError: - def wraps(wrapped, assigned=('__module__', '__name__', '__doc__'), - updated=('__dict__',)): - def inner(wrapper): - for attr in assigned: - setattr(wrapper, attr, getattr(wrapped, attr)) - for attr in updated: - getattr(wrapper, attr).update(getattr(wrapped, attr, {})) - return wrapper - return inner - -# -------- -# Autocomplete system -# -------- - -# Borrowed from PIP -def autocomplete(cmdtable, args): - """Command and option completion. - - Enable by sourcing one of the completion shell scripts (bash or zsh). - """ - - # Don't complete if user hasn't sourced bash_completion file. - if not os.environ.has_key('OPSTER_AUTO_COMPLETE'): - return - cwords = os.environ['COMP_WORDS'].split()[1:] - cword = int(os.environ['COMP_CWORD']) - - try: - current = cwords[cword-1] - except IndexError: - current = '' - - commands = [] - for k in cmdtable.keys(): - commands += aliases_(k) - - # command - if cword == 1: - print ' '.join(filter(lambda x: x.startswith(current), commands)) - - # command options - elif cwords[0] in commands: - options = [] - aliases, (cmd, opts, usage) = findcmd(cwords[0], cmdtable) - for (short, long, default, help) in opts: - options.append('-%s' % short) - options.append('--%s' % long) - - options = [o for o in options if o.startswith(current)] - print ' '.join(filter(lambda x: x.startswith(current), options)) - - sys.exit(1) - - -COMPLETIONS = { - 'bash': - """ -# opster bash completion start -_opster_completion() -{ - COMPREPLY=( $( COMP_WORDS="${COMP_WORDS[*]}" \\ - COMP_CWORD=$COMP_CWORD \\ - OPSTER_AUTO_COMPLETE=1 $1 ) ) -} -complete -o default -F _opster_completion %s -# opster bash completion end -""", - 'zsh': - """ -# opster zsh completion start -function _opster_completion { - local words cword - read -Ac words - read -cn cword - reply=( $( COMP_WORDS="$words[*]" \\ - COMP_CWORD=$(( cword-1 )) \\ - OPSTER_AUTO_COMPLETE=1 $words[1] ) ) -} -compctl -K _opster_completion %s -# opster zsh completion end -""" - } - -@command(name='_completion', hide=True) -def completion(type=('t', 'bash', 'Completion type (bash or zsh)')): - """Outputs completion script for bash or zsh.""" - - prog_name = os.path.split(sys.argv[0])[1] - print COMPLETIONS[type] % prog_name - -# -------- -# Exceptions -# -------- - -# Command exceptions -class CommandException(Exception): - 'Base class for command exceptions' - -class AmbiguousCommand(CommandException): - 'Raised if command is ambiguous' - -class UnknownCommand(CommandException): - 'Raised if command is unknown' - -class ParseError(CommandException): - 'Raised on error in command line parsing' - -class Abort(CommandException): - 'Abort execution' - -class FOError(CommandException): - 'Raised on trouble with opster configuration' diff --git a/tablib/packages/simplejson/__init__.py b/tablib/packages/simplejson/__init__.py deleted file mode 100644 index dcfd541..0000000 --- a/tablib/packages/simplejson/__init__.py +++ /dev/null @@ -1,437 +0,0 @@ -r"""JSON (JavaScript Object Notation) is a subset of -JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data -interchange format. - -:mod:`simplejson` exposes an API familiar to users of the standard library -:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained -version of the :mod:`json` library contained in Python 2.6, but maintains -compatibility with Python 2.4 and Python 2.5 and (currently) has -significant performance advantages, even without using the optional C -extension for speedups. - -Encoding basic Python object hierarchies:: - - >>> import simplejson as json - >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) - '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") - "\"foo\bar" - >>> print json.dumps(u'\u1234') - "\u1234" - >>> print json.dumps('\\') - "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) - {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO - >>> io = StringIO() - >>> json.dump(['streaming API'], io) - >>> io.getvalue() - '["streaming API"]' - -Compact encoding:: - - >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) - '[1,2,3,{"4":5,"6":7}]' - -Pretty printing:: - - >>> import simplejson as json - >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) - { - "4": 5, - "6": 7 - } - -Decoding JSON:: - - >>> import simplejson as json - >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] - >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj - True - >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' - True - >>> from StringIO import StringIO - >>> io = StringIO('["streaming API"]') - >>> json.load(io)[0] == 'streaming API' - True - -Specializing JSON object decoding:: - - >>> import simplejson as json - >>> def as_complex(dct): - ... if '__complex__' in dct: - ... return complex(dct['real'], dct['imag']) - ... return dct - ... - >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', - ... object_hook=as_complex) - (1+2j) - >>> from decimal import Decimal - >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') - True - -Specializing JSON object encoding:: - - >>> import simplejson as json - >>> def encode_complex(obj): - ... if isinstance(obj, complex): - ... return [obj.real, obj.imag] - ... raise TypeError(repr(o) + " is not JSON serializable") - ... - >>> json.dumps(2 + 1j, default=encode_complex) - '[2.0, 1.0]' - >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) - '[2.0, 1.0]' - >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) - '[2.0, 1.0]' - - -Using simplejson.tool from the shell to validate and pretty-print:: - - $ echo '{"json":"obj"}' | python -m simplejson.tool - { - "json": "obj" - } - $ echo '{ 1.2:3.4}' | python -m simplejson.tool - Expecting property name: line 1 column 2 (char 2) -""" -__version__ = '2.1.1' -__all__ = [ - 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', - 'OrderedDict', -] - -__author__ = 'Bob Ippolito ' - -from decimal import Decimal - -from decoder import JSONDecoder, JSONDecodeError -from encoder import JSONEncoder -def _import_OrderedDict(): - import collections - try: - return collections.OrderedDict - except AttributeError: - import ordered_dict - return ordered_dict.OrderedDict -OrderedDict = _import_OrderedDict() - -def _import_c_make_encoder(): - try: - from simplejson._speedups import make_encoder - return make_encoder - except ImportError: - return None - -_default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, - use_decimal=False, -) - -def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=False, **kw): - """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a - ``.write()``-supporting file-like object). - - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is false, then the some chunks written to ``fp`` - may be ``unicode`` instances, subject to normal Python ``str`` to - ``unicode`` coercion rules. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter()``) this is likely - to cause an error. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If *indent* is a string, then JSON array elements and object members - will be pretty-printed with a newline followed by that string repeated - for each level of nesting. ``None`` (the default) selects the most compact - representation without any newlines. For backwards compatibility with - versions of simplejson earlier than 2.1.0, an integer is also accepted - and is converted to a string with that many spaces. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *use_decimal* is true (default: ``False``) then decimal.Decimal - will be natively serialized to JSON with full precision. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. - - """ - # cached encoder - if (not skipkeys and ensure_ascii and - check_circular and allow_nan and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): - iterable = _default_encoder.iterencode(obj) - else: - if cls is None: - cls = JSONEncoder - iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, - default=default, use_decimal=use_decimal, **kw).iterencode(obj) - # could accelerate with writelines in some versions of Python, at - # a debuggability cost - for chunk in iterable: - fp.write(chunk) - - -def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, use_decimal=False, **kw): - """Serialize ``obj`` to a JSON formatted ``str``. - - If ``skipkeys`` is false then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) - will be skipped instead of raising a ``TypeError``. - - If ``ensure_ascii`` is false, then the return value will be a - ``unicode`` instance subject to normal Python ``str`` to ``unicode`` - coercion rules instead of being escaped to an ASCII ``str``. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). - - If ``allow_nan`` is false, then it will be a ``ValueError`` to - serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in - strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - - If ``indent`` is a string, then JSON array elements and object members - will be pretty-printed with a newline followed by that string repeated - for each level of nesting. ``None`` (the default) selects the most compact - representation without any newlines. For backwards compatibility with - versions of simplejson earlier than 2.1.0, an integer is also accepted - and is converted to a string with that many spaces. - - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. - - ``encoding`` is the character encoding for str instances, default is UTF-8. - - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. - - If *use_decimal* is true (default: ``False``) then decimal.Decimal - will be natively serialized to JSON with full precision. - - To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the - ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. - - """ - # cached encoder - if (not skipkeys and ensure_ascii and - check_circular and allow_nan and - cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not use_decimal - and not kw): - return _default_encoder.encode(obj) - if cls is None: - cls = JSONEncoder - return cls( - skipkeys=skipkeys, ensure_ascii=ensure_ascii, - check_circular=check_circular, allow_nan=allow_nan, indent=indent, - separators=separators, encoding=encoding, default=default, - use_decimal=use_decimal, **kw).encode(obj) - - -_default_decoder = JSONDecoder(encoding=None, object_hook=None, - object_pairs_hook=None) - - -def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, - use_decimal=False, **kw): - """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. - - *encoding* determines the encoding used to interpret any - :class:`str` objects decoded by this instance (``'utf-8'`` by - default). It has no effect when decoding :class:`unicode` objects. - - Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as :class:`unicode`. - - *object_hook*, if specified, will be called with the result of every - JSON object decoded and its return value will be used in place of the - given :class:`dict`. This can be used to provide custom - deserializations (e.g. to support JSON-RPC class hinting). - - *object_pairs_hook* is an optional function that will be called with - the result of any object literal decode with an ordered list of pairs. - The return value of *object_pairs_hook* will be used instead of the - :class:`dict`. This feature can be used to implement custom decoders - that rely on the order that the key and value pairs are decoded (for - example, :func:`collections.OrderedDict` will remember the order of - insertion). If *object_hook* is also defined, the *object_pairs_hook* - takes priority. - - *parse_float*, if specified, will be called with the string of every - JSON float to be decoded. By default, this is equivalent to - ``float(num_str)``. This can be used to use another datatype or parser - for JSON floats (e.g. :class:`decimal.Decimal`). - - *parse_int*, if specified, will be called with the string of every - JSON int to be decoded. By default, this is equivalent to - ``int(num_str)``. This can be used to use another datatype or parser - for JSON integers (e.g. :class:`float`). - - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. - - If *use_decimal* is true (default: ``False``) then it implies - parse_float=decimal.Decimal for parity with ``dump``. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. - - """ - return loads(fp.read(), - encoding=encoding, cls=cls, object_hook=object_hook, - parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, - use_decimal=use_decimal, **kw) - - -def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, object_pairs_hook=None, - use_decimal=False, **kw): - """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON - document) to a Python object. - - *encoding* determines the encoding used to interpret any - :class:`str` objects decoded by this instance (``'utf-8'`` by - default). It has no effect when decoding :class:`unicode` objects. - - Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as :class:`unicode`. - - *object_hook*, if specified, will be called with the result of every - JSON object decoded and its return value will be used in place of the - given :class:`dict`. This can be used to provide custom - deserializations (e.g. to support JSON-RPC class hinting). - - *object_pairs_hook* is an optional function that will be called with - the result of any object literal decode with an ordered list of pairs. - The return value of *object_pairs_hook* will be used instead of the - :class:`dict`. This feature can be used to implement custom decoders - that rely on the order that the key and value pairs are decoded (for - example, :func:`collections.OrderedDict` will remember the order of - insertion). If *object_hook* is also defined, the *object_pairs_hook* - takes priority. - - *parse_float*, if specified, will be called with the string of every - JSON float to be decoded. By default, this is equivalent to - ``float(num_str)``. This can be used to use another datatype or parser - for JSON floats (e.g. :class:`decimal.Decimal`). - - *parse_int*, if specified, will be called with the string of every - JSON int to be decoded. By default, this is equivalent to - ``int(num_str)``. This can be used to use another datatype or parser - for JSON integers (e.g. :class:`float`). - - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. - - If *use_decimal* is true (default: ``False``) then it implies - parse_float=decimal.Decimal for parity with ``dump``. - - To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. - - """ - if (cls is None and encoding is None and object_hook is None and - parse_int is None and parse_float is None and - parse_constant is None and object_pairs_hook is None - and not use_decimal and not kw): - return _default_decoder.decode(s) - if cls is None: - cls = JSONDecoder - if object_hook is not None: - kw['object_hook'] = object_hook - if object_pairs_hook is not None: - kw['object_pairs_hook'] = object_pairs_hook - if parse_float is not None: - kw['parse_float'] = parse_float - if parse_int is not None: - kw['parse_int'] = parse_int - if parse_constant is not None: - kw['parse_constant'] = parse_constant - if use_decimal: - if parse_float is not None: - raise TypeError("use_decimal=True implies parse_float=Decimal") - kw['parse_float'] = Decimal - return cls(encoding=encoding, **kw).decode(s) - - -def _toggle_speedups(enabled): - import simplejson.decoder as dec - import simplejson.encoder as enc - import simplejson.scanner as scan - c_make_encoder = _import_c_make_encoder() - if enabled: - dec.scanstring = dec.c_scanstring or dec.py_scanstring - enc.c_make_encoder = c_make_encoder - enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or - enc.py_encode_basestring_ascii) - scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner - else: - dec.scanstring = dec.py_scanstring - enc.c_make_encoder = None - enc.encode_basestring_ascii = enc.py_encode_basestring_ascii - scan.make_scanner = scan.py_make_scanner - dec.make_scanner = scan.make_scanner - global _default_decoder - _default_decoder = JSONDecoder( - encoding=None, - object_hook=None, - object_pairs_hook=None, - ) - global _default_encoder - _default_encoder = JSONEncoder( - skipkeys=False, - ensure_ascii=True, - check_circular=True, - allow_nan=True, - indent=None, - separators=None, - encoding='utf-8', - default=None, - ) diff --git a/tablib/packages/simplejson/_speedups.c b/tablib/packages/simplejson/_speedups.c deleted file mode 100644 index b06ba50..0000000 --- a/tablib/packages/simplejson/_speedups.c +++ /dev/null @@ -1,2561 +0,0 @@ -#include "Python.h" -#include "structmember.h" -#if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double) -#define PyOS_string_to_double json_PyOS_string_to_double -static double -json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); -static double -json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) { - double x; - assert(endptr == NULL); - assert(overflow_exception == NULL); - PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) - x = PyOS_ascii_atof(s); - PyFPE_END_PROTECT(x) - return x; -} -#endif -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#endif -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) -typedef int Py_ssize_t; -#define PY_SSIZE_T_MAX INT_MAX -#define PY_SSIZE_T_MIN INT_MIN -#define PyInt_FromSsize_t PyInt_FromLong -#define PyInt_AsSsize_t PyInt_AsLong -#endif -#ifndef Py_IS_FINITE -#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) -#endif - -#ifdef __GNUC__ -#define UNUSED __attribute__((__unused__)) -#else -#define UNUSED -#endif - -#define DEFAULT_ENCODING "utf-8" - -#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) -#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) -#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) -#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) -#define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr)) - -static PyTypeObject PyScannerType; -static PyTypeObject PyEncoderType; -static PyTypeObject *DecimalTypePtr; - -typedef struct _PyScannerObject { - PyObject_HEAD - PyObject *encoding; - PyObject *strict; - PyObject *object_hook; - PyObject *pairs_hook; - PyObject *parse_float; - PyObject *parse_int; - PyObject *parse_constant; - PyObject *memo; -} PyScannerObject; - -static PyMemberDef scanner_members[] = { - {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, - {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, - {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, - {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, - {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, - {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, - {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, - {NULL} -}; - -typedef struct _PyEncoderObject { - PyObject_HEAD - PyObject *markers; - PyObject *defaultfn; - PyObject *encoder; - PyObject *indent; - PyObject *key_separator; - PyObject *item_separator; - PyObject *sort_keys; - PyObject *skipkeys; - PyObject *key_memo; - int fast_encode; - int allow_nan; - int use_decimal; -} PyEncoderObject; - -static PyMemberDef encoder_members[] = { - {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, - {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, - {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, - {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, - {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, - {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, - {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, - {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, - {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, - {NULL} -}; - -static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); -static PyObject * -ascii_escape_unicode(PyObject *pystr); -static PyObject * -ascii_escape_str(PyObject *pystr); -static PyObject * -py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); -void init_speedups(void); -static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); -static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); -static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); -static PyObject * -scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); -static int -scanner_init(PyObject *self, PyObject *args, PyObject *kwds); -static void -scanner_dealloc(PyObject *self); -static int -scanner_clear(PyObject *self); -static PyObject * -encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); -static int -encoder_init(PyObject *self, PyObject *args, PyObject *kwds); -static void -encoder_dealloc(PyObject *self); -static int -encoder_clear(PyObject *self); -static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); -static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); -static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); -static PyObject * -_encoded_const(PyObject *obj); -static void -raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj); -static int -_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); -static PyObject * -_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); -static PyObject * -encoder_encode_float(PyEncoderObject *s, PyObject *obj); - -#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') -#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) - -#define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) -#else -#define MAX_EXPANSION MIN_EXPANSION -#endif - -static int -_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) -{ - /* PyObject to Py_ssize_t converter */ - *size_ptr = PyInt_AsSsize_t(o); - if (*size_ptr == -1 && PyErr_Occurred()) - return 0; - return 1; -} - -static PyObject * -_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) -{ - /* Py_ssize_t to PyObject converter */ - return PyInt_FromSsize_t(*size_ptr); -} - -static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) -{ - /* Escape unicode code point c to ASCII escape sequences - in char *output. output must have at least 12 bytes unused to - accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ - output[chars++] = '\\'; - switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; - case '\b': output[chars++] = 'b'; break; - case '\f': output[chars++] = 'f'; break; - case '\n': output[chars++] = 'n'; break; - case '\r': output[chars++] = 'r'; break; - case '\t': output[chars++] = 't'; break; - default: -#ifdef Py_UNICODE_WIDE - if (c >= 0x10000) { - /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; - c = 0xd800 | ((v >> 10) & 0x3ff); - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - c = 0xdc00 | (v & 0x3ff); - output[chars++] = '\\'; - } -#endif - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - } - return chars; -} - -static PyObject * -ascii_escape_unicode(PyObject *pystr) -{ - /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t max_output_size; - Py_ssize_t chars; - PyObject *rval; - char *output; - Py_UNICODE *input_unicode; - - input_chars = PyUnicode_GET_SIZE(pystr); - input_unicode = PyUnicode_AS_UNICODE(pystr); - - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); - rval = PyString_FromStringAndSize(NULL, output_size); - if (rval == NULL) { - return NULL; - } - output = PyString_AS_STRING(rval); - chars = 0; - output[chars++] = '"'; - for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } - } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; -} - -static PyObject * -ascii_escape_str(PyObject *pystr) -{ - /* Take a PyString pystr and return a new ASCII-only escaped PyString */ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t chars; - PyObject *rval; - char *output; - char *input_str; - - input_chars = PyString_GET_SIZE(pystr); - input_str = PyString_AS_STRING(pystr); - - /* Fast path for a string that's already ASCII */ - for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } - } - break; - } - } - - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } - rval = PyString_FromStringAndSize(NULL, output_size); - if (rval == NULL) { - return NULL; - } - output = PyString_AS_STRING(rval); - output[0] = '"'; - - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); - - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); - } - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; -} - -static void -raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) -{ - /* Use the Python function simplejson.decoder.errmsg to raise a nice - looking ValueError exception */ - static PyObject *JSONDecodeError = NULL; - PyObject *exc; - if (JSONDecodeError == NULL) { - PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); - if (decoder == NULL) - return; - JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError"); - Py_DECREF(decoder); - if (JSONDecodeError == NULL) - return; - } - exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); - if (exc) { - PyErr_SetObject(JSONDecodeError, exc); - Py_DECREF(exc); - } -} - -static PyObject * -join_list_unicode(PyObject *lst) -{ - /* return u''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) - return NULL; - } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); -} - -static PyObject * -join_list_string(PyObject *lst) -{ - /* return ''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = PyString_FromStringAndSize(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) - return NULL; - } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); -} - -static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { - /* return (rval, idx) tuple, stealing reference to rval */ - PyObject *tpl; - PyObject *pyidx; - /* - steal a reference to rval, returns (rval, idx) - */ - if (rval == NULL) { - return NULL; - } - pyidx = PyInt_FromSsize_t(idx); - if (pyidx == NULL) { - Py_DECREF(rval); - return NULL; - } - tpl = PyTuple_New(2); - if (tpl == NULL) { - Py_DECREF(pyidx); - Py_DECREF(rval); - return NULL; - } - PyTuple_SET_ITEM(tpl, 0, rval); - PyTuple_SET_ITEM(tpl, 1, pyidx); - return tpl; -} - -#define APPEND_OLD_CHUNK \ - if (chunk != NULL) { \ - if (chunks == NULL) { \ - chunks = PyList_New(0); \ - if (chunks == NULL) { \ - goto bail; \ - } \ - } \ - if (PyList_Append(chunks, chunk)) { \ - goto bail; \ - } \ - Py_CLEAR(chunk); \ - } - -static PyObject * -scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) -{ - /* Read the JSON string from PyString pystr. - end is the index of the first character after the quote. - encoding is the encoding of pystr (must be an ASCII superset) - if strict is zero then literal control characters are allowed - *next_end_ptr is a return-by-reference index of the character - after the end quote - - Return value is a new PyString (if ASCII-only) or PyUnicode - */ - PyObject *rval; - Py_ssize_t len = PyString_GET_SIZE(pystr); - Py_ssize_t begin = end - 1; - Py_ssize_t next = begin; - int has_unicode = 0; - char *buf = PyString_AS_STRING(pystr); - PyObject *chunks = NULL; - PyObject *chunk = NULL; - - if (end < 0 || len <= end) { - PyErr_SetString(PyExc_ValueError, "end is out of bounds"); - goto bail; - } - while (1) { - /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - for (next = end; next < len; next++) { - c = (unsigned char)buf[next]; - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; - } - else if (c > 0x7f) { - has_unicode = 1; - } - } - if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - /* Pick up this chunk if it's not zero length */ - if (next != end) { - PyObject *strchunk; - APPEND_OLD_CHUNK - strchunk = PyString_FromStringAndSize(&buf[end], next - end); - if (strchunk == NULL) { - goto bail; - } - if (has_unicode) { - chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); - Py_DECREF(strchunk); - if (chunk == NULL) { - goto bail; - } - } - else { - chunk = strchunk; - } - } - next++; - if (c == '"') { - end = next; - break; - } - if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - c = buf[next]; - if (c != 'u') { - /* Non-unicode backslash escapes */ - end = next + 1; - switch (c) { - case '"': break; - case '\\': break; - case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - default: c = 0; - } - if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); - goto bail; - } - } - else { - c = 0; - next++; - end = next + 4; - if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); - goto bail; - } - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } -#ifdef Py_UNICODE_WIDE - /* Surrogate pair */ - if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c2 |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c2 |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c2 |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif - } - if (c > 0x7f) { - has_unicode = 1; - } - APPEND_OLD_CHUNK - if (has_unicode) { - chunk = PyUnicode_FromUnicode(&c, 1); - if (chunk == NULL) { - goto bail; - } - } - else { - char c_char = Py_CHARMASK(c); - chunk = PyString_FromStringAndSize(&c_char, 1); - if (chunk == NULL) { - goto bail; - } - } - } - - if (chunks == NULL) { - if (chunk != NULL) - rval = chunk; - else - rval = PyString_FromStringAndSize("", 0); - } - else { - APPEND_OLD_CHUNK - rval = join_list_string(chunks); - if (rval == NULL) { - goto bail; - } - Py_CLEAR(chunks); - } - - *next_end_ptr = end; - return rval; -bail: - *next_end_ptr = -1; - Py_XDECREF(chunk); - Py_XDECREF(chunks); - return NULL; -} - - -static PyObject * -scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) -{ - /* Read the JSON string from PyUnicode pystr. - end is the index of the first character after the quote. - if strict is zero then literal control characters are allowed - *next_end_ptr is a return-by-reference index of the character - after the end quote - - Return value is a new PyUnicode - */ - PyObject *rval; - Py_ssize_t len = PyUnicode_GET_SIZE(pystr); - Py_ssize_t begin = end - 1; - Py_ssize_t next = begin; - const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); - PyObject *chunks = NULL; - PyObject *chunk = NULL; - - if (end < 0 || len <= end) { - PyErr_SetString(PyExc_ValueError, "end is out of bounds"); - goto bail; - } - while (1) { - /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - for (next = end; next < len; next++) { - c = buf[next]; - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; - } - } - if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - /* Pick up this chunk if it's not zero length */ - if (next != end) { - APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&buf[end], next - end); - if (chunk == NULL) { - goto bail; - } - } - next++; - if (c == '"') { - end = next; - break; - } - if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - c = buf[next]; - if (c != 'u') { - /* Non-unicode backslash escapes */ - end = next + 1; - switch (c) { - case '"': break; - case '\\': break; - case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - default: c = 0; - } - if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); - goto bail; - } - } - else { - c = 0; - next++; - end = next + 4; - if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); - goto bail; - } - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } -#ifdef Py_UNICODE_WIDE - /* Surrogate pair */ - if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c2 |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c2 |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c2 |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif - } - APPEND_OLD_CHUNK - chunk = PyUnicode_FromUnicode(&c, 1); - if (chunk == NULL) { - goto bail; - } - } - - if (chunks == NULL) { - if (chunk != NULL) - rval = chunk; - else - rval = PyUnicode_FromUnicode(NULL, 0); - } - else { - APPEND_OLD_CHUNK - rval = join_list_unicode(chunks); - if (rval == NULL) { - goto bail; - } - Py_CLEAR(chunks); - } - *next_end_ptr = end; - return rval; -bail: - *next_end_ptr = -1; - Py_XDECREF(chunk); - Py_XDECREF(chunks); - return NULL; -} - -PyDoc_STRVAR(pydoc_scanstring, - "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" - "\n" - "Scan the string s for a JSON string. End is the index of the\n" - "character in s after the quote that started the JSON string.\n" - "Unescapes all valid JSON string escape sequences and raises ValueError\n" - "on attempt to decode an invalid string. If strict is False then literal\n" - "control characters are allowed in the string.\n" - "\n" - "Returns a tuple of the decoded string and the index of the character in s\n" - "after the end quote." -); - -static PyObject * -py_scanstring(PyObject* self UNUSED, PyObject *args) -{ - PyObject *pystr; - PyObject *rval; - Py_ssize_t end; - Py_ssize_t next_end = -1; - char *encoding = NULL; - int strict = 1; - if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { - return NULL; - } - if (encoding == NULL) { - encoding = DEFAULT_ENCODING; - } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { - rval = scanstring_unicode(pystr, end, strict, &next_end); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } - return _build_rval_index_tuple(rval, next_end); -} - -PyDoc_STRVAR(pydoc_encode_basestring_ascii, - "encode_basestring_ascii(basestring) -> str\n" - "\n" - "Return an ASCII-only JSON representation of a Python string" -); - -static PyObject * -py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) -{ - /* Return an ASCII-only JSON representation of a Python string */ - /* METH_O */ - if (PyString_Check(pystr)) { - return ascii_escape_str(pystr); - } - else if (PyUnicode_Check(pystr)) { - return ascii_escape_unicode(pystr); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } -} - -static void -scanner_dealloc(PyObject *self) -{ - /* Deallocate scanner object */ - scanner_clear(self); - Py_TYPE(self)->tp_free(self); -} - -static int -scanner_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - Py_VISIT(s->encoding); - Py_VISIT(s->strict); - Py_VISIT(s->object_hook); - Py_VISIT(s->pairs_hook); - Py_VISIT(s->parse_float); - Py_VISIT(s->parse_int); - Py_VISIT(s->parse_constant); - Py_VISIT(s->memo); - return 0; -} - -static int -scanner_clear(PyObject *self) -{ - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - Py_CLEAR(s->encoding); - Py_CLEAR(s->strict); - Py_CLEAR(s->object_hook); - Py_CLEAR(s->pairs_hook); - Py_CLEAR(s->parse_float); - Py_CLEAR(s->parse_int); - Py_CLEAR(s->parse_constant); - Py_CLEAR(s->memo); - return 0; -} - -static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON object from PyString pystr. - idx is the index of the first character after the opening curly brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing curly brace. - - Returns a new PyObject (usually a dict, but object_hook or - object_pairs_hook can change that) - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *rval = NULL; - PyObject *pairs = NULL; - PyObject *item; - PyObject *key = NULL; - PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); - int strict = PyObject_IsTrue(s->strict); - int has_pairs_hook = (s->pairs_hook != Py_None); - Py_ssize_t next_idx; - if (has_pairs_hook) { - pairs = PyList_New(0); - if (pairs == NULL) - return NULL; - } - else { - rval = PyDict_New(); - if (rval == NULL) - return NULL; - } - - /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { - while (idx <= end_idx) { - PyObject *memokey; - - /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); - goto bail; - } - key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); - if (key == NULL) - goto bail; - memokey = PyDict_GetItem(s->memo, key); - if (memokey != NULL) { - Py_INCREF(memokey); - Py_DECREF(key); - key = memokey; - } - else { - if (PyDict_SetItem(s->memo, key, key) < 0) - goto bail; - } - idx = next_idx; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); - goto bail; - } - idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* read any JSON data type */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (has_pairs_hook) { - item = PyTuple_Pack(2, key, val); - if (item == NULL) - goto bail; - Py_CLEAR(key); - Py_CLEAR(val); - if (PyList_Append(pairs, item) == -1) { - Py_DECREF(item); - goto bail; - } - Py_DECREF(item); - } - else { - if (PyDict_SetItem(rval, key, val) < 0) - goto bail; - Py_CLEAR(key); - Py_CLEAR(val); - } - idx = next_idx; - - /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == '}') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - - /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ - if (s->pairs_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); - if (val == NULL) - goto bail; - Py_DECREF(pairs); - *next_idx_ptr = idx + 1; - return val; - } - - /* if object_hook is not None: rval = object_hook(rval) */ - if (s->object_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; - Py_DECREF(rval); - rval = val; - val = NULL; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(rval); - Py_XDECREF(key); - Py_XDECREF(val); - Py_XDECREF(pairs); - return NULL; -} - -static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON object from PyUnicode pystr. - idx is the index of the first character after the opening curly brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing curly brace. - - Returns a new PyObject (usually a dict, but object_hook can change that) - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *rval = NULL; - PyObject *pairs = NULL; - PyObject *item; - PyObject *key = NULL; - PyObject *val = NULL; - int strict = PyObject_IsTrue(s->strict); - int has_pairs_hook = (s->pairs_hook != Py_None); - Py_ssize_t next_idx; - - if (has_pairs_hook) { - pairs = PyList_New(0); - if (pairs == NULL) - return NULL; - } - else { - rval = PyDict_New(); - if (rval == NULL) - return NULL; - } - - /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { - while (idx <= end_idx) { - PyObject *memokey; - - /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); - goto bail; - } - key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); - if (key == NULL) - goto bail; - memokey = PyDict_GetItem(s->memo, key); - if (memokey != NULL) { - Py_INCREF(memokey); - Py_DECREF(key); - key = memokey; - } - else { - if (PyDict_SetItem(s->memo, key, key) < 0) - goto bail; - } - idx = next_idx; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); - goto bail; - } - idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (has_pairs_hook) { - item = PyTuple_Pack(2, key, val); - if (item == NULL) - goto bail; - Py_CLEAR(key); - Py_CLEAR(val); - if (PyList_Append(pairs, item) == -1) { - Py_DECREF(item); - goto bail; - } - Py_DECREF(item); - } - else { - if (PyDict_SetItem(rval, key, val) < 0) - goto bail; - Py_CLEAR(key); - Py_CLEAR(val); - } - idx = next_idx; - - /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == '}') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - - /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ - if (s->pairs_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); - if (val == NULL) - goto bail; - Py_DECREF(pairs); - *next_idx_ptr = idx + 1; - return val; - } - - /* if object_hook is not None: rval = object_hook(rval) */ - if (s->object_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; - Py_DECREF(rval); - rval = val; - val = NULL; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(rval); - Py_XDECREF(key); - Py_XDECREF(val); - Py_XDECREF(pairs); - return NULL; -} - -static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON array from PyString pystr. - idx is the index of the first character after the opening brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing brace. - - Returns a new PyList - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyList_New(0); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { - while (idx <= end_idx) { - - /* read any JSON term and de-tuplefy the (rval, idx) */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - raise_errmsg("Expecting object", pystr, idx); - } - goto bail; - } - - if (PyList_Append(rval, val) == -1) - goto bail; - - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == ']') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON array from PyString pystr. - idx is the index of the first character after the opening brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing brace. - - Returns a new PyList - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyList_New(0); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { - while (idx <= end_idx) { - - /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) { - if (PyErr_ExceptionMatches(PyExc_StopIteration)) { - PyErr_Clear(); - raise_errmsg("Expecting object", pystr, idx); - } - goto bail; - } - - if (PyList_Append(rval, val) == -1) - goto bail; - - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == ']') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON constant from PyString pystr. - constant is the constant string that was found - ("NaN", "Infinity", "-Infinity"). - idx is the index of the first character of the constant - *next_idx_ptr is a return-by-reference index to the first character after - the constant. - - Returns the result of parse_constant - */ - PyObject *cstr; - PyObject *rval; - /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); - if (cstr == NULL) - return NULL; - - /* rval = parse_constant(constant) */ - rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); - Py_DECREF(cstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { - /* Read a JSON number from PyString pystr. - idx is the index of the first character of the number - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of that number: - PyInt, PyLong, or PyFloat. - May return other types if parse_int or parse_float are set - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - Py_ssize_t idx = start; - int is_float = 0; - PyObject *rval; - PyObject *numstr; - - /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - } - - /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { - idx++; - } - /* no integer digits, error */ - else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { - is_float = 1; - idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - - /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { - - /* save the index of the 'e' or 'E' just in case we need to backtrack */ - Py_ssize_t e_start = idx; - idx++; - - /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; - - /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - - /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { - is_float = 1; - } - else { - idx = e_start; - } - } - - /* copy the section we determined to be a number */ - numstr = PyString_FromStringAndSize(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ - double d = PyOS_string_to_double(PyString_AS_STRING(numstr), - NULL, NULL); - if (d == -1.0 && PyErr_Occurred()) - return NULL; - rval = PyFloat_FromDouble(d); - } - } - else { - /* parse as an int using a fast path if available, otherwise call user defined method */ - if (s->parse_int != (PyObject *)&PyInt_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); - } - else { - rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); - } - } - Py_DECREF(numstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { - /* Read a JSON number from PyUnicode pystr. - idx is the index of the first character of the number - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of that number: - PyInt, PyLong, or PyFloat. - May return other types if parse_int or parse_float are set - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - Py_ssize_t idx = start; - int is_float = 0; - PyObject *rval; - PyObject *numstr; - - /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - } - - /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { - idx++; - } - /* no integer digits, error */ - else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { - is_float = 1; - idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - - /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { - Py_ssize_t e_start = idx; - idx++; - - /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; - - /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - - /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { - is_float = 1; - } - else { - idx = e_start; - } - } - - /* copy the section we determined to be a number */ - numstr = PyUnicode_FromUnicode(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - rval = PyFloat_FromString(numstr, NULL); - } - } - else { - /* no fast path for unicode -> int, just call */ - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); - } - Py_DECREF(numstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) -{ - /* Read one JSON term (of any kind) from PyString pystr. - idx is the index of the first character of the term - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of the term. - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t length = PyString_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - switch (str[idx]) { - case '"': - /* string */ - return scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), - PyObject_IsTrue(s->strict), - next_idx_ptr); - case '{': - /* object */ - return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); - case '[': - /* array */ - return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); - case 'n': - /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { - Py_INCREF(Py_None); - *next_idx_ptr = idx + 4; - return Py_None; - } - break; - case 't': - /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { - Py_INCREF(Py_True); - *next_idx_ptr = idx + 4; - return Py_True; - } - break; - case 'f': - /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { - Py_INCREF(Py_False); - *next_idx_ptr = idx + 5; - return Py_False; - } - break; - case 'N': - /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); - } - break; - case 'I': - /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); - } - break; - case '-': - /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); - } - break; - } - /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); -} - -static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) -{ - /* Read one JSON term (of any kind) from PyUnicode pystr. - idx is the index of the first character of the term - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of the term. - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t length = PyUnicode_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - switch (str[idx]) { - case '"': - /* string */ - return scanstring_unicode(pystr, idx + 1, - PyObject_IsTrue(s->strict), - next_idx_ptr); - case '{': - /* object */ - return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); - case '[': - /* array */ - return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); - case 'n': - /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { - Py_INCREF(Py_None); - *next_idx_ptr = idx + 4; - return Py_None; - } - break; - case 't': - /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { - Py_INCREF(Py_True); - *next_idx_ptr = idx + 4; - return Py_True; - } - break; - case 'f': - /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { - Py_INCREF(Py_False); - *next_idx_ptr = idx + 5; - return Py_False; - } - break; - case 'N': - /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); - } - break; - case 'I': - /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); - } - break; - case '-': - /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); - } - break; - } - /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_unicode(s, pystr, idx, next_idx_ptr); -} - -static PyObject * -scanner_call(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Python callable interface to scan_once_{str,unicode} */ - PyObject *pystr; - PyObject *rval; - Py_ssize_t idx; - Py_ssize_t next_idx = -1; - static char *kwlist[] = {"string", "idx", NULL}; - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) - return NULL; - - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { - rval = scan_once_unicode(s, pystr, idx, &next_idx); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } - PyDict_Clear(s->memo); - return _build_rval_index_tuple(rval, next_idx); -} - -static PyObject * -scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyScannerObject *s; - s = (PyScannerObject *)type->tp_alloc(type, 0); - if (s != NULL) { - s->encoding = NULL; - s->strict = NULL; - s->object_hook = NULL; - s->pairs_hook = NULL; - s->parse_float = NULL; - s->parse_int = NULL; - s->parse_constant = NULL; - } - return (PyObject *)s; -} - -static int -scanner_init(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Initialize Scanner object */ - PyObject *ctx; - static char *kwlist[] = {"context", NULL}; - PyScannerObject *s; - - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) - return -1; - - if (s->memo == NULL) { - s->memo = PyDict_New(); - if (s->memo == NULL) - goto bail; - } - - /* PyString_AS_STRING is used on encoding */ - s->encoding = PyObject_GetAttrString(ctx, "encoding"); - if (s->encoding == NULL) - goto bail; - if (s->encoding == Py_None) { - Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); - } - else if (PyUnicode_Check(s->encoding)) { - PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); - Py_DECREF(s->encoding); - s->encoding = tmp; - } - if (s->encoding == NULL || !PyString_Check(s->encoding)) - goto bail; - - /* All of these will fail "gracefully" so we don't need to verify them */ - s->strict = PyObject_GetAttrString(ctx, "strict"); - if (s->strict == NULL) - goto bail; - s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); - if (s->object_hook == NULL) - goto bail; - s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); - if (s->pairs_hook == NULL) - goto bail; - s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); - if (s->parse_float == NULL) - goto bail; - s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); - if (s->parse_int == NULL) - goto bail; - s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); - if (s->parse_constant == NULL) - goto bail; - - return 0; - -bail: - Py_CLEAR(s->encoding); - Py_CLEAR(s->strict); - Py_CLEAR(s->object_hook); - Py_CLEAR(s->pairs_hook); - Py_CLEAR(s->parse_float); - Py_CLEAR(s->parse_int); - Py_CLEAR(s->parse_constant); - return -1; -} - -PyDoc_STRVAR(scanner_doc, "JSON scanner object"); - -static -PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ - "simplejson._speedups.Scanner", /* tp_name */ - sizeof(PyScannerObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - scanner_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - scanner_call, /* tp_call */ - 0, /* tp_str */ - 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ - 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ - scanner_doc, /* tp_doc */ - scanner_traverse, /* tp_traverse */ - scanner_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - scanner_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - scanner_init, /* tp_init */ - 0,/* PyType_GenericAlloc, */ /* tp_alloc */ - scanner_new, /* tp_new */ - 0,/* PyObject_GC_Del, */ /* tp_free */ -}; - -static PyObject * -encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyEncoderObject *s; - s = (PyEncoderObject *)type->tp_alloc(type, 0); - if (s != NULL) { - s->markers = NULL; - s->defaultfn = NULL; - s->encoder = NULL; - s->indent = NULL; - s->key_separator = NULL; - s->item_separator = NULL; - s->sort_keys = NULL; - s->skipkeys = NULL; - s->key_memo = NULL; - } - return (PyObject *)s; -} - -static int -encoder_init(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", NULL}; - - PyEncoderObject *s; - PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; - PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal; - - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOO:make_encoder", kwlist, - &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, - &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal)) - return -1; - - s->markers = markers; - s->defaultfn = defaultfn; - s->encoder = encoder; - s->indent = indent; - s->key_separator = key_separator; - s->item_separator = item_separator; - s->sort_keys = sort_keys; - s->skipkeys = skipkeys; - s->key_memo = key_memo; - s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); - s->allow_nan = PyObject_IsTrue(allow_nan); - s->use_decimal = PyObject_IsTrue(use_decimal); - - Py_INCREF(s->markers); - Py_INCREF(s->defaultfn); - Py_INCREF(s->encoder); - Py_INCREF(s->indent); - Py_INCREF(s->key_separator); - Py_INCREF(s->item_separator); - Py_INCREF(s->sort_keys); - Py_INCREF(s->skipkeys); - Py_INCREF(s->key_memo); - return 0; -} - -static PyObject * -encoder_call(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Python callable interface to encode_listencode_obj */ - static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj; - PyObject *rval; - Py_ssize_t indent_level; - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, - &obj, _convertPyInt_AsSsize_t, &indent_level)) - return NULL; - rval = PyList_New(0); - if (rval == NULL) - return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); - return NULL; - } - return rval; -} - -static PyObject * -_encoded_const(PyObject *obj) -{ - /* Return the JSON string representation of None, True, False */ - if (obj == Py_None) { - static PyObject *s_null = NULL; - if (s_null == NULL) { - s_null = PyString_InternFromString("null"); - } - Py_INCREF(s_null); - return s_null; - } - else if (obj == Py_True) { - static PyObject *s_true = NULL; - if (s_true == NULL) { - s_true = PyString_InternFromString("true"); - } - Py_INCREF(s_true); - return s_true; - } - else if (obj == Py_False) { - static PyObject *s_false = NULL; - if (s_false == NULL) { - s_false = PyString_InternFromString("false"); - } - Py_INCREF(s_false); - return s_false; - } - else { - PyErr_SetString(PyExc_ValueError, "not a const"); - return NULL; - } -} - -static PyObject * -encoder_encode_float(PyEncoderObject *s, PyObject *obj) -{ - /* Return the JSON representation of a PyFloat */ - double i = PyFloat_AS_DOUBLE(obj); - if (!Py_IS_FINITE(i)) { - if (!s->allow_nan) { - PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); - return NULL; - } - if (i > 0) { - return PyString_FromString("Infinity"); - } - else if (i < 0) { - return PyString_FromString("-Infinity"); - } - else { - return PyString_FromString("NaN"); - } - } - /* Use a better float format here? */ - return PyObject_Repr(obj); -} - -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj) -{ - /* Return the JSON representation of a string */ - if (s->fast_encode) - return py_encode_basestring_ascii(NULL, obj); - else - return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); -} - -static int -_steal_list_append(PyObject *lst, PyObject *stolen) -{ - /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); - Py_DECREF(stolen); - return rval; -} - -static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) -{ - /* Encode Python object obj to a JSON term, rval is a PyList */ - PyObject *newobj; - int rv; - - if (obj == Py_None || obj == Py_True || obj == Py_False) { - PyObject *cstr = _encoded_const(obj); - if (cstr == NULL) - return -1; - return _steal_list_append(rval, cstr); - } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) - { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyFloat_Check(obj)) { - PyObject *encoded = encoder_encode_float(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyList_Check(obj) || PyTuple_Check(obj)) { - return encoder_listencode_list(s, rval, obj, indent_level); - } - else if (PyDict_Check(obj)) { - return encoder_listencode_dict(s, rval, obj, indent_level); - } - else if (s->use_decimal && Decimal_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else { - PyObject *ident = NULL; - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(obj); - if (ident == NULL) - return -1; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - Py_DECREF(ident); - return -1; - } - if (PyDict_SetItem(s->markers, ident, obj)) { - Py_DECREF(ident); - return -1; - } - } - newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); - if (newobj == NULL) { - Py_XDECREF(ident); - return -1; - } - rv = encoder_listencode_obj(s, rval, newobj, indent_level); - Py_DECREF(newobj); - if (rv) { - Py_XDECREF(ident); - return -1; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) { - Py_XDECREF(ident); - return -1; - } - Py_XDECREF(ident); - } - return rv; - } -} - -static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) -{ - /* Encode Python dict dct a JSON term, rval is a PyList */ - static PyObject *open_dict = NULL; - static PyObject *close_dict = NULL; - static PyObject *empty_dict = NULL; - static PyObject *iteritems = NULL; - PyObject *kstr = NULL; - PyObject *ident = NULL; - PyObject *key, *value; - PyObject *iter = NULL; - PyObject *item = NULL; - PyObject *encoded = NULL; - int skipkeys; - Py_ssize_t idx; - - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); - iteritems = PyString_InternFromString("iteritems"); - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) - return -1; - } - if (PyDict_Size(dct) == 0) - return PyList_Append(rval, empty_dict); - - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(dct); - if (ident == NULL) - goto bail; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - goto bail; - } - if (PyDict_SetItem(s->markers, ident, dct)) { - goto bail; - } - } - - if (PyList_Append(rval, open_dict)) - goto bail; - - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level += 1; - /* - newline_indent = '\n' + (_indent * _current_indent_level) - separator = _item_separator + newline_indent - buf += newline_indent - */ - } - - /* TODO: C speedup not implemented for sort_keys */ - - skipkeys = PyObject_IsTrue(s->skipkeys); - idx = 0; - iter = PyObject_CallMethodObjArgs(dct, iteritems, NULL); - if (iter == NULL) - goto bail; - while ((item = PyIter_Next(iter))) { - - key = PyTuple_GetItem(item, 0); - if (key == NULL) - goto bail; - value = PyTuple_GetItem(item, 1); - if (value == NULL) - goto bail; - - encoded = PyDict_GetItem(s->key_memo, key); - if (encoded != NULL) { - Py_INCREF(encoded); - } - else if (PyString_Check(key) || PyUnicode_Check(key)) { - Py_INCREF(key); - kstr = key; - } - else if (PyFloat_Check(key)) { - kstr = encoder_encode_float(s, key); - if (kstr == NULL) - goto bail; - } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); - if (kstr == NULL) - goto bail; - } - else if (key == Py_True || key == Py_False || key == Py_None) { - kstr = _encoded_const(key); - if (kstr == NULL) - goto bail; - } - else if (skipkeys) { - Py_DECREF(item); - continue; - } - else { - /* TODO: include repr of key */ - PyErr_SetString(PyExc_ValueError, "keys must be a string"); - goto bail; - } - - if (idx) { - if (PyList_Append(rval, s->item_separator)) - goto bail; - } - - if (encoded == NULL) { - encoded = encoder_encode_string(s, kstr); - Py_CLEAR(kstr); - if (encoded == NULL) - goto bail; - if (PyDict_SetItem(s->key_memo, key, encoded)) - goto bail; - } - if (PyList_Append(rval, encoded)) { - goto bail; - } - Py_CLEAR(encoded); - if (PyList_Append(rval, s->key_separator)) - goto bail; - if (encoder_listencode_obj(s, rval, value, indent_level)) - goto bail; - Py_CLEAR(item); - idx += 1; - } - Py_CLEAR(iter); - if (PyErr_Occurred()) - goto bail; - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) - goto bail; - Py_CLEAR(ident); - } - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level -= 1; - /* - yield '\n' + (_indent * _current_indent_level) - */ - } - if (PyList_Append(rval, close_dict)) - goto bail; - return 0; - -bail: - Py_XDECREF(encoded); - Py_XDECREF(item); - Py_XDECREF(iter); - Py_XDECREF(kstr); - Py_XDECREF(ident); - return -1; -} - - -static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) -{ - /* Encode Python list seq to a JSON term, rval is a PyList */ - static PyObject *open_array = NULL; - static PyObject *close_array = NULL; - static PyObject *empty_array = NULL; - PyObject *ident = NULL; - PyObject *iter = NULL; - PyObject *obj = NULL; - int is_true; - int i = 0; - - if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); - if (open_array == NULL || close_array == NULL || empty_array == NULL) - return -1; - } - ident = NULL; - is_true = PyObject_IsTrue(seq); - if (is_true == -1) - return -1; - else if (is_true == 0) - return PyList_Append(rval, empty_array); - - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(seq); - if (ident == NULL) - goto bail; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - goto bail; - } - if (PyDict_SetItem(s->markers, ident, seq)) { - goto bail; - } - } - - iter = PyObject_GetIter(seq); - if (iter == NULL) - goto bail; - - if (PyList_Append(rval, open_array)) - goto bail; - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level += 1; - /* - newline_indent = '\n' + (_indent * _current_indent_level) - separator = _item_separator + newline_indent - buf += newline_indent - */ - } - while ((obj = PyIter_Next(iter))) { - if (i) { - if (PyList_Append(rval, s->item_separator)) - goto bail; - } - if (encoder_listencode_obj(s, rval, obj, indent_level)) - goto bail; - i++; - Py_CLEAR(obj); - } - Py_CLEAR(iter); - if (PyErr_Occurred()) - goto bail; - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) - goto bail; - Py_CLEAR(ident); - } - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level -= 1; - /* - yield '\n' + (_indent * _current_indent_level) - */ - } - if (PyList_Append(rval, close_array)) - goto bail; - return 0; - -bail: - Py_XDECREF(obj); - Py_XDECREF(iter); - Py_XDECREF(ident); - return -1; -} - -static void -encoder_dealloc(PyObject *self) -{ - /* Deallocate Encoder */ - encoder_clear(self); - Py_TYPE(self)->tp_free(self); -} - -static int -encoder_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - Py_VISIT(s->markers); - Py_VISIT(s->defaultfn); - Py_VISIT(s->encoder); - Py_VISIT(s->indent); - Py_VISIT(s->key_separator); - Py_VISIT(s->item_separator); - Py_VISIT(s->sort_keys); - Py_VISIT(s->skipkeys); - Py_VISIT(s->key_memo); - return 0; -} - -static int -encoder_clear(PyObject *self) -{ - /* Deallocate Encoder */ - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - Py_CLEAR(s->markers); - Py_CLEAR(s->defaultfn); - Py_CLEAR(s->encoder); - Py_CLEAR(s->indent); - Py_CLEAR(s->key_separator); - Py_CLEAR(s->item_separator); - Py_CLEAR(s->sort_keys); - Py_CLEAR(s->skipkeys); - Py_CLEAR(s->key_memo); - return 0; -} - -PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); - -static -PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ - "simplejson._speedups.Encoder", /* tp_name */ - sizeof(PyEncoderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - encoder_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - encoder_call, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ - encoder_doc, /* tp_doc */ - encoder_traverse, /* tp_traverse */ - encoder_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - encoder_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - encoder_init, /* tp_init */ - 0, /* tp_alloc */ - encoder_new, /* tp_new */ - 0, /* tp_free */ -}; - -static PyMethodDef speedups_methods[] = { - {"encode_basestring_ascii", - (PyCFunction)py_encode_basestring_ascii, - METH_O, - pydoc_encode_basestring_ascii}, - {"scanstring", - (PyCFunction)py_scanstring, - METH_VARARGS, - pydoc_scanstring}, - {NULL, NULL, 0, NULL} -}; - -PyDoc_STRVAR(module_doc, -"simplejson speedups\n"); - -void -init_speedups(void) -{ - PyObject *m, *decimal; - PyScannerType.tp_new = PyType_GenericNew; - if (PyType_Ready(&PyScannerType) < 0) - return; - PyEncoderType.tp_new = PyType_GenericNew; - if (PyType_Ready(&PyEncoderType) < 0) - return; - - decimal = PyImport_ImportModule("decimal"); - if (decimal == NULL) - return; - DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal"); - Py_DECREF(decimal); - if (DecimalTypePtr == NULL) - return; - - m = Py_InitModule3("_speedups", speedups_methods, module_doc); - Py_INCREF((PyObject*)&PyScannerType); - PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); - Py_INCREF((PyObject*)&PyEncoderType); - PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); -} diff --git a/tablib/packages/simplejson/decoder.py b/tablib/packages/simplejson/decoder.py deleted file mode 100644 index 4cf4015..0000000 --- a/tablib/packages/simplejson/decoder.py +++ /dev/null @@ -1,421 +0,0 @@ -"""Implementation of JSONDecoder -""" -import re -import sys -import struct - -from simplejson.scanner import make_scanner -def _import_c_scanstring(): - try: - from simplejson._speedups import scanstring - return scanstring - except ImportError: - return None -c_scanstring = _import_c_scanstring() - -__all__ = ['JSONDecoder'] - -FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL - -def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') - # The struct module in Python 2.4 would get frexp() out of range here - # when an endian is specified in the format string. Fixed in Python 2.5+ - if sys.byteorder != 'big': - _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] - nan, inf = struct.unpack('dd', _BYTES) - return nan, inf, -inf - -NaN, PosInf, NegInf = _floatconstants() - - -class JSONDecodeError(ValueError): - """Subclass of ValueError with the following additional properties: - - msg: The unformatted error message - doc: The JSON document being parsed - pos: The start index of doc where parsing failed - end: The end index of doc where parsing failed (may be None) - lineno: The line corresponding to pos - colno: The column corresponding to pos - endlineno: The line corresponding to end (may be None) - endcolno: The column corresponding to end (may be None) - - """ - def __init__(self, msg, doc, pos, end=None): - ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) - self.msg = msg - self.doc = doc - self.pos = pos - self.end = end - self.lineno, self.colno = linecol(doc, pos) - if end is not None: - self.endlineno, self.endcolno = linecol(doc, pos) - else: - self.endlineno, self.endcolno = None, None - - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - #fmt = '{0}: line {1} column {2} (char {3})' - #return fmt.format(msg, lineno, colno, pos) - fmt = '%s: line %d column %d (char %d)' - return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - -_CONSTANTS = { - '-Infinity': NegInf, - 'Infinity': PosInf, - 'NaN': NaN, -} - -STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) -BACKSLASH = { - '"': u'"', '\\': u'\\', '/': u'/', - 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', -} - -DEFAULT_ENCODING = "utf-8" - -def py_scanstring(s, end, encoding=None, strict=True, - _b=BACKSLASH, _m=STRINGCHUNK.match): - """Scan the string s for a JSON string. End is the index of the - character in s after the quote that started the JSON string. - Unescapes all valid JSON string escape sequences and raises ValueError - on attempt to decode an invalid string. If strict is False then literal - control characters are allowed in the string. - - Returns a tuple of the decoded string and the index of the character in s - after the end quote.""" - if encoding is None: - encoding = DEFAULT_ENCODING - chunks = [] - _append = chunks.append - begin = end - 1 - while 1: - chunk = _m(s, end) - if chunk is None: - raise JSONDecodeError( - "Unterminated string starting at", s, begin) - end = chunk.end() - content, terminator = chunk.groups() - # Content is contains zero or more unescaped string characters - if content: - if not isinstance(content, unicode): - content = unicode(content, encoding) - _append(content) - # Terminator is the end of string, a literal control character, - # or a backslash denoting that an escape sequence follows - if terminator == '"': - break - elif terminator != '\\': - if strict: - msg = "Invalid control character %r at" % (terminator,) - #msg = "Invalid control character {0!r} at".format(terminator) - raise JSONDecodeError(msg, s, end) - else: - _append(terminator) - continue - try: - esc = s[end] - except IndexError: - raise JSONDecodeError( - "Unterminated string starting at", s, begin) - # If not a unicode escape sequence, must be in the lookup table - if esc != 'u': - try: - char = _b[esc] - except KeyError: - msg = "Invalid \\escape: " + repr(esc) - raise JSONDecodeError(msg, s, end) - end += 1 - else: - # Unicode escape sequence - esc = s[end + 1:end + 5] - next_end = end + 5 - if len(esc) != 4: - msg = "Invalid \\uXXXX escape" - raise JSONDecodeError(msg, s, end) - uni = int(esc, 16) - # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise JSONDecodeError(msg, s, end) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise JSONDecodeError(msg, s, end) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 - char = unichr(uni) - end = next_end - # Append the unescaped character - _append(char) - return u''.join(chunks), end - - -# Use speedup if available -scanstring = c_scanstring or py_scanstring - -WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) -WHITESPACE_STR = ' \t\n\r' - -def JSONObject((s, end), encoding, strict, scan_once, object_hook, - object_pairs_hook, memo=None, - _w=WHITESPACE.match, _ws=WHITESPACE_STR): - # Backwards compatibility - if memo is None: - memo = {} - memo_get = memo.setdefault - pairs = [] - # Use a slice to prevent IndexError from being raised, the following - # check will raise a more specific ValueError if the string is empty - nextchar = s[end:end + 1] - # Normally we expect nextchar == '"' - if nextchar != '"': - if nextchar in _ws: - end = _w(s, end).end() - nextchar = s[end:end + 1] - # Trivial empty object - if nextchar == '}': - if object_pairs_hook is not None: - result = object_pairs_hook(pairs) - return result, end - pairs = {} - if object_hook is not None: - pairs = object_hook(pairs) - return pairs, end + 1 - elif nextchar != '"': - raise JSONDecodeError("Expecting property name", s, end) - end += 1 - while True: - key, end = scanstring(s, end, encoding, strict) - key = memo_get(key, key) - - # To skip some function call overhead we optimize the fast paths where - # the JSON key separator is ": " or just ":". - if s[end:end + 1] != ':': - end = _w(s, end).end() - if s[end:end + 1] != ':': - raise JSONDecodeError("Expecting : delimiter", s, end) - - end += 1 - - try: - if s[end] in _ws: - end += 1 - if s[end] in _ws: - end = _w(s, end + 1).end() - except IndexError: - pass - - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) - pairs.append((key, value)) - - try: - nextchar = s[end] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end] - except IndexError: - nextchar = '' - end += 1 - - if nextchar == '}': - break - elif nextchar != ',': - raise JSONDecodeError("Expecting , delimiter", s, end - 1) - - try: - nextchar = s[end] - if nextchar in _ws: - end += 1 - nextchar = s[end] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end] - except IndexError: - nextchar = '' - - end += 1 - if nextchar != '"': - raise JSONDecodeError("Expecting property name", s, end - 1) - - if object_pairs_hook is not None: - result = object_pairs_hook(pairs) - return result, end - pairs = dict(pairs) - if object_hook is not None: - pairs = object_hook(pairs) - return pairs, end - -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - values = [] - nextchar = s[end:end + 1] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end:end + 1] - # Look-ahead for trivial empty array - if nextchar == ']': - return values, end + 1 - _append = values.append - while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise JSONDecodeError("Expecting object", s, end) - _append(value) - nextchar = s[end:end + 1] - if nextchar in _ws: - end = _w(s, end + 1).end() - nextchar = s[end:end + 1] - end += 1 - if nextchar == ']': - break - elif nextchar != ',': - raise JSONDecodeError("Expecting , delimiter", s, end) - - try: - if s[end] in _ws: - end += 1 - if s[end] in _ws: - end = _w(s, end + 1).end() - except IndexError: - pass - - return values, end - -class JSONDecoder(object): - """Simple JSON decoder - - Performs the following translations in decoding by default: - - +---------------+-------------------+ - | JSON | Python | - +===============+===================+ - | object | dict | - +---------------+-------------------+ - | array | list | - +---------------+-------------------+ - | string | unicode | - +---------------+-------------------+ - | number (int) | int, long | - +---------------+-------------------+ - | number (real) | float | - +---------------+-------------------+ - | true | True | - +---------------+-------------------+ - | false | False | - +---------------+-------------------+ - | null | None | - +---------------+-------------------+ - - It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as - their corresponding ``float`` values, which is outside the JSON spec. - - """ - - def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True, - object_pairs_hook=None): - """ - *encoding* determines the encoding used to interpret any - :class:`str` objects decoded by this instance (``'utf-8'`` by - default). It has no effect when decoding :class:`unicode` objects. - - Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as :class:`unicode`. - - *object_hook*, if specified, will be called with the result of every - JSON object decoded and its return value will be used in place of the - given :class:`dict`. This can be used to provide custom - deserializations (e.g. to support JSON-RPC class hinting). - - *object_pairs_hook* is an optional function that will be called with - the result of any object literal decode with an ordered list of pairs. - The return value of *object_pairs_hook* will be used instead of the - :class:`dict`. This feature can be used to implement custom decoders - that rely on the order that the key and value pairs are decoded (for - example, :func:`collections.OrderedDict` will remember the order of - insertion). If *object_hook* is also defined, the *object_pairs_hook* - takes priority. - - *parse_float*, if specified, will be called with the string of every - JSON float to be decoded. By default, this is equivalent to - ``float(num_str)``. This can be used to use another datatype or parser - for JSON floats (e.g. :class:`decimal.Decimal`). - - *parse_int*, if specified, will be called with the string of every - JSON int to be decoded. By default, this is equivalent to - ``int(num_str)``. This can be used to use another datatype or parser - for JSON integers (e.g. :class:`float`). - - *parse_constant*, if specified, will be called with one of the - following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This - can be used to raise an exception if invalid JSON numbers are - encountered. - - *strict* controls the parser's behavior when it encounters an - invalid control character in a string. The default setting of - ``True`` means that unescaped control characters are parse errors, if - ``False`` then control characters will be allowed in strings. - - """ - self.encoding = encoding - self.object_hook = object_hook - self.object_pairs_hook = object_pairs_hook - self.parse_float = parse_float or float - self.parse_int = parse_int or int - self.parse_constant = parse_constant or _CONSTANTS.__getitem__ - self.strict = strict - self.parse_object = JSONObject - self.parse_array = JSONArray - self.parse_string = scanstring - self.memo = {} - self.scan_once = make_scanner(self) - - def decode(self, s, _w=WHITESPACE.match): - """Return the Python representation of ``s`` (a ``str`` or ``unicode`` - instance containing a JSON document) - - """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) - end = _w(s, end).end() - if end != len(s): - raise JSONDecodeError("Extra data", s, end, len(s)) - return obj - - def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` - beginning with a JSON document) and return a 2-tuple of the Python - representation and the index in ``s`` where the document ended. - - This can be used to decode a JSON document from a string that may - have extraneous data at the end. - - """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise JSONDecodeError("No JSON object could be decoded", s, idx) - return obj, end diff --git a/tablib/packages/simplejson/encoder.py b/tablib/packages/simplejson/encoder.py deleted file mode 100644 index cab8456..0000000 --- a/tablib/packages/simplejson/encoder.py +++ /dev/null @@ -1,501 +0,0 @@ -"""Implementation of JSONEncoder -""" -import re -from decimal import Decimal - -def _import_speedups(): - try: - from simplejson import _speedups - return _speedups.encode_basestring_ascii, _speedups.make_encoder - except ImportError: - return None, None -c_encode_basestring_ascii, c_make_encoder = _import_speedups() - -from simplejson.decoder import PosInf - -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') -ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') -HAS_UTF8 = re.compile(r'[\x80-\xff]') -ESCAPE_DCT = { - '\\': '\\\\', - '"': '\\"', - '\b': '\\b', - '\f': '\\f', - '\n': '\\n', - '\r': '\\r', - '\t': '\\t', -} -for i in range(0x20): - #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) - ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) - -FLOAT_REPR = repr - -def encode_basestring(s): - """Return a JSON representation of a Python string - - """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - def replace(match): - return ESCAPE_DCT[match.group(0)] - return u'"' + ESCAPE.sub(replace, s) + u'"' - - -def py_encode_basestring_ascii(s): - """Return an ASCII-only JSON representation of a Python string - - """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') - def replace(match): - s = match.group(0) - try: - return ESCAPE_DCT[s] - except KeyError: - n = ord(s) - if n < 0x10000: - #return '\\u{0:04x}'.format(n) - return '\\u%04x' % (n,) - else: - # surrogate pair - n -= 0x10000 - s1 = 0xd800 | ((n >> 10) & 0x3ff) - s2 = 0xdc00 | (n & 0x3ff) - #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) - return '\\u%04x\\u%04x' % (s1, s2) - return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' - - -encode_basestring_ascii = ( - c_encode_basestring_ascii or py_encode_basestring_ascii) - -class JSONEncoder(object): - """Extensible JSON encoder for Python data structures. - - Supports the following objects and types by default: - - +-------------------+---------------+ - | Python | JSON | - +===================+===============+ - | dict | object | - +-------------------+---------------+ - | list, tuple | array | - +-------------------+---------------+ - | str, unicode | string | - +-------------------+---------------+ - | int, long, float | number | - +-------------------+---------------+ - | True | true | - +-------------------+---------------+ - | False | false | - +-------------------+---------------+ - | None | null | - +-------------------+---------------+ - - To extend this to recognize other objects, subclass and implement a - ``.default()`` method with another method that returns a serializable - object for ``o`` if possible, otherwise it should call the superclass - implementation (to raise ``TypeError``). - - """ - item_separator = ', ' - key_separator = ': ' - def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None, - use_decimal=False): - """Constructor for JSONEncoder, with sensible defaults. - - If skipkeys is false, then it is a TypeError to attempt - encoding of keys that are not str, int, long, float or None. If - skipkeys is True, such items are simply skipped. - - If ensure_ascii is true, the output is guaranteed to be str - objects with all incoming unicode characters escaped. If - ensure_ascii is false, the output will be unicode object. - - If check_circular is true, then lists, dicts, and custom encoded - objects will be checked for circular references during encoding to - prevent an infinite recursion (which would cause an OverflowError). - Otherwise, no such check takes place. - - If allow_nan is true, then NaN, Infinity, and -Infinity will be - encoded as such. This behavior is not JSON specification compliant, - but is consistent with most JavaScript based encoders and decoders. - Otherwise, it will be a ValueError to encode such floats. - - If sort_keys is true, then the output of dictionaries will be - sorted by key; this is useful for regression tests to ensure - that JSON serializations can be compared on a day-to-day basis. - - If indent is a string, then JSON array elements and object members - will be pretty-printed with a newline followed by that string repeated - for each level of nesting. ``None`` (the default) selects the most compact - representation without any newlines. For backwards compatibility with - versions of simplejson earlier than 2.1.0, an integer is also accepted - and is converted to a string with that many spaces. - - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. - - If specified, default is a function that gets called for objects - that can't otherwise be serialized. It should return a JSON encodable - version of the object or raise a ``TypeError``. - - If encoding is not None, then all input strings will be - transformed into unicode using that encoding prior to JSON-encoding. - The default is UTF-8. - - If use_decimal is true (not the default), ``decimal.Decimal`` will - be supported directly by the encoder. For the inverse, decode JSON - with ``parse_float=decimal.Decimal``. - - """ - - self.skipkeys = skipkeys - self.ensure_ascii = ensure_ascii - self.check_circular = check_circular - self.allow_nan = allow_nan - self.sort_keys = sort_keys - self.use_decimal = use_decimal - if isinstance(indent, (int, long)): - indent = ' ' * indent - self.indent = indent - if separators is not None: - self.item_separator, self.key_separator = separators - if default is not None: - self.default = default - self.encoding = encoding - - def default(self, o): - """Implement this method in a subclass such that it returns - a serializable object for ``o``, or calls the base implementation - (to raise a ``TypeError``). - - For example, to support arbitrary iterators, you could - implement default like this:: - - def default(self, o): - try: - iterable = iter(o) - except TypeError: - pass - else: - return list(iterable) - return JSONEncoder.default(self, o) - - """ - raise TypeError(repr(o) + " is not JSON serializable") - - def encode(self, o): - """Return a JSON string representation of a Python data structure. - - >>> from simplejson import JSONEncoder - >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) - '{"foo": ["bar", "baz"]}' - - """ - # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) - if self.ensure_ascii: - return encode_basestring_ascii(o) - else: - return encode_basestring(o) - # This doesn't pass the iterator directly to ''.join() because the - # exceptions aren't as detailed. The list call should be roughly - # equivalent to the PySequence_Fast that ''.join() would do. - chunks = self.iterencode(o, _one_shot=True) - if not isinstance(chunks, (list, tuple)): - chunks = list(chunks) - if self.ensure_ascii: - return ''.join(chunks) - else: - return u''.join(chunks) - - def iterencode(self, o, _one_shot=False): - """Encode the given object and yield each string - representation as available. - - For example:: - - for chunk in JSONEncoder().iterencode(bigobject): - mysocket.write(chunk) - - """ - if self.check_circular: - markers = {} - else: - markers = None - if self.ensure_ascii: - _encoder = encode_basestring_ascii - else: - _encoder = encode_basestring - if self.encoding != 'utf-8': - def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): - o = o.decode(_encoding) - return _orig_encoder(o) - - def floatstr(o, allow_nan=self.allow_nan, - _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): - # Check for specials. Note that this type of test is processor - # and/or platform-specific, so do tests which don't depend on - # the internals. - - if o != o: - text = 'NaN' - elif o == _inf: - text = 'Infinity' - elif o == _neginf: - text = '-Infinity' - else: - return _repr(o) - - if not allow_nan: - raise ValueError( - "Out of range float values are not JSON compliant: " + - repr(o)) - - return text - - - key_memo = {} - if (_one_shot and c_make_encoder is not None - and not self.indent and not self.sort_keys): - _iterencode = c_make_encoder( - markers, self.default, _encoder, self.indent, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan, key_memo, self.use_decimal) - else: - _iterencode = _make_iterencode( - markers, self.default, _encoder, self.indent, floatstr, - self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot, self.use_decimal) - try: - return _iterencode(o, 0) - finally: - key_memo.clear() - - -class JSONEncoderForHTML(JSONEncoder): - """An encoder that produces JSON safe to embed in HTML. - - To embed JSON content in, say, a script tag on a web page, the - characters &, < and > should be escaped. They cannot be escaped - with the usual entities (e.g. &) because they are not expanded - within ' - self.assertEqual( - r'"\u003c/script\u003e\u003cscript\u003e' - r'alert(\"gotcha\")\u003c/script\u003e"', - self.encoder.encode(bad_string)) - self.assertEqual( - bad_string, self.decoder.decode( - self.encoder.encode(bad_string))) diff --git a/tablib/packages/simplejson/tests/test_fail.py b/tablib/packages/simplejson/tests/test_fail.py deleted file mode 100644 index 646c0f4..0000000 --- a/tablib/packages/simplejson/tests/test_fail.py +++ /dev/null @@ -1,91 +0,0 @@ -from unittest import TestCase - -import simplejson as json - -# Fri Dec 30 18:57:26 2005 -JSONDOCS = [ - # http://json.org/JSON_checker/test/fail1.json - '"A JSON payload should be an object or array, not a string."', - # http://json.org/JSON_checker/test/fail2.json - '["Unclosed array"', - # http://json.org/JSON_checker/test/fail3.json - '{unquoted_key: "keys must be quoted}', - # http://json.org/JSON_checker/test/fail4.json - '["extra comma",]', - # http://json.org/JSON_checker/test/fail5.json - '["double extra comma",,]', - # http://json.org/JSON_checker/test/fail6.json - '[ , "<-- missing value"]', - # http://json.org/JSON_checker/test/fail7.json - '["Comma after the close"],', - # http://json.org/JSON_checker/test/fail8.json - '["Extra close"]]', - # http://json.org/JSON_checker/test/fail9.json - '{"Extra comma": true,}', - # http://json.org/JSON_checker/test/fail10.json - '{"Extra value after close": true} "misplaced quoted value"', - # http://json.org/JSON_checker/test/fail11.json - '{"Illegal expression": 1 + 2}', - # http://json.org/JSON_checker/test/fail12.json - '{"Illegal invocation": alert()}', - # http://json.org/JSON_checker/test/fail13.json - '{"Numbers cannot have leading zeroes": 013}', - # http://json.org/JSON_checker/test/fail14.json - '{"Numbers cannot be hex": 0x14}', - # http://json.org/JSON_checker/test/fail15.json - '["Illegal backslash escape: \\x15"]', - # http://json.org/JSON_checker/test/fail16.json - '["Illegal backslash escape: \\\'"]', - # http://json.org/JSON_checker/test/fail17.json - '["Illegal backslash escape: \\017"]', - # http://json.org/JSON_checker/test/fail18.json - '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', - # http://json.org/JSON_checker/test/fail19.json - '{"Missing colon" null}', - # http://json.org/JSON_checker/test/fail20.json - '{"Double colon":: null}', - # http://json.org/JSON_checker/test/fail21.json - '{"Comma instead of colon", null}', - # http://json.org/JSON_checker/test/fail22.json - '["Colon instead of comma": false]', - # http://json.org/JSON_checker/test/fail23.json - '["Bad value", truth]', - # http://json.org/JSON_checker/test/fail24.json - "['single quote']", - # http://code.google.com/p/simplejson/issues/detail?id=3 - u'["A\u001FZ control characters in string"]', -] - -SKIPS = { - 1: "why not have a string payload?", - 18: "spec doesn't specify any nesting limitations", -} - -class TestFail(TestCase): - def test_failures(self): - for idx, doc in enumerate(JSONDOCS): - idx = idx + 1 - if idx in SKIPS: - json.loads(doc) - continue - try: - json.loads(doc) - except json.JSONDecodeError: - pass - else: - #self.fail("Expected failure for fail{0}.json: {1!r}".format(idx, doc)) - self.fail("Expected failure for fail%d.json: %r" % (idx, doc)) - - def test_array_decoder_issue46(self): - # http://code.google.com/p/simplejson/issues/detail?id=46 - for doc in [u'[,]', '[,]']: - try: - json.loads(doc) - except json.JSONDecodeError, e: - self.assertEquals(e.pos, 1) - self.assertEquals(e.lineno, 1) - self.assertEquals(e.colno, 1) - except Exception, e: - self.fail("Unexpected exception raised %r %s" % (e, e)) - else: - self.fail("Unexpected success parsing '[,]'") \ No newline at end of file diff --git a/tablib/packages/simplejson/tests/test_float.py b/tablib/packages/simplejson/tests/test_float.py deleted file mode 100644 index 94502c6..0000000 --- a/tablib/packages/simplejson/tests/test_float.py +++ /dev/null @@ -1,19 +0,0 @@ -import math -from unittest import TestCase - -import simplejson as json - -class TestFloat(TestCase): - def test_floats(self): - for num in [1617161771.7650001, math.pi, math.pi**100, - math.pi**-100, 3.1]: - self.assertEquals(float(json.dumps(num)), num) - self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) - - def test_ints(self): - for num in [1, 1L, 1<<32, 1<<64]: - self.assertEquals(json.dumps(num), str(num)) - self.assertEquals(int(json.dumps(num)), num) - self.assertEquals(json.loads(json.dumps(num)), num) - self.assertEquals(json.loads(unicode(json.dumps(num))), num) diff --git a/tablib/packages/simplejson/tests/test_indent.py b/tablib/packages/simplejson/tests/test_indent.py deleted file mode 100644 index 985831b..0000000 --- a/tablib/packages/simplejson/tests/test_indent.py +++ /dev/null @@ -1,53 +0,0 @@ -from unittest import TestCase - -import simplejson as json -import textwrap - -class TestIndent(TestCase): - def test_indent(self): - h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', - 'i-vhbjkhnth', - {'nifty': 87}, {'field': 'yes', 'morefield': False} ] - - expect = textwrap.dedent("""\ - [ - \t[ - \t\t"blorpie" - \t], - \t[ - \t\t"whoops" - \t], - \t[], - \t"d-shtaeou", - \t"d-nthiouh", - \t"i-vhbjkhnth", - \t{ - \t\t"nifty": 87 - \t}, - \t{ - \t\t"field": "yes", - \t\t"morefield": false - \t} - ]""") - - - d1 = json.dumps(h) - d2 = json.dumps(h, indent='\t', sort_keys=True, separators=(',', ': ')) - d3 = json.dumps(h, indent=' ', sort_keys=True, separators=(',', ': ')) - d4 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': ')) - - h1 = json.loads(d1) - h2 = json.loads(d2) - h3 = json.loads(d3) - h4 = json.loads(d4) - - self.assertEquals(h1, h) - self.assertEquals(h2, h) - self.assertEquals(h3, h) - self.assertEquals(h4, h) - self.assertEquals(d3, expect.replace('\t', ' ')) - self.assertEquals(d4, expect.replace('\t', ' ')) - # NOTE: Python 2.4 textwrap.dedent converts tabs to spaces, - # so the following is expected to fail. Python 2.4 is not a - # supported platform in simplejson 2.1.0+. - self.assertEquals(d2, expect) diff --git a/tablib/packages/simplejson/tests/test_pass1.py b/tablib/packages/simplejson/tests/test_pass1.py deleted file mode 100644 index c3d6302..0000000 --- a/tablib/packages/simplejson/tests/test_pass1.py +++ /dev/null @@ -1,76 +0,0 @@ -from unittest import TestCase - -import simplejson as json - -# from http://json.org/JSON_checker/test/pass1.json -JSON = r''' -[ - "JSON Test Pattern pass1", - {"object with 1 member":["array with 1 element"]}, - {}, - [], - -42, - true, - false, - null, - { - "integer": 1234567890, - "real": -9876.543210, - "e": 0.123456789e-12, - "E": 1.234567890E+34, - "": 23456789012E666, - "zero": 0, - "one": 1, - "space": " ", - "quote": "\"", - "backslash": "\\", - "controls": "\b\f\n\r\t", - "slash": "/ & \/", - "alpha": "abcdefghijklmnopqrstuvwyz", - "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", - "digit": "0123456789", - "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", - "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", - "true": true, - "false": false, - "null": null, - "array":[ ], - "object":{ }, - "address": "50 St. James Street", - "url": "http://www.JSON.org/", - "comment": "// /* */": " ", - " s p a c e d " :[1,2 , 3 - -, - -4 , 5 , 6 ,7 ], - "compact": [1,2,3,4,5,6,7], - "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", - "quotes": "" \u0022 %22 0x22 034 "", - "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" -: "A key can be any string" - }, - 0.5 ,98.6 -, -99.44 -, - -1066 - - -,"rosebud"] -''' - -class TestPass1(TestCase): - def test_parse(self): - # test in/out equivalence and parsing - res = json.loads(JSON) - out = json.dumps(res) - self.assertEquals(res, json.loads(out)) - try: - json.dumps(res, allow_nan=False) - except ValueError: - pass - else: - self.fail("23456789012E666 should be out of range") diff --git a/tablib/packages/simplejson/tests/test_pass2.py b/tablib/packages/simplejson/tests/test_pass2.py deleted file mode 100644 index de4ee00..0000000 --- a/tablib/packages/simplejson/tests/test_pass2.py +++ /dev/null @@ -1,14 +0,0 @@ -from unittest import TestCase -import simplejson as json - -# from http://json.org/JSON_checker/test/pass2.json -JSON = r''' -[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] -''' - -class TestPass2(TestCase): - def test_parse(self): - # test in/out equivalence and parsing - res = json.loads(JSON) - out = json.dumps(res) - self.assertEquals(res, json.loads(out)) diff --git a/tablib/packages/simplejson/tests/test_pass3.py b/tablib/packages/simplejson/tests/test_pass3.py deleted file mode 100644 index f591aba..0000000 --- a/tablib/packages/simplejson/tests/test_pass3.py +++ /dev/null @@ -1,20 +0,0 @@ -from unittest import TestCase - -import simplejson as json - -# from http://json.org/JSON_checker/test/pass3.json -JSON = r''' -{ - "JSON Test Pattern pass3": { - "The outermost value": "must be an object or array.", - "In this test": "It is an object." - } -} -''' - -class TestPass3(TestCase): - def test_parse(self): - # test in/out equivalence and parsing - res = json.loads(JSON) - out = json.dumps(res) - self.assertEquals(res, json.loads(out)) diff --git a/tablib/packages/simplejson/tests/test_recursion.py b/tablib/packages/simplejson/tests/test_recursion.py deleted file mode 100644 index 97422a6..0000000 --- a/tablib/packages/simplejson/tests/test_recursion.py +++ /dev/null @@ -1,67 +0,0 @@ -from unittest import TestCase - -import simplejson as json - -class JSONTestObject: - pass - - -class RecursiveJSONEncoder(json.JSONEncoder): - recurse = False - def default(self, o): - if o is JSONTestObject: - if self.recurse: - return [JSONTestObject] - else: - return 'JSONTestObject' - return json.JSONEncoder.default(o) - - -class TestRecursion(TestCase): - def test_listrecursion(self): - x = [] - x.append(x) - try: - json.dumps(x) - except ValueError: - pass - else: - self.fail("didn't raise ValueError on list recursion") - x = [] - y = [x] - x.append(y) - try: - json.dumps(x) - except ValueError: - pass - else: - self.fail("didn't raise ValueError on alternating list recursion") - y = [] - x = [y, y] - # ensure that the marker is cleared - json.dumps(x) - - def test_dictrecursion(self): - x = {} - x["test"] = x - try: - json.dumps(x) - except ValueError: - pass - else: - self.fail("didn't raise ValueError on dict recursion") - x = {} - y = {"a": x, "b": x} - # ensure that the marker is cleared - json.dumps(x) - - def test_defaultrecursion(self): - enc = RecursiveJSONEncoder() - self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"') - enc.recurse = True - try: - enc.encode(JSONTestObject) - except ValueError: - pass - else: - self.fail("didn't raise ValueError on default recursion") diff --git a/tablib/packages/simplejson/tests/test_scanstring.py b/tablib/packages/simplejson/tests/test_scanstring.py deleted file mode 100644 index a7fcd46..0000000 --- a/tablib/packages/simplejson/tests/test_scanstring.py +++ /dev/null @@ -1,117 +0,0 @@ -import sys -from unittest import TestCase - -import simplejson as json -import simplejson.decoder - -class TestScanString(TestCase): - def test_py_scanstring(self): - self._test_scanstring(simplejson.decoder.py_scanstring) - - def test_c_scanstring(self): - if not simplejson.decoder.c_scanstring: - return - self._test_scanstring(simplejson.decoder.c_scanstring) - - def _test_scanstring(self, scanstring): - self.assertEquals( - scanstring('"z\\ud834\\udd20x"', 1, None, True), - (u'z\U0001d120x', 16)) - - if sys.maxunicode == 65535: - self.assertEquals( - scanstring(u'"z\U0001d120x"', 1, None, True), - (u'z\U0001d120x', 6)) - else: - self.assertEquals( - scanstring(u'"z\U0001d120x"', 1, None, True), - (u'z\U0001d120x', 5)) - - self.assertEquals( - scanstring('"\\u007b"', 1, None, True), - (u'{', 8)) - - self.assertEquals( - scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), - (u'A JSON payload should be an object or array, not a string.', 60)) - - self.assertEquals( - scanstring('["Unclosed array"', 2, None, True), - (u'Unclosed array', 17)) - - self.assertEquals( - scanstring('["extra comma",]', 2, None, True), - (u'extra comma', 14)) - - self.assertEquals( - scanstring('["double extra comma",,]', 2, None, True), - (u'double extra comma', 21)) - - self.assertEquals( - scanstring('["Comma after the close"],', 2, None, True), - (u'Comma after the close', 24)) - - self.assertEquals( - scanstring('["Extra close"]]', 2, None, True), - (u'Extra close', 14)) - - self.assertEquals( - scanstring('{"Extra comma": true,}', 2, None, True), - (u'Extra comma', 14)) - - self.assertEquals( - scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), - (u'Extra value after close', 26)) - - self.assertEquals( - scanstring('{"Illegal expression": 1 + 2}', 2, None, True), - (u'Illegal expression', 21)) - - self.assertEquals( - scanstring('{"Illegal invocation": alert()}', 2, None, True), - (u'Illegal invocation', 21)) - - self.assertEquals( - scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), - (u'Numbers cannot have leading zeroes', 37)) - - self.assertEquals( - scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), - (u'Numbers cannot be hex', 24)) - - self.assertEquals( - scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), - (u'Too deep', 30)) - - self.assertEquals( - scanstring('{"Missing colon" null}', 2, None, True), - (u'Missing colon', 16)) - - self.assertEquals( - scanstring('{"Double colon":: null}', 2, None, True), - (u'Double colon', 15)) - - self.assertEquals( - scanstring('{"Comma instead of colon", null}', 2, None, True), - (u'Comma instead of colon', 25)) - - self.assertEquals( - scanstring('["Colon instead of comma": false]', 2, None, True), - (u'Colon instead of comma', 25)) - - self.assertEquals( - scanstring('["Bad value", truth]', 2, None, True), - (u'Bad value', 12)) - - def test_issue3623(self): - self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, - "xxx") - self.assertRaises(UnicodeDecodeError, - json.encoder.encode_basestring_ascii, "xx\xff") - - def test_overflow(self): - # Python 2.5 does not have maxsize - maxsize = getattr(sys, 'maxsize', sys.maxint) - self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", - maxsize + 1) - diff --git a/tablib/packages/simplejson/tests/test_separators.py b/tablib/packages/simplejson/tests/test_separators.py deleted file mode 100644 index cbda93c..0000000 --- a/tablib/packages/simplejson/tests/test_separators.py +++ /dev/null @@ -1,42 +0,0 @@ -import textwrap -from unittest import TestCase - -import simplejson as json - - -class TestSeparators(TestCase): - def test_separators(self): - h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth', - {'nifty': 87}, {'field': 'yes', 'morefield': False} ] - - expect = textwrap.dedent("""\ - [ - [ - "blorpie" - ] , - [ - "whoops" - ] , - [] , - "d-shtaeou" , - "d-nthiouh" , - "i-vhbjkhnth" , - { - "nifty" : 87 - } , - { - "field" : "yes" , - "morefield" : false - } - ]""") - - - d1 = json.dumps(h) - d2 = json.dumps(h, indent=' ', sort_keys=True, separators=(' ,', ' : ')) - - h1 = json.loads(d1) - h2 = json.loads(d2) - - self.assertEquals(h1, h) - self.assertEquals(h2, h) - self.assertEquals(d2, expect) diff --git a/tablib/packages/simplejson/tests/test_speedups.py b/tablib/packages/simplejson/tests/test_speedups.py deleted file mode 100644 index 4bf0875..0000000 --- a/tablib/packages/simplejson/tests/test_speedups.py +++ /dev/null @@ -1,21 +0,0 @@ -import decimal -from unittest import TestCase - -from simplejson import decoder, encoder, scanner - -def has_speedups(): - return encoder.c_make_encoder is not None - -class TestDecode(TestCase): - def test_make_scanner(self): - if not has_speedups(): - return - self.assertRaises(AttributeError, scanner.c_make_scanner, 1) - - def test_make_encoder(self): - if not has_speedups(): - return - self.assertRaises(TypeError, encoder.c_make_encoder, - None, - "\xCD\x7D\x3D\x4E\x12\x4C\xF9\x79\xD7\x52\xBA\x82\xF2\x27\x4A\x7D\xA0\xCA\x75", - None) diff --git a/tablib/packages/simplejson/tests/test_unicode.py b/tablib/packages/simplejson/tests/test_unicode.py deleted file mode 100644 index f73e5bf..0000000 --- a/tablib/packages/simplejson/tests/test_unicode.py +++ /dev/null @@ -1,99 +0,0 @@ -from unittest import TestCase - -import simplejson as json - -class TestUnicode(TestCase): - def test_encoding1(self): - encoder = json.JSONEncoder(encoding='utf-8') - u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' - s = u.encode('utf-8') - ju = encoder.encode(u) - js = encoder.encode(s) - self.assertEquals(ju, js) - - def test_encoding2(self): - u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' - s = u.encode('utf-8') - ju = json.dumps(u, encoding='utf-8') - js = json.dumps(s, encoding='utf-8') - self.assertEquals(ju, js) - - def test_encoding3(self): - u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' - j = json.dumps(u) - self.assertEquals(j, '"\\u03b1\\u03a9"') - - def test_encoding4(self): - u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' - j = json.dumps([u]) - self.assertEquals(j, '["\\u03b1\\u03a9"]') - - def test_encoding5(self): - u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' - j = json.dumps(u, ensure_ascii=False) - self.assertEquals(j, u'"' + u + u'"') - - def test_encoding6(self): - u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}' - j = json.dumps([u], ensure_ascii=False) - self.assertEquals(j, u'["' + u + u'"]') - - def test_big_unicode_encode(self): - u = u'\U0001d120' - self.assertEquals(json.dumps(u), '"\\ud834\\udd20"') - self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"') - - def test_big_unicode_decode(self): - u = u'z\U0001d120x' - self.assertEquals(json.loads('"' + u + '"'), u) - self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u) - - def test_unicode_decode(self): - for i in range(0, 0xd7ff): - u = unichr(i) - #s = '"\\u{0:04x}"'.format(i) - s = '"\\u%04x"' % (i,) - self.assertEquals(json.loads(s), u) - - def test_object_pairs_hook_with_unicode(self): - s = u'{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}' - p = [(u"xkd", 1), (u"kcw", 2), (u"art", 3), (u"hxm", 4), - (u"qrt", 5), (u"pad", 6), (u"hoy", 7)] - self.assertEqual(json.loads(s), eval(s)) - self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p) - od = json.loads(s, object_pairs_hook=json.OrderedDict) - self.assertEqual(od, json.OrderedDict(p)) - self.assertEqual(type(od), json.OrderedDict) - # the object_pairs_hook takes priority over the object_hook - self.assertEqual(json.loads(s, - object_pairs_hook=json.OrderedDict, - object_hook=lambda x: None), - json.OrderedDict(p)) - - - def test_default_encoding(self): - self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')), - {'a': u'\xe9'}) - - def test_unicode_preservation(self): - self.assertEquals(type(json.loads(u'""')), unicode) - self.assertEquals(type(json.loads(u'"a"')), unicode) - self.assertEquals(type(json.loads(u'["a"]')[0]), unicode) - - def test_ensure_ascii_false_returns_unicode(self): - # http://code.google.com/p/simplejson/issues/detail?id=48 - self.assertEquals(type(json.dumps([], ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps(0, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps({}, ensure_ascii=False)), unicode) - self.assertEquals(type(json.dumps("", ensure_ascii=False)), unicode) - - def test_ensure_ascii_false_bytestring_encoding(self): - # http://code.google.com/p/simplejson/issues/detail?id=48 - doc1 = {u'quux': 'Arr\xc3\xaat sur images'} - doc2 = {u'quux': u'Arr\xeat sur images'} - doc_ascii = '{"quux": "Arr\\u00eat sur images"}' - doc_unicode = u'{"quux": "Arr\xeat sur images"}' - self.assertEquals(json.dumps(doc1), doc_ascii) - self.assertEquals(json.dumps(doc2), doc_ascii) - self.assertEquals(json.dumps(doc1, ensure_ascii=False), doc_unicode) - self.assertEquals(json.dumps(doc2, ensure_ascii=False), doc_unicode) diff --git a/tablib/packages/simplejson/tool.py b/tablib/packages/simplejson/tool.py deleted file mode 100644 index 73370db..0000000 --- a/tablib/packages/simplejson/tool.py +++ /dev/null @@ -1,39 +0,0 @@ -r"""Command-line tool to validate and pretty-print JSON - -Usage:: - - $ echo '{"json":"obj"}' | python -m simplejson.tool - { - "json": "obj" - } - $ echo '{ 1.2:3.4}' | python -m simplejson.tool - Expecting property name: line 1 column 2 (char 2) - -""" -import sys -import simplejson as json - -def main(): - if len(sys.argv) == 1: - infile = sys.stdin - outfile = sys.stdout - elif len(sys.argv) == 2: - infile = open(sys.argv[1], 'rb') - outfile = sys.stdout - elif len(sys.argv) == 3: - infile = open(sys.argv[1], 'rb') - outfile = open(sys.argv[2], 'wb') - else: - raise SystemExit(sys.argv[0] + " [infile [outfile]]") - try: - obj = json.load(infile, - object_pairs_hook=json.OrderedDict, - use_decimal=True) - except ValueError, e: - raise SystemExit(e) - json.dump(obj, outfile, sort_keys=True, indent=' ', use_decimal=True) - outfile.write('\n') - - -if __name__ == '__main__': - main() diff --git a/tablib/packages/yaml/__init__.py b/tablib/packages/yaml/__init__.py deleted file mode 100644 index c0fd1f3..0000000 --- a/tablib/packages/yaml/__init__.py +++ /dev/null @@ -1,288 +0,0 @@ - -from error import * - -from tokens import * -from events import * -from nodes import * - -from loader import * -from dumper import * - -__version__ = '3.09' - -try: - from cyaml import * - __with_libyaml__ = True -except ImportError: - __with_libyaml__ = False - -def scan(stream, Loader=Loader): - """ - Scan a YAML stream and produce scanning tokens. - """ - loader = Loader(stream) - while loader.check_token(): - yield loader.get_token() - -def parse(stream, Loader=Loader): - """ - Parse a YAML stream and produce parsing events. - """ - loader = Loader(stream) - while loader.check_event(): - yield loader.get_event() - -def compose(stream, Loader=Loader): - """ - Parse the first YAML document in a stream - and produce the corresponding representation tree. - """ - loader = Loader(stream) - return loader.get_single_node() - -def compose_all(stream, Loader=Loader): - """ - Parse all YAML documents in a stream - and produce corresponding representation trees. - """ - loader = Loader(stream) - while loader.check_node(): - yield loader.get_node() - -def load(stream, Loader=Loader): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - """ - loader = Loader(stream) - return loader.get_single_data() - -def load_all(stream, Loader=Loader): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - """ - loader = Loader(stream) - while loader.check_data(): - yield loader.get_data() - -def safe_load(stream): - """ - Parse the first YAML document in a stream - and produce the corresponding Python object. - Resolve only basic YAML tags. - """ - return load(stream, SafeLoader) - -def safe_load_all(stream): - """ - Parse all YAML documents in a stream - and produce corresponding Python objects. - Resolve only basic YAML tags. - """ - return load_all(stream, SafeLoader) - -def emit(events, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - """ - Emit YAML parsing events into a stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - from StringIO import StringIO - stream = StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - for event in events: - dumper.emit(event) - if getvalue: - return getvalue() - -def serialize_all(nodes, stream=None, Dumper=Dumper, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, - version=None, tags=None): - """ - Serialize a sequence of representation trees into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - from StringIO import StringIO - else: - from cStringIO import StringIO - stream = StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end) - dumper.open() - for node in nodes: - dumper.serialize(node) - dumper.close() - if getvalue: - return getvalue() - -def serialize(node, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a representation tree into a YAML stream. - If stream is None, return the produced string instead. - """ - return serialize_all([node], stream, Dumper=Dumper, **kwds) - -def dump_all(documents, stream=None, Dumper=Dumper, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding='utf-8', explicit_start=None, explicit_end=None, - version=None, tags=None): - """ - Serialize a sequence of Python objects into a YAML stream. - If stream is None, return the produced string instead. - """ - getvalue = None - if stream is None: - if encoding is None: - from StringIO import StringIO - else: - from cStringIO import StringIO - stream = StringIO() - getvalue = stream.getvalue - dumper = Dumper(stream, default_style=default_style, - default_flow_style=default_flow_style, - canonical=canonical, indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break, - encoding=encoding, version=version, tags=tags, - explicit_start=explicit_start, explicit_end=explicit_end) - dumper.open() - for data in documents: - dumper.represent(data) - dumper.close() - if getvalue: - return getvalue() - -def dump(data, stream=None, Dumper=Dumper, **kwds): - """ - Serialize a Python object into a YAML stream. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=Dumper, **kwds) - -def safe_dump_all(documents, stream=None, **kwds): - """ - Serialize a sequence of Python objects into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all(documents, stream, Dumper=SafeDumper, **kwds) - -def safe_dump(data, stream=None, **kwds): - """ - Serialize a Python object into a YAML stream. - Produce only basic YAML tags. - If stream is None, return the produced string instead. - """ - return dump_all([data], stream, Dumper=SafeDumper, **kwds) - -def add_implicit_resolver(tag, regexp, first=None, - Loader=Loader, Dumper=Dumper): - """ - Add an implicit scalar detector. - If an implicit scalar value matches the given regexp, - the corresponding tag is assigned to the scalar. - first is a sequence of possible initial characters or None. - """ - Loader.add_implicit_resolver(tag, regexp, first) - Dumper.add_implicit_resolver(tag, regexp, first) - -def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): - """ - Add a path based resolver for the given tag. - A path is a list of keys that forms a path - to a node in the representation tree. - Keys can be string values, integers, or None. - """ - Loader.add_path_resolver(tag, path, kind) - Dumper.add_path_resolver(tag, path, kind) - -def add_constructor(tag, constructor, Loader=Loader): - """ - Add a constructor for the given tag. - Constructor is a function that accepts a Loader instance - and a node object and produces the corresponding Python object. - """ - Loader.add_constructor(tag, constructor) - -def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): - """ - Add a multi-constructor for the given tag prefix. - Multi-constructor is called for a node if its tag starts with tag_prefix. - Multi-constructor accepts a Loader instance, a tag suffix, - and a node object and produces the corresponding Python object. - """ - Loader.add_multi_constructor(tag_prefix, multi_constructor) - -def add_representer(data_type, representer, Dumper=Dumper): - """ - Add a representer for the given type. - Representer is a function accepting a Dumper instance - and an instance of the given data type - and producing the corresponding representation node. - """ - Dumper.add_representer(data_type, representer) - -def add_multi_representer(data_type, multi_representer, Dumper=Dumper): - """ - Add a representer for the given type. - Multi-representer is a function accepting a Dumper instance - and an instance of the given data type or subtype - and producing the corresponding representation node. - """ - Dumper.add_multi_representer(data_type, multi_representer) - -class YAMLObjectMetaclass(type): - """ - The metaclass for YAMLObject. - """ - def __init__(cls, name, bases, kwds): - super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) - if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: - cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) - cls.yaml_dumper.add_representer(cls, cls.to_yaml) - -class YAMLObject(object): - """ - An object that can dump itself to a YAML stream - and load itself from a YAML stream. - """ - - __metaclass__ = YAMLObjectMetaclass - __slots__ = () # no direct instantiation, so allow immutable subclasses - - yaml_loader = Loader - yaml_dumper = Dumper - - yaml_tag = None - yaml_flow_style = None - - def from_yaml(cls, loader, node): - """ - Convert a representation node to a Python object. - """ - return loader.construct_yaml_object(node, cls) - from_yaml = classmethod(from_yaml) - - def to_yaml(cls, dumper, data): - """ - Convert a Python object to a representation node. - """ - return dumper.represent_yaml_object(cls.yaml_tag, data, cls, - flow_style=cls.yaml_flow_style) - to_yaml = classmethod(to_yaml) - diff --git a/tablib/packages/yaml/composer.py b/tablib/packages/yaml/composer.py deleted file mode 100644 index 06e5ac7..0000000 --- a/tablib/packages/yaml/composer.py +++ /dev/null @@ -1,139 +0,0 @@ - -__all__ = ['Composer', 'ComposerError'] - -from error import MarkedYAMLError -from events import * -from nodes import * - -class ComposerError(MarkedYAMLError): - pass - -class Composer(object): - - def __init__(self): - self.anchors = {} - - def check_node(self): - # Drop the STREAM-START event. - if self.check_event(StreamStartEvent): - self.get_event() - - # If there are more documents available? - return not self.check_event(StreamEndEvent) - - def get_node(self): - # Get the root node of the next document. - if not self.check_event(StreamEndEvent): - return self.compose_document() - - def get_single_node(self): - # Drop the STREAM-START event. - self.get_event() - - # Compose a document if the stream is not empty. - document = None - if not self.check_event(StreamEndEvent): - document = self.compose_document() - - # Ensure that the stream contains no more documents. - if not self.check_event(StreamEndEvent): - event = self.get_event() - raise ComposerError("expected a single document in the stream", - document.start_mark, "but found another document", - event.start_mark) - - # Drop the STREAM-END event. - self.get_event() - - return document - - def compose_document(self): - # Drop the DOCUMENT-START event. - self.get_event() - - # Compose the root node. - node = self.compose_node(None, None) - - # Drop the DOCUMENT-END event. - self.get_event() - - self.anchors = {} - return node - - def compose_node(self, parent, index): - if self.check_event(AliasEvent): - event = self.get_event() - anchor = event.anchor - if anchor not in self.anchors: - raise ComposerError(None, None, "found undefined alias %r" - % anchor.encode('utf-8'), event.start_mark) - return self.anchors[anchor] - event = self.peek_event() - anchor = event.anchor - if anchor is not None: - if anchor in self.anchors: - raise ComposerError("found duplicate anchor %r; first occurence" - % anchor.encode('utf-8'), self.anchors[anchor].start_mark, - "second occurence", event.start_mark) - self.descend_resolver(parent, index) - if self.check_event(ScalarEvent): - node = self.compose_scalar_node(anchor) - elif self.check_event(SequenceStartEvent): - node = self.compose_sequence_node(anchor) - elif self.check_event(MappingStartEvent): - node = self.compose_mapping_node(anchor) - self.ascend_resolver() - return node - - def compose_scalar_node(self, anchor): - event = self.get_event() - tag = event.tag - if tag is None or tag == u'!': - tag = self.resolve(ScalarNode, event.value, event.implicit) - node = ScalarNode(tag, event.value, - event.start_mark, event.end_mark, style=event.style) - if anchor is not None: - self.anchors[anchor] = node - return node - - def compose_sequence_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == u'!': - tag = self.resolve(SequenceNode, None, start_event.implicit) - node = SequenceNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - index = 0 - while not self.check_event(SequenceEndEvent): - node.value.append(self.compose_node(node, index)) - index += 1 - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - - def compose_mapping_node(self, anchor): - start_event = self.get_event() - tag = start_event.tag - if tag is None or tag == u'!': - tag = self.resolve(MappingNode, None, start_event.implicit) - node = MappingNode(tag, [], - start_event.start_mark, None, - flow_style=start_event.flow_style) - if anchor is not None: - self.anchors[anchor] = node - while not self.check_event(MappingEndEvent): - #key_event = self.peek_event() - item_key = self.compose_node(node, None) - #if item_key in node.value: - # raise ComposerError("while composing a mapping", start_event.start_mark, - # "found duplicate key", key_event.start_mark) - item_value = self.compose_node(node, item_key) - #node.value[item_key] = item_value - node.value.append((item_key, item_value)) - end_event = self.get_event() - node.end_mark = end_event.end_mark - return node - diff --git a/tablib/packages/yaml/constructor.py b/tablib/packages/yaml/constructor.py deleted file mode 100644 index 420c434..0000000 --- a/tablib/packages/yaml/constructor.py +++ /dev/null @@ -1,684 +0,0 @@ - -__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', - 'ConstructorError'] - -from error import * -from nodes import * - -import datetime - -try: - set -except NameError: - from sets import Set as set - -import binascii, re, sys, types - -class ConstructorError(MarkedYAMLError): - pass - -class BaseConstructor(object): - - yaml_constructors = {} - yaml_multi_constructors = {} - - def __init__(self): - self.constructed_objects = {} - self.recursive_objects = {} - self.state_generators = [] - self.deep_construct = False - - def check_data(self): - # If there are more documents available? - return self.check_node() - - def get_data(self): - # Construct and return the next document. - if self.check_node(): - return self.construct_document(self.get_node()) - - def get_single_data(self): - # Ensure that the stream contains a single document and construct it. - node = self.get_single_node() - if node is not None: - return self.construct_document(node) - return None - - def construct_document(self, node): - data = self.construct_object(node) - while self.state_generators: - state_generators = self.state_generators - self.state_generators = [] - for generator in state_generators: - for dummy in generator: - pass - self.constructed_objects = {} - self.recursive_objects = {} - self.deep_construct = False - return data - - def construct_object(self, node, deep=False): - if deep: - old_deep = self.deep_construct - self.deep_construct = True - if node in self.constructed_objects: - return self.constructed_objects[node] - if node in self.recursive_objects: - raise ConstructorError(None, None, - "found unconstructable recursive node", node.start_mark) - self.recursive_objects[node] = None - constructor = None - tag_suffix = None - if node.tag in self.yaml_constructors: - constructor = self.yaml_constructors[node.tag] - else: - for tag_prefix in self.yaml_multi_constructors: - if node.tag.startswith(tag_prefix): - tag_suffix = node.tag[len(tag_prefix):] - constructor = self.yaml_multi_constructors[tag_prefix] - break - else: - if None in self.yaml_multi_constructors: - tag_suffix = node.tag - constructor = self.yaml_multi_constructors[None] - elif None in self.yaml_constructors: - constructor = self.yaml_constructors[None] - elif isinstance(node, ScalarNode): - constructor = self.__class__.construct_scalar - elif isinstance(node, SequenceNode): - constructor = self.__class__.construct_sequence - elif isinstance(node, MappingNode): - constructor = self.__class__.construct_mapping - if tag_suffix is None: - data = constructor(self, node) - else: - data = constructor(self, tag_suffix, node) - if isinstance(data, types.GeneratorType): - generator = data - data = generator.next() - if self.deep_construct: - for dummy in generator: - pass - else: - self.state_generators.append(generator) - self.constructed_objects[node] = data - del self.recursive_objects[node] - if deep: - self.deep_construct = old_deep - return data - - def construct_scalar(self, node): - if not isinstance(node, ScalarNode): - raise ConstructorError(None, None, - "expected a scalar node, but found %s" % node.id, - node.start_mark) - return node.value - - def construct_sequence(self, node, deep=False): - if not isinstance(node, SequenceNode): - raise ConstructorError(None, None, - "expected a sequence node, but found %s" % node.id, - node.start_mark) - return [self.construct_object(child, deep=deep) - for child in node.value] - - def construct_mapping(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - mapping = {} - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - try: - hash(key) - except TypeError, exc: - raise ConstructorError("while constructing a mapping", node.start_mark, - "found unacceptable key (%s)" % exc, key_node.start_mark) - value = self.construct_object(value_node, deep=deep) - mapping[key] = value - return mapping - - def construct_pairs(self, node, deep=False): - if not isinstance(node, MappingNode): - raise ConstructorError(None, None, - "expected a mapping node, but found %s" % node.id, - node.start_mark) - pairs = [] - for key_node, value_node in node.value: - key = self.construct_object(key_node, deep=deep) - value = self.construct_object(value_node, deep=deep) - pairs.append((key, value)) - return pairs - - def add_constructor(cls, tag, constructor): - if not 'yaml_constructors' in cls.__dict__: - cls.yaml_constructors = cls.yaml_constructors.copy() - cls.yaml_constructors[tag] = constructor - add_constructor = classmethod(add_constructor) - - def add_multi_constructor(cls, tag_prefix, multi_constructor): - if not 'yaml_multi_constructors' in cls.__dict__: - cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() - cls.yaml_multi_constructors[tag_prefix] = multi_constructor - add_multi_constructor = classmethod(add_multi_constructor) - -class SafeConstructor(BaseConstructor): - - def construct_scalar(self, node): - if isinstance(node, MappingNode): - for key_node, value_node in node.value: - if key_node.tag == u'tag:yaml.org,2002:value': - return self.construct_scalar(value_node) - return BaseConstructor.construct_scalar(self, node) - - def flatten_mapping(self, node): - merge = [] - index = 0 - while index < len(node.value): - key_node, value_node = node.value[index] - if key_node.tag == u'tag:yaml.org,2002:merge': - del node.value[index] - if isinstance(value_node, MappingNode): - self.flatten_mapping(value_node) - merge.extend(value_node.value) - elif isinstance(value_node, SequenceNode): - submerge = [] - for subnode in value_node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing a mapping", - node.start_mark, - "expected a mapping for merging, but found %s" - % subnode.id, subnode.start_mark) - self.flatten_mapping(subnode) - submerge.append(subnode.value) - submerge.reverse() - for value in submerge: - merge.extend(value) - else: - raise ConstructorError("while constructing a mapping", node.start_mark, - "expected a mapping or list of mappings for merging, but found %s" - % value_node.id, value_node.start_mark) - elif key_node.tag == u'tag:yaml.org,2002:value': - key_node.tag = u'tag:yaml.org,2002:str' - index += 1 - else: - index += 1 - if merge: - node.value = merge + node.value - - def construct_mapping(self, node, deep=False): - if isinstance(node, MappingNode): - self.flatten_mapping(node) - return BaseConstructor.construct_mapping(self, node, deep=deep) - - def construct_yaml_null(self, node): - self.construct_scalar(node) - return None - - bool_values = { - u'yes': True, - u'no': False, - u'true': True, - u'false': False, - u'on': True, - u'off': False, - } - - def construct_yaml_bool(self, node): - value = self.construct_scalar(node) - return self.bool_values[value.lower()] - - def construct_yaml_int(self, node): - value = str(self.construct_scalar(node)) - value = value.replace('_', '') - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '0': - return 0 - elif value.startswith('0b'): - return sign*int(value[2:], 2) - elif value.startswith('0x'): - return sign*int(value[2:], 16) - elif value[0] == '0': - return sign*int(value, 8) - elif ':' in value: - digits = [int(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*int(value) - - inf_value = 1e300 - while inf_value != inf_value*inf_value: - inf_value *= inf_value - nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). - - def construct_yaml_float(self, node): - value = str(self.construct_scalar(node)) - value = value.replace('_', '').lower() - sign = +1 - if value[0] == '-': - sign = -1 - if value[0] in '+-': - value = value[1:] - if value == '.inf': - return sign*self.inf_value - elif value == '.nan': - return self.nan_value - elif ':' in value: - digits = [float(part) for part in value.split(':')] - digits.reverse() - base = 1 - value = 0.0 - for digit in digits: - value += digit*base - base *= 60 - return sign*value - else: - return sign*float(value) - - def construct_yaml_binary(self, node): - value = self.construct_scalar(node) - try: - return str(value).decode('base64') - except (binascii.Error, UnicodeEncodeError), exc: - raise ConstructorError(None, None, - "failed to decode base64 data: %s" % exc, node.start_mark) - - timestamp_regexp = re.compile( - ur'''^(?P[0-9][0-9][0-9][0-9]) - -(?P[0-9][0-9]?) - -(?P[0-9][0-9]?) - (?:(?:[Tt]|[ \t]+) - (?P[0-9][0-9]?) - :(?P[0-9][0-9]) - :(?P[0-9][0-9]) - (?:\.(?P[0-9]*))? - (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) - (?::(?P[0-9][0-9]))?))?)?$''', re.X) - - def construct_yaml_timestamp(self, node): - value = self.construct_scalar(node) - match = self.timestamp_regexp.match(node.value) - values = match.groupdict() - year = int(values['year']) - month = int(values['month']) - day = int(values['day']) - if not values['hour']: - return datetime.date(year, month, day) - hour = int(values['hour']) - minute = int(values['minute']) - second = int(values['second']) - fraction = 0 - if values['fraction']: - fraction = values['fraction'][:6] - while len(fraction) < 6: - fraction += '0' - fraction = int(fraction) - delta = None - if values['tz_sign']: - tz_hour = int(values['tz_hour']) - tz_minute = int(values['tz_minute'] or 0) - delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) - if values['tz_sign'] == '-': - delta = -delta - data = datetime.datetime(year, month, day, hour, minute, second, fraction) - if delta: - data -= delta - return data - - def construct_yaml_omap(self, node): - # Note: we do not check for duplicate keys, because it's too - # CPU-expensive. - omap = [] - yield omap - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing an ordered map", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - omap.append((key, value)) - - def construct_yaml_pairs(self, node): - # Note: the same code as `construct_yaml_omap`. - pairs = [] - yield pairs - if not isinstance(node, SequenceNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a sequence, but found %s" % node.id, node.start_mark) - for subnode in node.value: - if not isinstance(subnode, MappingNode): - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a mapping of length 1, but found %s" % subnode.id, - subnode.start_mark) - if len(subnode.value) != 1: - raise ConstructorError("while constructing pairs", node.start_mark, - "expected a single mapping item, but found %d items" % len(subnode.value), - subnode.start_mark) - key_node, value_node = subnode.value[0] - key = self.construct_object(key_node) - value = self.construct_object(value_node) - pairs.append((key, value)) - - def construct_yaml_set(self, node): - data = set() - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_str(self, node): - value = self.construct_scalar(node) - try: - return value.encode('ascii') - except UnicodeEncodeError: - return value - - def construct_yaml_seq(self, node): - data = [] - yield data - data.extend(self.construct_sequence(node)) - - def construct_yaml_map(self, node): - data = {} - yield data - value = self.construct_mapping(node) - data.update(value) - - def construct_yaml_object(self, node, cls): - data = cls.__new__(cls) - yield data - if hasattr(data, '__setstate__'): - state = self.construct_mapping(node, deep=True) - data.__setstate__(state) - else: - state = self.construct_mapping(node) - data.__dict__.update(state) - - def construct_undefined(self, node): - raise ConstructorError(None, None, - "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), - node.start_mark) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:null', - SafeConstructor.construct_yaml_null) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:bool', - SafeConstructor.construct_yaml_bool) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:int', - SafeConstructor.construct_yaml_int) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:float', - SafeConstructor.construct_yaml_float) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:binary', - SafeConstructor.construct_yaml_binary) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:timestamp', - SafeConstructor.construct_yaml_timestamp) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:omap', - SafeConstructor.construct_yaml_omap) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:pairs', - SafeConstructor.construct_yaml_pairs) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:set', - SafeConstructor.construct_yaml_set) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:str', - SafeConstructor.construct_yaml_str) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:seq', - SafeConstructor.construct_yaml_seq) - -SafeConstructor.add_constructor( - u'tag:yaml.org,2002:map', - SafeConstructor.construct_yaml_map) - -SafeConstructor.add_constructor(None, - SafeConstructor.construct_undefined) - -class Constructor(SafeConstructor): - - def construct_python_str(self, node): - return self.construct_scalar(node).encode('utf-8') - - def construct_python_unicode(self, node): - return self.construct_scalar(node) - - def construct_python_long(self, node): - return long(self.construct_yaml_int(node)) - - def construct_python_complex(self, node): - return complex(self.construct_scalar(node)) - - def construct_python_tuple(self, node): - return tuple(self.construct_sequence(node)) - - def find_python_module(self, name, mark): - if not name: - raise ConstructorError("while constructing a Python module", mark, - "expected non-empty name appended to the tag", mark) - try: - __import__(name) - except ImportError, exc: - raise ConstructorError("while constructing a Python module", mark, - "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) - return sys.modules[name] - - def find_python_name(self, name, mark): - if not name: - raise ConstructorError("while constructing a Python object", mark, - "expected non-empty name appended to the tag", mark) - if u'.' in name: - # Python 2.4 only - #module_name, object_name = name.rsplit('.', 1) - items = name.split('.') - object_name = items.pop() - module_name = '.'.join(items) - else: - module_name = '__builtin__' - object_name = name - try: - __import__(module_name) - except ImportError, exc: - raise ConstructorError("while constructing a Python object", mark, - "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) - module = sys.modules[module_name] - if not hasattr(module, object_name): - raise ConstructorError("while constructing a Python object", mark, - "cannot find %r in the module %r" % (object_name.encode('utf-8'), - module.__name__), mark) - return getattr(module, object_name) - - def construct_python_name(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python name", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) - return self.find_python_name(suffix, node.start_mark) - - def construct_python_module(self, suffix, node): - value = self.construct_scalar(node) - if value: - raise ConstructorError("while constructing a Python module", node.start_mark, - "expected the empty value, but found %r" % value.encode('utf-8'), - node.start_mark) - return self.find_python_module(suffix, node.start_mark) - - class classobj: pass - - def make_python_instance(self, suffix, node, - args=None, kwds=None, newobj=False): - if not args: - args = [] - if not kwds: - kwds = {} - cls = self.find_python_name(suffix, node.start_mark) - if newobj and isinstance(cls, type(self.classobj)) \ - and not args and not kwds: - instance = self.classobj() - instance.__class__ = cls - return instance - elif newobj and isinstance(cls, type): - return cls.__new__(cls, *args, **kwds) - else: - return cls(*args, **kwds) - - def set_python_instance_state(self, instance, state): - if hasattr(instance, '__setstate__'): - instance.__setstate__(state) - else: - slotstate = {} - if isinstance(state, tuple) and len(state) == 2: - state, slotstate = state - if hasattr(instance, '__dict__'): - instance.__dict__.update(state) - elif state: - slotstate.update(state) - for key, value in slotstate.items(): - setattr(object, key, value) - - def construct_python_object(self, suffix, node): - # Format: - # !!python/object:module.name { ... state ... } - instance = self.make_python_instance(suffix, node, newobj=True) - yield instance - deep = hasattr(instance, '__setstate__') - state = self.construct_mapping(node, deep=deep) - self.set_python_instance_state(instance, state) - - def construct_python_object_apply(self, suffix, node, newobj=False): - # Format: - # !!python/object/apply # (or !!python/object/new) - # args: [ ... arguments ... ] - # kwds: { ... keywords ... } - # state: ... state ... - # listitems: [ ... listitems ... ] - # dictitems: { ... dictitems ... } - # or short format: - # !!python/object/apply [ ... arguments ... ] - # The difference between !!python/object/apply and !!python/object/new - # is how an object is created, check make_python_instance for details. - if isinstance(node, SequenceNode): - args = self.construct_sequence(node, deep=True) - kwds = {} - state = {} - listitems = [] - dictitems = {} - else: - value = self.construct_mapping(node, deep=True) - args = value.get('args', []) - kwds = value.get('kwds', {}) - state = value.get('state', {}) - listitems = value.get('listitems', []) - dictitems = value.get('dictitems', {}) - instance = self.make_python_instance(suffix, node, args, kwds, newobj) - if state: - self.set_python_instance_state(instance, state) - if listitems: - instance.extend(listitems) - if dictitems: - for key in dictitems: - instance[key] = dictitems[key] - return instance - - def construct_python_object_new(self, suffix, node): - return self.construct_python_object_apply(suffix, node, newobj=True) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/none', - Constructor.construct_yaml_null) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/bool', - Constructor.construct_yaml_bool) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/str', - Constructor.construct_python_str) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/unicode', - Constructor.construct_python_unicode) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/int', - Constructor.construct_yaml_int) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/long', - Constructor.construct_python_long) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/float', - Constructor.construct_yaml_float) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/complex', - Constructor.construct_python_complex) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/list', - Constructor.construct_yaml_seq) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/tuple', - Constructor.construct_python_tuple) - -Constructor.add_constructor( - u'tag:yaml.org,2002:python/dict', - Constructor.construct_yaml_map) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/name:', - Constructor.construct_python_name) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/module:', - Constructor.construct_python_module) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object:', - Constructor.construct_python_object) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/apply:', - Constructor.construct_python_object_apply) - -Constructor.add_multi_constructor( - u'tag:yaml.org,2002:python/object/new:', - Constructor.construct_python_object_new) - diff --git a/tablib/packages/yaml/cyaml.py b/tablib/packages/yaml/cyaml.py deleted file mode 100644 index 68dcd75..0000000 --- a/tablib/packages/yaml/cyaml.py +++ /dev/null @@ -1,85 +0,0 @@ - -__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', - 'CBaseDumper', 'CSafeDumper', 'CDumper'] - -from _yaml import CParser, CEmitter - -from constructor import * - -from serializer import * -from representer import * - -from resolver import * - -class CBaseLoader(CParser, BaseConstructor, BaseResolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class CSafeLoader(CParser, SafeConstructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class CLoader(CParser, Constructor, Resolver): - - def __init__(self, stream): - CParser.__init__(self, stream) - Constructor.__init__(self) - Resolver.__init__(self) - -class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - -class CSafeDumper(CEmitter, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - -class CDumper(CEmitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - CEmitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, encoding=encoding, - allow_unicode=allow_unicode, line_break=line_break, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - diff --git a/tablib/packages/yaml/dumper.py b/tablib/packages/yaml/dumper.py deleted file mode 100644 index f811d2c..0000000 --- a/tablib/packages/yaml/dumper.py +++ /dev/null @@ -1,62 +0,0 @@ - -__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] - -from emitter import * -from serializer import * -from representer import * -from resolver import * - -class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): - - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - -class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - SafeRepresenter.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - -class Dumper(Emitter, Serializer, Representer, Resolver): - - def __init__(self, stream, - default_style=None, default_flow_style=None, - canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None, - encoding=None, explicit_start=None, explicit_end=None, - version=None, tags=None): - Emitter.__init__(self, stream, canonical=canonical, - indent=indent, width=width, - allow_unicode=allow_unicode, line_break=line_break) - Serializer.__init__(self, encoding=encoding, - explicit_start=explicit_start, explicit_end=explicit_end, - version=version, tags=tags) - Representer.__init__(self, default_style=default_style, - default_flow_style=default_flow_style) - Resolver.__init__(self) - diff --git a/tablib/packages/yaml/emitter.py b/tablib/packages/yaml/emitter.py deleted file mode 100644 index 4cb2c8a..0000000 --- a/tablib/packages/yaml/emitter.py +++ /dev/null @@ -1,1135 +0,0 @@ - -# Emitter expects events obeying the following grammar: -# stream ::= STREAM-START document* STREAM-END -# document ::= DOCUMENT-START node DOCUMENT-END -# node ::= SCALAR | sequence | mapping -# sequence ::= SEQUENCE-START node* SEQUENCE-END -# mapping ::= MAPPING-START (node node)* MAPPING-END - -__all__ = ['Emitter', 'EmitterError'] - -from error import YAMLError -from events import * - -class EmitterError(YAMLError): - pass - -class ScalarAnalysis(object): - def __init__(self, scalar, empty, multiline, - allow_flow_plain, allow_block_plain, - allow_single_quoted, allow_double_quoted, - allow_block): - self.scalar = scalar - self.empty = empty - self.multiline = multiline - self.allow_flow_plain = allow_flow_plain - self.allow_block_plain = allow_block_plain - self.allow_single_quoted = allow_single_quoted - self.allow_double_quoted = allow_double_quoted - self.allow_block = allow_block - -class Emitter(object): - - DEFAULT_TAG_PREFIXES = { - u'!' : u'!', - u'tag:yaml.org,2002:' : u'!!', - } - - def __init__(self, stream, canonical=None, indent=None, width=None, - allow_unicode=None, line_break=None): - - # The stream should have the methods `write` and possibly `flush`. - self.stream = stream - - # Encoding can be overriden by STREAM-START. - self.encoding = None - - # Emitter is a state machine with a stack of states to handle nested - # structures. - self.states = [] - self.state = self.expect_stream_start - - # Current event and the event queue. - self.events = [] - self.event = None - - # The current indentation level and the stack of previous indents. - self.indents = [] - self.indent = None - - # Flow level. - self.flow_level = 0 - - # Contexts. - self.root_context = False - self.sequence_context = False - self.mapping_context = False - self.simple_key_context = False - - # Characteristics of the last emitted character: - # - current position. - # - is it a whitespace? - # - is it an indention character - # (indentation space, '-', '?', or ':')? - self.line = 0 - self.column = 0 - self.whitespace = True - self.indention = True - - # Whether the document requires an explicit document indicator - self.open_ended = False - - # Formatting details. - self.canonical = canonical - self.allow_unicode = allow_unicode - self.best_indent = 2 - if indent and 1 < indent < 10: - self.best_indent = indent - self.best_width = 80 - if width and width > self.best_indent*2: - self.best_width = width - self.best_line_break = u'\n' - if line_break in [u'\r', u'\n', u'\r\n']: - self.best_line_break = line_break - - # Tag prefixes. - self.tag_prefixes = None - - # Prepared anchor and tag. - self.prepared_anchor = None - self.prepared_tag = None - - # Scalar analysis and style. - self.analysis = None - self.style = None - - def emit(self, event): - self.events.append(event) - while not self.need_more_events(): - self.event = self.events.pop(0) - self.state() - self.event = None - - # In some cases, we wait for a few next events before emitting. - - def need_more_events(self): - if not self.events: - return True - event = self.events[0] - if isinstance(event, DocumentStartEvent): - return self.need_events(1) - elif isinstance(event, SequenceStartEvent): - return self.need_events(2) - elif isinstance(event, MappingStartEvent): - return self.need_events(3) - else: - return False - - def need_events(self, count): - level = 0 - for event in self.events[1:]: - if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): - level += 1 - elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): - level -= 1 - elif isinstance(event, StreamEndEvent): - level = -1 - if level < 0: - return False - return (len(self.events) < count+1) - - def increase_indent(self, flow=False, indentless=False): - self.indents.append(self.indent) - if self.indent is None: - if flow: - self.indent = self.best_indent - else: - self.indent = 0 - elif not indentless: - self.indent += self.best_indent - - # States. - - # Stream handlers. - - def expect_stream_start(self): - if isinstance(self.event, StreamStartEvent): - if self.event.encoding and not getattr(self.stream, 'encoding', None): - self.encoding = self.event.encoding - self.write_stream_start() - self.state = self.expect_first_document_start - else: - raise EmitterError("expected StreamStartEvent, but got %s" - % self.event) - - def expect_nothing(self): - raise EmitterError("expected nothing, but got %s" % self.event) - - # Document handlers. - - def expect_first_document_start(self): - return self.expect_document_start(first=True) - - def expect_document_start(self, first=False): - if isinstance(self.event, DocumentStartEvent): - if (self.event.version or self.event.tags) and self.open_ended: - self.write_indicator(u'...', True) - self.write_indent() - if self.event.version: - version_text = self.prepare_version(self.event.version) - self.write_version_directive(version_text) - self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() - if self.event.tags: - handles = self.event.tags.keys() - handles.sort() - for handle in handles: - prefix = self.event.tags[handle] - self.tag_prefixes[prefix] = handle - handle_text = self.prepare_tag_handle(handle) - prefix_text = self.prepare_tag_prefix(prefix) - self.write_tag_directive(handle_text, prefix_text) - implicit = (first and not self.event.explicit and not self.canonical - and not self.event.version and not self.event.tags - and not self.check_empty_document()) - if not implicit: - self.write_indent() - self.write_indicator(u'---', True) - if self.canonical: - self.write_indent() - self.state = self.expect_document_root - elif isinstance(self.event, StreamEndEvent): - if self.open_ended: - self.write_indicator(u'...', True) - self.write_indent() - self.write_stream_end() - self.state = self.expect_nothing - else: - raise EmitterError("expected DocumentStartEvent, but got %s" - % self.event) - - def expect_document_end(self): - if isinstance(self.event, DocumentEndEvent): - self.write_indent() - if self.event.explicit: - self.write_indicator(u'...', True) - self.write_indent() - self.flush_stream() - self.state = self.expect_document_start - else: - raise EmitterError("expected DocumentEndEvent, but got %s" - % self.event) - - def expect_document_root(self): - self.states.append(self.expect_document_end) - self.expect_node(root=True) - - # Node handlers. - - def expect_node(self, root=False, sequence=False, mapping=False, - simple_key=False): - self.root_context = root - self.sequence_context = sequence - self.mapping_context = mapping - self.simple_key_context = simple_key - if isinstance(self.event, AliasEvent): - self.expect_alias() - elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): - self.process_anchor(u'&') - self.process_tag() - if isinstance(self.event, ScalarEvent): - self.expect_scalar() - elif isinstance(self.event, SequenceStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_sequence(): - self.expect_flow_sequence() - else: - self.expect_block_sequence() - elif isinstance(self.event, MappingStartEvent): - if self.flow_level or self.canonical or self.event.flow_style \ - or self.check_empty_mapping(): - self.expect_flow_mapping() - else: - self.expect_block_mapping() - else: - raise EmitterError("expected NodeEvent, but got %s" % self.event) - - def expect_alias(self): - if self.event.anchor is None: - raise EmitterError("anchor is not specified for alias") - self.process_anchor(u'*') - self.state = self.states.pop() - - def expect_scalar(self): - self.increase_indent(flow=True) - self.process_scalar() - self.indent = self.indents.pop() - self.state = self.states.pop() - - # Flow sequence handlers. - - def expect_flow_sequence(self): - self.write_indicator(u'[', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_sequence_item - - def expect_first_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(u']', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - def expect_flow_sequence_item(self): - if isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(u',', False) - self.write_indent() - self.write_indicator(u']', False) - self.state = self.states.pop() - else: - self.write_indicator(u',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - self.states.append(self.expect_flow_sequence_item) - self.expect_node(sequence=True) - - # Flow mapping handlers. - - def expect_flow_mapping(self): - self.write_indicator(u'{', True, whitespace=True) - self.flow_level += 1 - self.increase_indent(flow=True) - self.state = self.expect_first_flow_mapping_key - - def expect_first_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - self.write_indicator(u'}', False) - self.state = self.states.pop() - else: - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_key(self): - if isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.flow_level -= 1 - if self.canonical: - self.write_indicator(u',', False) - self.write_indent() - self.write_indicator(u'}', False) - self.state = self.states.pop() - else: - self.write_indicator(u',', False) - if self.canonical or self.column > self.best_width: - self.write_indent() - if not self.canonical and self.check_simple_key(): - self.states.append(self.expect_flow_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True) - self.states.append(self.expect_flow_mapping_value) - self.expect_node(mapping=True) - - def expect_flow_mapping_simple_value(self): - self.write_indicator(u':', False) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - def expect_flow_mapping_value(self): - if self.canonical or self.column > self.best_width: - self.write_indent() - self.write_indicator(u':', True) - self.states.append(self.expect_flow_mapping_key) - self.expect_node(mapping=True) - - # Block sequence handlers. - - def expect_block_sequence(self): - indentless = (self.mapping_context and not self.indention) - self.increase_indent(flow=False, indentless=indentless) - self.state = self.expect_first_block_sequence_item - - def expect_first_block_sequence_item(self): - return self.expect_block_sequence_item(first=True) - - def expect_block_sequence_item(self, first=False): - if not first and isinstance(self.event, SequenceEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - self.write_indicator(u'-', True, indention=True) - self.states.append(self.expect_block_sequence_item) - self.expect_node(sequence=True) - - # Block mapping handlers. - - def expect_block_mapping(self): - self.increase_indent(flow=False) - self.state = self.expect_first_block_mapping_key - - def expect_first_block_mapping_key(self): - return self.expect_block_mapping_key(first=True) - - def expect_block_mapping_key(self, first=False): - if not first and isinstance(self.event, MappingEndEvent): - self.indent = self.indents.pop() - self.state = self.states.pop() - else: - self.write_indent() - if self.check_simple_key(): - self.states.append(self.expect_block_mapping_simple_value) - self.expect_node(mapping=True, simple_key=True) - else: - self.write_indicator(u'?', True, indention=True) - self.states.append(self.expect_block_mapping_value) - self.expect_node(mapping=True) - - def expect_block_mapping_simple_value(self): - self.write_indicator(u':', False) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - def expect_block_mapping_value(self): - self.write_indent() - self.write_indicator(u':', True, indention=True) - self.states.append(self.expect_block_mapping_key) - self.expect_node(mapping=True) - - # Checkers. - - def check_empty_sequence(self): - return (isinstance(self.event, SequenceStartEvent) and self.events - and isinstance(self.events[0], SequenceEndEvent)) - - def check_empty_mapping(self): - return (isinstance(self.event, MappingStartEvent) and self.events - and isinstance(self.events[0], MappingEndEvent)) - - def check_empty_document(self): - if not isinstance(self.event, DocumentStartEvent) or not self.events: - return False - event = self.events[0] - return (isinstance(event, ScalarEvent) and event.anchor is None - and event.tag is None and event.implicit and event.value == u'') - - def check_simple_key(self): - length = 0 - if isinstance(self.event, NodeEvent) and self.event.anchor is not None: - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - length += len(self.prepared_anchor) - if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ - and self.event.tag is not None: - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(self.event.tag) - length += len(self.prepared_tag) - if isinstance(self.event, ScalarEvent): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - length += len(self.analysis.scalar) - return (length < 128 and (isinstance(self.event, AliasEvent) - or (isinstance(self.event, ScalarEvent) - and not self.analysis.empty and not self.analysis.multiline) - or self.check_empty_sequence() or self.check_empty_mapping())) - - # Anchor, Tag, and Scalar processors. - - def process_anchor(self, indicator): - if self.event.anchor is None: - self.prepared_anchor = None - return - if self.prepared_anchor is None: - self.prepared_anchor = self.prepare_anchor(self.event.anchor) - if self.prepared_anchor: - self.write_indicator(indicator+self.prepared_anchor, True) - self.prepared_anchor = None - - def process_tag(self): - tag = self.event.tag - if isinstance(self.event, ScalarEvent): - if self.style is None: - self.style = self.choose_scalar_style() - if ((not self.canonical or tag is None) and - ((self.style == '' and self.event.implicit[0]) - or (self.style != '' and self.event.implicit[1]))): - self.prepared_tag = None - return - if self.event.implicit[0] and tag is None: - tag = u'!' - self.prepared_tag = None - else: - if (not self.canonical or tag is None) and self.event.implicit: - self.prepared_tag = None - return - if tag is None: - raise EmitterError("tag is not specified") - if self.prepared_tag is None: - self.prepared_tag = self.prepare_tag(tag) - if self.prepared_tag: - self.write_indicator(self.prepared_tag, True) - self.prepared_tag = None - - def choose_scalar_style(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.event.style == '"' or self.canonical: - return '"' - if not self.event.style and self.event.implicit[0]: - if (not (self.simple_key_context and - (self.analysis.empty or self.analysis.multiline)) - and (self.flow_level and self.analysis.allow_flow_plain - or (not self.flow_level and self.analysis.allow_block_plain))): - return '' - if self.event.style and self.event.style in '|>': - if (not self.flow_level and not self.simple_key_context - and self.analysis.allow_block): - return self.event.style - if not self.event.style or self.event.style == '\'': - if (self.analysis.allow_single_quoted and - not (self.simple_key_context and self.analysis.multiline)): - return '\'' - return '"' - - def process_scalar(self): - if self.analysis is None: - self.analysis = self.analyze_scalar(self.event.value) - if self.style is None: - self.style = self.choose_scalar_style() - split = (not self.simple_key_context) - #if self.analysis.multiline and split \ - # and (not self.style or self.style in '\'\"'): - # self.write_indent() - if self.style == '"': - self.write_double_quoted(self.analysis.scalar, split) - elif self.style == '\'': - self.write_single_quoted(self.analysis.scalar, split) - elif self.style == '>': - self.write_folded(self.analysis.scalar) - elif self.style == '|': - self.write_literal(self.analysis.scalar) - else: - self.write_plain(self.analysis.scalar, split) - self.analysis = None - self.style = None - - # Analyzers. - - def prepare_version(self, version): - major, minor = version - if major != 1: - raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) - return u'%d.%d' % (major, minor) - - def prepare_tag_handle(self, handle): - if not handle: - raise EmitterError("tag handle must not be empty") - if handle[0] != u'!' or handle[-1] != u'!': - raise EmitterError("tag handle must start and end with '!': %r" - % (handle.encode('utf-8'))) - for ch in handle[1:-1]: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): - raise EmitterError("invalid character %r in the tag handle: %r" - % (ch.encode('utf-8'), handle.encode('utf-8'))) - return handle - - def prepare_tag_prefix(self, prefix): - if not prefix: - raise EmitterError("tag prefix must not be empty") - chunks = [] - start = end = 0 - if prefix[0] == u'!': - end = 1 - while end < len(prefix): - ch = prefix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?!:@&=+$,_.~*\'()[]': - end += 1 - else: - if start < end: - chunks.append(prefix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append(u'%%%02X' % ord(ch)) - if start < end: - chunks.append(prefix[start:end]) - return u''.join(chunks) - - def prepare_tag(self, tag): - if not tag: - raise EmitterError("tag must not be empty") - if tag == u'!': - return tag - handle = None - suffix = tag - prefixes = self.tag_prefixes.keys() - prefixes.sort() - for prefix in prefixes: - if tag.startswith(prefix) \ - and (prefix == u'!' or len(prefix) < len(tag)): - handle = self.tag_prefixes[prefix] - suffix = tag[len(prefix):] - chunks = [] - start = end = 0 - while end < len(suffix): - ch = suffix[end] - if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.~*\'()[]' \ - or (ch == u'!' and handle != u'!'): - end += 1 - else: - if start < end: - chunks.append(suffix[start:end]) - start = end = end+1 - data = ch.encode('utf-8') - for ch in data: - chunks.append(u'%%%02X' % ord(ch)) - if start < end: - chunks.append(suffix[start:end]) - suffix_text = u''.join(chunks) - if handle: - return u'%s%s' % (handle, suffix_text) - else: - return u'!<%s>' % suffix_text - - def prepare_anchor(self, anchor): - if not anchor: - raise EmitterError("anchor must not be empty") - for ch in anchor: - if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_'): - raise EmitterError("invalid character %r in the anchor: %r" - % (ch.encode('utf-8'), anchor.encode('utf-8'))) - return anchor - - def analyze_scalar(self, scalar): - - # Empty scalar is a special case. - if not scalar: - return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, - allow_flow_plain=False, allow_block_plain=True, - allow_single_quoted=True, allow_double_quoted=True, - allow_block=False) - - # Indicators and special characters. - block_indicators = False - flow_indicators = False - line_breaks = False - special_characters = False - - # Important whitespace combinations. - leading_space = False - leading_break = False - trailing_space = False - trailing_break = False - break_space = False - space_break = False - - # Check document indicators. - if scalar.startswith(u'---') or scalar.startswith(u'...'): - block_indicators = True - flow_indicators = True - - # First character or preceded by a whitespace. - preceeded_by_whitespace = True - - # Last character or followed by a whitespace. - followed_by_whitespace = (len(scalar) == 1 or - scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') - - # The previous character is a space. - previous_space = False - - # The previous character is a break. - previous_break = False - - index = 0 - while index < len(scalar): - ch = scalar[index] - - # Check for indicators. - if index == 0: - # Leading indicators are special characters. - if ch in u'#,[]{}&*!|>\'\"%@`': - flow_indicators = True - block_indicators = True - if ch in u'?:': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == u'-' and followed_by_whitespace: - flow_indicators = True - block_indicators = True - else: - # Some indicators cannot appear within a scalar as well. - if ch in u',?[]{}': - flow_indicators = True - if ch == u':': - flow_indicators = True - if followed_by_whitespace: - block_indicators = True - if ch == u'#' and preceeded_by_whitespace: - flow_indicators = True - block_indicators = True - - # Check for line breaks, special, and unicode characters. - if ch in u'\n\x85\u2028\u2029': - line_breaks = True - if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): - if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': - unicode_characters = True - if not self.allow_unicode: - special_characters = True - else: - special_characters = True - - # Detect important whitespace combinations. - if ch == u' ': - if index == 0: - leading_space = True - if index == len(scalar)-1: - trailing_space = True - if previous_break: - break_space = True - previous_space = True - previous_break = False - elif ch in u'\n\x85\u2028\u2029': - if index == 0: - leading_break = True - if index == len(scalar)-1: - trailing_break = True - if previous_space: - space_break = True - previous_space = False - previous_break = True - else: - previous_space = False - previous_break = False - - # Prepare for the next character. - index += 1 - preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') - followed_by_whitespace = (index+1 >= len(scalar) or - scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') - - # Let's decide what styles are allowed. - allow_flow_plain = True - allow_block_plain = True - allow_single_quoted = True - allow_double_quoted = True - allow_block = True - - # Leading and trailing whitespaces are bad for plain scalars. - if (leading_space or leading_break - or trailing_space or trailing_break): - allow_flow_plain = allow_block_plain = False - - # We do not permit trailing spaces for block scalars. - if trailing_space: - allow_block = False - - # Spaces at the beginning of a new line are only acceptable for block - # scalars. - if break_space: - allow_flow_plain = allow_block_plain = allow_single_quoted = False - - # Spaces followed by breaks, as well as special character are only - # allowed for double quoted scalars. - if space_break or special_characters: - allow_flow_plain = allow_block_plain = \ - allow_single_quoted = allow_block = False - - # Although the plain scalar writer supports breaks, we never emit - # multiline plain scalars. - if line_breaks: - allow_flow_plain = allow_block_plain = False - - # Flow indicators are forbidden for flow plain scalars. - if flow_indicators: - allow_flow_plain = False - - # Block indicators are forbidden for block plain scalars. - if block_indicators: - allow_block_plain = False - - return ScalarAnalysis(scalar=scalar, - empty=False, multiline=line_breaks, - allow_flow_plain=allow_flow_plain, - allow_block_plain=allow_block_plain, - allow_single_quoted=allow_single_quoted, - allow_double_quoted=allow_double_quoted, - allow_block=allow_block) - - # Writers. - - def flush_stream(self): - if hasattr(self.stream, 'flush'): - self.stream.flush() - - def write_stream_start(self): - # Write BOM if needed. - if self.encoding and self.encoding.startswith('utf-16'): - self.stream.write(u'\uFEFF'.encode(self.encoding)) - - def write_stream_end(self): - self.flush_stream() - - def write_indicator(self, indicator, need_whitespace, - whitespace=False, indention=False): - if self.whitespace or not need_whitespace: - data = indicator - else: - data = u' '+indicator - self.whitespace = whitespace - self.indention = self.indention and indention - self.column += len(data) - self.open_ended = False - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_indent(self): - indent = self.indent or 0 - if not self.indention or self.column > indent \ - or (self.column == indent and not self.whitespace): - self.write_line_break() - if self.column < indent: - self.whitespace = True - data = u' '*(indent-self.column) - self.column = indent - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_line_break(self, data=None): - if data is None: - data = self.best_line_break - self.whitespace = True - self.indention = True - self.line += 1 - self.column = 0 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - - def write_version_directive(self, version_text): - data = u'%%YAML %s' % version_text - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - def write_tag_directive(self, handle_text, prefix_text): - data = u'%%TAG %s %s' % (handle_text, prefix_text) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_line_break() - - # Scalar streams. - - def write_single_quoted(self, text, split=True): - self.write_indicator(u'\'', True) - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch is None or ch != u' ': - if start+1 == end and self.column > self.best_width and split \ - and start != 0 and end != len(text): - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': - self.write_line_break() - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch == u'\'': - data = u'\'\'' - self.column += 2 - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end + 1 - if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - self.write_indicator(u'\'', False) - - ESCAPE_REPLACEMENTS = { - u'\0': u'0', - u'\x07': u'a', - u'\x08': u'b', - u'\x09': u't', - u'\x0A': u'n', - u'\x0B': u'v', - u'\x0C': u'f', - u'\x0D': u'r', - u'\x1B': u'e', - u'\"': u'\"', - u'\\': u'\\', - u'\x85': u'N', - u'\xA0': u'_', - u'\u2028': u'L', - u'\u2029': u'P', - } - - def write_double_quoted(self, text, split=True): - self.write_indicator(u'"', True) - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ - or not (u'\x20' <= ch <= u'\x7E' - or (self.allow_unicode - and (u'\xA0' <= ch <= u'\uD7FF' - or u'\uE000' <= ch <= u'\uFFFD'))): - if start < end: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - if ch in self.ESCAPE_REPLACEMENTS: - data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] - elif ch <= u'\xFF': - data = u'\\x%02X' % ord(ch) - elif ch <= u'\uFFFF': - data = u'\\u%04X' % ord(ch) - else: - data = u'\\U%08X' % ord(ch) - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end+1 - if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ - and self.column+(end-start) > self.best_width and split: - data = text[start:end]+u'\\' - if start < end: - start = end - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.write_indent() - self.whitespace = False - self.indention = False - if text[start] == u' ': - data = u'\\' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - end += 1 - self.write_indicator(u'"', False) - - def determine_block_hints(self, text): - hints = u'' - if text: - if text[0] in u' \n\x85\u2028\u2029': - hints += unicode(self.best_indent) - if text[-1] not in u'\n\x85\u2028\u2029': - hints += u'-' - elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': - hints += u'+' - return hints - - def write_folded(self, text): - hints = self.determine_block_hints(text) - self.write_indicator(u'>'+hints, True) - if hints[-1:] == u'+': - self.open_ended = True - self.write_line_break() - leading_space = True - spaces = False - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - if not leading_space and ch is not None and ch != u' ' \ - and text[start] == u'\n': - self.write_line_break() - leading_space = (ch == u' ') - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - elif spaces: - if ch != u' ': - if start+1 == end and self.column > self.best_width: - self.write_indent() - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - spaces = (ch == u' ') - end += 1 - - def write_literal(self, text): - hints = self.determine_block_hints(text) - self.write_indicator(u'|'+hints, True) - if hints[-1:] == u'+': - self.open_ended = True - self.write_line_break() - breaks = True - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if breaks: - if ch is None or ch not in u'\n\x85\u2028\u2029': - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - if ch is not None: - self.write_indent() - start = end - else: - if ch is None or ch in u'\n\x85\u2028\u2029': - data = text[start:end] - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - if ch is None: - self.write_line_break() - start = end - if ch is not None: - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - - def write_plain(self, text, split=True): - if self.root_context: - self.open_ended = True - if not text: - return - if not self.whitespace: - data = u' ' - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - self.whitespace = False - self.indention = False - spaces = False - breaks = False - start = end = 0 - while end <= len(text): - ch = None - if end < len(text): - ch = text[end] - if spaces: - if ch != u' ': - if start+1 == end and self.column > self.best_width and split: - self.write_indent() - self.whitespace = False - self.indention = False - else: - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - elif breaks: - if ch not in u'\n\x85\u2028\u2029': - if text[start] == u'\n': - self.write_line_break() - for br in text[start:end]: - if br == u'\n': - self.write_line_break() - else: - self.write_line_break(br) - self.write_indent() - self.whitespace = False - self.indention = False - start = end - else: - if ch is None or ch in u' \n\x85\u2028\u2029': - data = text[start:end] - self.column += len(data) - if self.encoding: - data = data.encode(self.encoding) - self.stream.write(data) - start = end - if ch is not None: - spaces = (ch == u' ') - breaks = (ch in u'\n\x85\u2028\u2029') - end += 1 - diff --git a/tablib/packages/yaml/error.py b/tablib/packages/yaml/error.py deleted file mode 100644 index 577686d..0000000 --- a/tablib/packages/yaml/error.py +++ /dev/null @@ -1,75 +0,0 @@ - -__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] - -class Mark(object): - - def __init__(self, name, index, line, column, buffer, pointer): - self.name = name - self.index = index - self.line = line - self.column = column - self.buffer = buffer - self.pointer = pointer - - def get_snippet(self, indent=4, max_length=75): - if self.buffer is None: - return None - head = '' - start = self.pointer - while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': - start -= 1 - if self.pointer-start > max_length/2-1: - head = ' ... ' - start += 5 - break - tail = '' - end = self.pointer - while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': - end += 1 - if end-self.pointer > max_length/2-1: - tail = ' ... ' - end -= 5 - break - snippet = self.buffer[start:end].encode('utf-8') - return ' '*indent + head + snippet + tail + '\n' \ - + ' '*(indent+self.pointer-start+len(head)) + '^' - - def __str__(self): - snippet = self.get_snippet() - where = " in \"%s\", line %d, column %d" \ - % (self.name, self.line+1, self.column+1) - if snippet is not None: - where += ":\n"+snippet - return where - -class YAMLError(Exception): - pass - -class MarkedYAMLError(YAMLError): - - def __init__(self, context=None, context_mark=None, - problem=None, problem_mark=None, note=None): - self.context = context - self.context_mark = context_mark - self.problem = problem - self.problem_mark = problem_mark - self.note = note - - def __str__(self): - lines = [] - if self.context is not None: - lines.append(self.context) - if self.context_mark is not None \ - and (self.problem is None or self.problem_mark is None - or self.context_mark.name != self.problem_mark.name - or self.context_mark.line != self.problem_mark.line - or self.context_mark.column != self.problem_mark.column): - lines.append(str(self.context_mark)) - if self.problem is not None: - lines.append(self.problem) - if self.problem_mark is not None: - lines.append(str(self.problem_mark)) - if self.note is not None: - lines.append(self.note) - return '\n'.join(lines) - diff --git a/tablib/packages/yaml/events.py b/tablib/packages/yaml/events.py deleted file mode 100644 index f79ad38..0000000 --- a/tablib/packages/yaml/events.py +++ /dev/null @@ -1,86 +0,0 @@ - -# Abstract classes. - -class Event(object): - def __init__(self, start_mark=None, end_mark=None): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] - if hasattr(self, key)] - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -class NodeEvent(Event): - def __init__(self, anchor, start_mark=None, end_mark=None): - self.anchor = anchor - self.start_mark = start_mark - self.end_mark = end_mark - -class CollectionStartEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, - flow_style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class CollectionEndEvent(Event): - pass - -# Implementations. - -class StreamStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndEvent(Event): - pass - -class DocumentStartEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None, version=None, tags=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - self.version = version - self.tags = tags - -class DocumentEndEvent(Event): - def __init__(self, start_mark=None, end_mark=None, - explicit=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.explicit = explicit - -class AliasEvent(NodeEvent): - pass - -class ScalarEvent(NodeEvent): - def __init__(self, anchor, tag, implicit, value, - start_mark=None, end_mark=None, style=None): - self.anchor = anchor - self.tag = tag - self.implicit = implicit - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class SequenceStartEvent(CollectionStartEvent): - pass - -class SequenceEndEvent(CollectionEndEvent): - pass - -class MappingStartEvent(CollectionStartEvent): - pass - -class MappingEndEvent(CollectionEndEvent): - pass - diff --git a/tablib/packages/yaml/loader.py b/tablib/packages/yaml/loader.py deleted file mode 100644 index 293ff46..0000000 --- a/tablib/packages/yaml/loader.py +++ /dev/null @@ -1,40 +0,0 @@ - -__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] - -from reader import * -from scanner import * -from parser import * -from composer import * -from constructor import * -from resolver import * - -class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - BaseConstructor.__init__(self) - BaseResolver.__init__(self) - -class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - SafeConstructor.__init__(self) - Resolver.__init__(self) - -class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): - - def __init__(self, stream): - Reader.__init__(self, stream) - Scanner.__init__(self) - Parser.__init__(self) - Composer.__init__(self) - Constructor.__init__(self) - Resolver.__init__(self) - diff --git a/tablib/packages/yaml/nodes.py b/tablib/packages/yaml/nodes.py deleted file mode 100644 index c4f070c..0000000 --- a/tablib/packages/yaml/nodes.py +++ /dev/null @@ -1,49 +0,0 @@ - -class Node(object): - def __init__(self, tag, value, start_mark, end_mark): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - value = self.value - #if isinstance(value, list): - # if len(value) == 0: - # value = '' - # elif len(value) == 1: - # value = '<1 item>' - # else: - # value = '<%d items>' % len(value) - #else: - # if len(value) > 75: - # value = repr(value[:70]+u' ... ') - # else: - # value = repr(value) - value = repr(value) - return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) - -class ScalarNode(Node): - id = 'scalar' - def __init__(self, tag, value, - start_mark=None, end_mark=None, style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style - -class CollectionNode(Node): - def __init__(self, tag, value, - start_mark=None, end_mark=None, flow_style=None): - self.tag = tag - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - self.flow_style = flow_style - -class SequenceNode(CollectionNode): - id = 'sequence' - -class MappingNode(CollectionNode): - id = 'mapping' - diff --git a/tablib/packages/yaml/parser.py b/tablib/packages/yaml/parser.py deleted file mode 100644 index b6a7416..0000000 --- a/tablib/packages/yaml/parser.py +++ /dev/null @@ -1,584 +0,0 @@ - -# The following YAML grammar is LL(1) and is parsed by a recursive descent -# parser. -# -# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END -# implicit_document ::= block_node DOCUMENT-END* -# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* -# block_node_or_indentless_sequence ::= -# ALIAS -# | properties (block_content | indentless_block_sequence)? -# | block_content -# | indentless_block_sequence -# block_node ::= ALIAS -# | properties block_content? -# | block_content -# flow_node ::= ALIAS -# | properties flow_content? -# | flow_content -# properties ::= TAG ANCHOR? | ANCHOR TAG? -# block_content ::= block_collection | flow_collection | SCALAR -# flow_content ::= flow_collection | SCALAR -# block_collection ::= block_sequence | block_mapping -# flow_collection ::= flow_sequence | flow_mapping -# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END -# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ -# block_mapping ::= BLOCK-MAPPING_START -# ((KEY block_node_or_indentless_sequence?)? -# (VALUE block_node_or_indentless_sequence?)?)* -# BLOCK-END -# flow_sequence ::= FLOW-SEQUENCE-START -# (flow_sequence_entry FLOW-ENTRY)* -# flow_sequence_entry? -# FLOW-SEQUENCE-END -# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# flow_mapping ::= FLOW-MAPPING-START -# (flow_mapping_entry FLOW-ENTRY)* -# flow_mapping_entry? -# FLOW-MAPPING-END -# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? -# -# FIRST sets: -# -# stream: { STREAM-START } -# explicit_document: { DIRECTIVE DOCUMENT-START } -# implicit_document: FIRST(block_node) -# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } -# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# block_sequence: { BLOCK-SEQUENCE-START } -# block_mapping: { BLOCK-MAPPING-START } -# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } -# indentless_sequence: { ENTRY } -# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } -# flow_sequence: { FLOW-SEQUENCE-START } -# flow_mapping: { FLOW-MAPPING-START } -# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } -# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } - -__all__ = ['Parser', 'ParserError'] - -from error import MarkedYAMLError -from tokens import * -from events import * -from scanner import * - -class ParserError(MarkedYAMLError): - pass - -class Parser(object): - # Since writing a recursive-descendant parser is a straightforward task, we - # do not give many comments here. - - DEFAULT_TAGS = { - u'!': u'!', - u'!!': u'tag:yaml.org,2002:', - } - - def __init__(self): - self.current_event = None - self.yaml_version = None - self.tag_handles = {} - self.states = [] - self.marks = [] - self.state = self.parse_stream_start - - def check_event(self, *choices): - # Check the type of the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - if self.current_event is not None: - if not choices: - return True - for choice in choices: - if isinstance(self.current_event, choice): - return True - return False - - def peek_event(self): - # Get the next event. - if self.current_event is None: - if self.state: - self.current_event = self.state() - return self.current_event - - def get_event(self): - # Get the next event and proceed further. - if self.current_event is None: - if self.state: - self.current_event = self.state() - value = self.current_event - self.current_event = None - return value - - # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END - # implicit_document ::= block_node DOCUMENT-END* - # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* - - def parse_stream_start(self): - - # Parse the stream start. - token = self.get_token() - event = StreamStartEvent(token.start_mark, token.end_mark, - encoding=token.encoding) - - # Prepare the next state. - self.state = self.parse_implicit_document_start - - return event - - def parse_implicit_document_start(self): - - # Parse an implicit document. - if not self.check_token(DirectiveToken, DocumentStartToken, - StreamEndToken): - self.tag_handles = self.DEFAULT_TAGS - token = self.peek_token() - start_mark = end_mark = token.start_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=False) - - # Prepare the next state. - self.states.append(self.parse_document_end) - self.state = self.parse_block_node - - return event - - else: - return self.parse_document_start() - - def parse_document_start(self): - - # Parse any extra document end indicators. - while self.check_token(DocumentEndToken): - self.get_token() - - # Parse an explicit document. - if not self.check_token(StreamEndToken): - token = self.peek_token() - start_mark = token.start_mark - version, tags = self.process_directives() - if not self.check_token(DocumentStartToken): - raise ParserError(None, None, - "expected '', but found %r" - % self.peek_token().id, - self.peek_token().start_mark) - token = self.get_token() - end_mark = token.end_mark - event = DocumentStartEvent(start_mark, end_mark, - explicit=True, version=version, tags=tags) - self.states.append(self.parse_document_end) - self.state = self.parse_document_content - else: - # Parse the end of the stream. - token = self.get_token() - event = StreamEndEvent(token.start_mark, token.end_mark) - assert not self.states - assert not self.marks - self.state = None - return event - - def parse_document_end(self): - - # Parse the document end. - token = self.peek_token() - start_mark = end_mark = token.start_mark - explicit = False - if self.check_token(DocumentEndToken): - token = self.get_token() - end_mark = token.end_mark - explicit = True - event = DocumentEndEvent(start_mark, end_mark, - explicit=explicit) - - # Prepare the next state. - self.state = self.parse_document_start - - return event - - def parse_document_content(self): - if self.check_token(DirectiveToken, - DocumentStartToken, DocumentEndToken, StreamEndToken): - event = self.process_empty_scalar(self.peek_token().start_mark) - self.state = self.states.pop() - return event - else: - return self.parse_block_node() - - def process_directives(self): - self.yaml_version = None - self.tag_handles = {} - while self.check_token(DirectiveToken): - token = self.get_token() - if token.name == u'YAML': - if self.yaml_version is not None: - raise ParserError(None, None, - "found duplicate YAML directive", token.start_mark) - major, minor = token.value - if major != 1: - raise ParserError(None, None, - "found incompatible YAML document (version 1.* is required)", - token.start_mark) - self.yaml_version = token.value - elif token.name == u'TAG': - handle, prefix = token.value - if handle in self.tag_handles: - raise ParserError(None, None, - "duplicate tag handle %r" % handle.encode('utf-8'), - token.start_mark) - self.tag_handles[handle] = prefix - if self.tag_handles: - value = self.yaml_version, self.tag_handles.copy() - else: - value = self.yaml_version, None - for key in self.DEFAULT_TAGS: - if key not in self.tag_handles: - self.tag_handles[key] = self.DEFAULT_TAGS[key] - return value - - # block_node_or_indentless_sequence ::= ALIAS - # | properties (block_content | indentless_block_sequence)? - # | block_content - # | indentless_block_sequence - # block_node ::= ALIAS - # | properties block_content? - # | block_content - # flow_node ::= ALIAS - # | properties flow_content? - # | flow_content - # properties ::= TAG ANCHOR? | ANCHOR TAG? - # block_content ::= block_collection | flow_collection | SCALAR - # flow_content ::= flow_collection | SCALAR - # block_collection ::= block_sequence | block_mapping - # flow_collection ::= flow_sequence | flow_mapping - - def parse_block_node(self): - return self.parse_node(block=True) - - def parse_flow_node(self): - return self.parse_node() - - def parse_block_node_or_indentless_sequence(self): - return self.parse_node(block=True, indentless_sequence=True) - - def parse_node(self, block=False, indentless_sequence=False): - if self.check_token(AliasToken): - token = self.get_token() - event = AliasEvent(token.value, token.start_mark, token.end_mark) - self.state = self.states.pop() - else: - anchor = None - tag = None - start_mark = end_mark = tag_mark = None - if self.check_token(AnchorToken): - token = self.get_token() - start_mark = token.start_mark - end_mark = token.end_mark - anchor = token.value - if self.check_token(TagToken): - token = self.get_token() - tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - elif self.check_token(TagToken): - token = self.get_token() - start_mark = tag_mark = token.start_mark - end_mark = token.end_mark - tag = token.value - if self.check_token(AnchorToken): - token = self.get_token() - end_mark = token.end_mark - anchor = token.value - if tag is not None: - handle, suffix = tag - if handle is not None: - if handle not in self.tag_handles: - raise ParserError("while parsing a node", start_mark, - "found undefined tag handle %r" % handle.encode('utf-8'), - tag_mark) - tag = self.tag_handles[handle]+suffix - else: - tag = suffix - #if tag == u'!': - # raise ParserError("while parsing a node", start_mark, - # "found non-specific tag '!'", tag_mark, - # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") - if start_mark is None: - start_mark = end_mark = self.peek_token().start_mark - event = None - implicit = (tag is None or tag == u'!') - if indentless_sequence and self.check_token(BlockEntryToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark) - self.state = self.parse_indentless_sequence_entry - else: - if self.check_token(ScalarToken): - token = self.get_token() - end_mark = token.end_mark - if (token.plain and tag is None) or tag == u'!': - implicit = (True, False) - elif tag is None: - implicit = (False, True) - else: - implicit = (False, False) - event = ScalarEvent(anchor, tag, implicit, token.value, - start_mark, end_mark, style=token.style) - self.state = self.states.pop() - elif self.check_token(FlowSequenceStartToken): - end_mark = self.peek_token().end_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_sequence_first_entry - elif self.check_token(FlowMappingStartToken): - end_mark = self.peek_token().end_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=True) - self.state = self.parse_flow_mapping_first_key - elif block and self.check_token(BlockSequenceStartToken): - end_mark = self.peek_token().start_mark - event = SequenceStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_sequence_first_entry - elif block and self.check_token(BlockMappingStartToken): - end_mark = self.peek_token().start_mark - event = MappingStartEvent(anchor, tag, implicit, - start_mark, end_mark, flow_style=False) - self.state = self.parse_block_mapping_first_key - elif anchor is not None or tag is not None: - # Empty scalars are allowed even if a tag or an anchor is - # specified. - event = ScalarEvent(anchor, tag, (implicit, False), u'', - start_mark, end_mark) - self.state = self.states.pop() - else: - if block: - node = 'block' - else: - node = 'flow' - token = self.peek_token() - raise ParserError("while parsing a %s node" % node, start_mark, - "expected the node content, but found %r" % token.id, - token.start_mark) - return event - - # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END - - def parse_block_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_sequence_entry() - - def parse_block_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, BlockEndToken): - self.states.append(self.parse_block_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_block_sequence_entry - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block collection", self.marks[-1], - "expected , but found %r" % token.id, token.start_mark) - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ - - def parse_indentless_sequence_entry(self): - if self.check_token(BlockEntryToken): - token = self.get_token() - if not self.check_token(BlockEntryToken, - KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_indentless_sequence_entry) - return self.parse_block_node() - else: - self.state = self.parse_indentless_sequence_entry - return self.process_empty_scalar(token.end_mark) - token = self.peek_token() - event = SequenceEndEvent(token.start_mark, token.start_mark) - self.state = self.states.pop() - return event - - # block_mapping ::= BLOCK-MAPPING_START - # ((KEY block_node_or_indentless_sequence?)? - # (VALUE block_node_or_indentless_sequence?)?)* - # BLOCK-END - - def parse_block_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_block_mapping_key() - - def parse_block_mapping_key(self): - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_value) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_value - return self.process_empty_scalar(token.end_mark) - if not self.check_token(BlockEndToken): - token = self.peek_token() - raise ParserError("while parsing a block mapping", self.marks[-1], - "expected , but found %r" % token.id, token.start_mark) - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_block_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(KeyToken, ValueToken, BlockEndToken): - self.states.append(self.parse_block_mapping_key) - return self.parse_block_node_or_indentless_sequence() - else: - self.state = self.parse_block_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_block_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - # flow_sequence ::= FLOW-SEQUENCE-START - # (flow_sequence_entry FLOW-ENTRY)* - # flow_sequence_entry? - # FLOW-SEQUENCE-END - # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - # - # Note that while production rules for both flow_sequence_entry and - # flow_mapping_entry are equal, their interpretations are different. - # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` - # generate an inline mapping (set syntax). - - def parse_flow_sequence_first_entry(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_sequence_entry(first=True) - - def parse_flow_sequence_entry(self, first=False): - if not self.check_token(FlowSequenceEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow sequence", self.marks[-1], - "expected ',' or ']', but got %r" % token.id, token.start_mark) - - if self.check_token(KeyToken): - token = self.peek_token() - event = MappingStartEvent(None, None, True, - token.start_mark, token.end_mark, - flow_style=True) - self.state = self.parse_flow_sequence_entry_mapping_key - return event - elif not self.check_token(FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry) - return self.parse_flow_node() - token = self.get_token() - event = SequenceEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_sequence_entry_mapping_key(self): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_value - return self.process_empty_scalar(token.end_mark) - - def parse_flow_sequence_entry_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowSequenceEndToken): - self.states.append(self.parse_flow_sequence_entry_mapping_end) - return self.parse_flow_node() - else: - self.state = self.parse_flow_sequence_entry_mapping_end - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_sequence_entry_mapping_end - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_sequence_entry_mapping_end(self): - self.state = self.parse_flow_sequence_entry - token = self.peek_token() - return MappingEndEvent(token.start_mark, token.start_mark) - - # flow_mapping ::= FLOW-MAPPING-START - # (flow_mapping_entry FLOW-ENTRY)* - # flow_mapping_entry? - # FLOW-MAPPING-END - # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? - - def parse_flow_mapping_first_key(self): - token = self.get_token() - self.marks.append(token.start_mark) - return self.parse_flow_mapping_key(first=True) - - def parse_flow_mapping_key(self, first=False): - if not self.check_token(FlowMappingEndToken): - if not first: - if self.check_token(FlowEntryToken): - self.get_token() - else: - token = self.peek_token() - raise ParserError("while parsing a flow mapping", self.marks[-1], - "expected ',' or '}', but got %r" % token.id, token.start_mark) - if self.check_token(KeyToken): - token = self.get_token() - if not self.check_token(ValueToken, - FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_value) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_value - return self.process_empty_scalar(token.end_mark) - elif not self.check_token(FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_empty_value) - return self.parse_flow_node() - token = self.get_token() - event = MappingEndEvent(token.start_mark, token.end_mark) - self.state = self.states.pop() - self.marks.pop() - return event - - def parse_flow_mapping_value(self): - if self.check_token(ValueToken): - token = self.get_token() - if not self.check_token(FlowEntryToken, FlowMappingEndToken): - self.states.append(self.parse_flow_mapping_key) - return self.parse_flow_node() - else: - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(token.end_mark) - else: - self.state = self.parse_flow_mapping_key - token = self.peek_token() - return self.process_empty_scalar(token.start_mark) - - def parse_flow_mapping_empty_value(self): - self.state = self.parse_flow_mapping_key - return self.process_empty_scalar(self.peek_token().start_mark) - - def process_empty_scalar(self, mark): - return ScalarEvent(None, None, (True, False), u'', mark, mark) - diff --git a/tablib/packages/yaml/reader.py b/tablib/packages/yaml/reader.py deleted file mode 100644 index 1e7a4db..0000000 --- a/tablib/packages/yaml/reader.py +++ /dev/null @@ -1,225 +0,0 @@ -# This module contains abstractions for the input stream. You don't have to -# looks further, there are no pretty code. -# -# We define two classes here. -# -# Mark(source, line, column) -# It's just a record and its only use is producing nice error messages. -# Parser does not use it for any other purposes. -# -# Reader(source, data) -# Reader determines the encoding of `data` and converts it to unicode. -# Reader provides the following methods and attributes: -# reader.peek(length=1) - return the next `length` characters -# reader.forward(length=1) - move the current position to `length` characters. -# reader.index - the number of the current character. -# reader.line, stream.column - the line and the column of the current character. - -__all__ = ['Reader', 'ReaderError'] - -from error import YAMLError, Mark - -import codecs, re - -# Unfortunately, codec functions in Python 2.3 does not support the `finish` -# arguments, so we have to write our own wrappers. - -try: - codecs.utf_8_decode('', 'strict', False) - from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode - -except TypeError: - - def utf_16_le_decode(data, errors, finish=False): - if not finish and len(data) % 2 == 1: - data = data[:-1] - return codecs.utf_16_le_decode(data, errors) - - def utf_16_be_decode(data, errors, finish=False): - if not finish and len(data) % 2 == 1: - data = data[:-1] - return codecs.utf_16_be_decode(data, errors) - - def utf_8_decode(data, errors, finish=False): - if not finish: - # We are trying to remove a possible incomplete multibyte character - # from the suffix of the data. - # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd. - # All further bytes are in the range 0x80 to 0xbf. - # UTF-8 encoded UCS characters may be up to six bytes long. - count = 0 - while count < 5 and count < len(data) \ - and '\x80' <= data[-count-1] <= '\xBF': - count -= 1 - if count < 5 and count < len(data) \ - and '\xC0' <= data[-count-1] <= '\xFD': - data = data[:-count-1] - return codecs.utf_8_decode(data, errors) - -class ReaderError(YAMLError): - - def __init__(self, name, position, character, encoding, reason): - self.name = name - self.character = character - self.position = position - self.encoding = encoding - self.reason = reason - - def __str__(self): - if isinstance(self.character, str): - return "'%s' codec can't decode byte #x%02x: %s\n" \ - " in \"%s\", position %d" \ - % (self.encoding, ord(self.character), self.reason, - self.name, self.position) - else: - return "unacceptable character #x%04x: %s\n" \ - " in \"%s\", position %d" \ - % (self.character, self.reason, - self.name, self.position) - -class Reader(object): - # Reader: - # - determines the data encoding and converts it to unicode, - # - checks if characters are in allowed range, - # - adds '\0' to the end. - - # Reader accepts - # - a `str` object, - # - a `unicode` object, - # - a file-like object with its `read` method returning `str`, - # - a file-like object with its `read` method returning `unicode`. - - # Yeah, it's ugly and slow. - - def __init__(self, stream): - self.name = None - self.stream = None - self.stream_pointer = 0 - self.eof = True - self.buffer = u'' - self.pointer = 0 - self.raw_buffer = None - self.raw_decode = None - self.encoding = None - self.index = 0 - self.line = 0 - self.column = 0 - if isinstance(stream, unicode): - self.name = "" - self.check_printable(stream) - self.buffer = stream+u'\0' - elif isinstance(stream, str): - self.name = "" - self.raw_buffer = stream - self.determine_encoding() - else: - self.stream = stream - self.name = getattr(stream, 'name', "") - self.eof = False - self.raw_buffer = '' - self.determine_encoding() - - def peek(self, index=0): - try: - return self.buffer[self.pointer+index] - except IndexError: - self.update(index+1) - return self.buffer[self.pointer+index] - - def prefix(self, length=1): - if self.pointer+length >= len(self.buffer): - self.update(length) - return self.buffer[self.pointer:self.pointer+length] - - def forward(self, length=1): - if self.pointer+length+1 >= len(self.buffer): - self.update(length+1) - while length: - ch = self.buffer[self.pointer] - self.pointer += 1 - self.index += 1 - if ch in u'\n\x85\u2028\u2029' \ - or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): - self.line += 1 - self.column = 0 - elif ch != u'\uFEFF': - self.column += 1 - length -= 1 - - def get_mark(self): - if self.stream is None: - return Mark(self.name, self.index, self.line, self.column, - self.buffer, self.pointer) - else: - return Mark(self.name, self.index, self.line, self.column, - None, None) - - def determine_encoding(self): - while not self.eof and len(self.raw_buffer) < 2: - self.update_raw() - if not isinstance(self.raw_buffer, unicode): - if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): - self.raw_decode = utf_16_le_decode - self.encoding = 'utf-16-le' - elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): - self.raw_decode = utf_16_be_decode - self.encoding = 'utf-16-be' - else: - self.raw_decode = utf_8_decode - self.encoding = 'utf-8' - self.update(1) - - NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') - def check_printable(self, data): - match = self.NON_PRINTABLE.search(data) - if match: - character = match.group() - position = self.index+(len(self.buffer)-self.pointer)+match.start() - raise ReaderError(self.name, position, ord(character), - 'unicode', "special characters are not allowed") - - def update(self, length): - if self.raw_buffer is None: - return - self.buffer = self.buffer[self.pointer:] - self.pointer = 0 - while len(self.buffer) < length: - if not self.eof: - self.update_raw() - if self.raw_decode is not None: - try: - data, converted = self.raw_decode(self.raw_buffer, - 'strict', self.eof) - except UnicodeDecodeError, exc: - character = exc.object[exc.start] - if self.stream is not None: - position = self.stream_pointer-len(self.raw_buffer)+exc.start - else: - position = exc.start - raise ReaderError(self.name, position, character, - exc.encoding, exc.reason) - else: - data = self.raw_buffer - converted = len(data) - self.check_printable(data) - self.buffer += data - self.raw_buffer = self.raw_buffer[converted:] - if self.eof: - self.buffer += u'\0' - self.raw_buffer = None - break - - def update_raw(self, size=1024): - data = self.stream.read(size) - if data: - self.raw_buffer += data - self.stream_pointer += len(data) - else: - self.eof = True - -#try: -# import psyco -# psyco.bind(Reader) -#except ImportError: -# pass - diff --git a/tablib/packages/yaml/representer.py b/tablib/packages/yaml/representer.py deleted file mode 100644 index f5606ec..0000000 --- a/tablib/packages/yaml/representer.py +++ /dev/null @@ -1,489 +0,0 @@ - -__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', - 'RepresenterError'] - -from error import * -from nodes import * - -import datetime - -try: - set -except NameError: - from sets import Set as set - -import sys, copy_reg, types - -class RepresenterError(YAMLError): - pass - -class BaseRepresenter(object): - - yaml_representers = {} - yaml_multi_representers = {} - - def __init__(self, default_style=None, default_flow_style=None): - self.default_style = default_style - self.default_flow_style = default_flow_style - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def represent(self, data): - node = self.represent_data(data) - self.serialize(node) - self.represented_objects = {} - self.object_keeper = [] - self.alias_key = None - - def get_classobj_bases(self, cls): - bases = [cls] - for base in cls.__bases__: - bases.extend(self.get_classobj_bases(base)) - return bases - - def represent_data(self, data): - if self.ignore_aliases(data): - self.alias_key = None - else: - self.alias_key = id(data) - if self.alias_key is not None: - if self.alias_key in self.represented_objects: - node = self.represented_objects[self.alias_key] - #if node is None: - # raise RepresenterError("recursive objects are not allowed: %r" % data) - return node - #self.represented_objects[alias_key] = None - self.object_keeper.append(data) - data_types = type(data).__mro__ - if type(data) is types.InstanceType: - data_types = self.get_classobj_bases(data.__class__)+list(data_types) - if data_types[0] in self.yaml_representers: - node = self.yaml_representers[data_types[0]](self, data) - else: - for data_type in data_types: - if data_type in self.yaml_multi_representers: - node = self.yaml_multi_representers[data_type](self, data) - break - else: - if None in self.yaml_multi_representers: - node = self.yaml_multi_representers[None](self, data) - elif None in self.yaml_representers: - node = self.yaml_representers[None](self, data) - else: - node = ScalarNode(None, unicode(data)) - #if alias_key is not None: - # self.represented_objects[alias_key] = node - return node - - def add_representer(cls, data_type, representer): - if not 'yaml_representers' in cls.__dict__: - cls.yaml_representers = cls.yaml_representers.copy() - cls.yaml_representers[data_type] = representer - add_representer = classmethod(add_representer) - - def add_multi_representer(cls, data_type, representer): - if not 'yaml_multi_representers' in cls.__dict__: - cls.yaml_multi_representers = cls.yaml_multi_representers.copy() - cls.yaml_multi_representers[data_type] = representer - add_multi_representer = classmethod(add_multi_representer) - - def represent_scalar(self, tag, value, style=None): - if style is None: - style = self.default_style - node = ScalarNode(tag, value, style=style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - return node - - def represent_sequence(self, tag, sequence, flow_style=None): - value = [] - node = SequenceNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - for item in sequence: - node_item = self.represent_data(item) - if not (isinstance(node_item, ScalarNode) and not node_item.style): - best_style = False - value.append(node_item) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def represent_mapping(self, tag, mapping, flow_style=None): - value = [] - node = MappingNode(tag, value, flow_style=flow_style) - if self.alias_key is not None: - self.represented_objects[self.alias_key] = node - best_style = True - if hasattr(mapping, 'items'): - mapping = mapping.items() - mapping.sort() - for item_key, item_value in mapping: - node_key = self.represent_data(item_key) - node_value = self.represent_data(item_value) - if not (isinstance(node_key, ScalarNode) and not node_key.style): - best_style = False - if not (isinstance(node_value, ScalarNode) and not node_value.style): - best_style = False - value.append((node_key, node_value)) - if flow_style is None: - if self.default_flow_style is not None: - node.flow_style = self.default_flow_style - else: - node.flow_style = best_style - return node - - def ignore_aliases(self, data): - return False - -class SafeRepresenter(BaseRepresenter): - - def ignore_aliases(self, data): - if data in [None, ()]: - return True - if isinstance(data, (str, unicode, bool, int, float)): - return True - - def represent_none(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:null', - u'null') - - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:str', data) - - def represent_bool(self, data): - if data: - value = u'true' - else: - value = u'false' - return self.represent_scalar(u'tag:yaml.org,2002:bool', value) - - def represent_int(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) - - def represent_long(self, data): - return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) - - inf_value = 1e300 - while repr(inf_value) != repr(inf_value*inf_value): - inf_value *= inf_value - - def represent_float(self, data): - if data != data or (data == 0.0 and data == 1.0): - value = u'.nan' - elif data == self.inf_value: - value = u'.inf' - elif data == -self.inf_value: - value = u'-.inf' - else: - value = unicode(repr(data)).lower() - # Note that in some cases `repr(data)` represents a float number - # without the decimal parts. For instance: - # >>> repr(1e17) - # '1e17' - # Unfortunately, this is not a valid float representation according - # to the definition of the `!!float` tag. We fix this by adding - # '.0' before the 'e' symbol. - if u'.' not in value and u'e' in value: - value = value.replace(u'e', u'.0e', 1) - return self.represent_scalar(u'tag:yaml.org,2002:float', value) - - def represent_list(self, data): - #pairs = (len(data) > 0 and isinstance(data, list)) - #if pairs: - # for item in data: - # if not isinstance(item, tuple) or len(item) != 2: - # pairs = False - # break - #if not pairs: - return self.represent_sequence(u'tag:yaml.org,2002:seq', data) - #value = [] - #for item_key, item_value in data: - # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', - # [(item_key, item_value)])) - #return SequenceNode(u'tag:yaml.org,2002:pairs', value) - - def represent_dict(self, data): - return self.represent_mapping(u'tag:yaml.org,2002:map', data) - - def represent_set(self, data): - value = {} - for key in data: - value[key] = None - return self.represent_mapping(u'tag:yaml.org,2002:set', value) - - def represent_date(self, data): - value = unicode(data.isoformat()) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - - def represent_datetime(self, data): - value = unicode(data.isoformat(' ')) - return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) - - def represent_yaml_object(self, tag, data, cls, flow_style=None): - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__.copy() - return self.represent_mapping(tag, state, flow_style=flow_style) - - def represent_undefined(self, data): - raise RepresenterError("cannot represent an object: %s" % data) - -SafeRepresenter.add_representer(type(None), - SafeRepresenter.represent_none) - -SafeRepresenter.add_representer(str, - SafeRepresenter.represent_str) - -SafeRepresenter.add_representer(unicode, - SafeRepresenter.represent_unicode) - -SafeRepresenter.add_representer(bool, - SafeRepresenter.represent_bool) - -SafeRepresenter.add_representer(int, - SafeRepresenter.represent_int) - -SafeRepresenter.add_representer(long, - SafeRepresenter.represent_long) - -SafeRepresenter.add_representer(float, - SafeRepresenter.represent_float) - -SafeRepresenter.add_representer(list, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(tuple, - SafeRepresenter.represent_list) - -SafeRepresenter.add_representer(dict, - SafeRepresenter.represent_dict) - -SafeRepresenter.add_representer(set, - SafeRepresenter.represent_set) - -SafeRepresenter.add_representer(datetime.date, - SafeRepresenter.represent_date) - -SafeRepresenter.add_representer(datetime.datetime, - SafeRepresenter.represent_datetime) - -SafeRepresenter.add_representer(None, - SafeRepresenter.represent_undefined) - -class Representer(SafeRepresenter): - - def represent_str(self, data): - tag = None - style = None - try: - data = unicode(data, 'ascii') - tag = u'tag:yaml.org,2002:str' - except UnicodeDecodeError: - try: - data = unicode(data, 'utf-8') - tag = u'tag:yaml.org,2002:python/str' - except UnicodeDecodeError: - data = data.encode('base64') - tag = u'tag:yaml.org,2002:binary' - style = '|' - return self.represent_scalar(tag, data, style=style) - - def represent_unicode(self, data): - tag = None - try: - data.encode('ascii') - tag = u'tag:yaml.org,2002:python/unicode' - except UnicodeEncodeError: - tag = u'tag:yaml.org,2002:str' - return self.represent_scalar(tag, data) - - def represent_long(self, data): - tag = u'tag:yaml.org,2002:int' - if int(data) is not data: - tag = u'tag:yaml.org,2002:python/long' - return self.represent_scalar(tag, unicode(data)) - - def represent_complex(self, data): - if data.imag == 0.0: - data = u'%r' % data.real - elif data.real == 0.0: - data = u'%rj' % data.imag - elif data.imag > 0: - data = u'%r+%rj' % (data.real, data.imag) - else: - data = u'%r%rj' % (data.real, data.imag) - return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) - - def represent_tuple(self, data): - return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) - - def represent_name(self, data): - name = u'%s.%s' % (data.__module__, data.__name__) - return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') - - def represent_module(self, data): - return self.represent_scalar( - u'tag:yaml.org,2002:python/module:'+data.__name__, u'') - - def represent_instance(self, data): - # For instances of classic classes, we use __getinitargs__ and - # __getstate__ to serialize the data. - - # If data.__getinitargs__ exists, the object must be reconstructed by - # calling cls(**args), where args is a tuple returned by - # __getinitargs__. Otherwise, the cls.__init__ method should never be - # called and the class instance is created by instantiating a trivial - # class and assigning to the instance's __class__ variable. - - # If data.__getstate__ exists, it returns the state of the object. - # Otherwise, the state of the object is data.__dict__. - - # We produce either a !!python/object or !!python/object/new node. - # If data.__getinitargs__ does not exist and state is a dictionary, we - # produce a !!python/object node . Otherwise we produce a - # !!python/object/new node. - - cls = data.__class__ - class_name = u'%s.%s' % (cls.__module__, cls.__name__) - args = None - state = None - if hasattr(data, '__getinitargs__'): - args = list(data.__getinitargs__()) - if hasattr(data, '__getstate__'): - state = data.__getstate__() - else: - state = data.__dict__ - if args is None and isinstance(state, dict): - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+class_name, state) - if isinstance(state, dict) and not state: - return self.represent_sequence( - u'tag:yaml.org,2002:python/object/new:'+class_name, args) - value = {} - if args: - value['args'] = args - value['state'] = state - return self.represent_mapping( - u'tag:yaml.org,2002:python/object/new:'+class_name, value) - - def represent_object(self, data): - # We use __reduce__ API to save the data. data.__reduce__ returns - # a tuple of length 2-5: - # (function, args, state, listitems, dictitems) - - # For reconstructing, we calls function(*args), then set its state, - # listitems, and dictitems if they are not None. - - # A special case is when function.__name__ == '__newobj__'. In this - # case we create the object with args[0].__new__(*args). - - # Another special case is when __reduce__ returns a string - we don't - # support it. - - # We produce a !!python/object, !!python/object/new or - # !!python/object/apply node. - - cls = type(data) - if cls in copy_reg.dispatch_table: - reduce = copy_reg.dispatch_table[cls](data) - elif hasattr(data, '__reduce_ex__'): - reduce = data.__reduce_ex__(2) - elif hasattr(data, '__reduce__'): - reduce = data.__reduce__() - else: - raise RepresenterError("cannot represent object: %r" % data) - reduce = (list(reduce)+[None]*5)[:5] - function, args, state, listitems, dictitems = reduce - args = list(args) - if state is None: - state = {} - if listitems is not None: - listitems = list(listitems) - if dictitems is not None: - dictitems = dict(dictitems) - if function.__name__ == '__newobj__': - function = args[0] - args = args[1:] - tag = u'tag:yaml.org,2002:python/object/new:' - newobj = True - else: - tag = u'tag:yaml.org,2002:python/object/apply:' - newobj = False - function_name = u'%s.%s' % (function.__module__, function.__name__) - if not args and not listitems and not dictitems \ - and isinstance(state, dict) and newobj: - return self.represent_mapping( - u'tag:yaml.org,2002:python/object:'+function_name, state) - if not listitems and not dictitems \ - and isinstance(state, dict) and not state: - return self.represent_sequence(tag+function_name, args) - value = {} - if args: - value['args'] = args - if state or not isinstance(state, dict): - value['state'] = state - if listitems: - value['listitems'] = listitems - if dictitems: - value['dictitems'] = dictitems - return self.represent_mapping(tag+function_name, value) - -Representer.add_representer(str, - Representer.represent_str) - -Representer.add_representer(unicode, - Representer.represent_unicode) - -Representer.add_representer(long, - Representer.represent_long) - -Representer.add_representer(complex, - Representer.represent_complex) - -Representer.add_representer(tuple, - Representer.represent_tuple) - -Representer.add_representer(type, - Representer.represent_name) - -Representer.add_representer(types.ClassType, - Representer.represent_name) - -Representer.add_representer(types.FunctionType, - Representer.represent_name) - -Representer.add_representer(types.BuiltinFunctionType, - Representer.represent_name) - -Representer.add_representer(types.ModuleType, - Representer.represent_module) - -Representer.add_multi_representer(types.InstanceType, - Representer.represent_instance) - -Representer.add_multi_representer(object, - Representer.represent_object) - diff --git a/tablib/packages/yaml/resolver.py b/tablib/packages/yaml/resolver.py deleted file mode 100644 index 6b5ab87..0000000 --- a/tablib/packages/yaml/resolver.py +++ /dev/null @@ -1,224 +0,0 @@ - -__all__ = ['BaseResolver', 'Resolver'] - -from error import * -from nodes import * - -import re - -class ResolverError(YAMLError): - pass - -class BaseResolver(object): - - DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' - DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' - DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' - - yaml_implicit_resolvers = {} - yaml_path_resolvers = {} - - def __init__(self): - self.resolver_exact_paths = [] - self.resolver_prefix_paths = [] - - def add_implicit_resolver(cls, tag, regexp, first): - if not 'yaml_implicit_resolvers' in cls.__dict__: - cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() - if first is None: - first = [None] - for ch in first: - cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) - add_implicit_resolver = classmethod(add_implicit_resolver) - - def add_path_resolver(cls, tag, path, kind=None): - # Note: `add_path_resolver` is experimental. The API could be changed. - # `new_path` is a pattern that is matched against the path from the - # root to the node that is being considered. `node_path` elements are - # tuples `(node_check, index_check)`. `node_check` is a node class: - # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` - # matches any kind of a node. `index_check` could be `None`, a boolean - # value, a string value, or a number. `None` and `False` match against - # any _value_ of sequence and mapping nodes. `True` matches against - # any _key_ of a mapping node. A string `index_check` matches against - # a mapping value that corresponds to a scalar key which content is - # equal to the `index_check` value. An integer `index_check` matches - # against a sequence value with the index equal to `index_check`. - if not 'yaml_path_resolvers' in cls.__dict__: - cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() - new_path = [] - for element in path: - if isinstance(element, (list, tuple)): - if len(element) == 2: - node_check, index_check = element - elif len(element) == 1: - node_check = element[0] - index_check = True - else: - raise ResolverError("Invalid path element: %s" % element) - else: - node_check = None - index_check = element - if node_check is str: - node_check = ScalarNode - elif node_check is list: - node_check = SequenceNode - elif node_check is dict: - node_check = MappingNode - elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ - and not isinstance(node_check, basestring) \ - and node_check is not None: - raise ResolverError("Invalid node checker: %s" % node_check) - if not isinstance(index_check, (basestring, int)) \ - and index_check is not None: - raise ResolverError("Invalid index checker: %s" % index_check) - new_path.append((node_check, index_check)) - if kind is str: - kind = ScalarNode - elif kind is list: - kind = SequenceNode - elif kind is dict: - kind = MappingNode - elif kind not in [ScalarNode, SequenceNode, MappingNode] \ - and kind is not None: - raise ResolverError("Invalid node kind: %s" % kind) - cls.yaml_path_resolvers[tuple(new_path), kind] = tag - add_path_resolver = classmethod(add_path_resolver) - - def descend_resolver(self, current_node, current_index): - if not self.yaml_path_resolvers: - return - exact_paths = {} - prefix_paths = [] - if current_node: - depth = len(self.resolver_prefix_paths) - for path, kind in self.resolver_prefix_paths[-1]: - if self.check_resolver_prefix(depth, path, kind, - current_node, current_index): - if len(path) > depth: - prefix_paths.append((path, kind)) - else: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - for path, kind in self.yaml_path_resolvers: - if not path: - exact_paths[kind] = self.yaml_path_resolvers[path, kind] - else: - prefix_paths.append((path, kind)) - self.resolver_exact_paths.append(exact_paths) - self.resolver_prefix_paths.append(prefix_paths) - - def ascend_resolver(self): - if not self.yaml_path_resolvers: - return - self.resolver_exact_paths.pop() - self.resolver_prefix_paths.pop() - - def check_resolver_prefix(self, depth, path, kind, - current_node, current_index): - node_check, index_check = path[depth-1] - if isinstance(node_check, basestring): - if current_node.tag != node_check: - return - elif node_check is not None: - if not isinstance(current_node, node_check): - return - if index_check is True and current_index is not None: - return - if (index_check is False or index_check is None) \ - and current_index is None: - return - if isinstance(index_check, basestring): - if not (isinstance(current_index, ScalarNode) - and index_check == current_index.value): - return - elif isinstance(index_check, int) and not isinstance(index_check, bool): - if index_check != current_index: - return - return True - - def resolve(self, kind, value, implicit): - if kind is ScalarNode and implicit[0]: - if value == u'': - resolvers = self.yaml_implicit_resolvers.get(u'', []) - else: - resolvers = self.yaml_implicit_resolvers.get(value[0], []) - resolvers += self.yaml_implicit_resolvers.get(None, []) - for tag, regexp in resolvers: - if regexp.match(value): - return tag - implicit = implicit[1] - if self.yaml_path_resolvers: - exact_paths = self.resolver_exact_paths[-1] - if kind in exact_paths: - return exact_paths[kind] - if None in exact_paths: - return exact_paths[None] - if kind is ScalarNode: - return self.DEFAULT_SCALAR_TAG - elif kind is SequenceNode: - return self.DEFAULT_SEQUENCE_TAG - elif kind is MappingNode: - return self.DEFAULT_MAPPING_TAG - -class Resolver(BaseResolver): - pass - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:bool', - re.compile(ur'''^(?:yes|Yes|YES|no|No|NO - |true|True|TRUE|false|False|FALSE - |on|On|ON|off|Off|OFF)$''', re.X), - list(u'yYnNtTfFoO')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:float', - re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? - |\.[0-9_]+(?:[eE][-+][0-9]+)? - |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* - |[-+]?\.(?:inf|Inf|INF) - |\.(?:nan|NaN|NAN))$''', re.X), - list(u'-+0123456789.')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:int', - re.compile(ur'''^(?:[-+]?0b[0-1_]+ - |[-+]?0[0-7_]+ - |[-+]?(?:0|[1-9][0-9_]*) - |[-+]?0x[0-9a-fA-F_]+ - |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), - list(u'-+0123456789')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:merge', - re.compile(ur'^(?:<<)$'), - [u'<']) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:null', - re.compile(ur'''^(?: ~ - |null|Null|NULL - | )$''', re.X), - [u'~', u'n', u'N', u'']) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:timestamp', - re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] - |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? - (?:[Tt]|[ \t]+)[0-9][0-9]? - :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? - (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), - list(u'0123456789')) - -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:value', - re.compile(ur'^(?:=)$'), - [u'=']) - -# The following resolver is only for documentation purposes. It cannot work -# because plain scalars cannot start with '!', '&', or '*'. -Resolver.add_implicit_resolver( - u'tag:yaml.org,2002:yaml', - re.compile(ur'^(?:!|&|\*)$'), - list(u'!&*')) - diff --git a/tablib/packages/yaml/scanner.py b/tablib/packages/yaml/scanner.py deleted file mode 100644 index 5228fad..0000000 --- a/tablib/packages/yaml/scanner.py +++ /dev/null @@ -1,1457 +0,0 @@ - -# Scanner produces tokens of the following types: -# STREAM-START -# STREAM-END -# DIRECTIVE(name, value) -# DOCUMENT-START -# DOCUMENT-END -# BLOCK-SEQUENCE-START -# BLOCK-MAPPING-START -# BLOCK-END -# FLOW-SEQUENCE-START -# FLOW-MAPPING-START -# FLOW-SEQUENCE-END -# FLOW-MAPPING-END -# BLOCK-ENTRY -# FLOW-ENTRY -# KEY -# VALUE -# ALIAS(value) -# ANCHOR(value) -# TAG(value) -# SCALAR(value, plain, style) -# -# Read comments in the Scanner code for more details. -# - -__all__ = ['Scanner', 'ScannerError'] - -from error import MarkedYAMLError -from tokens import * - -class ScannerError(MarkedYAMLError): - pass - -class SimpleKey(object): - # See below simple keys treatment. - - def __init__(self, token_number, required, index, line, column, mark): - self.token_number = token_number - self.required = required - self.index = index - self.line = line - self.column = column - self.mark = mark - -class Scanner(object): - - def __init__(self): - """Initialize the scanner.""" - # It is assumed that Scanner and Reader will have a common descendant. - # Reader do the dirty work of checking for BOM and converting the - # input data to Unicode. It also adds NUL to the end. - # - # Reader supports the following methods - # self.peek(i=0) # peek the next i-th character - # self.prefix(l=1) # peek the next l characters - # self.forward(l=1) # read the next l characters and move the pointer. - - # Had we reached the end of the stream? - self.done = False - - # The number of unclosed '{' and '['. `flow_level == 0` means block - # context. - self.flow_level = 0 - - # List of processed tokens that are not yet emitted. - self.tokens = [] - - # Add the STREAM-START token. - self.fetch_stream_start() - - # Number of tokens that were emitted through the `get_token` method. - self.tokens_taken = 0 - - # The current indentation level. - self.indent = -1 - - # Past indentation levels. - self.indents = [] - - # Variables related to simple keys treatment. - - # A simple key is a key that is not denoted by the '?' indicator. - # Example of simple keys: - # --- - # block simple key: value - # ? not a simple key: - # : { flow simple key: value } - # We emit the KEY token before all keys, so when we find a potential - # simple key, we try to locate the corresponding ':' indicator. - # Simple keys should be limited to a single line and 1024 characters. - - # Can a simple key start at the current position? A simple key may - # start: - # - at the beginning of the line, not counting indentation spaces - # (in block context), - # - after '{', '[', ',' (in the flow context), - # - after '?', ':', '-' (in the block context). - # In the block context, this flag also signifies if a block collection - # may start at the current position. - self.allow_simple_key = True - - # Keep track of possible simple keys. This is a dictionary. The key - # is `flow_level`; there can be no more that one possible simple key - # for each level. The value is a SimpleKey record: - # (token_number, required, index, line, column, mark) - # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), - # '[', or '{' tokens. - self.possible_simple_keys = {} - - # Public methods. - - def check_token(self, *choices): - # Check if the next token is one of the given types. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - if not choices: - return True - for choice in choices: - if isinstance(self.tokens[0], choice): - return True - return False - - def peek_token(self): - # Return the next token, but do not delete if from the queue. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - return self.tokens[0] - - def get_token(self): - # Return the next token. - while self.need_more_tokens(): - self.fetch_more_tokens() - if self.tokens: - self.tokens_taken += 1 - return self.tokens.pop(0) - - # Private methods. - - def need_more_tokens(self): - if self.done: - return False - if not self.tokens: - return True - # The current token may be a potential simple key, so we - # need to look further. - self.stale_possible_simple_keys() - if self.next_possible_simple_key() == self.tokens_taken: - return True - - def fetch_more_tokens(self): - - # Eat whitespaces and comments until we reach the next token. - self.scan_to_next_token() - - # Remove obsolete possible simple keys. - self.stale_possible_simple_keys() - - # Compare the current indentation and column. It may add some tokens - # and decrease the current indentation level. - self.unwind_indent(self.column) - - # Peek the next character. - ch = self.peek() - - # Is it the end of stream? - if ch == u'\0': - return self.fetch_stream_end() - - # Is it a directive? - if ch == u'%' and self.check_directive(): - return self.fetch_directive() - - # Is it the document start? - if ch == u'-' and self.check_document_start(): - return self.fetch_document_start() - - # Is it the document end? - if ch == u'.' and self.check_document_end(): - return self.fetch_document_end() - - # TODO: support for BOM within a stream. - #if ch == u'\uFEFF': - # return self.fetch_bom() <-- issue BOMToken - - # Note: the order of the following checks is NOT significant. - - # Is it the flow sequence start indicator? - if ch == u'[': - return self.fetch_flow_sequence_start() - - # Is it the flow mapping start indicator? - if ch == u'{': - return self.fetch_flow_mapping_start() - - # Is it the flow sequence end indicator? - if ch == u']': - return self.fetch_flow_sequence_end() - - # Is it the flow mapping end indicator? - if ch == u'}': - return self.fetch_flow_mapping_end() - - # Is it the flow entry indicator? - if ch == u',': - return self.fetch_flow_entry() - - # Is it the block entry indicator? - if ch == u'-' and self.check_block_entry(): - return self.fetch_block_entry() - - # Is it the key indicator? - if ch == u'?' and self.check_key(): - return self.fetch_key() - - # Is it the value indicator? - if ch == u':' and self.check_value(): - return self.fetch_value() - - # Is it an alias? - if ch == u'*': - return self.fetch_alias() - - # Is it an anchor? - if ch == u'&': - return self.fetch_anchor() - - # Is it a tag? - if ch == u'!': - return self.fetch_tag() - - # Is it a literal scalar? - if ch == u'|' and not self.flow_level: - return self.fetch_literal() - - # Is it a folded scalar? - if ch == u'>' and not self.flow_level: - return self.fetch_folded() - - # Is it a single quoted scalar? - if ch == u'\'': - return self.fetch_single() - - # Is it a double quoted scalar? - if ch == u'\"': - return self.fetch_double() - - # It must be a plain scalar then. - if self.check_plain(): - return self.fetch_plain() - - # No? It's an error. Let's produce a nice error message. - raise ScannerError("while scanning for the next token", None, - "found character %r that cannot start any token" - % ch.encode('utf-8'), self.get_mark()) - - # Simple keys treatment. - - def next_possible_simple_key(self): - # Return the number of the nearest possible simple key. Actually we - # don't need to loop through the whole dictionary. We may replace it - # with the following code: - # if not self.possible_simple_keys: - # return None - # return self.possible_simple_keys[ - # min(self.possible_simple_keys.keys())].token_number - min_token_number = None - for level in self.possible_simple_keys: - key = self.possible_simple_keys[level] - if min_token_number is None or key.token_number < min_token_number: - min_token_number = key.token_number - return min_token_number - - def stale_possible_simple_keys(self): - # Remove entries that are no longer possible simple keys. According to - # the YAML specification, simple keys - # - should be limited to a single line, - # - should be no longer than 1024 characters. - # Disabling this procedure will allow simple keys of any length and - # height (may cause problems if indentation is broken though). - for level in self.possible_simple_keys.keys(): - key = self.possible_simple_keys[level] - if key.line != self.line \ - or self.index-key.index > 1024: - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not found expected ':'", self.get_mark()) - del self.possible_simple_keys[level] - - def save_possible_simple_key(self): - # The next token may start a simple key. We check if it's possible - # and save its position. This function is called for - # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. - - # Check if a simple key is required at the current position. - required = not self.flow_level and self.indent == self.column - - # A simple key is required only if it is the first token in the current - # line. Therefore it is always allowed. - assert self.allow_simple_key or not required - - # The next token might be a simple key. Let's save it's number and - # position. - if self.allow_simple_key: - self.remove_possible_simple_key() - token_number = self.tokens_taken+len(self.tokens) - key = SimpleKey(token_number, required, - self.index, self.line, self.column, self.get_mark()) - self.possible_simple_keys[self.flow_level] = key - - def remove_possible_simple_key(self): - # Remove the saved possible key position at the current flow level. - if self.flow_level in self.possible_simple_keys: - key = self.possible_simple_keys[self.flow_level] - - if key.required: - raise ScannerError("while scanning a simple key", key.mark, - "could not found expected ':'", self.get_mark()) - - del self.possible_simple_keys[self.flow_level] - - # Indentation functions. - - def unwind_indent(self, column): - - ## In flow context, tokens should respect indentation. - ## Actually the condition should be `self.indent >= column` according to - ## the spec. But this condition will prohibit intuitively correct - ## constructions such as - ## key : { - ## } - #if self.flow_level and self.indent > column: - # raise ScannerError(None, None, - # "invalid intendation or unclosed '[' or '{'", - # self.get_mark()) - - # In the flow context, indentation is ignored. We make the scanner less - # restrictive then specification requires. - if self.flow_level: - return - - # In block context, we may need to issue the BLOCK-END tokens. - while self.indent > column: - mark = self.get_mark() - self.indent = self.indents.pop() - self.tokens.append(BlockEndToken(mark, mark)) - - def add_indent(self, column): - # Check if we need to increase indentation. - if self.indent < column: - self.indents.append(self.indent) - self.indent = column - return True - return False - - # Fetchers. - - def fetch_stream_start(self): - # We always add STREAM-START as the first token and STREAM-END as the - # last token. - - # Read the token. - mark = self.get_mark() - - # Add STREAM-START. - self.tokens.append(StreamStartToken(mark, mark, - encoding=self.encoding)) - - - def fetch_stream_end(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - self.possible_simple_keys = {} - - # Read the token. - mark = self.get_mark() - - # Add STREAM-END. - self.tokens.append(StreamEndToken(mark, mark)) - - # The steam is finished. - self.done = True - - def fetch_directive(self): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Scan and add DIRECTIVE. - self.tokens.append(self.scan_directive()) - - def fetch_document_start(self): - self.fetch_document_indicator(DocumentStartToken) - - def fetch_document_end(self): - self.fetch_document_indicator(DocumentEndToken) - - def fetch_document_indicator(self, TokenClass): - - # Set the current intendation to -1. - self.unwind_indent(-1) - - # Reset simple keys. Note that there could not be a block collection - # after '---'. - self.remove_possible_simple_key() - self.allow_simple_key = False - - # Add DOCUMENT-START or DOCUMENT-END. - start_mark = self.get_mark() - self.forward(3) - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_start(self): - self.fetch_flow_collection_start(FlowSequenceStartToken) - - def fetch_flow_mapping_start(self): - self.fetch_flow_collection_start(FlowMappingStartToken) - - def fetch_flow_collection_start(self, TokenClass): - - # '[' and '{' may start a simple key. - self.save_possible_simple_key() - - # Increase the flow level. - self.flow_level += 1 - - # Simple keys are allowed after '[' and '{'. - self.allow_simple_key = True - - # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_sequence_end(self): - self.fetch_flow_collection_end(FlowSequenceEndToken) - - def fetch_flow_mapping_end(self): - self.fetch_flow_collection_end(FlowMappingEndToken) - - def fetch_flow_collection_end(self, TokenClass): - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Decrease the flow level. - self.flow_level -= 1 - - # No simple keys after ']' or '}'. - self.allow_simple_key = False - - # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(TokenClass(start_mark, end_mark)) - - def fetch_flow_entry(self): - - # Simple keys are allowed after ','. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add FLOW-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(FlowEntryToken(start_mark, end_mark)) - - def fetch_block_entry(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a new entry? - if not self.allow_simple_key: - raise ScannerError(None, None, - "sequence entries are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-SEQUENCE-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockSequenceStartToken(mark, mark)) - - # It's an error for the block entry to occur in the flow context, - # but we let the parser detect this. - else: - pass - - # Simple keys are allowed after '-'. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add BLOCK-ENTRY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(BlockEntryToken(start_mark, end_mark)) - - def fetch_key(self): - - # Block context needs additional checks. - if not self.flow_level: - - # Are we allowed to start a key (not nessesary a simple)? - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping keys are not allowed here", - self.get_mark()) - - # We may need to add BLOCK-MAPPING-START. - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after '?' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add KEY. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(KeyToken(start_mark, end_mark)) - - def fetch_value(self): - - # Do we determine a simple key? - if self.flow_level in self.possible_simple_keys: - - # Add KEY. - key = self.possible_simple_keys[self.flow_level] - del self.possible_simple_keys[self.flow_level] - self.tokens.insert(key.token_number-self.tokens_taken, - KeyToken(key.mark, key.mark)) - - # If this key starts a new block mapping, we need to add - # BLOCK-MAPPING-START. - if not self.flow_level: - if self.add_indent(key.column): - self.tokens.insert(key.token_number-self.tokens_taken, - BlockMappingStartToken(key.mark, key.mark)) - - # There cannot be two simple keys one after another. - self.allow_simple_key = False - - # It must be a part of a complex key. - else: - - # Block context needs additional checks. - # (Do we really need them? They will be catched by the parser - # anyway.) - if not self.flow_level: - - # We are allowed to start a complex value if and only if - # we can start a simple key. - if not self.allow_simple_key: - raise ScannerError(None, None, - "mapping values are not allowed here", - self.get_mark()) - - # If this value starts a new block mapping, we need to add - # BLOCK-MAPPING-START. It will be detected as an error later by - # the parser. - if not self.flow_level: - if self.add_indent(self.column): - mark = self.get_mark() - self.tokens.append(BlockMappingStartToken(mark, mark)) - - # Simple keys are allowed after ':' in the block context. - self.allow_simple_key = not self.flow_level - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Add VALUE. - start_mark = self.get_mark() - self.forward() - end_mark = self.get_mark() - self.tokens.append(ValueToken(start_mark, end_mark)) - - def fetch_alias(self): - - # ALIAS could be a simple key. - self.save_possible_simple_key() - - # No simple keys after ALIAS. - self.allow_simple_key = False - - # Scan and add ALIAS. - self.tokens.append(self.scan_anchor(AliasToken)) - - def fetch_anchor(self): - - # ANCHOR could start a simple key. - self.save_possible_simple_key() - - # No simple keys after ANCHOR. - self.allow_simple_key = False - - # Scan and add ANCHOR. - self.tokens.append(self.scan_anchor(AnchorToken)) - - def fetch_tag(self): - - # TAG could start a simple key. - self.save_possible_simple_key() - - # No simple keys after TAG. - self.allow_simple_key = False - - # Scan and add TAG. - self.tokens.append(self.scan_tag()) - - def fetch_literal(self): - self.fetch_block_scalar(style='|') - - def fetch_folded(self): - self.fetch_block_scalar(style='>') - - def fetch_block_scalar(self, style): - - # A simple key may follow a block scalar. - self.allow_simple_key = True - - # Reset possible simple key on the current level. - self.remove_possible_simple_key() - - # Scan and add SCALAR. - self.tokens.append(self.scan_block_scalar(style)) - - def fetch_single(self): - self.fetch_flow_scalar(style='\'') - - def fetch_double(self): - self.fetch_flow_scalar(style='"') - - def fetch_flow_scalar(self, style): - - # A flow scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after flow scalars. - self.allow_simple_key = False - - # Scan and add SCALAR. - self.tokens.append(self.scan_flow_scalar(style)) - - def fetch_plain(self): - - # A plain scalar could be a simple key. - self.save_possible_simple_key() - - # No simple keys after plain scalars. But note that `scan_plain` will - # change this flag if the scan is finished at the beginning of the - # line. - self.allow_simple_key = False - - # Scan and add SCALAR. May change `allow_simple_key`. - self.tokens.append(self.scan_plain()) - - # Checkers. - - def check_directive(self): - - # DIRECTIVE: ^ '%' ... - # The '%' indicator is already checked. - if self.column == 0: - return True - - def check_document_start(self): - - # DOCUMENT-START: ^ '---' (' '|'\n') - if self.column == 0: - if self.prefix(3) == u'---' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return True - - def check_document_end(self): - - # DOCUMENT-END: ^ '...' (' '|'\n') - if self.column == 0: - if self.prefix(3) == u'...' \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return True - - def check_block_entry(self): - - # BLOCK-ENTRY: '-' (' '|'\n') - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_key(self): - - # KEY(flow context): '?' - if self.flow_level: - return True - - # KEY(block context): '?' (' '|'\n') - else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_value(self): - - # VALUE(flow context): ':' - if self.flow_level: - return True - - # VALUE(block context): ':' (' '|'\n') - else: - return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' - - def check_plain(self): - - # A plain scalar may start with any non-space character except: - # '-', '?', ':', ',', '[', ']', '{', '}', - # '#', '&', '*', '!', '|', '>', '\'', '\"', - # '%', '@', '`'. - # - # It may also start with - # '-', '?', ':' - # if it is followed by a non-space character. - # - # Note that we limit the last rule to the block context (except the - # '-' character) because we want the flow context to be space - # independent. - ch = self.peek() - return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ - or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' - and (ch == u'-' or (not self.flow_level and ch in u'?:'))) - - # Scanners. - - def scan_to_next_token(self): - # We ignore spaces, line breaks and comments. - # If we find a line break in the block context, we set the flag - # `allow_simple_key` on. - # The byte order mark is stripped if it's the first character in the - # stream. We do not yet support BOM inside the stream as the - # specification requires. Any such mark will be considered as a part - # of the document. - # - # TODO: We need to make tab handling rules more sane. A good rule is - # Tabs cannot precede tokens - # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, - # KEY(block), VALUE(block), BLOCK-ENTRY - # So the checking code is - # if : - # self.allow_simple_keys = False - # We also need to add the check for `allow_simple_keys == True` to - # `unwind_indent` before issuing BLOCK-END. - # Scanners for block, flow, and plain scalars need to be modified. - - if self.index == 0 and self.peek() == u'\uFEFF': - self.forward() - found = False - while not found: - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - if self.scan_line_break(): - if not self.flow_level: - self.allow_simple_key = True - else: - found = True - - def scan_directive(self): - # See the specification for details. - start_mark = self.get_mark() - self.forward() - name = self.scan_directive_name(start_mark) - value = None - if name == u'YAML': - value = self.scan_yaml_directive_value(start_mark) - end_mark = self.get_mark() - elif name == u'TAG': - value = self.scan_tag_directive_value(start_mark) - end_mark = self.get_mark() - else: - end_mark = self.get_mark() - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - self.scan_directive_ignored_line(start_mark) - return DirectiveToken(name, value, start_mark, end_mark) - - def scan_directive_name(self, start_mark): - # See the specification for details. - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - return value - - def scan_yaml_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - major = self.scan_yaml_directive_number(start_mark) - if self.peek() != '.': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or '.', but found %r" - % self.peek().encode('utf-8'), - self.get_mark()) - self.forward() - minor = self.scan_yaml_directive_number(start_mark) - if self.peek() not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a digit or ' ', but found %r" - % self.peek().encode('utf-8'), - self.get_mark()) - return (major, minor) - - def scan_yaml_directive_number(self, start_mark): - # See the specification for details. - ch = self.peek() - if not (u'0' <= ch <= u'9'): - raise ScannerError("while scanning a directive", start_mark, - "expected a digit, but found %r" % ch.encode('utf-8'), - self.get_mark()) - length = 0 - while u'0' <= self.peek(length) <= u'9': - length += 1 - value = int(self.prefix(length)) - self.forward(length) - return value - - def scan_tag_directive_value(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - handle = self.scan_tag_directive_handle(start_mark) - while self.peek() == u' ': - self.forward() - prefix = self.scan_tag_directive_prefix(start_mark) - return (handle, prefix) - - def scan_tag_directive_handle(self, start_mark): - # See the specification for details. - value = self.scan_tag_handle('directive', start_mark) - ch = self.peek() - if ch != u' ': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) - return value - - def scan_tag_directive_prefix(self, start_mark): - # See the specification for details. - value = self.scan_tag_uri('directive', start_mark) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) - return value - - def scan_directive_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a directive", start_mark, - "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) - self.scan_line_break() - - def scan_anchor(self, TokenClass): - # The specification does not restrict characters for anchors and - # aliases. This may lead to problems, for instance, the document: - # [ *alias, value ] - # can be interpteted in two ways, as - # [ "value" ] - # and - # [ *alias , "value" ] - # Therefore we restrict aliases to numbers and ASCII letters. - start_mark = self.get_mark() - indicator = self.peek() - if indicator == u'*': - name = 'alias' - else: - name = 'anchor' - self.forward() - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if not length: - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - value = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': - raise ScannerError("while scanning an %s" % name, start_mark, - "expected alphabetic or numeric character, but found %r" - % ch.encode('utf-8'), self.get_mark()) - end_mark = self.get_mark() - return TokenClass(value, start_mark, end_mark) - - def scan_tag(self): - # See the specification for details. - start_mark = self.get_mark() - ch = self.peek(1) - if ch == u'<': - handle = None - self.forward(2) - suffix = self.scan_tag_uri('tag', start_mark) - if self.peek() != u'>': - raise ScannerError("while parsing a tag", start_mark, - "expected '>', but found %r" % self.peek().encode('utf-8'), - self.get_mark()) - self.forward() - elif ch in u'\0 \t\r\n\x85\u2028\u2029': - handle = None - suffix = u'!' - self.forward() - else: - length = 1 - use_handle = False - while ch not in u'\0 \r\n\x85\u2028\u2029': - if ch == u'!': - use_handle = True - break - length += 1 - ch = self.peek(length) - handle = u'!' - if use_handle: - handle = self.scan_tag_handle('tag', start_mark) - else: - handle = u'!' - self.forward() - suffix = self.scan_tag_uri('tag', start_mark) - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a tag", start_mark, - "expected ' ', but found %r" % ch.encode('utf-8'), - self.get_mark()) - value = (handle, suffix) - end_mark = self.get_mark() - return TagToken(value, start_mark, end_mark) - - def scan_block_scalar(self, style): - # See the specification for details. - - if style == '>': - folded = True - else: - folded = False - - chunks = [] - start_mark = self.get_mark() - - # Scan the header. - self.forward() - chomping, increment = self.scan_block_scalar_indicators(start_mark) - self.scan_block_scalar_ignored_line(start_mark) - - # Determine the indentation level and go to the first non-empty line. - min_indent = self.indent+1 - if min_indent < 1: - min_indent = 1 - if increment is None: - breaks, max_indent, end_mark = self.scan_block_scalar_indentation() - indent = max(min_indent, max_indent) - else: - indent = min_indent+increment-1 - breaks, end_mark = self.scan_block_scalar_breaks(indent) - line_break = u'' - - # Scan the inner part of the block scalar. - while self.column == indent and self.peek() != u'\0': - chunks.extend(breaks) - leading_non_space = self.peek() not in u' \t' - length = 0 - while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': - length += 1 - chunks.append(self.prefix(length)) - self.forward(length) - line_break = self.scan_line_break() - breaks, end_mark = self.scan_block_scalar_breaks(indent) - if self.column == indent and self.peek() != u'\0': - - # Unfortunately, folding rules are ambiguous. - # - # This is the folding according to the specification: - - if folded and line_break == u'\n' \ - and leading_non_space and self.peek() not in u' \t': - if not breaks: - chunks.append(u' ') - else: - chunks.append(line_break) - - # This is Clark Evans's interpretation (also in the spec - # examples): - # - #if folded and line_break == u'\n': - # if not breaks: - # if self.peek() not in ' \t': - # chunks.append(u' ') - # else: - # chunks.append(line_break) - #else: - # chunks.append(line_break) - else: - break - - # Chomp the tail. - if chomping is not False: - chunks.append(line_break) - if chomping is True: - chunks.extend(breaks) - - # We are done. - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, - style) - - def scan_block_scalar_indicators(self, start_mark): - # See the specification for details. - chomping = None - increment = None - ch = self.peek() - if ch in u'+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch in u'0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - elif ch in u'0123456789': - increment = int(ch) - if increment == 0: - raise ScannerError("while scanning a block scalar", start_mark, - "expected indentation indicator in the range 1-9, but found 0", - self.get_mark()) - self.forward() - ch = self.peek() - if ch in u'+-': - if ch == '+': - chomping = True - else: - chomping = False - self.forward() - ch = self.peek() - if ch not in u'\0 \r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected chomping or indentation indicators, but found %r" - % ch.encode('utf-8'), self.get_mark()) - return chomping, increment - - def scan_block_scalar_ignored_line(self, start_mark): - # See the specification for details. - while self.peek() == u' ': - self.forward() - if self.peek() == u'#': - while self.peek() not in u'\0\r\n\x85\u2028\u2029': - self.forward() - ch = self.peek() - if ch not in u'\0\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a block scalar", start_mark, - "expected a comment or a line break, but found %r" - % ch.encode('utf-8'), self.get_mark()) - self.scan_line_break() - - def scan_block_scalar_indentation(self): - # See the specification for details. - chunks = [] - max_indent = 0 - end_mark = self.get_mark() - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() != u' ': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - else: - self.forward() - if self.column > max_indent: - max_indent = self.column - return chunks, max_indent, end_mark - - def scan_block_scalar_breaks(self, indent): - # See the specification for details. - chunks = [] - end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': - self.forward() - while self.peek() in u'\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - end_mark = self.get_mark() - while self.column < indent and self.peek() == u' ': - self.forward() - return chunks, end_mark - - def scan_flow_scalar(self, style): - # See the specification for details. - # Note that we loose indentation rules for quoted scalars. Quoted - # scalars don't need to adhere indentation because " and ' clearly - # mark the beginning and the end of them. Therefore we are less - # restrictive then the specification requires. We only need to check - # that document separators are not included in scalars. - if style == '"': - double = True - else: - double = False - chunks = [] - start_mark = self.get_mark() - quote = self.peek() - self.forward() - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - while self.peek() != quote: - chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) - chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) - self.forward() - end_mark = self.get_mark() - return ScalarToken(u''.join(chunks), False, start_mark, end_mark, - style) - - ESCAPE_REPLACEMENTS = { - u'0': u'\0', - u'a': u'\x07', - u'b': u'\x08', - u't': u'\x09', - u'\t': u'\x09', - u'n': u'\x0A', - u'v': u'\x0B', - u'f': u'\x0C', - u'r': u'\x0D', - u'e': u'\x1B', - u' ': u'\x20', - u'\"': u'\"', - u'\\': u'\\', - u'N': u'\x85', - u'_': u'\xA0', - u'L': u'\u2028', - u'P': u'\u2029', - } - - ESCAPE_CODES = { - u'x': 2, - u'u': 4, - u'U': 8, - } - - def scan_flow_scalar_non_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - length = 0 - while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': - length += 1 - if length: - chunks.append(self.prefix(length)) - self.forward(length) - ch = self.peek() - if not double and ch == u'\'' and self.peek(1) == u'\'': - chunks.append(u'\'') - self.forward(2) - elif (double and ch == u'\'') or (not double and ch in u'\"\\'): - chunks.append(ch) - self.forward() - elif double and ch == u'\\': - self.forward() - ch = self.peek() - if ch in self.ESCAPE_REPLACEMENTS: - chunks.append(self.ESCAPE_REPLACEMENTS[ch]) - self.forward() - elif ch in self.ESCAPE_CODES: - length = self.ESCAPE_CODES[ch] - self.forward() - for k in range(length): - if self.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "expected escape sequence of %d hexdecimal numbers, but found %r" % - (length, self.peek(k).encode('utf-8')), self.get_mark()) - code = int(self.prefix(length), 16) - chunks.append(unichr(code)) - self.forward(length) - elif ch in u'\r\n\x85\u2028\u2029': - self.scan_line_break() - chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) - else: - raise ScannerError("while scanning a double-quoted scalar", start_mark, - "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) - else: - return chunks - - def scan_flow_scalar_spaces(self, double, start_mark): - # See the specification for details. - chunks = [] - length = 0 - while self.peek(length) in u' \t': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch == u'\0': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected end of stream", self.get_mark()) - elif ch in u'\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - breaks = self.scan_flow_scalar_breaks(double, start_mark) - if line_break != u'\n': - chunks.append(line_break) - elif not breaks: - chunks.append(u' ') - chunks.extend(breaks) - else: - chunks.append(whitespaces) - return chunks - - def scan_flow_scalar_breaks(self, double, start_mark): - # See the specification for details. - chunks = [] - while True: - # Instead of checking indentation, we check for document - # separators. - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - raise ScannerError("while scanning a quoted scalar", start_mark, - "found unexpected document separator", self.get_mark()) - while self.peek() in u' \t': - self.forward() - if self.peek() in u'\r\n\x85\u2028\u2029': - chunks.append(self.scan_line_break()) - else: - return chunks - - def scan_plain(self): - # See the specification for details. - # We add an additional restriction for the flow context: - # plain scalars in the flow context cannot contain ',', ':' and '?'. - # We also keep track of the `allow_simple_key` flag here. - # Indentation rules are loosed for the flow context. - chunks = [] - start_mark = self.get_mark() - end_mark = start_mark - indent = self.indent+1 - # We allow zero indentation for scalars, but then we need to check for - # document separators at the beginning of the line. - #if indent == 0: - # indent = 1 - spaces = [] - while True: - length = 0 - if self.peek() == u'#': - break - while True: - ch = self.peek(length) - if ch in u'\0 \t\r\n\x85\u2028\u2029' \ - or (not self.flow_level and ch == u':' and - self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ - or (self.flow_level and ch in u',:?[]{}'): - break - length += 1 - # It's not clear what we should do with ':' in the flow context. - if (self.flow_level and ch == u':' - and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): - self.forward(length) - raise ScannerError("while scanning a plain scalar", start_mark, - "found unexpected ':'", self.get_mark(), - "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") - if length == 0: - break - self.allow_simple_key = False - chunks.extend(spaces) - chunks.append(self.prefix(length)) - self.forward(length) - end_mark = self.get_mark() - spaces = self.scan_plain_spaces(indent, start_mark) - if not spaces or self.peek() == u'#' \ - or (not self.flow_level and self.column < indent): - break - return ScalarToken(u''.join(chunks), True, start_mark, end_mark) - - def scan_plain_spaces(self, indent, start_mark): - # See the specification for details. - # The specification is really confusing about tabs in plain scalars. - # We just forbid them completely. Do not use tabs in YAML! - chunks = [] - length = 0 - while self.peek(length) in u' ': - length += 1 - whitespaces = self.prefix(length) - self.forward(length) - ch = self.peek() - if ch in u'\r\n\x85\u2028\u2029': - line_break = self.scan_line_break() - self.allow_simple_key = True - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return - breaks = [] - while self.peek() in u' \r\n\x85\u2028\u2029': - if self.peek() == ' ': - self.forward() - else: - breaks.append(self.scan_line_break()) - prefix = self.prefix(3) - if (prefix == u'---' or prefix == u'...') \ - and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': - return - if line_break != u'\n': - chunks.append(line_break) - elif not breaks: - chunks.append(u' ') - chunks.extend(breaks) - elif whitespaces: - chunks.append(whitespaces) - return chunks - - def scan_tag_handle(self, name, start_mark): - # See the specification for details. - # For some strange reasons, the specification does not allow '_' in - # tag handles. I have allowed it anyway. - ch = self.peek() - if ch != u'!': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) - length = 1 - ch = self.peek(length) - if ch != u' ': - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-_': - length += 1 - ch = self.peek(length) - if ch != u'!': - self.forward(length) - raise ScannerError("while scanning a %s" % name, start_mark, - "expected '!', but found %r" % ch.encode('utf-8'), - self.get_mark()) - length += 1 - value = self.prefix(length) - self.forward(length) - return value - - def scan_tag_uri(self, name, start_mark): - # See the specification for details. - # Note: we do not check if URI is well-formed. - chunks = [] - length = 0 - ch = self.peek(length) - while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ - or ch in u'-;/?:@&=+$,_.!~*\'()[]%': - if ch == u'%': - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - chunks.append(self.scan_uri_escapes(name, start_mark)) - else: - length += 1 - ch = self.peek(length) - if length: - chunks.append(self.prefix(length)) - self.forward(length) - length = 0 - if not chunks: - raise ScannerError("while parsing a %s" % name, start_mark, - "expected URI, but found %r" % ch.encode('utf-8'), - self.get_mark()) - return u''.join(chunks) - - def scan_uri_escapes(self, name, start_mark): - # See the specification for details. - bytes = [] - mark = self.get_mark() - while self.peek() == u'%': - self.forward() - for k in range(2): - if self.peek(k) not in u'0123456789ABCDEFabcdef': - raise ScannerError("while scanning a %s" % name, start_mark, - "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % - (self.peek(k).encode('utf-8')), self.get_mark()) - bytes.append(chr(int(self.prefix(2), 16))) - self.forward(2) - try: - value = unicode(''.join(bytes), 'utf-8') - except UnicodeDecodeError, exc: - raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) - return value - - def scan_line_break(self): - # Transforms: - # '\r\n' : '\n' - # '\r' : '\n' - # '\n' : '\n' - # '\x85' : '\n' - # '\u2028' : '\u2028' - # '\u2029 : '\u2029' - # default : '' - ch = self.peek() - if ch in u'\r\n\x85': - if self.prefix(2) == u'\r\n': - self.forward(2) - else: - self.forward() - return u'\n' - elif ch in u'\u2028\u2029': - self.forward() - return ch - return u'' - -#try: -# import psyco -# psyco.bind(Scanner) -#except ImportError: -# pass - diff --git a/tablib/packages/yaml/serializer.py b/tablib/packages/yaml/serializer.py deleted file mode 100644 index 0bf1e96..0000000 --- a/tablib/packages/yaml/serializer.py +++ /dev/null @@ -1,111 +0,0 @@ - -__all__ = ['Serializer', 'SerializerError'] - -from error import YAMLError -from events import * -from nodes import * - -class SerializerError(YAMLError): - pass - -class Serializer(object): - - ANCHOR_TEMPLATE = u'id%03d' - - def __init__(self, encoding=None, - explicit_start=None, explicit_end=None, version=None, tags=None): - self.use_encoding = encoding - self.use_explicit_start = explicit_start - self.use_explicit_end = explicit_end - self.use_version = version - self.use_tags = tags - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - self.closed = None - - def open(self): - if self.closed is None: - self.emit(StreamStartEvent(encoding=self.use_encoding)) - self.closed = False - elif self.closed: - raise SerializerError("serializer is closed") - else: - raise SerializerError("serializer is already opened") - - def close(self): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif not self.closed: - self.emit(StreamEndEvent()) - self.closed = True - - #def __del__(self): - # self.close() - - def serialize(self, node): - if self.closed is None: - raise SerializerError("serializer is not opened") - elif self.closed: - raise SerializerError("serializer is closed") - self.emit(DocumentStartEvent(explicit=self.use_explicit_start, - version=self.use_version, tags=self.use_tags)) - self.anchor_node(node) - self.serialize_node(node, None, None) - self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) - self.serialized_nodes = {} - self.anchors = {} - self.last_anchor_id = 0 - - def anchor_node(self, node): - if node in self.anchors: - if self.anchors[node] is None: - self.anchors[node] = self.generate_anchor(node) - else: - self.anchors[node] = None - if isinstance(node, SequenceNode): - for item in node.value: - self.anchor_node(item) - elif isinstance(node, MappingNode): - for key, value in node.value: - self.anchor_node(key) - self.anchor_node(value) - - def generate_anchor(self, node): - self.last_anchor_id += 1 - return self.ANCHOR_TEMPLATE % self.last_anchor_id - - def serialize_node(self, node, parent, index): - alias = self.anchors[node] - if node in self.serialized_nodes: - self.emit(AliasEvent(alias)) - else: - self.serialized_nodes[node] = True - self.descend_resolver(parent, index) - if isinstance(node, ScalarNode): - detected_tag = self.resolve(ScalarNode, node.value, (True, False)) - default_tag = self.resolve(ScalarNode, node.value, (False, True)) - implicit = (node.tag == detected_tag), (node.tag == default_tag) - self.emit(ScalarEvent(alias, node.tag, implicit, node.value, - style=node.style)) - elif isinstance(node, SequenceNode): - implicit = (node.tag - == self.resolve(SequenceNode, node.value, True)) - self.emit(SequenceStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - index = 0 - for item in node.value: - self.serialize_node(item, node, index) - index += 1 - self.emit(SequenceEndEvent()) - elif isinstance(node, MappingNode): - implicit = (node.tag - == self.resolve(MappingNode, node.value, True)) - self.emit(MappingStartEvent(alias, node.tag, implicit, - flow_style=node.flow_style)) - for key, value in node.value: - self.serialize_node(key, node, None) - self.serialize_node(value, node, key) - self.emit(MappingEndEvent()) - self.ascend_resolver() - diff --git a/tablib/packages/yaml/tokens.py b/tablib/packages/yaml/tokens.py deleted file mode 100644 index 4d0b48a..0000000 --- a/tablib/packages/yaml/tokens.py +++ /dev/null @@ -1,104 +0,0 @@ - -class Token(object): - def __init__(self, start_mark, end_mark): - self.start_mark = start_mark - self.end_mark = end_mark - def __repr__(self): - attributes = [key for key in self.__dict__ - if not key.endswith('_mark')] - attributes.sort() - arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) - for key in attributes]) - return '%s(%s)' % (self.__class__.__name__, arguments) - -#class BOMToken(Token): -# id = '' - -class DirectiveToken(Token): - id = '' - def __init__(self, name, value, start_mark, end_mark): - self.name = name - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class DocumentStartToken(Token): - id = '' - -class DocumentEndToken(Token): - id = '' - -class StreamStartToken(Token): - id = '' - def __init__(self, start_mark=None, end_mark=None, - encoding=None): - self.start_mark = start_mark - self.end_mark = end_mark - self.encoding = encoding - -class StreamEndToken(Token): - id = '' - -class BlockSequenceStartToken(Token): - id = '' - -class BlockMappingStartToken(Token): - id = '' - -class BlockEndToken(Token): - id = '' - -class FlowSequenceStartToken(Token): - id = '[' - -class FlowMappingStartToken(Token): - id = '{' - -class FlowSequenceEndToken(Token): - id = ']' - -class FlowMappingEndToken(Token): - id = '}' - -class KeyToken(Token): - id = '?' - -class ValueToken(Token): - id = ':' - -class BlockEntryToken(Token): - id = '-' - -class FlowEntryToken(Token): - id = ',' - -class AliasToken(Token): - id = '' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class AnchorToken(Token): - id = '' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class TagToken(Token): - id = '' - def __init__(self, value, start_mark, end_mark): - self.value = value - self.start_mark = start_mark - self.end_mark = end_mark - -class ScalarToken(Token): - id = '' - def __init__(self, value, plain, start_mark, end_mark, style=None): - self.value = value - self.plain = plain - self.start_mark = start_mark - self.end_mark = end_mark - self.style = style -