From 59d1f9fdedac38032b55649eb8e416c21f0eb688 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 29 Aug 2010 20:12:39 -0400 Subject: [PATCH 01/23] Changed Data class to Dataset. --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 77d7dcb..dda5e3a 100644 --- a/README.rst +++ b/README.rst @@ -54,7 +54,7 @@ Populate fresh data files: :: ('Henry', 'Ford', 2.3) ] - data = tablib.Data(*data, headers=headers) + data = tablib.Dataset(*data, headers=headers) # Establish file location and save data.save('test.xls') @@ -83,7 +83,7 @@ Slice columns by header: :: Manipulate rows by index: :: - data.delRow(0) + del data[0] print data[0:1] # >>> [('George', 'Washington', 2.6), ('Henry', 'Ford', 2.3)] From 95c98861daef57091347343f9238bdb25bfc5c4d Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 29 Aug 2010 22:41:34 -0400 Subject: [PATCH 02/23] Object structure in place --- tablib/core.py | 116 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 3 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index aa97ebc..8ba5c43 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -6,14 +6,124 @@ # / /_ / /_/ / _ /_/ /_ /_/ // __// /_/ / # \__/ \__,_/ /_.___/ /_.___/ \___/ \__,_/ + +import csv + + + __version__ = '0.0.2' __build__ = '0x000002' __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' +__all__ = ['Dataset', 'source'] -def cheese(): + + +class Dataset(object): + """Amazing Tabular Dataset object. """ + + def __init__(self, *args, **kwargs): + + self._data = [].append(args) + + try: + self.headers = kwargs['headers'] + except KeyError, why: + self.headers = None + + + def __len__(self): + return self.height + + + def __getitem__(self, key): + return self._data[key] + + + def __setitem__(self, key, value): + self.validate(value) + self._data[key] = value + + + def __delitem__(self, key): + del self._data[key] + + + def __repr__(self): + return '' + + + def validate(self, row=None, safety=False): + """Assures size of every row in dataset is of proper proportions.""" + if row: + is_valid = (len(row) == self.width) if self.width else True + else: + is_valid = all((len(x)== self.width for x in self._data)) + + if is_valid: + return True + + else: + if not safety: + raise InvalidDimensions + return False + + def digest(self): + """Retruns digest information of dataset in human-readable format.""" + pass + + + @property + def height(self): + """Returns the height of the Dataset.""" + return len(self._data) + + + @property + def width(self): + """Returns the width of the Dataset.""" + + try: + len(self._data[0]) + except Exception, why: + raise why + + + @property + def json(self): + pass + + + @property + def yaml(self): + pass + + + @property + def csv(self): + pass + + + @property + def xls(self): + pass + + + def add_row(self, index=None): + pass + + def del_row(self): + pass + + def save(self): + pass + +class InvalidDimensions(Exception): + "Invalid size" + + +def source(): """docstring for import""" - pass - + pass \ No newline at end of file From 4d3a31e19f5472ae27a39841e8e535a2f9507850 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 29 Aug 2010 23:20:59 -0400 Subject: [PATCH 03/23] API change. --- README.rst | 12 ++++++------ tablib/core.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index dda5e3a..7ac287f 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,7 @@ Features Convert datafile formats via API: :: - tablib.import(filename='data.csv').export('data.json') + tablib.source(filename='data.csv').export('data.json') Convert datafile formats via CLI: :: @@ -75,11 +75,11 @@ Slice rows: :: # >>> [('John', 'Adams', 4.0), ('George', 'Washington', 2.6)] -Slice columns by header: :: - - print data['first_name'] - # >>> ['John', 'George', 'Henry'] - +.. Slice columns by header: :: +.. +.. print data['first_name'] +.. # >>> ['John', 'George', 'Henry'] +.. Manipulate rows by index: :: diff --git a/tablib/core.py b/tablib/core.py index 8ba5c43..a9ce1e2 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -111,7 +111,7 @@ class Dataset(object): pass - def add_row(self, index=None): + def append(self, row, index=None): pass def del_row(self): From a2f59584e499c27464e1d1e36b442541a52418d6 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 29 Aug 2010 23:21:37 -0400 Subject: [PATCH 04/23] Version bump (v0.0.3) --- tablib/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index a9ce1e2..1032013 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -11,8 +11,8 @@ import csv -__version__ = '0.0.2' -__build__ = '0x000002' +__version__ = '0.0.3' +__build__ = '0x000003' __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' From 2b3f2771384a8de615fab485d8541d38fc529c0d Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Sun, 29 Aug 2010 23:38:03 -0400 Subject: [PATCH 05/23] Time for some testing. --- tablib/core.py | 17 +++++++++++++---- tablib/helpers.py | 7 ++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 1032013..0a8fce7 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -9,7 +9,9 @@ import csv +from helpers import * +__all__ = ['Dataset', 'source'] __version__ = '0.0.3' __build__ = '0x000003' @@ -17,8 +19,6 @@ __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' -__all__ = ['Dataset', 'source'] - class Dataset(object): @@ -39,7 +39,15 @@ class Dataset(object): def __getitem__(self, key): - return self._data[key] + + if is_string(key): + if key in self.headers: + pos = self.headers.index(key) # get 'key' index from each data + return [row[pos] for row in self._data] + else: + raise KeyError + else: + return self._data[key] def __setitem__(self, key, value): @@ -93,7 +101,8 @@ class Dataset(object): @property def json(self): - pass + if self.headers: + pass @property diff --git a/tablib/helpers.py b/tablib/helpers.py index 0f5232c..1afbc4c 100644 --- a/tablib/helpers.py +++ b/tablib/helpers.py @@ -15,6 +15,11 @@ class Object(object): def piped(): """Returns piped input via stdin, else False""" - with sys.stdin as stdin: return stdin.read() if not stdin.isatty() else None + + +def is_string(obj): + """Tests if an object is a string""" + + return True if type(obj).__name__ == 'str' else False \ No newline at end of file From 254ce62b2aebbb962055c129bdde12ab8e307593 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 00:22:11 -0400 Subject: [PATCH 06/23] .append --- tablib/core.py | 25 +++++++++++++++++-------- tablib/tests/tests.py | 17 +++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 tablib/tests/tests.py diff --git a/tablib/core.py b/tablib/core.py index 0a8fce7..8f08714 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -25,8 +25,10 @@ class Dataset(object): """Amazing Tabular Dataset object. """ def __init__(self, *args, **kwargs): + self._data = None + self._filename = None - self._data = [].append(args) + self._data = list(args) try: self.headers = kwargs['headers'] @@ -37,7 +39,6 @@ class Dataset(object): def __len__(self): return self.height - def __getitem__(self, key): if is_string(key): @@ -94,9 +95,9 @@ class Dataset(object): """Returns the width of the Dataset.""" try: - len(self._data[0]) - except Exception, why: - raise why + return len(self._data[0]) + except KeyError, why: + return 0 @property @@ -121,18 +122,26 @@ class Dataset(object): def append(self, row, index=None): - pass + # todo: impliment index + self.validate(row) + self._data.append(row) - def del_row(self): + def sort_by(self, key): + """Returns datastet sorted by given key""" + # todo: accpept string if headers, or index nubmer pass def save(self): pass + # note export format + # open file, save the bitch + class InvalidDimensions(Exception): "Invalid size" -def source(): +def source(io_string=None, filename=None): """docstring for import""" + #open by filename pass \ No newline at end of file diff --git a/tablib/tests/tests.py b/tablib/tests/tests.py new file mode 100644 index 0000000..f457dfb --- /dev/null +++ b/tablib/tests/tests.py @@ -0,0 +1,17 @@ +import tablib + +headers = ('first_name', 'last_name', 'gpa') + +data = [ + ('John', 'Adams', 4.0), + ('George', 'Washington', 2.6), + ('Henry', 'Ford', 2.3) +] + +data = tablib.Dataset(*data, headers=headers) + +print data[1] +data.append(['kenneth' ,'reitz', 4.3]) + + +print data._data \ No newline at end of file From 687670762f43ba3f72f5d7690c069764b65d5e3d Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 00:28:47 -0400 Subject: [PATCH 07/23] Added PyYaml into vendorized packages. --- tablib/packages/yaml/__init__.py | 288 ++++++ tablib/packages/yaml/composer.py | 139 +++ tablib/packages/yaml/constructor.py | 684 +++++++++++++ tablib/packages/yaml/cyaml.py | 85 ++ tablib/packages/yaml/dumper.py | 62 ++ tablib/packages/yaml/emitter.py | 1135 +++++++++++++++++++++ tablib/packages/yaml/error.py | 75 ++ tablib/packages/yaml/events.py | 86 ++ tablib/packages/yaml/loader.py | 40 + tablib/packages/yaml/nodes.py | 49 + tablib/packages/yaml/parser.py | 584 +++++++++++ tablib/packages/yaml/reader.py | 225 +++++ tablib/packages/yaml/representer.py | 489 +++++++++ tablib/packages/yaml/resolver.py | 224 ++++ tablib/packages/yaml/scanner.py | 1457 +++++++++++++++++++++++++++ tablib/packages/yaml/serializer.py | 111 ++ tablib/packages/yaml/tokens.py | 104 ++ 17 files changed, 5837 insertions(+) create mode 100644 tablib/packages/yaml/__init__.py create mode 100644 tablib/packages/yaml/composer.py create mode 100644 tablib/packages/yaml/constructor.py create mode 100644 tablib/packages/yaml/cyaml.py create mode 100644 tablib/packages/yaml/dumper.py create mode 100644 tablib/packages/yaml/emitter.py create mode 100644 tablib/packages/yaml/error.py create mode 100644 tablib/packages/yaml/events.py create mode 100644 tablib/packages/yaml/loader.py create mode 100644 tablib/packages/yaml/nodes.py create mode 100644 tablib/packages/yaml/parser.py create mode 100644 tablib/packages/yaml/reader.py create mode 100644 tablib/packages/yaml/representer.py create mode 100644 tablib/packages/yaml/resolver.py create mode 100644 tablib/packages/yaml/scanner.py create mode 100644 tablib/packages/yaml/serializer.py create mode 100644 tablib/packages/yaml/tokens.py diff --git a/tablib/packages/yaml/__init__.py b/tablib/packages/yaml/__init__.py new file mode 100644 index 0000000..c0fd1f3 --- /dev/null +++ b/tablib/packages/yaml/__init__.py @@ -0,0 +1,288 @@ + +from error import * + +from tokens import * +from events import * +from nodes import * + +from loader import * +from dumper import * + +__version__ = '3.09' + +try: + from cyaml import * + __with_libyaml__ = True +except ImportError: + __with_libyaml__ = False + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + while loader.check_token(): + yield loader.get_token() + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + while loader.check_event(): + yield loader.get_event() + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + return loader.get_single_node() + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + while loader.check_node(): + yield loader.get_node() + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + return loader.get_single_data() + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + while loader.check_data(): + yield loader.get_data() + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + from StringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + for event in events: + dumper.emit(event) + if getvalue: + return getvalue() + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + if getvalue: + return getvalue() + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding='utf-8', explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + from StringIO import StringIO + else: + from cStringIO import StringIO + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + if getvalue: + return getvalue() + +def dump(data, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=Dumper, **kwds) + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + +class YAMLObject(object): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + + __metaclass__ = YAMLObjectMetaclass + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + from_yaml = classmethod(from_yaml) + + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + to_yaml = classmethod(to_yaml) + diff --git a/tablib/packages/yaml/composer.py b/tablib/packages/yaml/composer.py new file mode 100644 index 0000000..06e5ac7 --- /dev/null +++ b/tablib/packages/yaml/composer.py @@ -0,0 +1,139 @@ + +__all__ = ['Composer', 'ComposerError'] + +from error import MarkedYAMLError +from events import * +from nodes import * + +class ComposerError(MarkedYAMLError): + pass + +class Composer(object): + + def __init__(self): + self.anchors = {} + + def check_node(self): + # Drop the STREAM-START event. + if self.check_event(StreamStartEvent): + self.get_event() + + # If there are more documents available? + return not self.check_event(StreamEndEvent) + + def get_node(self): + # Get the root node of the next document. + if not self.check_event(StreamEndEvent): + return self.compose_document() + + def get_single_node(self): + # Drop the STREAM-START event. + self.get_event() + + # Compose a document if the stream is not empty. + document = None + if not self.check_event(StreamEndEvent): + document = self.compose_document() + + # Ensure that the stream contains no more documents. + if not self.check_event(StreamEndEvent): + event = self.get_event() + raise ComposerError("expected a single document in the stream", + document.start_mark, "but found another document", + event.start_mark) + + # Drop the STREAM-END event. + self.get_event() + + return document + + def compose_document(self): + # Drop the DOCUMENT-START event. + self.get_event() + + # Compose the root node. + node = self.compose_node(None, None) + + # Drop the DOCUMENT-END event. + self.get_event() + + self.anchors = {} + return node + + def compose_node(self, parent, index): + if self.check_event(AliasEvent): + event = self.get_event() + anchor = event.anchor + if anchor not in self.anchors: + raise ComposerError(None, None, "found undefined alias %r" + % anchor.encode('utf-8'), event.start_mark) + return self.anchors[anchor] + event = self.peek_event() + anchor = event.anchor + if anchor is not None: + if anchor in self.anchors: + raise ComposerError("found duplicate anchor %r; first occurence" + % anchor.encode('utf-8'), self.anchors[anchor].start_mark, + "second occurence", event.start_mark) + self.descend_resolver(parent, index) + if self.check_event(ScalarEvent): + node = self.compose_scalar_node(anchor) + elif self.check_event(SequenceStartEvent): + node = self.compose_sequence_node(anchor) + elif self.check_event(MappingStartEvent): + node = self.compose_mapping_node(anchor) + self.ascend_resolver() + return node + + def compose_scalar_node(self, anchor): + event = self.get_event() + tag = event.tag + if tag is None or tag == u'!': + tag = self.resolve(ScalarNode, event.value, event.implicit) + node = ScalarNode(tag, event.value, + event.start_mark, event.end_mark, style=event.style) + if anchor is not None: + self.anchors[anchor] = node + return node + + def compose_sequence_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(SequenceNode, None, start_event.implicit) + node = SequenceNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + index = 0 + while not self.check_event(SequenceEndEvent): + node.value.append(self.compose_node(node, index)) + index += 1 + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + + def compose_mapping_node(self, anchor): + start_event = self.get_event() + tag = start_event.tag + if tag is None or tag == u'!': + tag = self.resolve(MappingNode, None, start_event.implicit) + node = MappingNode(tag, [], + start_event.start_mark, None, + flow_style=start_event.flow_style) + if anchor is not None: + self.anchors[anchor] = node + while not self.check_event(MappingEndEvent): + #key_event = self.peek_event() + item_key = self.compose_node(node, None) + #if item_key in node.value: + # raise ComposerError("while composing a mapping", start_event.start_mark, + # "found duplicate key", key_event.start_mark) + item_value = self.compose_node(node, item_key) + #node.value[item_key] = item_value + node.value.append((item_key, item_value)) + end_event = self.get_event() + node.end_mark = end_event.end_mark + return node + diff --git a/tablib/packages/yaml/constructor.py b/tablib/packages/yaml/constructor.py new file mode 100644 index 0000000..420c434 --- /dev/null +++ b/tablib/packages/yaml/constructor.py @@ -0,0 +1,684 @@ + +__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor', + 'ConstructorError'] + +from error import * +from nodes import * + +import datetime + +try: + set +except NameError: + from sets import Set as set + +import binascii, re, sys, types + +class ConstructorError(MarkedYAMLError): + pass + +class BaseConstructor(object): + + yaml_constructors = {} + yaml_multi_constructors = {} + + def __init__(self): + self.constructed_objects = {} + self.recursive_objects = {} + self.state_generators = [] + self.deep_construct = False + + def check_data(self): + # If there are more documents available? + return self.check_node() + + def get_data(self): + # Construct and return the next document. + if self.check_node(): + return self.construct_document(self.get_node()) + + def get_single_data(self): + # Ensure that the stream contains a single document and construct it. + node = self.get_single_node() + if node is not None: + return self.construct_document(node) + return None + + def construct_document(self, node): + data = self.construct_object(node) + while self.state_generators: + state_generators = self.state_generators + self.state_generators = [] + for generator in state_generators: + for dummy in generator: + pass + self.constructed_objects = {} + self.recursive_objects = {} + self.deep_construct = False + return data + + def construct_object(self, node, deep=False): + if deep: + old_deep = self.deep_construct + self.deep_construct = True + if node in self.constructed_objects: + return self.constructed_objects[node] + if node in self.recursive_objects: + raise ConstructorError(None, None, + "found unconstructable recursive node", node.start_mark) + self.recursive_objects[node] = None + constructor = None + tag_suffix = None + if node.tag in self.yaml_constructors: + constructor = self.yaml_constructors[node.tag] + else: + for tag_prefix in self.yaml_multi_constructors: + if node.tag.startswith(tag_prefix): + tag_suffix = node.tag[len(tag_prefix):] + constructor = self.yaml_multi_constructors[tag_prefix] + break + else: + if None in self.yaml_multi_constructors: + tag_suffix = node.tag + constructor = self.yaml_multi_constructors[None] + elif None in self.yaml_constructors: + constructor = self.yaml_constructors[None] + elif isinstance(node, ScalarNode): + constructor = self.__class__.construct_scalar + elif isinstance(node, SequenceNode): + constructor = self.__class__.construct_sequence + elif isinstance(node, MappingNode): + constructor = self.__class__.construct_mapping + if tag_suffix is None: + data = constructor(self, node) + else: + data = constructor(self, tag_suffix, node) + if isinstance(data, types.GeneratorType): + generator = data + data = generator.next() + if self.deep_construct: + for dummy in generator: + pass + else: + self.state_generators.append(generator) + self.constructed_objects[node] = data + del self.recursive_objects[node] + if deep: + self.deep_construct = old_deep + return data + + def construct_scalar(self, node): + if not isinstance(node, ScalarNode): + raise ConstructorError(None, None, + "expected a scalar node, but found %s" % node.id, + node.start_mark) + return node.value + + def construct_sequence(self, node, deep=False): + if not isinstance(node, SequenceNode): + raise ConstructorError(None, None, + "expected a sequence node, but found %s" % node.id, + node.start_mark) + return [self.construct_object(child, deep=deep) + for child in node.value] + + def construct_mapping(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + try: + hash(key) + except TypeError, exc: + raise ConstructorError("while constructing a mapping", node.start_mark, + "found unacceptable key (%s)" % exc, key_node.start_mark) + value = self.construct_object(value_node, deep=deep) + mapping[key] = value + return mapping + + def construct_pairs(self, node, deep=False): + if not isinstance(node, MappingNode): + raise ConstructorError(None, None, + "expected a mapping node, but found %s" % node.id, + node.start_mark) + pairs = [] + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + pairs.append((key, value)) + return pairs + + def add_constructor(cls, tag, constructor): + if not 'yaml_constructors' in cls.__dict__: + cls.yaml_constructors = cls.yaml_constructors.copy() + cls.yaml_constructors[tag] = constructor + add_constructor = classmethod(add_constructor) + + def add_multi_constructor(cls, tag_prefix, multi_constructor): + if not 'yaml_multi_constructors' in cls.__dict__: + cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy() + cls.yaml_multi_constructors[tag_prefix] = multi_constructor + add_multi_constructor = classmethod(add_multi_constructor) + +class SafeConstructor(BaseConstructor): + + def construct_scalar(self, node): + if isinstance(node, MappingNode): + for key_node, value_node in node.value: + if key_node.tag == u'tag:yaml.org,2002:value': + return self.construct_scalar(value_node) + return BaseConstructor.construct_scalar(self, node) + + def flatten_mapping(self, node): + merge = [] + index = 0 + while index < len(node.value): + key_node, value_node = node.value[index] + if key_node.tag == u'tag:yaml.org,2002:merge': + del node.value[index] + if isinstance(value_node, MappingNode): + self.flatten_mapping(value_node) + merge.extend(value_node.value) + elif isinstance(value_node, SequenceNode): + submerge = [] + for subnode in value_node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing a mapping", + node.start_mark, + "expected a mapping for merging, but found %s" + % subnode.id, subnode.start_mark) + self.flatten_mapping(subnode) + submerge.append(subnode.value) + submerge.reverse() + for value in submerge: + merge.extend(value) + else: + raise ConstructorError("while constructing a mapping", node.start_mark, + "expected a mapping or list of mappings for merging, but found %s" + % value_node.id, value_node.start_mark) + elif key_node.tag == u'tag:yaml.org,2002:value': + key_node.tag = u'tag:yaml.org,2002:str' + index += 1 + else: + index += 1 + if merge: + node.value = merge + node.value + + def construct_mapping(self, node, deep=False): + if isinstance(node, MappingNode): + self.flatten_mapping(node) + return BaseConstructor.construct_mapping(self, node, deep=deep) + + def construct_yaml_null(self, node): + self.construct_scalar(node) + return None + + bool_values = { + u'yes': True, + u'no': False, + u'true': True, + u'false': False, + u'on': True, + u'off': False, + } + + def construct_yaml_bool(self, node): + value = self.construct_scalar(node) + return self.bool_values[value.lower()] + + def construct_yaml_int(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '') + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '0': + return 0 + elif value.startswith('0b'): + return sign*int(value[2:], 2) + elif value.startswith('0x'): + return sign*int(value[2:], 16) + elif value[0] == '0': + return sign*int(value, 8) + elif ':' in value: + digits = [int(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*int(value) + + inf_value = 1e300 + while inf_value != inf_value*inf_value: + inf_value *= inf_value + nan_value = -inf_value/inf_value # Trying to make a quiet NaN (like C99). + + def construct_yaml_float(self, node): + value = str(self.construct_scalar(node)) + value = value.replace('_', '').lower() + sign = +1 + if value[0] == '-': + sign = -1 + if value[0] in '+-': + value = value[1:] + if value == '.inf': + return sign*self.inf_value + elif value == '.nan': + return self.nan_value + elif ':' in value: + digits = [float(part) for part in value.split(':')] + digits.reverse() + base = 1 + value = 0.0 + for digit in digits: + value += digit*base + base *= 60 + return sign*value + else: + return sign*float(value) + + def construct_yaml_binary(self, node): + value = self.construct_scalar(node) + try: + return str(value).decode('base64') + except (binascii.Error, UnicodeEncodeError), exc: + raise ConstructorError(None, None, + "failed to decode base64 data: %s" % exc, node.start_mark) + + timestamp_regexp = re.compile( + ur'''^(?P[0-9][0-9][0-9][0-9]) + -(?P[0-9][0-9]?) + -(?P[0-9][0-9]?) + (?:(?:[Tt]|[ \t]+) + (?P[0-9][0-9]?) + :(?P[0-9][0-9]) + :(?P[0-9][0-9]) + (?:\.(?P[0-9]*))? + (?:[ \t]*(?PZ|(?P[-+])(?P[0-9][0-9]?) + (?::(?P[0-9][0-9]))?))?)?$''', re.X) + + def construct_yaml_timestamp(self, node): + value = self.construct_scalar(node) + match = self.timestamp_regexp.match(node.value) + values = match.groupdict() + year = int(values['year']) + month = int(values['month']) + day = int(values['day']) + if not values['hour']: + return datetime.date(year, month, day) + hour = int(values['hour']) + minute = int(values['minute']) + second = int(values['second']) + fraction = 0 + if values['fraction']: + fraction = values['fraction'][:6] + while len(fraction) < 6: + fraction += '0' + fraction = int(fraction) + delta = None + if values['tz_sign']: + tz_hour = int(values['tz_hour']) + tz_minute = int(values['tz_minute'] or 0) + delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute) + if values['tz_sign'] == '-': + delta = -delta + data = datetime.datetime(year, month, day, hour, minute, second, fraction) + if delta: + data -= delta + return data + + def construct_yaml_omap(self, node): + # Note: we do not check for duplicate keys, because it's too + # CPU-expensive. + omap = [] + yield omap + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing an ordered map", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + omap.append((key, value)) + + def construct_yaml_pairs(self, node): + # Note: the same code as `construct_yaml_omap`. + pairs = [] + yield pairs + if not isinstance(node, SequenceNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a sequence, but found %s" % node.id, node.start_mark) + for subnode in node.value: + if not isinstance(subnode, MappingNode): + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a mapping of length 1, but found %s" % subnode.id, + subnode.start_mark) + if len(subnode.value) != 1: + raise ConstructorError("while constructing pairs", node.start_mark, + "expected a single mapping item, but found %d items" % len(subnode.value), + subnode.start_mark) + key_node, value_node = subnode.value[0] + key = self.construct_object(key_node) + value = self.construct_object(value_node) + pairs.append((key, value)) + + def construct_yaml_set(self, node): + data = set() + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_str(self, node): + value = self.construct_scalar(node) + try: + return value.encode('ascii') + except UnicodeEncodeError: + return value + + def construct_yaml_seq(self, node): + data = [] + yield data + data.extend(self.construct_sequence(node)) + + def construct_yaml_map(self, node): + data = {} + yield data + value = self.construct_mapping(node) + data.update(value) + + def construct_yaml_object(self, node, cls): + data = cls.__new__(cls) + yield data + if hasattr(data, '__setstate__'): + state = self.construct_mapping(node, deep=True) + data.__setstate__(state) + else: + state = self.construct_mapping(node) + data.__dict__.update(state) + + def construct_undefined(self, node): + raise ConstructorError(None, None, + "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'), + node.start_mark) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:null', + SafeConstructor.construct_yaml_null) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:bool', + SafeConstructor.construct_yaml_bool) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:int', + SafeConstructor.construct_yaml_int) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:float', + SafeConstructor.construct_yaml_float) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:binary', + SafeConstructor.construct_yaml_binary) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:timestamp', + SafeConstructor.construct_yaml_timestamp) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:omap', + SafeConstructor.construct_yaml_omap) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:pairs', + SafeConstructor.construct_yaml_pairs) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:set', + SafeConstructor.construct_yaml_set) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:str', + SafeConstructor.construct_yaml_str) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:seq', + SafeConstructor.construct_yaml_seq) + +SafeConstructor.add_constructor( + u'tag:yaml.org,2002:map', + SafeConstructor.construct_yaml_map) + +SafeConstructor.add_constructor(None, + SafeConstructor.construct_undefined) + +class Constructor(SafeConstructor): + + def construct_python_str(self, node): + return self.construct_scalar(node).encode('utf-8') + + def construct_python_unicode(self, node): + return self.construct_scalar(node) + + def construct_python_long(self, node): + return long(self.construct_yaml_int(node)) + + def construct_python_complex(self, node): + return complex(self.construct_scalar(node)) + + def construct_python_tuple(self, node): + return tuple(self.construct_sequence(node)) + + def find_python_module(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python module", mark, + "expected non-empty name appended to the tag", mark) + try: + __import__(name) + except ImportError, exc: + raise ConstructorError("while constructing a Python module", mark, + "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark) + return sys.modules[name] + + def find_python_name(self, name, mark): + if not name: + raise ConstructorError("while constructing a Python object", mark, + "expected non-empty name appended to the tag", mark) + if u'.' in name: + # Python 2.4 only + #module_name, object_name = name.rsplit('.', 1) + items = name.split('.') + object_name = items.pop() + module_name = '.'.join(items) + else: + module_name = '__builtin__' + object_name = name + try: + __import__(module_name) + except ImportError, exc: + raise ConstructorError("while constructing a Python object", mark, + "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark) + module = sys.modules[module_name] + if not hasattr(module, object_name): + raise ConstructorError("while constructing a Python object", mark, + "cannot find %r in the module %r" % (object_name.encode('utf-8'), + module.__name__), mark) + return getattr(module, object_name) + + def construct_python_name(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python name", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_name(suffix, node.start_mark) + + def construct_python_module(self, suffix, node): + value = self.construct_scalar(node) + if value: + raise ConstructorError("while constructing a Python module", node.start_mark, + "expected the empty value, but found %r" % value.encode('utf-8'), + node.start_mark) + return self.find_python_module(suffix, node.start_mark) + + class classobj: pass + + def make_python_instance(self, suffix, node, + args=None, kwds=None, newobj=False): + if not args: + args = [] + if not kwds: + kwds = {} + cls = self.find_python_name(suffix, node.start_mark) + if newobj and isinstance(cls, type(self.classobj)) \ + and not args and not kwds: + instance = self.classobj() + instance.__class__ = cls + return instance + elif newobj and isinstance(cls, type): + return cls.__new__(cls, *args, **kwds) + else: + return cls(*args, **kwds) + + def set_python_instance_state(self, instance, state): + if hasattr(instance, '__setstate__'): + instance.__setstate__(state) + else: + slotstate = {} + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state + if hasattr(instance, '__dict__'): + instance.__dict__.update(state) + elif state: + slotstate.update(state) + for key, value in slotstate.items(): + setattr(object, key, value) + + def construct_python_object(self, suffix, node): + # Format: + # !!python/object:module.name { ... state ... } + instance = self.make_python_instance(suffix, node, newobj=True) + yield instance + deep = hasattr(instance, '__setstate__') + state = self.construct_mapping(node, deep=deep) + self.set_python_instance_state(instance, state) + + def construct_python_object_apply(self, suffix, node, newobj=False): + # Format: + # !!python/object/apply # (or !!python/object/new) + # args: [ ... arguments ... ] + # kwds: { ... keywords ... } + # state: ... state ... + # listitems: [ ... listitems ... ] + # dictitems: { ... dictitems ... } + # or short format: + # !!python/object/apply [ ... arguments ... ] + # The difference between !!python/object/apply and !!python/object/new + # is how an object is created, check make_python_instance for details. + if isinstance(node, SequenceNode): + args = self.construct_sequence(node, deep=True) + kwds = {} + state = {} + listitems = [] + dictitems = {} + else: + value = self.construct_mapping(node, deep=True) + args = value.get('args', []) + kwds = value.get('kwds', {}) + state = value.get('state', {}) + listitems = value.get('listitems', []) + dictitems = value.get('dictitems', {}) + instance = self.make_python_instance(suffix, node, args, kwds, newobj) + if state: + self.set_python_instance_state(instance, state) + if listitems: + instance.extend(listitems) + if dictitems: + for key in dictitems: + instance[key] = dictitems[key] + return instance + + def construct_python_object_new(self, suffix, node): + return self.construct_python_object_apply(suffix, node, newobj=True) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/none', + Constructor.construct_yaml_null) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/bool', + Constructor.construct_yaml_bool) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/str', + Constructor.construct_python_str) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/unicode', + Constructor.construct_python_unicode) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/int', + Constructor.construct_yaml_int) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/long', + Constructor.construct_python_long) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/float', + Constructor.construct_yaml_float) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/complex', + Constructor.construct_python_complex) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/list', + Constructor.construct_yaml_seq) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/tuple', + Constructor.construct_python_tuple) + +Constructor.add_constructor( + u'tag:yaml.org,2002:python/dict', + Constructor.construct_yaml_map) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/name:', + Constructor.construct_python_name) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/module:', + Constructor.construct_python_module) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object:', + Constructor.construct_python_object) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/apply:', + Constructor.construct_python_object_apply) + +Constructor.add_multi_constructor( + u'tag:yaml.org,2002:python/object/new:', + Constructor.construct_python_object_new) + diff --git a/tablib/packages/yaml/cyaml.py b/tablib/packages/yaml/cyaml.py new file mode 100644 index 0000000..68dcd75 --- /dev/null +++ b/tablib/packages/yaml/cyaml.py @@ -0,0 +1,85 @@ + +__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader', + 'CBaseDumper', 'CSafeDumper', 'CDumper'] + +from _yaml import CParser, CEmitter + +from constructor import * + +from serializer import * +from representer import * + +from resolver import * + +class CBaseLoader(CParser, BaseConstructor, BaseResolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class CSafeLoader(CParser, SafeConstructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class CLoader(CParser, Constructor, Resolver): + + def __init__(self, stream): + CParser.__init__(self, stream) + Constructor.__init__(self) + Resolver.__init__(self) + +class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CSafeDumper(CEmitter, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class CDumper(CEmitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + CEmitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, encoding=encoding, + allow_unicode=allow_unicode, line_break=line_break, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/tablib/packages/yaml/dumper.py b/tablib/packages/yaml/dumper.py new file mode 100644 index 0000000..f811d2c --- /dev/null +++ b/tablib/packages/yaml/dumper.py @@ -0,0 +1,62 @@ + +__all__ = ['BaseDumper', 'SafeDumper', 'Dumper'] + +from emitter import * +from serializer import * +from representer import * +from resolver import * + +class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + SafeRepresenter.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + +class Dumper(Emitter, Serializer, Representer, Resolver): + + def __init__(self, stream, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=None, explicit_start=None, explicit_end=None, + version=None, tags=None): + Emitter.__init__(self, stream, canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + Serializer.__init__(self, encoding=encoding, + explicit_start=explicit_start, explicit_end=explicit_end, + version=version, tags=tags) + Representer.__init__(self, default_style=default_style, + default_flow_style=default_flow_style) + Resolver.__init__(self) + diff --git a/tablib/packages/yaml/emitter.py b/tablib/packages/yaml/emitter.py new file mode 100644 index 0000000..4cb2c8a --- /dev/null +++ b/tablib/packages/yaml/emitter.py @@ -0,0 +1,1135 @@ + +# Emitter expects events obeying the following grammar: +# stream ::= STREAM-START document* STREAM-END +# document ::= DOCUMENT-START node DOCUMENT-END +# node ::= SCALAR | sequence | mapping +# sequence ::= SEQUENCE-START node* SEQUENCE-END +# mapping ::= MAPPING-START (node node)* MAPPING-END + +__all__ = ['Emitter', 'EmitterError'] + +from error import YAMLError +from events import * + +class EmitterError(YAMLError): + pass + +class ScalarAnalysis(object): + def __init__(self, scalar, empty, multiline, + allow_flow_plain, allow_block_plain, + allow_single_quoted, allow_double_quoted, + allow_block): + self.scalar = scalar + self.empty = empty + self.multiline = multiline + self.allow_flow_plain = allow_flow_plain + self.allow_block_plain = allow_block_plain + self.allow_single_quoted = allow_single_quoted + self.allow_double_quoted = allow_double_quoted + self.allow_block = allow_block + +class Emitter(object): + + DEFAULT_TAG_PREFIXES = { + u'!' : u'!', + u'tag:yaml.org,2002:' : u'!!', + } + + def __init__(self, stream, canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + + # The stream should have the methods `write` and possibly `flush`. + self.stream = stream + + # Encoding can be overriden by STREAM-START. + self.encoding = None + + # Emitter is a state machine with a stack of states to handle nested + # structures. + self.states = [] + self.state = self.expect_stream_start + + # Current event and the event queue. + self.events = [] + self.event = None + + # The current indentation level and the stack of previous indents. + self.indents = [] + self.indent = None + + # Flow level. + self.flow_level = 0 + + # Contexts. + self.root_context = False + self.sequence_context = False + self.mapping_context = False + self.simple_key_context = False + + # Characteristics of the last emitted character: + # - current position. + # - is it a whitespace? + # - is it an indention character + # (indentation space, '-', '?', or ':')? + self.line = 0 + self.column = 0 + self.whitespace = True + self.indention = True + + # Whether the document requires an explicit document indicator + self.open_ended = False + + # Formatting details. + self.canonical = canonical + self.allow_unicode = allow_unicode + self.best_indent = 2 + if indent and 1 < indent < 10: + self.best_indent = indent + self.best_width = 80 + if width and width > self.best_indent*2: + self.best_width = width + self.best_line_break = u'\n' + if line_break in [u'\r', u'\n', u'\r\n']: + self.best_line_break = line_break + + # Tag prefixes. + self.tag_prefixes = None + + # Prepared anchor and tag. + self.prepared_anchor = None + self.prepared_tag = None + + # Scalar analysis and style. + self.analysis = None + self.style = None + + def emit(self, event): + self.events.append(event) + while not self.need_more_events(): + self.event = self.events.pop(0) + self.state() + self.event = None + + # In some cases, we wait for a few next events before emitting. + + def need_more_events(self): + if not self.events: + return True + event = self.events[0] + if isinstance(event, DocumentStartEvent): + return self.need_events(1) + elif isinstance(event, SequenceStartEvent): + return self.need_events(2) + elif isinstance(event, MappingStartEvent): + return self.need_events(3) + else: + return False + + def need_events(self, count): + level = 0 + for event in self.events[1:]: + if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): + level += 1 + elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): + level -= 1 + elif isinstance(event, StreamEndEvent): + level = -1 + if level < 0: + return False + return (len(self.events) < count+1) + + def increase_indent(self, flow=False, indentless=False): + self.indents.append(self.indent) + if self.indent is None: + if flow: + self.indent = self.best_indent + else: + self.indent = 0 + elif not indentless: + self.indent += self.best_indent + + # States. + + # Stream handlers. + + def expect_stream_start(self): + if isinstance(self.event, StreamStartEvent): + if self.event.encoding and not getattr(self.stream, 'encoding', None): + self.encoding = self.event.encoding + self.write_stream_start() + self.state = self.expect_first_document_start + else: + raise EmitterError("expected StreamStartEvent, but got %s" + % self.event) + + def expect_nothing(self): + raise EmitterError("expected nothing, but got %s" % self.event) + + # Document handlers. + + def expect_first_document_start(self): + return self.expect_document_start(first=True) + + def expect_document_start(self, first=False): + if isinstance(self.event, DocumentStartEvent): + if (self.event.version or self.event.tags) and self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + if self.event.version: + version_text = self.prepare_version(self.event.version) + self.write_version_directive(version_text) + self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() + if self.event.tags: + handles = self.event.tags.keys() + handles.sort() + for handle in handles: + prefix = self.event.tags[handle] + self.tag_prefixes[prefix] = handle + handle_text = self.prepare_tag_handle(handle) + prefix_text = self.prepare_tag_prefix(prefix) + self.write_tag_directive(handle_text, prefix_text) + implicit = (first and not self.event.explicit and not self.canonical + and not self.event.version and not self.event.tags + and not self.check_empty_document()) + if not implicit: + self.write_indent() + self.write_indicator(u'---', True) + if self.canonical: + self.write_indent() + self.state = self.expect_document_root + elif isinstance(self.event, StreamEndEvent): + if self.open_ended: + self.write_indicator(u'...', True) + self.write_indent() + self.write_stream_end() + self.state = self.expect_nothing + else: + raise EmitterError("expected DocumentStartEvent, but got %s" + % self.event) + + def expect_document_end(self): + if isinstance(self.event, DocumentEndEvent): + self.write_indent() + if self.event.explicit: + self.write_indicator(u'...', True) + self.write_indent() + self.flush_stream() + self.state = self.expect_document_start + else: + raise EmitterError("expected DocumentEndEvent, but got %s" + % self.event) + + def expect_document_root(self): + self.states.append(self.expect_document_end) + self.expect_node(root=True) + + # Node handlers. + + def expect_node(self, root=False, sequence=False, mapping=False, + simple_key=False): + self.root_context = root + self.sequence_context = sequence + self.mapping_context = mapping + self.simple_key_context = simple_key + if isinstance(self.event, AliasEvent): + self.expect_alias() + elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): + self.process_anchor(u'&') + self.process_tag() + if isinstance(self.event, ScalarEvent): + self.expect_scalar() + elif isinstance(self.event, SequenceStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_sequence(): + self.expect_flow_sequence() + else: + self.expect_block_sequence() + elif isinstance(self.event, MappingStartEvent): + if self.flow_level or self.canonical or self.event.flow_style \ + or self.check_empty_mapping(): + self.expect_flow_mapping() + else: + self.expect_block_mapping() + else: + raise EmitterError("expected NodeEvent, but got %s" % self.event) + + def expect_alias(self): + if self.event.anchor is None: + raise EmitterError("anchor is not specified for alias") + self.process_anchor(u'*') + self.state = self.states.pop() + + def expect_scalar(self): + self.increase_indent(flow=True) + self.process_scalar() + self.indent = self.indents.pop() + self.state = self.states.pop() + + # Flow sequence handlers. + + def expect_flow_sequence(self): + self.write_indicator(u'[', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_sequence_item + + def expect_first_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + def expect_flow_sequence_item(self): + if isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u']', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + self.states.append(self.expect_flow_sequence_item) + self.expect_node(sequence=True) + + # Flow mapping handlers. + + def expect_flow_mapping(self): + self.write_indicator(u'{', True, whitespace=True) + self.flow_level += 1 + self.increase_indent(flow=True) + self.state = self.expect_first_flow_mapping_key + + def expect_first_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_key(self): + if isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.flow_level -= 1 + if self.canonical: + self.write_indicator(u',', False) + self.write_indent() + self.write_indicator(u'}', False) + self.state = self.states.pop() + else: + self.write_indicator(u',', False) + if self.canonical or self.column > self.best_width: + self.write_indent() + if not self.canonical and self.check_simple_key(): + self.states.append(self.expect_flow_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True) + self.states.append(self.expect_flow_mapping_value) + self.expect_node(mapping=True) + + def expect_flow_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + def expect_flow_mapping_value(self): + if self.canonical or self.column > self.best_width: + self.write_indent() + self.write_indicator(u':', True) + self.states.append(self.expect_flow_mapping_key) + self.expect_node(mapping=True) + + # Block sequence handlers. + + def expect_block_sequence(self): + indentless = (self.mapping_context and not self.indention) + self.increase_indent(flow=False, indentless=indentless) + self.state = self.expect_first_block_sequence_item + + def expect_first_block_sequence_item(self): + return self.expect_block_sequence_item(first=True) + + def expect_block_sequence_item(self, first=False): + if not first and isinstance(self.event, SequenceEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + self.write_indicator(u'-', True, indention=True) + self.states.append(self.expect_block_sequence_item) + self.expect_node(sequence=True) + + # Block mapping handlers. + + def expect_block_mapping(self): + self.increase_indent(flow=False) + self.state = self.expect_first_block_mapping_key + + def expect_first_block_mapping_key(self): + return self.expect_block_mapping_key(first=True) + + def expect_block_mapping_key(self, first=False): + if not first and isinstance(self.event, MappingEndEvent): + self.indent = self.indents.pop() + self.state = self.states.pop() + else: + self.write_indent() + if self.check_simple_key(): + self.states.append(self.expect_block_mapping_simple_value) + self.expect_node(mapping=True, simple_key=True) + else: + self.write_indicator(u'?', True, indention=True) + self.states.append(self.expect_block_mapping_value) + self.expect_node(mapping=True) + + def expect_block_mapping_simple_value(self): + self.write_indicator(u':', False) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + def expect_block_mapping_value(self): + self.write_indent() + self.write_indicator(u':', True, indention=True) + self.states.append(self.expect_block_mapping_key) + self.expect_node(mapping=True) + + # Checkers. + + def check_empty_sequence(self): + return (isinstance(self.event, SequenceStartEvent) and self.events + and isinstance(self.events[0], SequenceEndEvent)) + + def check_empty_mapping(self): + return (isinstance(self.event, MappingStartEvent) and self.events + and isinstance(self.events[0], MappingEndEvent)) + + def check_empty_document(self): + if not isinstance(self.event, DocumentStartEvent) or not self.events: + return False + event = self.events[0] + return (isinstance(event, ScalarEvent) and event.anchor is None + and event.tag is None and event.implicit and event.value == u'') + + def check_simple_key(self): + length = 0 + if isinstance(self.event, NodeEvent) and self.event.anchor is not None: + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + length += len(self.prepared_anchor) + if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ + and self.event.tag is not None: + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(self.event.tag) + length += len(self.prepared_tag) + if isinstance(self.event, ScalarEvent): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + length += len(self.analysis.scalar) + return (length < 128 and (isinstance(self.event, AliasEvent) + or (isinstance(self.event, ScalarEvent) + and not self.analysis.empty and not self.analysis.multiline) + or self.check_empty_sequence() or self.check_empty_mapping())) + + # Anchor, Tag, and Scalar processors. + + def process_anchor(self, indicator): + if self.event.anchor is None: + self.prepared_anchor = None + return + if self.prepared_anchor is None: + self.prepared_anchor = self.prepare_anchor(self.event.anchor) + if self.prepared_anchor: + self.write_indicator(indicator+self.prepared_anchor, True) + self.prepared_anchor = None + + def process_tag(self): + tag = self.event.tag + if isinstance(self.event, ScalarEvent): + if self.style is None: + self.style = self.choose_scalar_style() + if ((not self.canonical or tag is None) and + ((self.style == '' and self.event.implicit[0]) + or (self.style != '' and self.event.implicit[1]))): + self.prepared_tag = None + return + if self.event.implicit[0] and tag is None: + tag = u'!' + self.prepared_tag = None + else: + if (not self.canonical or tag is None) and self.event.implicit: + self.prepared_tag = None + return + if tag is None: + raise EmitterError("tag is not specified") + if self.prepared_tag is None: + self.prepared_tag = self.prepare_tag(tag) + if self.prepared_tag: + self.write_indicator(self.prepared_tag, True) + self.prepared_tag = None + + def choose_scalar_style(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.event.style == '"' or self.canonical: + return '"' + if not self.event.style and self.event.implicit[0]: + if (not (self.simple_key_context and + (self.analysis.empty or self.analysis.multiline)) + and (self.flow_level and self.analysis.allow_flow_plain + or (not self.flow_level and self.analysis.allow_block_plain))): + return '' + if self.event.style and self.event.style in '|>': + if (not self.flow_level and not self.simple_key_context + and self.analysis.allow_block): + return self.event.style + if not self.event.style or self.event.style == '\'': + if (self.analysis.allow_single_quoted and + not (self.simple_key_context and self.analysis.multiline)): + return '\'' + return '"' + + def process_scalar(self): + if self.analysis is None: + self.analysis = self.analyze_scalar(self.event.value) + if self.style is None: + self.style = self.choose_scalar_style() + split = (not self.simple_key_context) + #if self.analysis.multiline and split \ + # and (not self.style or self.style in '\'\"'): + # self.write_indent() + if self.style == '"': + self.write_double_quoted(self.analysis.scalar, split) + elif self.style == '\'': + self.write_single_quoted(self.analysis.scalar, split) + elif self.style == '>': + self.write_folded(self.analysis.scalar) + elif self.style == '|': + self.write_literal(self.analysis.scalar) + else: + self.write_plain(self.analysis.scalar, split) + self.analysis = None + self.style = None + + # Analyzers. + + def prepare_version(self, version): + major, minor = version + if major != 1: + raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) + return u'%d.%d' % (major, minor) + + def prepare_tag_handle(self, handle): + if not handle: + raise EmitterError("tag handle must not be empty") + if handle[0] != u'!' or handle[-1] != u'!': + raise EmitterError("tag handle must start and end with '!': %r" + % (handle.encode('utf-8'))) + for ch in handle[1:-1]: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the tag handle: %r" + % (ch.encode('utf-8'), handle.encode('utf-8'))) + return handle + + def prepare_tag_prefix(self, prefix): + if not prefix: + raise EmitterError("tag prefix must not be empty") + chunks = [] + start = end = 0 + if prefix[0] == u'!': + end = 1 + while end < len(prefix): + ch = prefix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?!:@&=+$,_.~*\'()[]': + end += 1 + else: + if start < end: + chunks.append(prefix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(prefix[start:end]) + return u''.join(chunks) + + def prepare_tag(self, tag): + if not tag: + raise EmitterError("tag must not be empty") + if tag == u'!': + return tag + handle = None + suffix = tag + prefixes = self.tag_prefixes.keys() + prefixes.sort() + for prefix in prefixes: + if tag.startswith(prefix) \ + and (prefix == u'!' or len(prefix) < len(tag)): + handle = self.tag_prefixes[prefix] + suffix = tag[len(prefix):] + chunks = [] + start = end = 0 + while end < len(suffix): + ch = suffix[end] + if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.~*\'()[]' \ + or (ch == u'!' and handle != u'!'): + end += 1 + else: + if start < end: + chunks.append(suffix[start:end]) + start = end = end+1 + data = ch.encode('utf-8') + for ch in data: + chunks.append(u'%%%02X' % ord(ch)) + if start < end: + chunks.append(suffix[start:end]) + suffix_text = u''.join(chunks) + if handle: + return u'%s%s' % (handle, suffix_text) + else: + return u'!<%s>' % suffix_text + + def prepare_anchor(self, anchor): + if not anchor: + raise EmitterError("anchor must not be empty") + for ch in anchor: + if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_'): + raise EmitterError("invalid character %r in the anchor: %r" + % (ch.encode('utf-8'), anchor.encode('utf-8'))) + return anchor + + def analyze_scalar(self, scalar): + + # Empty scalar is a special case. + if not scalar: + return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, + allow_flow_plain=False, allow_block_plain=True, + allow_single_quoted=True, allow_double_quoted=True, + allow_block=False) + + # Indicators and special characters. + block_indicators = False + flow_indicators = False + line_breaks = False + special_characters = False + + # Important whitespace combinations. + leading_space = False + leading_break = False + trailing_space = False + trailing_break = False + break_space = False + space_break = False + + # Check document indicators. + if scalar.startswith(u'---') or scalar.startswith(u'...'): + block_indicators = True + flow_indicators = True + + # First character or preceded by a whitespace. + preceeded_by_whitespace = True + + # Last character or followed by a whitespace. + followed_by_whitespace = (len(scalar) == 1 or + scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') + + # The previous character is a space. + previous_space = False + + # The previous character is a break. + previous_break = False + + index = 0 + while index < len(scalar): + ch = scalar[index] + + # Check for indicators. + if index == 0: + # Leading indicators are special characters. + if ch in u'#,[]{}&*!|>\'\"%@`': + flow_indicators = True + block_indicators = True + if ch in u'?:': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'-' and followed_by_whitespace: + flow_indicators = True + block_indicators = True + else: + # Some indicators cannot appear within a scalar as well. + if ch in u',?[]{}': + flow_indicators = True + if ch == u':': + flow_indicators = True + if followed_by_whitespace: + block_indicators = True + if ch == u'#' and preceeded_by_whitespace: + flow_indicators = True + block_indicators = True + + # Check for line breaks, special, and unicode characters. + if ch in u'\n\x85\u2028\u2029': + line_breaks = True + if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): + if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': + unicode_characters = True + if not self.allow_unicode: + special_characters = True + else: + special_characters = True + + # Detect important whitespace combinations. + if ch == u' ': + if index == 0: + leading_space = True + if index == len(scalar)-1: + trailing_space = True + if previous_break: + break_space = True + previous_space = True + previous_break = False + elif ch in u'\n\x85\u2028\u2029': + if index == 0: + leading_break = True + if index == len(scalar)-1: + trailing_break = True + if previous_space: + space_break = True + previous_space = False + previous_break = True + else: + previous_space = False + previous_break = False + + # Prepare for the next character. + index += 1 + preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') + followed_by_whitespace = (index+1 >= len(scalar) or + scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') + + # Let's decide what styles are allowed. + allow_flow_plain = True + allow_block_plain = True + allow_single_quoted = True + allow_double_quoted = True + allow_block = True + + # Leading and trailing whitespaces are bad for plain scalars. + if (leading_space or leading_break + or trailing_space or trailing_break): + allow_flow_plain = allow_block_plain = False + + # We do not permit trailing spaces for block scalars. + if trailing_space: + allow_block = False + + # Spaces at the beginning of a new line are only acceptable for block + # scalars. + if break_space: + allow_flow_plain = allow_block_plain = allow_single_quoted = False + + # Spaces followed by breaks, as well as special character are only + # allowed for double quoted scalars. + if space_break or special_characters: + allow_flow_plain = allow_block_plain = \ + allow_single_quoted = allow_block = False + + # Although the plain scalar writer supports breaks, we never emit + # multiline plain scalars. + if line_breaks: + allow_flow_plain = allow_block_plain = False + + # Flow indicators are forbidden for flow plain scalars. + if flow_indicators: + allow_flow_plain = False + + # Block indicators are forbidden for block plain scalars. + if block_indicators: + allow_block_plain = False + + return ScalarAnalysis(scalar=scalar, + empty=False, multiline=line_breaks, + allow_flow_plain=allow_flow_plain, + allow_block_plain=allow_block_plain, + allow_single_quoted=allow_single_quoted, + allow_double_quoted=allow_double_quoted, + allow_block=allow_block) + + # Writers. + + def flush_stream(self): + if hasattr(self.stream, 'flush'): + self.stream.flush() + + def write_stream_start(self): + # Write BOM if needed. + if self.encoding and self.encoding.startswith('utf-16'): + self.stream.write(u'\uFEFF'.encode(self.encoding)) + + def write_stream_end(self): + self.flush_stream() + + def write_indicator(self, indicator, need_whitespace, + whitespace=False, indention=False): + if self.whitespace or not need_whitespace: + data = indicator + else: + data = u' '+indicator + self.whitespace = whitespace + self.indention = self.indention and indention + self.column += len(data) + self.open_ended = False + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_indent(self): + indent = self.indent or 0 + if not self.indention or self.column > indent \ + or (self.column == indent and not self.whitespace): + self.write_line_break() + if self.column < indent: + self.whitespace = True + data = u' '*(indent-self.column) + self.column = indent + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_line_break(self, data=None): + if data is None: + data = self.best_line_break + self.whitespace = True + self.indention = True + self.line += 1 + self.column = 0 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + + def write_version_directive(self, version_text): + data = u'%%YAML %s' % version_text + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + def write_tag_directive(self, handle_text, prefix_text): + data = u'%%TAG %s %s' % (handle_text, prefix_text) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_line_break() + + # Scalar streams. + + def write_single_quoted(self, text, split=True): + self.write_indicator(u'\'', True) + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch is None or ch != u' ': + if start+1 == end and self.column > self.best_width and split \ + and start != 0 and end != len(text): + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch == u'\'': + data = u'\'\'' + self.column += 2 + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + 1 + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + self.write_indicator(u'\'', False) + + ESCAPE_REPLACEMENTS = { + u'\0': u'0', + u'\x07': u'a', + u'\x08': u'b', + u'\x09': u't', + u'\x0A': u'n', + u'\x0B': u'v', + u'\x0C': u'f', + u'\x0D': u'r', + u'\x1B': u'e', + u'\"': u'\"', + u'\\': u'\\', + u'\x85': u'N', + u'\xA0': u'_', + u'\u2028': u'L', + u'\u2029': u'P', + } + + def write_double_quoted(self, text, split=True): + self.write_indicator(u'"', True) + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ + or not (u'\x20' <= ch <= u'\x7E' + or (self.allow_unicode + and (u'\xA0' <= ch <= u'\uD7FF' + or u'\uE000' <= ch <= u'\uFFFD'))): + if start < end: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + if ch in self.ESCAPE_REPLACEMENTS: + data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] + elif ch <= u'\xFF': + data = u'\\x%02X' % ord(ch) + elif ch <= u'\uFFFF': + data = u'\\u%04X' % ord(ch) + else: + data = u'\\U%08X' % ord(ch) + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end+1 + if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ + and self.column+(end-start) > self.best_width and split: + data = text[start:end]+u'\\' + if start < end: + start = end + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.write_indent() + self.whitespace = False + self.indention = False + if text[start] == u' ': + data = u'\\' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + end += 1 + self.write_indicator(u'"', False) + + def determine_block_hints(self, text): + hints = u'' + if text: + if text[0] in u' \n\x85\u2028\u2029': + hints += unicode(self.best_indent) + if text[-1] not in u'\n\x85\u2028\u2029': + hints += u'-' + elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': + hints += u'+' + return hints + + def write_folded(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'>'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + leading_space = True + spaces = False + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + if not leading_space and ch is not None and ch != u' ' \ + and text[start] == u'\n': + self.write_line_break() + leading_space = (ch == u' ') + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + elif spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width: + self.write_indent() + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + spaces = (ch == u' ') + end += 1 + + def write_literal(self, text): + hints = self.determine_block_hints(text) + self.write_indicator(u'|'+hints, True) + if hints[-1:] == u'+': + self.open_ended = True + self.write_line_break() + breaks = True + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if breaks: + if ch is None or ch not in u'\n\x85\u2028\u2029': + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + if ch is not None: + self.write_indent() + start = end + else: + if ch is None or ch in u'\n\x85\u2028\u2029': + data = text[start:end] + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + if ch is None: + self.write_line_break() + start = end + if ch is not None: + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + + def write_plain(self, text, split=True): + if self.root_context: + self.open_ended = True + if not text: + return + if not self.whitespace: + data = u' ' + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + self.whitespace = False + self.indention = False + spaces = False + breaks = False + start = end = 0 + while end <= len(text): + ch = None + if end < len(text): + ch = text[end] + if spaces: + if ch != u' ': + if start+1 == end and self.column > self.best_width and split: + self.write_indent() + self.whitespace = False + self.indention = False + else: + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + elif breaks: + if ch not in u'\n\x85\u2028\u2029': + if text[start] == u'\n': + self.write_line_break() + for br in text[start:end]: + if br == u'\n': + self.write_line_break() + else: + self.write_line_break(br) + self.write_indent() + self.whitespace = False + self.indention = False + start = end + else: + if ch is None or ch in u' \n\x85\u2028\u2029': + data = text[start:end] + self.column += len(data) + if self.encoding: + data = data.encode(self.encoding) + self.stream.write(data) + start = end + if ch is not None: + spaces = (ch == u' ') + breaks = (ch in u'\n\x85\u2028\u2029') + end += 1 + diff --git a/tablib/packages/yaml/error.py b/tablib/packages/yaml/error.py new file mode 100644 index 0000000..577686d --- /dev/null +++ b/tablib/packages/yaml/error.py @@ -0,0 +1,75 @@ + +__all__ = ['Mark', 'YAMLError', 'MarkedYAMLError'] + +class Mark(object): + + def __init__(self, name, index, line, column, buffer, pointer): + self.name = name + self.index = index + self.line = line + self.column = column + self.buffer = buffer + self.pointer = pointer + + def get_snippet(self, indent=4, max_length=75): + if self.buffer is None: + return None + head = '' + start = self.pointer + while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029': + start -= 1 + if self.pointer-start > max_length/2-1: + head = ' ... ' + start += 5 + break + tail = '' + end = self.pointer + while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029': + end += 1 + if end-self.pointer > max_length/2-1: + tail = ' ... ' + end -= 5 + break + snippet = self.buffer[start:end].encode('utf-8') + return ' '*indent + head + snippet + tail + '\n' \ + + ' '*(indent+self.pointer-start+len(head)) + '^' + + def __str__(self): + snippet = self.get_snippet() + where = " in \"%s\", line %d, column %d" \ + % (self.name, self.line+1, self.column+1) + if snippet is not None: + where += ":\n"+snippet + return where + +class YAMLError(Exception): + pass + +class MarkedYAMLError(YAMLError): + + def __init__(self, context=None, context_mark=None, + problem=None, problem_mark=None, note=None): + self.context = context + self.context_mark = context_mark + self.problem = problem + self.problem_mark = problem_mark + self.note = note + + def __str__(self): + lines = [] + if self.context is not None: + lines.append(self.context) + if self.context_mark is not None \ + and (self.problem is None or self.problem_mark is None + or self.context_mark.name != self.problem_mark.name + or self.context_mark.line != self.problem_mark.line + or self.context_mark.column != self.problem_mark.column): + lines.append(str(self.context_mark)) + if self.problem is not None: + lines.append(self.problem) + if self.problem_mark is not None: + lines.append(str(self.problem_mark)) + if self.note is not None: + lines.append(self.note) + return '\n'.join(lines) + diff --git a/tablib/packages/yaml/events.py b/tablib/packages/yaml/events.py new file mode 100644 index 0000000..f79ad38 --- /dev/null +++ b/tablib/packages/yaml/events.py @@ -0,0 +1,86 @@ + +# Abstract classes. + +class Event(object): + def __init__(self, start_mark=None, end_mark=None): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in ['anchor', 'tag', 'implicit', 'value'] + if hasattr(self, key)] + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +class NodeEvent(Event): + def __init__(self, anchor, start_mark=None, end_mark=None): + self.anchor = anchor + self.start_mark = start_mark + self.end_mark = end_mark + +class CollectionStartEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, start_mark=None, end_mark=None, + flow_style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class CollectionEndEvent(Event): + pass + +# Implementations. + +class StreamStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndEvent(Event): + pass + +class DocumentStartEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None, version=None, tags=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + self.version = version + self.tags = tags + +class DocumentEndEvent(Event): + def __init__(self, start_mark=None, end_mark=None, + explicit=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.explicit = explicit + +class AliasEvent(NodeEvent): + pass + +class ScalarEvent(NodeEvent): + def __init__(self, anchor, tag, implicit, value, + start_mark=None, end_mark=None, style=None): + self.anchor = anchor + self.tag = tag + self.implicit = implicit + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class SequenceStartEvent(CollectionStartEvent): + pass + +class SequenceEndEvent(CollectionEndEvent): + pass + +class MappingStartEvent(CollectionStartEvent): + pass + +class MappingEndEvent(CollectionEndEvent): + pass + diff --git a/tablib/packages/yaml/loader.py b/tablib/packages/yaml/loader.py new file mode 100644 index 0000000..293ff46 --- /dev/null +++ b/tablib/packages/yaml/loader.py @@ -0,0 +1,40 @@ + +__all__ = ['BaseLoader', 'SafeLoader', 'Loader'] + +from reader import * +from scanner import * +from parser import * +from composer import * +from constructor import * +from resolver import * + +class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + BaseConstructor.__init__(self) + BaseResolver.__init__(self) + +class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + SafeConstructor.__init__(self) + Resolver.__init__(self) + +class Loader(Reader, Scanner, Parser, Composer, Constructor, Resolver): + + def __init__(self, stream): + Reader.__init__(self, stream) + Scanner.__init__(self) + Parser.__init__(self) + Composer.__init__(self) + Constructor.__init__(self) + Resolver.__init__(self) + diff --git a/tablib/packages/yaml/nodes.py b/tablib/packages/yaml/nodes.py new file mode 100644 index 0000000..c4f070c --- /dev/null +++ b/tablib/packages/yaml/nodes.py @@ -0,0 +1,49 @@ + +class Node(object): + def __init__(self, tag, value, start_mark, end_mark): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + value = self.value + #if isinstance(value, list): + # if len(value) == 0: + # value = '' + # elif len(value) == 1: + # value = '<1 item>' + # else: + # value = '<%d items>' % len(value) + #else: + # if len(value) > 75: + # value = repr(value[:70]+u' ... ') + # else: + # value = repr(value) + value = repr(value) + return '%s(tag=%r, value=%s)' % (self.__class__.__name__, self.tag, value) + +class ScalarNode(Node): + id = 'scalar' + def __init__(self, tag, value, + start_mark=None, end_mark=None, style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + +class CollectionNode(Node): + def __init__(self, tag, value, + start_mark=None, end_mark=None, flow_style=None): + self.tag = tag + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + self.flow_style = flow_style + +class SequenceNode(CollectionNode): + id = 'sequence' + +class MappingNode(CollectionNode): + id = 'mapping' + diff --git a/tablib/packages/yaml/parser.py b/tablib/packages/yaml/parser.py new file mode 100644 index 0000000..b6a7416 --- /dev/null +++ b/tablib/packages/yaml/parser.py @@ -0,0 +1,584 @@ + +# The following YAML grammar is LL(1) and is parsed by a recursive descent +# parser. +# +# stream ::= STREAM-START implicit_document? explicit_document* STREAM-END +# implicit_document ::= block_node DOCUMENT-END* +# explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* +# block_node_or_indentless_sequence ::= +# ALIAS +# | properties (block_content | indentless_block_sequence)? +# | block_content +# | indentless_block_sequence +# block_node ::= ALIAS +# | properties block_content? +# | block_content +# flow_node ::= ALIAS +# | properties flow_content? +# | flow_content +# properties ::= TAG ANCHOR? | ANCHOR TAG? +# block_content ::= block_collection | flow_collection | SCALAR +# flow_content ::= flow_collection | SCALAR +# block_collection ::= block_sequence | block_mapping +# flow_collection ::= flow_sequence | flow_mapping +# block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END +# indentless_sequence ::= (BLOCK-ENTRY block_node?)+ +# block_mapping ::= BLOCK-MAPPING_START +# ((KEY block_node_or_indentless_sequence?)? +# (VALUE block_node_or_indentless_sequence?)?)* +# BLOCK-END +# flow_sequence ::= FLOW-SEQUENCE-START +# (flow_sequence_entry FLOW-ENTRY)* +# flow_sequence_entry? +# FLOW-SEQUENCE-END +# flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# flow_mapping ::= FLOW-MAPPING-START +# (flow_mapping_entry FLOW-ENTRY)* +# flow_mapping_entry? +# FLOW-MAPPING-END +# flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? +# +# FIRST sets: +# +# stream: { STREAM-START } +# explicit_document: { DIRECTIVE DOCUMENT-START } +# implicit_document: FIRST(block_node) +# block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR } +# block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# block_sequence: { BLOCK-SEQUENCE-START } +# block_mapping: { BLOCK-MAPPING-START } +# block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY } +# indentless_sequence: { ENTRY } +# flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START } +# flow_sequence: { FLOW-SEQUENCE-START } +# flow_mapping: { FLOW-MAPPING-START } +# flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } +# flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY } + +__all__ = ['Parser', 'ParserError'] + +from error import MarkedYAMLError +from tokens import * +from events import * +from scanner import * + +class ParserError(MarkedYAMLError): + pass + +class Parser(object): + # Since writing a recursive-descendant parser is a straightforward task, we + # do not give many comments here. + + DEFAULT_TAGS = { + u'!': u'!', + u'!!': u'tag:yaml.org,2002:', + } + + def __init__(self): + self.current_event = None + self.yaml_version = None + self.tag_handles = {} + self.states = [] + self.marks = [] + self.state = self.parse_stream_start + + def check_event(self, *choices): + # Check the type of the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + if self.current_event is not None: + if not choices: + return True + for choice in choices: + if isinstance(self.current_event, choice): + return True + return False + + def peek_event(self): + # Get the next event. + if self.current_event is None: + if self.state: + self.current_event = self.state() + return self.current_event + + def get_event(self): + # Get the next event and proceed further. + if self.current_event is None: + if self.state: + self.current_event = self.state() + value = self.current_event + self.current_event = None + return value + + # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END + # implicit_document ::= block_node DOCUMENT-END* + # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END* + + def parse_stream_start(self): + + # Parse the stream start. + token = self.get_token() + event = StreamStartEvent(token.start_mark, token.end_mark, + encoding=token.encoding) + + # Prepare the next state. + self.state = self.parse_implicit_document_start + + return event + + def parse_implicit_document_start(self): + + # Parse an implicit document. + if not self.check_token(DirectiveToken, DocumentStartToken, + StreamEndToken): + self.tag_handles = self.DEFAULT_TAGS + token = self.peek_token() + start_mark = end_mark = token.start_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=False) + + # Prepare the next state. + self.states.append(self.parse_document_end) + self.state = self.parse_block_node + + return event + + else: + return self.parse_document_start() + + def parse_document_start(self): + + # Parse any extra document end indicators. + while self.check_token(DocumentEndToken): + self.get_token() + + # Parse an explicit document. + if not self.check_token(StreamEndToken): + token = self.peek_token() + start_mark = token.start_mark + version, tags = self.process_directives() + if not self.check_token(DocumentStartToken): + raise ParserError(None, None, + "expected '', but found %r" + % self.peek_token().id, + self.peek_token().start_mark) + token = self.get_token() + end_mark = token.end_mark + event = DocumentStartEvent(start_mark, end_mark, + explicit=True, version=version, tags=tags) + self.states.append(self.parse_document_end) + self.state = self.parse_document_content + else: + # Parse the end of the stream. + token = self.get_token() + event = StreamEndEvent(token.start_mark, token.end_mark) + assert not self.states + assert not self.marks + self.state = None + return event + + def parse_document_end(self): + + # Parse the document end. + token = self.peek_token() + start_mark = end_mark = token.start_mark + explicit = False + if self.check_token(DocumentEndToken): + token = self.get_token() + end_mark = token.end_mark + explicit = True + event = DocumentEndEvent(start_mark, end_mark, + explicit=explicit) + + # Prepare the next state. + self.state = self.parse_document_start + + return event + + def parse_document_content(self): + if self.check_token(DirectiveToken, + DocumentStartToken, DocumentEndToken, StreamEndToken): + event = self.process_empty_scalar(self.peek_token().start_mark) + self.state = self.states.pop() + return event + else: + return self.parse_block_node() + + def process_directives(self): + self.yaml_version = None + self.tag_handles = {} + while self.check_token(DirectiveToken): + token = self.get_token() + if token.name == u'YAML': + if self.yaml_version is not None: + raise ParserError(None, None, + "found duplicate YAML directive", token.start_mark) + major, minor = token.value + if major != 1: + raise ParserError(None, None, + "found incompatible YAML document (version 1.* is required)", + token.start_mark) + self.yaml_version = token.value + elif token.name == u'TAG': + handle, prefix = token.value + if handle in self.tag_handles: + raise ParserError(None, None, + "duplicate tag handle %r" % handle.encode('utf-8'), + token.start_mark) + self.tag_handles[handle] = prefix + if self.tag_handles: + value = self.yaml_version, self.tag_handles.copy() + else: + value = self.yaml_version, None + for key in self.DEFAULT_TAGS: + if key not in self.tag_handles: + self.tag_handles[key] = self.DEFAULT_TAGS[key] + return value + + # block_node_or_indentless_sequence ::= ALIAS + # | properties (block_content | indentless_block_sequence)? + # | block_content + # | indentless_block_sequence + # block_node ::= ALIAS + # | properties block_content? + # | block_content + # flow_node ::= ALIAS + # | properties flow_content? + # | flow_content + # properties ::= TAG ANCHOR? | ANCHOR TAG? + # block_content ::= block_collection | flow_collection | SCALAR + # flow_content ::= flow_collection | SCALAR + # block_collection ::= block_sequence | block_mapping + # flow_collection ::= flow_sequence | flow_mapping + + def parse_block_node(self): + return self.parse_node(block=True) + + def parse_flow_node(self): + return self.parse_node() + + def parse_block_node_or_indentless_sequence(self): + return self.parse_node(block=True, indentless_sequence=True) + + def parse_node(self, block=False, indentless_sequence=False): + if self.check_token(AliasToken): + token = self.get_token() + event = AliasEvent(token.value, token.start_mark, token.end_mark) + self.state = self.states.pop() + else: + anchor = None + tag = None + start_mark = end_mark = tag_mark = None + if self.check_token(AnchorToken): + token = self.get_token() + start_mark = token.start_mark + end_mark = token.end_mark + anchor = token.value + if self.check_token(TagToken): + token = self.get_token() + tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + elif self.check_token(TagToken): + token = self.get_token() + start_mark = tag_mark = token.start_mark + end_mark = token.end_mark + tag = token.value + if self.check_token(AnchorToken): + token = self.get_token() + end_mark = token.end_mark + anchor = token.value + if tag is not None: + handle, suffix = tag + if handle is not None: + if handle not in self.tag_handles: + raise ParserError("while parsing a node", start_mark, + "found undefined tag handle %r" % handle.encode('utf-8'), + tag_mark) + tag = self.tag_handles[handle]+suffix + else: + tag = suffix + #if tag == u'!': + # raise ParserError("while parsing a node", start_mark, + # "found non-specific tag '!'", tag_mark, + # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.") + if start_mark is None: + start_mark = end_mark = self.peek_token().start_mark + event = None + implicit = (tag is None or tag == u'!') + if indentless_sequence and self.check_token(BlockEntryToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark) + self.state = self.parse_indentless_sequence_entry + else: + if self.check_token(ScalarToken): + token = self.get_token() + end_mark = token.end_mark + if (token.plain and tag is None) or tag == u'!': + implicit = (True, False) + elif tag is None: + implicit = (False, True) + else: + implicit = (False, False) + event = ScalarEvent(anchor, tag, implicit, token.value, + start_mark, end_mark, style=token.style) + self.state = self.states.pop() + elif self.check_token(FlowSequenceStartToken): + end_mark = self.peek_token().end_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_sequence_first_entry + elif self.check_token(FlowMappingStartToken): + end_mark = self.peek_token().end_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=True) + self.state = self.parse_flow_mapping_first_key + elif block and self.check_token(BlockSequenceStartToken): + end_mark = self.peek_token().start_mark + event = SequenceStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_sequence_first_entry + elif block and self.check_token(BlockMappingStartToken): + end_mark = self.peek_token().start_mark + event = MappingStartEvent(anchor, tag, implicit, + start_mark, end_mark, flow_style=False) + self.state = self.parse_block_mapping_first_key + elif anchor is not None or tag is not None: + # Empty scalars are allowed even if a tag or an anchor is + # specified. + event = ScalarEvent(anchor, tag, (implicit, False), u'', + start_mark, end_mark) + self.state = self.states.pop() + else: + if block: + node = 'block' + else: + node = 'flow' + token = self.peek_token() + raise ParserError("while parsing a %s node" % node, start_mark, + "expected the node content, but found %r" % token.id, + token.start_mark) + return event + + # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END + + def parse_block_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_sequence_entry() + + def parse_block_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, BlockEndToken): + self.states.append(self.parse_block_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_block_sequence_entry + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block collection", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + # indentless_sequence ::= (BLOCK-ENTRY block_node?)+ + + def parse_indentless_sequence_entry(self): + if self.check_token(BlockEntryToken): + token = self.get_token() + if not self.check_token(BlockEntryToken, + KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_indentless_sequence_entry) + return self.parse_block_node() + else: + self.state = self.parse_indentless_sequence_entry + return self.process_empty_scalar(token.end_mark) + token = self.peek_token() + event = SequenceEndEvent(token.start_mark, token.start_mark) + self.state = self.states.pop() + return event + + # block_mapping ::= BLOCK-MAPPING_START + # ((KEY block_node_or_indentless_sequence?)? + # (VALUE block_node_or_indentless_sequence?)?)* + # BLOCK-END + + def parse_block_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_block_mapping_key() + + def parse_block_mapping_key(self): + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_value) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_value + return self.process_empty_scalar(token.end_mark) + if not self.check_token(BlockEndToken): + token = self.peek_token() + raise ParserError("while parsing a block mapping", self.marks[-1], + "expected , but found %r" % token.id, token.start_mark) + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_block_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(KeyToken, ValueToken, BlockEndToken): + self.states.append(self.parse_block_mapping_key) + return self.parse_block_node_or_indentless_sequence() + else: + self.state = self.parse_block_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_block_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + # flow_sequence ::= FLOW-SEQUENCE-START + # (flow_sequence_entry FLOW-ENTRY)* + # flow_sequence_entry? + # FLOW-SEQUENCE-END + # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + # + # Note that while production rules for both flow_sequence_entry and + # flow_mapping_entry are equal, their interpretations are different. + # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?` + # generate an inline mapping (set syntax). + + def parse_flow_sequence_first_entry(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_sequence_entry(first=True) + + def parse_flow_sequence_entry(self, first=False): + if not self.check_token(FlowSequenceEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow sequence", self.marks[-1], + "expected ',' or ']', but got %r" % token.id, token.start_mark) + + if self.check_token(KeyToken): + token = self.peek_token() + event = MappingStartEvent(None, None, True, + token.start_mark, token.end_mark, + flow_style=True) + self.state = self.parse_flow_sequence_entry_mapping_key + return event + elif not self.check_token(FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry) + return self.parse_flow_node() + token = self.get_token() + event = SequenceEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_sequence_entry_mapping_key(self): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_value + return self.process_empty_scalar(token.end_mark) + + def parse_flow_sequence_entry_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowSequenceEndToken): + self.states.append(self.parse_flow_sequence_entry_mapping_end) + return self.parse_flow_node() + else: + self.state = self.parse_flow_sequence_entry_mapping_end + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_sequence_entry_mapping_end + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_sequence_entry_mapping_end(self): + self.state = self.parse_flow_sequence_entry + token = self.peek_token() + return MappingEndEvent(token.start_mark, token.start_mark) + + # flow_mapping ::= FLOW-MAPPING-START + # (flow_mapping_entry FLOW-ENTRY)* + # flow_mapping_entry? + # FLOW-MAPPING-END + # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)? + + def parse_flow_mapping_first_key(self): + token = self.get_token() + self.marks.append(token.start_mark) + return self.parse_flow_mapping_key(first=True) + + def parse_flow_mapping_key(self, first=False): + if not self.check_token(FlowMappingEndToken): + if not first: + if self.check_token(FlowEntryToken): + self.get_token() + else: + token = self.peek_token() + raise ParserError("while parsing a flow mapping", self.marks[-1], + "expected ',' or '}', but got %r" % token.id, token.start_mark) + if self.check_token(KeyToken): + token = self.get_token() + if not self.check_token(ValueToken, + FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_value) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_value + return self.process_empty_scalar(token.end_mark) + elif not self.check_token(FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_empty_value) + return self.parse_flow_node() + token = self.get_token() + event = MappingEndEvent(token.start_mark, token.end_mark) + self.state = self.states.pop() + self.marks.pop() + return event + + def parse_flow_mapping_value(self): + if self.check_token(ValueToken): + token = self.get_token() + if not self.check_token(FlowEntryToken, FlowMappingEndToken): + self.states.append(self.parse_flow_mapping_key) + return self.parse_flow_node() + else: + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(token.end_mark) + else: + self.state = self.parse_flow_mapping_key + token = self.peek_token() + return self.process_empty_scalar(token.start_mark) + + def parse_flow_mapping_empty_value(self): + self.state = self.parse_flow_mapping_key + return self.process_empty_scalar(self.peek_token().start_mark) + + def process_empty_scalar(self, mark): + return ScalarEvent(None, None, (True, False), u'', mark, mark) + diff --git a/tablib/packages/yaml/reader.py b/tablib/packages/yaml/reader.py new file mode 100644 index 0000000..1e7a4db --- /dev/null +++ b/tablib/packages/yaml/reader.py @@ -0,0 +1,225 @@ +# This module contains abstractions for the input stream. You don't have to +# looks further, there are no pretty code. +# +# We define two classes here. +# +# Mark(source, line, column) +# It's just a record and its only use is producing nice error messages. +# Parser does not use it for any other purposes. +# +# Reader(source, data) +# Reader determines the encoding of `data` and converts it to unicode. +# Reader provides the following methods and attributes: +# reader.peek(length=1) - return the next `length` characters +# reader.forward(length=1) - move the current position to `length` characters. +# reader.index - the number of the current character. +# reader.line, stream.column - the line and the column of the current character. + +__all__ = ['Reader', 'ReaderError'] + +from error import YAMLError, Mark + +import codecs, re + +# Unfortunately, codec functions in Python 2.3 does not support the `finish` +# arguments, so we have to write our own wrappers. + +try: + codecs.utf_8_decode('', 'strict', False) + from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode + +except TypeError: + + def utf_16_le_decode(data, errors, finish=False): + if not finish and len(data) % 2 == 1: + data = data[:-1] + return codecs.utf_16_le_decode(data, errors) + + def utf_16_be_decode(data, errors, finish=False): + if not finish and len(data) % 2 == 1: + data = data[:-1] + return codecs.utf_16_be_decode(data, errors) + + def utf_8_decode(data, errors, finish=False): + if not finish: + # We are trying to remove a possible incomplete multibyte character + # from the suffix of the data. + # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd. + # All further bytes are in the range 0x80 to 0xbf. + # UTF-8 encoded UCS characters may be up to six bytes long. + count = 0 + while count < 5 and count < len(data) \ + and '\x80' <= data[-count-1] <= '\xBF': + count -= 1 + if count < 5 and count < len(data) \ + and '\xC0' <= data[-count-1] <= '\xFD': + data = data[:-count-1] + return codecs.utf_8_decode(data, errors) + +class ReaderError(YAMLError): + + def __init__(self, name, position, character, encoding, reason): + self.name = name + self.character = character + self.position = position + self.encoding = encoding + self.reason = reason + + def __str__(self): + if isinstance(self.character, str): + return "'%s' codec can't decode byte #x%02x: %s\n" \ + " in \"%s\", position %d" \ + % (self.encoding, ord(self.character), self.reason, + self.name, self.position) + else: + return "unacceptable character #x%04x: %s\n" \ + " in \"%s\", position %d" \ + % (self.character, self.reason, + self.name, self.position) + +class Reader(object): + # Reader: + # - determines the data encoding and converts it to unicode, + # - checks if characters are in allowed range, + # - adds '\0' to the end. + + # Reader accepts + # - a `str` object, + # - a `unicode` object, + # - a file-like object with its `read` method returning `str`, + # - a file-like object with its `read` method returning `unicode`. + + # Yeah, it's ugly and slow. + + def __init__(self, stream): + self.name = None + self.stream = None + self.stream_pointer = 0 + self.eof = True + self.buffer = u'' + self.pointer = 0 + self.raw_buffer = None + self.raw_decode = None + self.encoding = None + self.index = 0 + self.line = 0 + self.column = 0 + if isinstance(stream, unicode): + self.name = "" + self.check_printable(stream) + self.buffer = stream+u'\0' + elif isinstance(stream, str): + self.name = "" + self.raw_buffer = stream + self.determine_encoding() + else: + self.stream = stream + self.name = getattr(stream, 'name', "") + self.eof = False + self.raw_buffer = '' + self.determine_encoding() + + def peek(self, index=0): + try: + return self.buffer[self.pointer+index] + except IndexError: + self.update(index+1) + return self.buffer[self.pointer+index] + + def prefix(self, length=1): + if self.pointer+length >= len(self.buffer): + self.update(length) + return self.buffer[self.pointer:self.pointer+length] + + def forward(self, length=1): + if self.pointer+length+1 >= len(self.buffer): + self.update(length+1) + while length: + ch = self.buffer[self.pointer] + self.pointer += 1 + self.index += 1 + if ch in u'\n\x85\u2028\u2029' \ + or (ch == u'\r' and self.buffer[self.pointer] != u'\n'): + self.line += 1 + self.column = 0 + elif ch != u'\uFEFF': + self.column += 1 + length -= 1 + + def get_mark(self): + if self.stream is None: + return Mark(self.name, self.index, self.line, self.column, + self.buffer, self.pointer) + else: + return Mark(self.name, self.index, self.line, self.column, + None, None) + + def determine_encoding(self): + while not self.eof and len(self.raw_buffer) < 2: + self.update_raw() + if not isinstance(self.raw_buffer, unicode): + if self.raw_buffer.startswith(codecs.BOM_UTF16_LE): + self.raw_decode = utf_16_le_decode + self.encoding = 'utf-16-le' + elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE): + self.raw_decode = utf_16_be_decode + self.encoding = 'utf-16-be' + else: + self.raw_decode = utf_8_decode + self.encoding = 'utf-8' + self.update(1) + + NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]') + def check_printable(self, data): + match = self.NON_PRINTABLE.search(data) + if match: + character = match.group() + position = self.index+(len(self.buffer)-self.pointer)+match.start() + raise ReaderError(self.name, position, ord(character), + 'unicode', "special characters are not allowed") + + def update(self, length): + if self.raw_buffer is None: + return + self.buffer = self.buffer[self.pointer:] + self.pointer = 0 + while len(self.buffer) < length: + if not self.eof: + self.update_raw() + if self.raw_decode is not None: + try: + data, converted = self.raw_decode(self.raw_buffer, + 'strict', self.eof) + except UnicodeDecodeError, exc: + character = exc.object[exc.start] + if self.stream is not None: + position = self.stream_pointer-len(self.raw_buffer)+exc.start + else: + position = exc.start + raise ReaderError(self.name, position, character, + exc.encoding, exc.reason) + else: + data = self.raw_buffer + converted = len(data) + self.check_printable(data) + self.buffer += data + self.raw_buffer = self.raw_buffer[converted:] + if self.eof: + self.buffer += u'\0' + self.raw_buffer = None + break + + def update_raw(self, size=1024): + data = self.stream.read(size) + if data: + self.raw_buffer += data + self.stream_pointer += len(data) + else: + self.eof = True + +#try: +# import psyco +# psyco.bind(Reader) +#except ImportError: +# pass + diff --git a/tablib/packages/yaml/representer.py b/tablib/packages/yaml/representer.py new file mode 100644 index 0000000..f5606ec --- /dev/null +++ b/tablib/packages/yaml/representer.py @@ -0,0 +1,489 @@ + +__all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer', + 'RepresenterError'] + +from error import * +from nodes import * + +import datetime + +try: + set +except NameError: + from sets import Set as set + +import sys, copy_reg, types + +class RepresenterError(YAMLError): + pass + +class BaseRepresenter(object): + + yaml_representers = {} + yaml_multi_representers = {} + + def __init__(self, default_style=None, default_flow_style=None): + self.default_style = default_style + self.default_flow_style = default_flow_style + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def represent(self, data): + node = self.represent_data(data) + self.serialize(node) + self.represented_objects = {} + self.object_keeper = [] + self.alias_key = None + + def get_classobj_bases(self, cls): + bases = [cls] + for base in cls.__bases__: + bases.extend(self.get_classobj_bases(base)) + return bases + + def represent_data(self, data): + if self.ignore_aliases(data): + self.alias_key = None + else: + self.alias_key = id(data) + if self.alias_key is not None: + if self.alias_key in self.represented_objects: + node = self.represented_objects[self.alias_key] + #if node is None: + # raise RepresenterError("recursive objects are not allowed: %r" % data) + return node + #self.represented_objects[alias_key] = None + self.object_keeper.append(data) + data_types = type(data).__mro__ + if type(data) is types.InstanceType: + data_types = self.get_classobj_bases(data.__class__)+list(data_types) + if data_types[0] in self.yaml_representers: + node = self.yaml_representers[data_types[0]](self, data) + else: + for data_type in data_types: + if data_type in self.yaml_multi_representers: + node = self.yaml_multi_representers[data_type](self, data) + break + else: + if None in self.yaml_multi_representers: + node = self.yaml_multi_representers[None](self, data) + elif None in self.yaml_representers: + node = self.yaml_representers[None](self, data) + else: + node = ScalarNode(None, unicode(data)) + #if alias_key is not None: + # self.represented_objects[alias_key] = node + return node + + def add_representer(cls, data_type, representer): + if not 'yaml_representers' in cls.__dict__: + cls.yaml_representers = cls.yaml_representers.copy() + cls.yaml_representers[data_type] = representer + add_representer = classmethod(add_representer) + + def add_multi_representer(cls, data_type, representer): + if not 'yaml_multi_representers' in cls.__dict__: + cls.yaml_multi_representers = cls.yaml_multi_representers.copy() + cls.yaml_multi_representers[data_type] = representer + add_multi_representer = classmethod(add_multi_representer) + + def represent_scalar(self, tag, value, style=None): + if style is None: + style = self.default_style + node = ScalarNode(tag, value, style=style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + return node + + def represent_sequence(self, tag, sequence, flow_style=None): + value = [] + node = SequenceNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + for item in sequence: + node_item = self.represent_data(item) + if not (isinstance(node_item, ScalarNode) and not node_item.style): + best_style = False + value.append(node_item) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def represent_mapping(self, tag, mapping, flow_style=None): + value = [] + node = MappingNode(tag, value, flow_style=flow_style) + if self.alias_key is not None: + self.represented_objects[self.alias_key] = node + best_style = True + if hasattr(mapping, 'items'): + mapping = mapping.items() + mapping.sort() + for item_key, item_value in mapping: + node_key = self.represent_data(item_key) + node_value = self.represent_data(item_value) + if not (isinstance(node_key, ScalarNode) and not node_key.style): + best_style = False + if not (isinstance(node_value, ScalarNode) and not node_value.style): + best_style = False + value.append((node_key, node_value)) + if flow_style is None: + if self.default_flow_style is not None: + node.flow_style = self.default_flow_style + else: + node.flow_style = best_style + return node + + def ignore_aliases(self, data): + return False + +class SafeRepresenter(BaseRepresenter): + + def ignore_aliases(self, data): + if data in [None, ()]: + return True + if isinstance(data, (str, unicode, bool, int, float)): + return True + + def represent_none(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:null', + u'null') + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:str', data) + + def represent_bool(self, data): + if data: + value = u'true' + else: + value = u'false' + return self.represent_scalar(u'tag:yaml.org,2002:bool', value) + + def represent_int(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + def represent_long(self, data): + return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data)) + + inf_value = 1e300 + while repr(inf_value) != repr(inf_value*inf_value): + inf_value *= inf_value + + def represent_float(self, data): + if data != data or (data == 0.0 and data == 1.0): + value = u'.nan' + elif data == self.inf_value: + value = u'.inf' + elif data == -self.inf_value: + value = u'-.inf' + else: + value = unicode(repr(data)).lower() + # Note that in some cases `repr(data)` represents a float number + # without the decimal parts. For instance: + # >>> repr(1e17) + # '1e17' + # Unfortunately, this is not a valid float representation according + # to the definition of the `!!float` tag. We fix this by adding + # '.0' before the 'e' symbol. + if u'.' not in value and u'e' in value: + value = value.replace(u'e', u'.0e', 1) + return self.represent_scalar(u'tag:yaml.org,2002:float', value) + + def represent_list(self, data): + #pairs = (len(data) > 0 and isinstance(data, list)) + #if pairs: + # for item in data: + # if not isinstance(item, tuple) or len(item) != 2: + # pairs = False + # break + #if not pairs: + return self.represent_sequence(u'tag:yaml.org,2002:seq', data) + #value = [] + #for item_key, item_value in data: + # value.append(self.represent_mapping(u'tag:yaml.org,2002:map', + # [(item_key, item_value)])) + #return SequenceNode(u'tag:yaml.org,2002:pairs', value) + + def represent_dict(self, data): + return self.represent_mapping(u'tag:yaml.org,2002:map', data) + + def represent_set(self, data): + value = {} + for key in data: + value[key] = None + return self.represent_mapping(u'tag:yaml.org,2002:set', value) + + def represent_date(self, data): + value = unicode(data.isoformat()) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_datetime(self, data): + value = unicode(data.isoformat(' ')) + return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value) + + def represent_yaml_object(self, tag, data, cls, flow_style=None): + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__.copy() + return self.represent_mapping(tag, state, flow_style=flow_style) + + def represent_undefined(self, data): + raise RepresenterError("cannot represent an object: %s" % data) + +SafeRepresenter.add_representer(type(None), + SafeRepresenter.represent_none) + +SafeRepresenter.add_representer(str, + SafeRepresenter.represent_str) + +SafeRepresenter.add_representer(unicode, + SafeRepresenter.represent_unicode) + +SafeRepresenter.add_representer(bool, + SafeRepresenter.represent_bool) + +SafeRepresenter.add_representer(int, + SafeRepresenter.represent_int) + +SafeRepresenter.add_representer(long, + SafeRepresenter.represent_long) + +SafeRepresenter.add_representer(float, + SafeRepresenter.represent_float) + +SafeRepresenter.add_representer(list, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(tuple, + SafeRepresenter.represent_list) + +SafeRepresenter.add_representer(dict, + SafeRepresenter.represent_dict) + +SafeRepresenter.add_representer(set, + SafeRepresenter.represent_set) + +SafeRepresenter.add_representer(datetime.date, + SafeRepresenter.represent_date) + +SafeRepresenter.add_representer(datetime.datetime, + SafeRepresenter.represent_datetime) + +SafeRepresenter.add_representer(None, + SafeRepresenter.represent_undefined) + +class Representer(SafeRepresenter): + + def represent_str(self, data): + tag = None + style = None + try: + data = unicode(data, 'ascii') + tag = u'tag:yaml.org,2002:str' + except UnicodeDecodeError: + try: + data = unicode(data, 'utf-8') + tag = u'tag:yaml.org,2002:python/str' + except UnicodeDecodeError: + data = data.encode('base64') + tag = u'tag:yaml.org,2002:binary' + style = '|' + return self.represent_scalar(tag, data, style=style) + + def represent_unicode(self, data): + tag = None + try: + data.encode('ascii') + tag = u'tag:yaml.org,2002:python/unicode' + except UnicodeEncodeError: + tag = u'tag:yaml.org,2002:str' + return self.represent_scalar(tag, data) + + def represent_long(self, data): + tag = u'tag:yaml.org,2002:int' + if int(data) is not data: + tag = u'tag:yaml.org,2002:python/long' + return self.represent_scalar(tag, unicode(data)) + + def represent_complex(self, data): + if data.imag == 0.0: + data = u'%r' % data.real + elif data.real == 0.0: + data = u'%rj' % data.imag + elif data.imag > 0: + data = u'%r+%rj' % (data.real, data.imag) + else: + data = u'%r%rj' % (data.real, data.imag) + return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data) + + def represent_tuple(self, data): + return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data) + + def represent_name(self, data): + name = u'%s.%s' % (data.__module__, data.__name__) + return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'') + + def represent_module(self, data): + return self.represent_scalar( + u'tag:yaml.org,2002:python/module:'+data.__name__, u'') + + def represent_instance(self, data): + # For instances of classic classes, we use __getinitargs__ and + # __getstate__ to serialize the data. + + # If data.__getinitargs__ exists, the object must be reconstructed by + # calling cls(**args), where args is a tuple returned by + # __getinitargs__. Otherwise, the cls.__init__ method should never be + # called and the class instance is created by instantiating a trivial + # class and assigning to the instance's __class__ variable. + + # If data.__getstate__ exists, it returns the state of the object. + # Otherwise, the state of the object is data.__dict__. + + # We produce either a !!python/object or !!python/object/new node. + # If data.__getinitargs__ does not exist and state is a dictionary, we + # produce a !!python/object node . Otherwise we produce a + # !!python/object/new node. + + cls = data.__class__ + class_name = u'%s.%s' % (cls.__module__, cls.__name__) + args = None + state = None + if hasattr(data, '__getinitargs__'): + args = list(data.__getinitargs__()) + if hasattr(data, '__getstate__'): + state = data.__getstate__() + else: + state = data.__dict__ + if args is None and isinstance(state, dict): + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+class_name, state) + if isinstance(state, dict) and not state: + return self.represent_sequence( + u'tag:yaml.org,2002:python/object/new:'+class_name, args) + value = {} + if args: + value['args'] = args + value['state'] = state + return self.represent_mapping( + u'tag:yaml.org,2002:python/object/new:'+class_name, value) + + def represent_object(self, data): + # We use __reduce__ API to save the data. data.__reduce__ returns + # a tuple of length 2-5: + # (function, args, state, listitems, dictitems) + + # For reconstructing, we calls function(*args), then set its state, + # listitems, and dictitems if they are not None. + + # A special case is when function.__name__ == '__newobj__'. In this + # case we create the object with args[0].__new__(*args). + + # Another special case is when __reduce__ returns a string - we don't + # support it. + + # We produce a !!python/object, !!python/object/new or + # !!python/object/apply node. + + cls = type(data) + if cls in copy_reg.dispatch_table: + reduce = copy_reg.dispatch_table[cls](data) + elif hasattr(data, '__reduce_ex__'): + reduce = data.__reduce_ex__(2) + elif hasattr(data, '__reduce__'): + reduce = data.__reduce__() + else: + raise RepresenterError("cannot represent object: %r" % data) + reduce = (list(reduce)+[None]*5)[:5] + function, args, state, listitems, dictitems = reduce + args = list(args) + if state is None: + state = {} + if listitems is not None: + listitems = list(listitems) + if dictitems is not None: + dictitems = dict(dictitems) + if function.__name__ == '__newobj__': + function = args[0] + args = args[1:] + tag = u'tag:yaml.org,2002:python/object/new:' + newobj = True + else: + tag = u'tag:yaml.org,2002:python/object/apply:' + newobj = False + function_name = u'%s.%s' % (function.__module__, function.__name__) + if not args and not listitems and not dictitems \ + and isinstance(state, dict) and newobj: + return self.represent_mapping( + u'tag:yaml.org,2002:python/object:'+function_name, state) + if not listitems and not dictitems \ + and isinstance(state, dict) and not state: + return self.represent_sequence(tag+function_name, args) + value = {} + if args: + value['args'] = args + if state or not isinstance(state, dict): + value['state'] = state + if listitems: + value['listitems'] = listitems + if dictitems: + value['dictitems'] = dictitems + return self.represent_mapping(tag+function_name, value) + +Representer.add_representer(str, + Representer.represent_str) + +Representer.add_representer(unicode, + Representer.represent_unicode) + +Representer.add_representer(long, + Representer.represent_long) + +Representer.add_representer(complex, + Representer.represent_complex) + +Representer.add_representer(tuple, + Representer.represent_tuple) + +Representer.add_representer(type, + Representer.represent_name) + +Representer.add_representer(types.ClassType, + Representer.represent_name) + +Representer.add_representer(types.FunctionType, + Representer.represent_name) + +Representer.add_representer(types.BuiltinFunctionType, + Representer.represent_name) + +Representer.add_representer(types.ModuleType, + Representer.represent_module) + +Representer.add_multi_representer(types.InstanceType, + Representer.represent_instance) + +Representer.add_multi_representer(object, + Representer.represent_object) + diff --git a/tablib/packages/yaml/resolver.py b/tablib/packages/yaml/resolver.py new file mode 100644 index 0000000..6b5ab87 --- /dev/null +++ b/tablib/packages/yaml/resolver.py @@ -0,0 +1,224 @@ + +__all__ = ['BaseResolver', 'Resolver'] + +from error import * +from nodes import * + +import re + +class ResolverError(YAMLError): + pass + +class BaseResolver(object): + + DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str' + DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq' + DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map' + + yaml_implicit_resolvers = {} + yaml_path_resolvers = {} + + def __init__(self): + self.resolver_exact_paths = [] + self.resolver_prefix_paths = [] + + def add_implicit_resolver(cls, tag, regexp, first): + if not 'yaml_implicit_resolvers' in cls.__dict__: + cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() + if first is None: + first = [None] + for ch in first: + cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp)) + add_implicit_resolver = classmethod(add_implicit_resolver) + + def add_path_resolver(cls, tag, path, kind=None): + # Note: `add_path_resolver` is experimental. The API could be changed. + # `new_path` is a pattern that is matched against the path from the + # root to the node that is being considered. `node_path` elements are + # tuples `(node_check, index_check)`. `node_check` is a node class: + # `ScalarNode`, `SequenceNode`, `MappingNode` or `None`. `None` + # matches any kind of a node. `index_check` could be `None`, a boolean + # value, a string value, or a number. `None` and `False` match against + # any _value_ of sequence and mapping nodes. `True` matches against + # any _key_ of a mapping node. A string `index_check` matches against + # a mapping value that corresponds to a scalar key which content is + # equal to the `index_check` value. An integer `index_check` matches + # against a sequence value with the index equal to `index_check`. + if not 'yaml_path_resolvers' in cls.__dict__: + cls.yaml_path_resolvers = cls.yaml_path_resolvers.copy() + new_path = [] + for element in path: + if isinstance(element, (list, tuple)): + if len(element) == 2: + node_check, index_check = element + elif len(element) == 1: + node_check = element[0] + index_check = True + else: + raise ResolverError("Invalid path element: %s" % element) + else: + node_check = None + index_check = element + if node_check is str: + node_check = ScalarNode + elif node_check is list: + node_check = SequenceNode + elif node_check is dict: + node_check = MappingNode + elif node_check not in [ScalarNode, SequenceNode, MappingNode] \ + and not isinstance(node_check, basestring) \ + and node_check is not None: + raise ResolverError("Invalid node checker: %s" % node_check) + if not isinstance(index_check, (basestring, int)) \ + and index_check is not None: + raise ResolverError("Invalid index checker: %s" % index_check) + new_path.append((node_check, index_check)) + if kind is str: + kind = ScalarNode + elif kind is list: + kind = SequenceNode + elif kind is dict: + kind = MappingNode + elif kind not in [ScalarNode, SequenceNode, MappingNode] \ + and kind is not None: + raise ResolverError("Invalid node kind: %s" % kind) + cls.yaml_path_resolvers[tuple(new_path), kind] = tag + add_path_resolver = classmethod(add_path_resolver) + + def descend_resolver(self, current_node, current_index): + if not self.yaml_path_resolvers: + return + exact_paths = {} + prefix_paths = [] + if current_node: + depth = len(self.resolver_prefix_paths) + for path, kind in self.resolver_prefix_paths[-1]: + if self.check_resolver_prefix(depth, path, kind, + current_node, current_index): + if len(path) > depth: + prefix_paths.append((path, kind)) + else: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + for path, kind in self.yaml_path_resolvers: + if not path: + exact_paths[kind] = self.yaml_path_resolvers[path, kind] + else: + prefix_paths.append((path, kind)) + self.resolver_exact_paths.append(exact_paths) + self.resolver_prefix_paths.append(prefix_paths) + + def ascend_resolver(self): + if not self.yaml_path_resolvers: + return + self.resolver_exact_paths.pop() + self.resolver_prefix_paths.pop() + + def check_resolver_prefix(self, depth, path, kind, + current_node, current_index): + node_check, index_check = path[depth-1] + if isinstance(node_check, basestring): + if current_node.tag != node_check: + return + elif node_check is not None: + if not isinstance(current_node, node_check): + return + if index_check is True and current_index is not None: + return + if (index_check is False or index_check is None) \ + and current_index is None: + return + if isinstance(index_check, basestring): + if not (isinstance(current_index, ScalarNode) + and index_check == current_index.value): + return + elif isinstance(index_check, int) and not isinstance(index_check, bool): + if index_check != current_index: + return + return True + + def resolve(self, kind, value, implicit): + if kind is ScalarNode and implicit[0]: + if value == u'': + resolvers = self.yaml_implicit_resolvers.get(u'', []) + else: + resolvers = self.yaml_implicit_resolvers.get(value[0], []) + resolvers += self.yaml_implicit_resolvers.get(None, []) + for tag, regexp in resolvers: + if regexp.match(value): + return tag + implicit = implicit[1] + if self.yaml_path_resolvers: + exact_paths = self.resolver_exact_paths[-1] + if kind in exact_paths: + return exact_paths[kind] + if None in exact_paths: + return exact_paths[None] + if kind is ScalarNode: + return self.DEFAULT_SCALAR_TAG + elif kind is SequenceNode: + return self.DEFAULT_SEQUENCE_TAG + elif kind is MappingNode: + return self.DEFAULT_MAPPING_TAG + +class Resolver(BaseResolver): + pass + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:bool', + re.compile(ur'''^(?:yes|Yes|YES|no|No|NO + |true|True|TRUE|false|False|FALSE + |on|On|ON|off|Off|OFF)$''', re.X), + list(u'yYnNtTfFoO')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:float', + re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)? + |\.[0-9_]+(?:[eE][-+][0-9]+)? + |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]* + |[-+]?\.(?:inf|Inf|INF) + |\.(?:nan|NaN|NAN))$''', re.X), + list(u'-+0123456789.')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:int', + re.compile(ur'''^(?:[-+]?0b[0-1_]+ + |[-+]?0[0-7_]+ + |[-+]?(?:0|[1-9][0-9_]*) + |[-+]?0x[0-9a-fA-F_]+ + |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X), + list(u'-+0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:merge', + re.compile(ur'^(?:<<)$'), + [u'<']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:null', + re.compile(ur'''^(?: ~ + |null|Null|NULL + | )$''', re.X), + [u'~', u'n', u'N', u'']) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:timestamp', + re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] + |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]? + (?:[Tt]|[ \t]+)[0-9][0-9]? + :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)? + (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X), + list(u'0123456789')) + +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:value', + re.compile(ur'^(?:=)$'), + [u'=']) + +# The following resolver is only for documentation purposes. It cannot work +# because plain scalars cannot start with '!', '&', or '*'. +Resolver.add_implicit_resolver( + u'tag:yaml.org,2002:yaml', + re.compile(ur'^(?:!|&|\*)$'), + list(u'!&*')) + diff --git a/tablib/packages/yaml/scanner.py b/tablib/packages/yaml/scanner.py new file mode 100644 index 0000000..5228fad --- /dev/null +++ b/tablib/packages/yaml/scanner.py @@ -0,0 +1,1457 @@ + +# Scanner produces tokens of the following types: +# STREAM-START +# STREAM-END +# DIRECTIVE(name, value) +# DOCUMENT-START +# DOCUMENT-END +# BLOCK-SEQUENCE-START +# BLOCK-MAPPING-START +# BLOCK-END +# FLOW-SEQUENCE-START +# FLOW-MAPPING-START +# FLOW-SEQUENCE-END +# FLOW-MAPPING-END +# BLOCK-ENTRY +# FLOW-ENTRY +# KEY +# VALUE +# ALIAS(value) +# ANCHOR(value) +# TAG(value) +# SCALAR(value, plain, style) +# +# Read comments in the Scanner code for more details. +# + +__all__ = ['Scanner', 'ScannerError'] + +from error import MarkedYAMLError +from tokens import * + +class ScannerError(MarkedYAMLError): + pass + +class SimpleKey(object): + # See below simple keys treatment. + + def __init__(self, token_number, required, index, line, column, mark): + self.token_number = token_number + self.required = required + self.index = index + self.line = line + self.column = column + self.mark = mark + +class Scanner(object): + + def __init__(self): + """Initialize the scanner.""" + # It is assumed that Scanner and Reader will have a common descendant. + # Reader do the dirty work of checking for BOM and converting the + # input data to Unicode. It also adds NUL to the end. + # + # Reader supports the following methods + # self.peek(i=0) # peek the next i-th character + # self.prefix(l=1) # peek the next l characters + # self.forward(l=1) # read the next l characters and move the pointer. + + # Had we reached the end of the stream? + self.done = False + + # The number of unclosed '{' and '['. `flow_level == 0` means block + # context. + self.flow_level = 0 + + # List of processed tokens that are not yet emitted. + self.tokens = [] + + # Add the STREAM-START token. + self.fetch_stream_start() + + # Number of tokens that were emitted through the `get_token` method. + self.tokens_taken = 0 + + # The current indentation level. + self.indent = -1 + + # Past indentation levels. + self.indents = [] + + # Variables related to simple keys treatment. + + # A simple key is a key that is not denoted by the '?' indicator. + # Example of simple keys: + # --- + # block simple key: value + # ? not a simple key: + # : { flow simple key: value } + # We emit the KEY token before all keys, so when we find a potential + # simple key, we try to locate the corresponding ':' indicator. + # Simple keys should be limited to a single line and 1024 characters. + + # Can a simple key start at the current position? A simple key may + # start: + # - at the beginning of the line, not counting indentation spaces + # (in block context), + # - after '{', '[', ',' (in the flow context), + # - after '?', ':', '-' (in the block context). + # In the block context, this flag also signifies if a block collection + # may start at the current position. + self.allow_simple_key = True + + # Keep track of possible simple keys. This is a dictionary. The key + # is `flow_level`; there can be no more that one possible simple key + # for each level. The value is a SimpleKey record: + # (token_number, required, index, line, column, mark) + # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow), + # '[', or '{' tokens. + self.possible_simple_keys = {} + + # Public methods. + + def check_token(self, *choices): + # Check if the next token is one of the given types. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + if not choices: + return True + for choice in choices: + if isinstance(self.tokens[0], choice): + return True + return False + + def peek_token(self): + # Return the next token, but do not delete if from the queue. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + return self.tokens[0] + + def get_token(self): + # Return the next token. + while self.need_more_tokens(): + self.fetch_more_tokens() + if self.tokens: + self.tokens_taken += 1 + return self.tokens.pop(0) + + # Private methods. + + def need_more_tokens(self): + if self.done: + return False + if not self.tokens: + return True + # The current token may be a potential simple key, so we + # need to look further. + self.stale_possible_simple_keys() + if self.next_possible_simple_key() == self.tokens_taken: + return True + + def fetch_more_tokens(self): + + # Eat whitespaces and comments until we reach the next token. + self.scan_to_next_token() + + # Remove obsolete possible simple keys. + self.stale_possible_simple_keys() + + # Compare the current indentation and column. It may add some tokens + # and decrease the current indentation level. + self.unwind_indent(self.column) + + # Peek the next character. + ch = self.peek() + + # Is it the end of stream? + if ch == u'\0': + return self.fetch_stream_end() + + # Is it a directive? + if ch == u'%' and self.check_directive(): + return self.fetch_directive() + + # Is it the document start? + if ch == u'-' and self.check_document_start(): + return self.fetch_document_start() + + # Is it the document end? + if ch == u'.' and self.check_document_end(): + return self.fetch_document_end() + + # TODO: support for BOM within a stream. + #if ch == u'\uFEFF': + # return self.fetch_bom() <-- issue BOMToken + + # Note: the order of the following checks is NOT significant. + + # Is it the flow sequence start indicator? + if ch == u'[': + return self.fetch_flow_sequence_start() + + # Is it the flow mapping start indicator? + if ch == u'{': + return self.fetch_flow_mapping_start() + + # Is it the flow sequence end indicator? + if ch == u']': + return self.fetch_flow_sequence_end() + + # Is it the flow mapping end indicator? + if ch == u'}': + return self.fetch_flow_mapping_end() + + # Is it the flow entry indicator? + if ch == u',': + return self.fetch_flow_entry() + + # Is it the block entry indicator? + if ch == u'-' and self.check_block_entry(): + return self.fetch_block_entry() + + # Is it the key indicator? + if ch == u'?' and self.check_key(): + return self.fetch_key() + + # Is it the value indicator? + if ch == u':' and self.check_value(): + return self.fetch_value() + + # Is it an alias? + if ch == u'*': + return self.fetch_alias() + + # Is it an anchor? + if ch == u'&': + return self.fetch_anchor() + + # Is it a tag? + if ch == u'!': + return self.fetch_tag() + + # Is it a literal scalar? + if ch == u'|' and not self.flow_level: + return self.fetch_literal() + + # Is it a folded scalar? + if ch == u'>' and not self.flow_level: + return self.fetch_folded() + + # Is it a single quoted scalar? + if ch == u'\'': + return self.fetch_single() + + # Is it a double quoted scalar? + if ch == u'\"': + return self.fetch_double() + + # It must be a plain scalar then. + if self.check_plain(): + return self.fetch_plain() + + # No? It's an error. Let's produce a nice error message. + raise ScannerError("while scanning for the next token", None, + "found character %r that cannot start any token" + % ch.encode('utf-8'), self.get_mark()) + + # Simple keys treatment. + + def next_possible_simple_key(self): + # Return the number of the nearest possible simple key. Actually we + # don't need to loop through the whole dictionary. We may replace it + # with the following code: + # if not self.possible_simple_keys: + # return None + # return self.possible_simple_keys[ + # min(self.possible_simple_keys.keys())].token_number + min_token_number = None + for level in self.possible_simple_keys: + key = self.possible_simple_keys[level] + if min_token_number is None or key.token_number < min_token_number: + min_token_number = key.token_number + return min_token_number + + def stale_possible_simple_keys(self): + # Remove entries that are no longer possible simple keys. According to + # the YAML specification, simple keys + # - should be limited to a single line, + # - should be no longer than 1024 characters. + # Disabling this procedure will allow simple keys of any length and + # height (may cause problems if indentation is broken though). + for level in self.possible_simple_keys.keys(): + key = self.possible_simple_keys[level] + if key.line != self.line \ + or self.index-key.index > 1024: + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + del self.possible_simple_keys[level] + + def save_possible_simple_key(self): + # The next token may start a simple key. We check if it's possible + # and save its position. This function is called for + # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. + + # Check if a simple key is required at the current position. + required = not self.flow_level and self.indent == self.column + + # A simple key is required only if it is the first token in the current + # line. Therefore it is always allowed. + assert self.allow_simple_key or not required + + # The next token might be a simple key. Let's save it's number and + # position. + if self.allow_simple_key: + self.remove_possible_simple_key() + token_number = self.tokens_taken+len(self.tokens) + key = SimpleKey(token_number, required, + self.index, self.line, self.column, self.get_mark()) + self.possible_simple_keys[self.flow_level] = key + + def remove_possible_simple_key(self): + # Remove the saved possible key position at the current flow level. + if self.flow_level in self.possible_simple_keys: + key = self.possible_simple_keys[self.flow_level] + + if key.required: + raise ScannerError("while scanning a simple key", key.mark, + "could not found expected ':'", self.get_mark()) + + del self.possible_simple_keys[self.flow_level] + + # Indentation functions. + + def unwind_indent(self, column): + + ## In flow context, tokens should respect indentation. + ## Actually the condition should be `self.indent >= column` according to + ## the spec. But this condition will prohibit intuitively correct + ## constructions such as + ## key : { + ## } + #if self.flow_level and self.indent > column: + # raise ScannerError(None, None, + # "invalid intendation or unclosed '[' or '{'", + # self.get_mark()) + + # In the flow context, indentation is ignored. We make the scanner less + # restrictive then specification requires. + if self.flow_level: + return + + # In block context, we may need to issue the BLOCK-END tokens. + while self.indent > column: + mark = self.get_mark() + self.indent = self.indents.pop() + self.tokens.append(BlockEndToken(mark, mark)) + + def add_indent(self, column): + # Check if we need to increase indentation. + if self.indent < column: + self.indents.append(self.indent) + self.indent = column + return True + return False + + # Fetchers. + + def fetch_stream_start(self): + # We always add STREAM-START as the first token and STREAM-END as the + # last token. + + # Read the token. + mark = self.get_mark() + + # Add STREAM-START. + self.tokens.append(StreamStartToken(mark, mark, + encoding=self.encoding)) + + + def fetch_stream_end(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + self.possible_simple_keys = {} + + # Read the token. + mark = self.get_mark() + + # Add STREAM-END. + self.tokens.append(StreamEndToken(mark, mark)) + + # The steam is finished. + self.done = True + + def fetch_directive(self): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Scan and add DIRECTIVE. + self.tokens.append(self.scan_directive()) + + def fetch_document_start(self): + self.fetch_document_indicator(DocumentStartToken) + + def fetch_document_end(self): + self.fetch_document_indicator(DocumentEndToken) + + def fetch_document_indicator(self, TokenClass): + + # Set the current intendation to -1. + self.unwind_indent(-1) + + # Reset simple keys. Note that there could not be a block collection + # after '---'. + self.remove_possible_simple_key() + self.allow_simple_key = False + + # Add DOCUMENT-START or DOCUMENT-END. + start_mark = self.get_mark() + self.forward(3) + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_start(self): + self.fetch_flow_collection_start(FlowSequenceStartToken) + + def fetch_flow_mapping_start(self): + self.fetch_flow_collection_start(FlowMappingStartToken) + + def fetch_flow_collection_start(self, TokenClass): + + # '[' and '{' may start a simple key. + self.save_possible_simple_key() + + # Increase the flow level. + self.flow_level += 1 + + # Simple keys are allowed after '[' and '{'. + self.allow_simple_key = True + + # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_sequence_end(self): + self.fetch_flow_collection_end(FlowSequenceEndToken) + + def fetch_flow_mapping_end(self): + self.fetch_flow_collection_end(FlowMappingEndToken) + + def fetch_flow_collection_end(self, TokenClass): + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Decrease the flow level. + self.flow_level -= 1 + + # No simple keys after ']' or '}'. + self.allow_simple_key = False + + # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(TokenClass(start_mark, end_mark)) + + def fetch_flow_entry(self): + + # Simple keys are allowed after ','. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add FLOW-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(FlowEntryToken(start_mark, end_mark)) + + def fetch_block_entry(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a new entry? + if not self.allow_simple_key: + raise ScannerError(None, None, + "sequence entries are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-SEQUENCE-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockSequenceStartToken(mark, mark)) + + # It's an error for the block entry to occur in the flow context, + # but we let the parser detect this. + else: + pass + + # Simple keys are allowed after '-'. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add BLOCK-ENTRY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(BlockEntryToken(start_mark, end_mark)) + + def fetch_key(self): + + # Block context needs additional checks. + if not self.flow_level: + + # Are we allowed to start a key (not nessesary a simple)? + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping keys are not allowed here", + self.get_mark()) + + # We may need to add BLOCK-MAPPING-START. + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after '?' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add KEY. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(KeyToken(start_mark, end_mark)) + + def fetch_value(self): + + # Do we determine a simple key? + if self.flow_level in self.possible_simple_keys: + + # Add KEY. + key = self.possible_simple_keys[self.flow_level] + del self.possible_simple_keys[self.flow_level] + self.tokens.insert(key.token_number-self.tokens_taken, + KeyToken(key.mark, key.mark)) + + # If this key starts a new block mapping, we need to add + # BLOCK-MAPPING-START. + if not self.flow_level: + if self.add_indent(key.column): + self.tokens.insert(key.token_number-self.tokens_taken, + BlockMappingStartToken(key.mark, key.mark)) + + # There cannot be two simple keys one after another. + self.allow_simple_key = False + + # It must be a part of a complex key. + else: + + # Block context needs additional checks. + # (Do we really need them? They will be catched by the parser + # anyway.) + if not self.flow_level: + + # We are allowed to start a complex value if and only if + # we can start a simple key. + if not self.allow_simple_key: + raise ScannerError(None, None, + "mapping values are not allowed here", + self.get_mark()) + + # If this value starts a new block mapping, we need to add + # BLOCK-MAPPING-START. It will be detected as an error later by + # the parser. + if not self.flow_level: + if self.add_indent(self.column): + mark = self.get_mark() + self.tokens.append(BlockMappingStartToken(mark, mark)) + + # Simple keys are allowed after ':' in the block context. + self.allow_simple_key = not self.flow_level + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Add VALUE. + start_mark = self.get_mark() + self.forward() + end_mark = self.get_mark() + self.tokens.append(ValueToken(start_mark, end_mark)) + + def fetch_alias(self): + + # ALIAS could be a simple key. + self.save_possible_simple_key() + + # No simple keys after ALIAS. + self.allow_simple_key = False + + # Scan and add ALIAS. + self.tokens.append(self.scan_anchor(AliasToken)) + + def fetch_anchor(self): + + # ANCHOR could start a simple key. + self.save_possible_simple_key() + + # No simple keys after ANCHOR. + self.allow_simple_key = False + + # Scan and add ANCHOR. + self.tokens.append(self.scan_anchor(AnchorToken)) + + def fetch_tag(self): + + # TAG could start a simple key. + self.save_possible_simple_key() + + # No simple keys after TAG. + self.allow_simple_key = False + + # Scan and add TAG. + self.tokens.append(self.scan_tag()) + + def fetch_literal(self): + self.fetch_block_scalar(style='|') + + def fetch_folded(self): + self.fetch_block_scalar(style='>') + + def fetch_block_scalar(self, style): + + # A simple key may follow a block scalar. + self.allow_simple_key = True + + # Reset possible simple key on the current level. + self.remove_possible_simple_key() + + # Scan and add SCALAR. + self.tokens.append(self.scan_block_scalar(style)) + + def fetch_single(self): + self.fetch_flow_scalar(style='\'') + + def fetch_double(self): + self.fetch_flow_scalar(style='"') + + def fetch_flow_scalar(self, style): + + # A flow scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after flow scalars. + self.allow_simple_key = False + + # Scan and add SCALAR. + self.tokens.append(self.scan_flow_scalar(style)) + + def fetch_plain(self): + + # A plain scalar could be a simple key. + self.save_possible_simple_key() + + # No simple keys after plain scalars. But note that `scan_plain` will + # change this flag if the scan is finished at the beginning of the + # line. + self.allow_simple_key = False + + # Scan and add SCALAR. May change `allow_simple_key`. + self.tokens.append(self.scan_plain()) + + # Checkers. + + def check_directive(self): + + # DIRECTIVE: ^ '%' ... + # The '%' indicator is already checked. + if self.column == 0: + return True + + def check_document_start(self): + + # DOCUMENT-START: ^ '---' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'---' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_document_end(self): + + # DOCUMENT-END: ^ '...' (' '|'\n') + if self.column == 0: + if self.prefix(3) == u'...' \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return True + + def check_block_entry(self): + + # BLOCK-ENTRY: '-' (' '|'\n') + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_key(self): + + # KEY(flow context): '?' + if self.flow_level: + return True + + # KEY(block context): '?' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_value(self): + + # VALUE(flow context): ':' + if self.flow_level: + return True + + # VALUE(block context): ':' (' '|'\n') + else: + return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029' + + def check_plain(self): + + # A plain scalar may start with any non-space character except: + # '-', '?', ':', ',', '[', ']', '{', '}', + # '#', '&', '*', '!', '|', '>', '\'', '\"', + # '%', '@', '`'. + # + # It may also start with + # '-', '?', ':' + # if it is followed by a non-space character. + # + # Note that we limit the last rule to the block context (except the + # '-' character) because we want the flow context to be space + # independent. + ch = self.peek() + return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \ + or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029' + and (ch == u'-' or (not self.flow_level and ch in u'?:'))) + + # Scanners. + + def scan_to_next_token(self): + # We ignore spaces, line breaks and comments. + # If we find a line break in the block context, we set the flag + # `allow_simple_key` on. + # The byte order mark is stripped if it's the first character in the + # stream. We do not yet support BOM inside the stream as the + # specification requires. Any such mark will be considered as a part + # of the document. + # + # TODO: We need to make tab handling rules more sane. A good rule is + # Tabs cannot precede tokens + # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, + # KEY(block), VALUE(block), BLOCK-ENTRY + # So the checking code is + # if : + # self.allow_simple_keys = False + # We also need to add the check for `allow_simple_keys == True` to + # `unwind_indent` before issuing BLOCK-END. + # Scanners for block, flow, and plain scalars need to be modified. + + if self.index == 0 and self.peek() == u'\uFEFF': + self.forward() + found = False + while not found: + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + if self.scan_line_break(): + if not self.flow_level: + self.allow_simple_key = True + else: + found = True + + def scan_directive(self): + # See the specification for details. + start_mark = self.get_mark() + self.forward() + name = self.scan_directive_name(start_mark) + value = None + if name == u'YAML': + value = self.scan_yaml_directive_value(start_mark) + end_mark = self.get_mark() + elif name == u'TAG': + value = self.scan_tag_directive_value(start_mark) + end_mark = self.get_mark() + else: + end_mark = self.get_mark() + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + self.scan_directive_ignored_line(start_mark) + return DirectiveToken(name, value, start_mark, end_mark) + + def scan_directive_name(self, start_mark): + # See the specification for details. + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return value + + def scan_yaml_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + major = self.scan_yaml_directive_number(start_mark) + if self.peek() != '.': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or '.', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + minor = self.scan_yaml_directive_number(start_mark) + if self.peek() not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a digit or ' ', but found %r" + % self.peek().encode('utf-8'), + self.get_mark()) + return (major, minor) + + def scan_yaml_directive_number(self, start_mark): + # See the specification for details. + ch = self.peek() + if not (u'0' <= ch <= u'9'): + raise ScannerError("while scanning a directive", start_mark, + "expected a digit, but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 0 + while u'0' <= self.peek(length) <= u'9': + length += 1 + value = int(self.prefix(length)) + self.forward(length) + return value + + def scan_tag_directive_value(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + handle = self.scan_tag_directive_handle(start_mark) + while self.peek() == u' ': + self.forward() + prefix = self.scan_tag_directive_prefix(start_mark) + return (handle, prefix) + + def scan_tag_directive_handle(self, start_mark): + # See the specification for details. + value = self.scan_tag_handle('directive', start_mark) + ch = self.peek() + if ch != u' ': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_tag_directive_prefix(self, start_mark): + # See the specification for details. + value = self.scan_tag_uri('directive', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + return value + + def scan_directive_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a directive", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_anchor(self, TokenClass): + # The specification does not restrict characters for anchors and + # aliases. This may lead to problems, for instance, the document: + # [ *alias, value ] + # can be interpteted in two ways, as + # [ "value" ] + # and + # [ *alias , "value" ] + # Therefore we restrict aliases to numbers and ASCII letters. + start_mark = self.get_mark() + indicator = self.peek() + if indicator == u'*': + name = 'alias' + else: + name = 'anchor' + self.forward() + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if not length: + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + value = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`': + raise ScannerError("while scanning an %s" % name, start_mark, + "expected alphabetic or numeric character, but found %r" + % ch.encode('utf-8'), self.get_mark()) + end_mark = self.get_mark() + return TokenClass(value, start_mark, end_mark) + + def scan_tag(self): + # See the specification for details. + start_mark = self.get_mark() + ch = self.peek(1) + if ch == u'<': + handle = None + self.forward(2) + suffix = self.scan_tag_uri('tag', start_mark) + if self.peek() != u'>': + raise ScannerError("while parsing a tag", start_mark, + "expected '>', but found %r" % self.peek().encode('utf-8'), + self.get_mark()) + self.forward() + elif ch in u'\0 \t\r\n\x85\u2028\u2029': + handle = None + suffix = u'!' + self.forward() + else: + length = 1 + use_handle = False + while ch not in u'\0 \r\n\x85\u2028\u2029': + if ch == u'!': + use_handle = True + break + length += 1 + ch = self.peek(length) + handle = u'!' + if use_handle: + handle = self.scan_tag_handle('tag', start_mark) + else: + handle = u'!' + self.forward() + suffix = self.scan_tag_uri('tag', start_mark) + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a tag", start_mark, + "expected ' ', but found %r" % ch.encode('utf-8'), + self.get_mark()) + value = (handle, suffix) + end_mark = self.get_mark() + return TagToken(value, start_mark, end_mark) + + def scan_block_scalar(self, style): + # See the specification for details. + + if style == '>': + folded = True + else: + folded = False + + chunks = [] + start_mark = self.get_mark() + + # Scan the header. + self.forward() + chomping, increment = self.scan_block_scalar_indicators(start_mark) + self.scan_block_scalar_ignored_line(start_mark) + + # Determine the indentation level and go to the first non-empty line. + min_indent = self.indent+1 + if min_indent < 1: + min_indent = 1 + if increment is None: + breaks, max_indent, end_mark = self.scan_block_scalar_indentation() + indent = max(min_indent, max_indent) + else: + indent = min_indent+increment-1 + breaks, end_mark = self.scan_block_scalar_breaks(indent) + line_break = u'' + + # Scan the inner part of the block scalar. + while self.column == indent and self.peek() != u'\0': + chunks.extend(breaks) + leading_non_space = self.peek() not in u' \t' + length = 0 + while self.peek(length) not in u'\0\r\n\x85\u2028\u2029': + length += 1 + chunks.append(self.prefix(length)) + self.forward(length) + line_break = self.scan_line_break() + breaks, end_mark = self.scan_block_scalar_breaks(indent) + if self.column == indent and self.peek() != u'\0': + + # Unfortunately, folding rules are ambiguous. + # + # This is the folding according to the specification: + + if folded and line_break == u'\n' \ + and leading_non_space and self.peek() not in u' \t': + if not breaks: + chunks.append(u' ') + else: + chunks.append(line_break) + + # This is Clark Evans's interpretation (also in the spec + # examples): + # + #if folded and line_break == u'\n': + # if not breaks: + # if self.peek() not in ' \t': + # chunks.append(u' ') + # else: + # chunks.append(line_break) + #else: + # chunks.append(line_break) + else: + break + + # Chomp the tail. + if chomping is not False: + chunks.append(line_break) + if chomping is True: + chunks.extend(breaks) + + # We are done. + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + def scan_block_scalar_indicators(self, start_mark): + # See the specification for details. + chomping = None + increment = None + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + elif ch in u'0123456789': + increment = int(ch) + if increment == 0: + raise ScannerError("while scanning a block scalar", start_mark, + "expected indentation indicator in the range 1-9, but found 0", + self.get_mark()) + self.forward() + ch = self.peek() + if ch in u'+-': + if ch == '+': + chomping = True + else: + chomping = False + self.forward() + ch = self.peek() + if ch not in u'\0 \r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected chomping or indentation indicators, but found %r" + % ch.encode('utf-8'), self.get_mark()) + return chomping, increment + + def scan_block_scalar_ignored_line(self, start_mark): + # See the specification for details. + while self.peek() == u' ': + self.forward() + if self.peek() == u'#': + while self.peek() not in u'\0\r\n\x85\u2028\u2029': + self.forward() + ch = self.peek() + if ch not in u'\0\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a block scalar", start_mark, + "expected a comment or a line break, but found %r" + % ch.encode('utf-8'), self.get_mark()) + self.scan_line_break() + + def scan_block_scalar_indentation(self): + # See the specification for details. + chunks = [] + max_indent = 0 + end_mark = self.get_mark() + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() != u' ': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + else: + self.forward() + if self.column > max_indent: + max_indent = self.column + return chunks, max_indent, end_mark + + def scan_block_scalar_breaks(self, indent): + # See the specification for details. + chunks = [] + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + while self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + end_mark = self.get_mark() + while self.column < indent and self.peek() == u' ': + self.forward() + return chunks, end_mark + + def scan_flow_scalar(self, style): + # See the specification for details. + # Note that we loose indentation rules for quoted scalars. Quoted + # scalars don't need to adhere indentation because " and ' clearly + # mark the beginning and the end of them. Therefore we are less + # restrictive then the specification requires. We only need to check + # that document separators are not included in scalars. + if style == '"': + double = True + else: + double = False + chunks = [] + start_mark = self.get_mark() + quote = self.peek() + self.forward() + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + while self.peek() != quote: + chunks.extend(self.scan_flow_scalar_spaces(double, start_mark)) + chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark)) + self.forward() + end_mark = self.get_mark() + return ScalarToken(u''.join(chunks), False, start_mark, end_mark, + style) + + ESCAPE_REPLACEMENTS = { + u'0': u'\0', + u'a': u'\x07', + u'b': u'\x08', + u't': u'\x09', + u'\t': u'\x09', + u'n': u'\x0A', + u'v': u'\x0B', + u'f': u'\x0C', + u'r': u'\x0D', + u'e': u'\x1B', + u' ': u'\x20', + u'\"': u'\"', + u'\\': u'\\', + u'N': u'\x85', + u'_': u'\xA0', + u'L': u'\u2028', + u'P': u'\u2029', + } + + ESCAPE_CODES = { + u'x': 2, + u'u': 4, + u'U': 8, + } + + def scan_flow_scalar_non_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + length = 0 + while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029': + length += 1 + if length: + chunks.append(self.prefix(length)) + self.forward(length) + ch = self.peek() + if not double and ch == u'\'' and self.peek(1) == u'\'': + chunks.append(u'\'') + self.forward(2) + elif (double and ch == u'\'') or (not double and ch in u'\"\\'): + chunks.append(ch) + self.forward() + elif double and ch == u'\\': + self.forward() + ch = self.peek() + if ch in self.ESCAPE_REPLACEMENTS: + chunks.append(self.ESCAPE_REPLACEMENTS[ch]) + self.forward() + elif ch in self.ESCAPE_CODES: + length = self.ESCAPE_CODES[ch] + self.forward() + for k in range(length): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "expected escape sequence of %d hexdecimal numbers, but found %r" % + (length, self.peek(k).encode('utf-8')), self.get_mark()) + code = int(self.prefix(length), 16) + chunks.append(unichr(code)) + self.forward(length) + elif ch in u'\r\n\x85\u2028\u2029': + self.scan_line_break() + chunks.extend(self.scan_flow_scalar_breaks(double, start_mark)) + else: + raise ScannerError("while scanning a double-quoted scalar", start_mark, + "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark()) + else: + return chunks + + def scan_flow_scalar_spaces(self, double, start_mark): + # See the specification for details. + chunks = [] + length = 0 + while self.peek(length) in u' \t': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch == u'\0': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected end of stream", self.get_mark()) + elif ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + breaks = self.scan_flow_scalar_breaks(double, start_mark) + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + else: + chunks.append(whitespaces) + return chunks + + def scan_flow_scalar_breaks(self, double, start_mark): + # See the specification for details. + chunks = [] + while True: + # Instead of checking indentation, we check for document + # separators. + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + raise ScannerError("while scanning a quoted scalar", start_mark, + "found unexpected document separator", self.get_mark()) + while self.peek() in u' \t': + self.forward() + if self.peek() in u'\r\n\x85\u2028\u2029': + chunks.append(self.scan_line_break()) + else: + return chunks + + def scan_plain(self): + # See the specification for details. + # We add an additional restriction for the flow context: + # plain scalars in the flow context cannot contain ',', ':' and '?'. + # We also keep track of the `allow_simple_key` flag here. + # Indentation rules are loosed for the flow context. + chunks = [] + start_mark = self.get_mark() + end_mark = start_mark + indent = self.indent+1 + # We allow zero indentation for scalars, but then we need to check for + # document separators at the beginning of the line. + #if indent == 0: + # indent = 1 + spaces = [] + while True: + length = 0 + if self.peek() == u'#': + break + while True: + ch = self.peek(length) + if ch in u'\0 \t\r\n\x85\u2028\u2029' \ + or (not self.flow_level and ch == u':' and + self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \ + or (self.flow_level and ch in u',:?[]{}'): + break + length += 1 + # It's not clear what we should do with ':' in the flow context. + if (self.flow_level and ch == u':' + and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'): + self.forward(length) + raise ScannerError("while scanning a plain scalar", start_mark, + "found unexpected ':'", self.get_mark(), + "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.") + if length == 0: + break + self.allow_simple_key = False + chunks.extend(spaces) + chunks.append(self.prefix(length)) + self.forward(length) + end_mark = self.get_mark() + spaces = self.scan_plain_spaces(indent, start_mark) + if not spaces or self.peek() == u'#' \ + or (not self.flow_level and self.column < indent): + break + return ScalarToken(u''.join(chunks), True, start_mark, end_mark) + + def scan_plain_spaces(self, indent, start_mark): + # See the specification for details. + # The specification is really confusing about tabs in plain scalars. + # We just forbid them completely. Do not use tabs in YAML! + chunks = [] + length = 0 + while self.peek(length) in u' ': + length += 1 + whitespaces = self.prefix(length) + self.forward(length) + ch = self.peek() + if ch in u'\r\n\x85\u2028\u2029': + line_break = self.scan_line_break() + self.allow_simple_key = True + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + breaks = [] + while self.peek() in u' \r\n\x85\u2028\u2029': + if self.peek() == ' ': + self.forward() + else: + breaks.append(self.scan_line_break()) + prefix = self.prefix(3) + if (prefix == u'---' or prefix == u'...') \ + and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029': + return + if line_break != u'\n': + chunks.append(line_break) + elif not breaks: + chunks.append(u' ') + chunks.extend(breaks) + elif whitespaces: + chunks.append(whitespaces) + return chunks + + def scan_tag_handle(self, name, start_mark): + # See the specification for details. + # For some strange reasons, the specification does not allow '_' in + # tag handles. I have allowed it anyway. + ch = self.peek() + if ch != u'!': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length = 1 + ch = self.peek(length) + if ch != u' ': + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-_': + length += 1 + ch = self.peek(length) + if ch != u'!': + self.forward(length) + raise ScannerError("while scanning a %s" % name, start_mark, + "expected '!', but found %r" % ch.encode('utf-8'), + self.get_mark()) + length += 1 + value = self.prefix(length) + self.forward(length) + return value + + def scan_tag_uri(self, name, start_mark): + # See the specification for details. + # Note: we do not check if URI is well-formed. + chunks = [] + length = 0 + ch = self.peek(length) + while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ + or ch in u'-;/?:@&=+$,_.!~*\'()[]%': + if ch == u'%': + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + chunks.append(self.scan_uri_escapes(name, start_mark)) + else: + length += 1 + ch = self.peek(length) + if length: + chunks.append(self.prefix(length)) + self.forward(length) + length = 0 + if not chunks: + raise ScannerError("while parsing a %s" % name, start_mark, + "expected URI, but found %r" % ch.encode('utf-8'), + self.get_mark()) + return u''.join(chunks) + + def scan_uri_escapes(self, name, start_mark): + # See the specification for details. + bytes = [] + mark = self.get_mark() + while self.peek() == u'%': + self.forward() + for k in range(2): + if self.peek(k) not in u'0123456789ABCDEFabcdef': + raise ScannerError("while scanning a %s" % name, start_mark, + "expected URI escape sequence of 2 hexdecimal numbers, but found %r" % + (self.peek(k).encode('utf-8')), self.get_mark()) + bytes.append(chr(int(self.prefix(2), 16))) + self.forward(2) + try: + value = unicode(''.join(bytes), 'utf-8') + except UnicodeDecodeError, exc: + raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark) + return value + + def scan_line_break(self): + # Transforms: + # '\r\n' : '\n' + # '\r' : '\n' + # '\n' : '\n' + # '\x85' : '\n' + # '\u2028' : '\u2028' + # '\u2029 : '\u2029' + # default : '' + ch = self.peek() + if ch in u'\r\n\x85': + if self.prefix(2) == u'\r\n': + self.forward(2) + else: + self.forward() + return u'\n' + elif ch in u'\u2028\u2029': + self.forward() + return ch + return u'' + +#try: +# import psyco +# psyco.bind(Scanner) +#except ImportError: +# pass + diff --git a/tablib/packages/yaml/serializer.py b/tablib/packages/yaml/serializer.py new file mode 100644 index 0000000..0bf1e96 --- /dev/null +++ b/tablib/packages/yaml/serializer.py @@ -0,0 +1,111 @@ + +__all__ = ['Serializer', 'SerializerError'] + +from error import YAMLError +from events import * +from nodes import * + +class SerializerError(YAMLError): + pass + +class Serializer(object): + + ANCHOR_TEMPLATE = u'id%03d' + + def __init__(self, encoding=None, + explicit_start=None, explicit_end=None, version=None, tags=None): + self.use_encoding = encoding + self.use_explicit_start = explicit_start + self.use_explicit_end = explicit_end + self.use_version = version + self.use_tags = tags + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + self.closed = None + + def open(self): + if self.closed is None: + self.emit(StreamStartEvent(encoding=self.use_encoding)) + self.closed = False + elif self.closed: + raise SerializerError("serializer is closed") + else: + raise SerializerError("serializer is already opened") + + def close(self): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif not self.closed: + self.emit(StreamEndEvent()) + self.closed = True + + #def __del__(self): + # self.close() + + def serialize(self, node): + if self.closed is None: + raise SerializerError("serializer is not opened") + elif self.closed: + raise SerializerError("serializer is closed") + self.emit(DocumentStartEvent(explicit=self.use_explicit_start, + version=self.use_version, tags=self.use_tags)) + self.anchor_node(node) + self.serialize_node(node, None, None) + self.emit(DocumentEndEvent(explicit=self.use_explicit_end)) + self.serialized_nodes = {} + self.anchors = {} + self.last_anchor_id = 0 + + def anchor_node(self, node): + if node in self.anchors: + if self.anchors[node] is None: + self.anchors[node] = self.generate_anchor(node) + else: + self.anchors[node] = None + if isinstance(node, SequenceNode): + for item in node.value: + self.anchor_node(item) + elif isinstance(node, MappingNode): + for key, value in node.value: + self.anchor_node(key) + self.anchor_node(value) + + def generate_anchor(self, node): + self.last_anchor_id += 1 + return self.ANCHOR_TEMPLATE % self.last_anchor_id + + def serialize_node(self, node, parent, index): + alias = self.anchors[node] + if node in self.serialized_nodes: + self.emit(AliasEvent(alias)) + else: + self.serialized_nodes[node] = True + self.descend_resolver(parent, index) + if isinstance(node, ScalarNode): + detected_tag = self.resolve(ScalarNode, node.value, (True, False)) + default_tag = self.resolve(ScalarNode, node.value, (False, True)) + implicit = (node.tag == detected_tag), (node.tag == default_tag) + self.emit(ScalarEvent(alias, node.tag, implicit, node.value, + style=node.style)) + elif isinstance(node, SequenceNode): + implicit = (node.tag + == self.resolve(SequenceNode, node.value, True)) + self.emit(SequenceStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + index = 0 + for item in node.value: + self.serialize_node(item, node, index) + index += 1 + self.emit(SequenceEndEvent()) + elif isinstance(node, MappingNode): + implicit = (node.tag + == self.resolve(MappingNode, node.value, True)) + self.emit(MappingStartEvent(alias, node.tag, implicit, + flow_style=node.flow_style)) + for key, value in node.value: + self.serialize_node(key, node, None) + self.serialize_node(value, node, key) + self.emit(MappingEndEvent()) + self.ascend_resolver() + diff --git a/tablib/packages/yaml/tokens.py b/tablib/packages/yaml/tokens.py new file mode 100644 index 0000000..4d0b48a --- /dev/null +++ b/tablib/packages/yaml/tokens.py @@ -0,0 +1,104 @@ + +class Token(object): + def __init__(self, start_mark, end_mark): + self.start_mark = start_mark + self.end_mark = end_mark + def __repr__(self): + attributes = [key for key in self.__dict__ + if not key.endswith('_mark')] + attributes.sort() + arguments = ', '.join(['%s=%r' % (key, getattr(self, key)) + for key in attributes]) + return '%s(%s)' % (self.__class__.__name__, arguments) + +#class BOMToken(Token): +# id = '' + +class DirectiveToken(Token): + id = '' + def __init__(self, name, value, start_mark, end_mark): + self.name = name + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class DocumentStartToken(Token): + id = '' + +class DocumentEndToken(Token): + id = '' + +class StreamStartToken(Token): + id = '' + def __init__(self, start_mark=None, end_mark=None, + encoding=None): + self.start_mark = start_mark + self.end_mark = end_mark + self.encoding = encoding + +class StreamEndToken(Token): + id = '' + +class BlockSequenceStartToken(Token): + id = '' + +class BlockMappingStartToken(Token): + id = '' + +class BlockEndToken(Token): + id = '' + +class FlowSequenceStartToken(Token): + id = '[' + +class FlowMappingStartToken(Token): + id = '{' + +class FlowSequenceEndToken(Token): + id = ']' + +class FlowMappingEndToken(Token): + id = '}' + +class KeyToken(Token): + id = '?' + +class ValueToken(Token): + id = ':' + +class BlockEntryToken(Token): + id = '-' + +class FlowEntryToken(Token): + id = ',' + +class AliasToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class AnchorToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class TagToken(Token): + id = '' + def __init__(self, value, start_mark, end_mark): + self.value = value + self.start_mark = start_mark + self.end_mark = end_mark + +class ScalarToken(Token): + id = '' + def __init__(self, value, plain, start_mark, end_mark, style=None): + self.value = value + self.plain = plain + self.start_mark = start_mark + self.end_mark = end_mark + self.style = style + From 0bea48ccc376eaba938e7339c8e88d992341cebc Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 00:28:54 -0400 Subject: [PATCH 08/23] Imports. --- tablib/core.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index 8f08714..4ea6b94 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -10,6 +10,9 @@ import csv from helpers import * +from packages import simplejson as json +from packages import xlwt + __all__ = ['Dataset', 'source'] @@ -108,16 +111,19 @@ class Dataset(object): @property def yaml(self): + # TODO: YAML Export pass @property def csv(self): + # TODO: CSV Export pass @property def xls(self): + # TODO: XLS Export pass @@ -126,11 +132,13 @@ class Dataset(object): self.validate(row) self._data.append(row) + def sort_by(self, key): - """Returns datastet sorted by given key""" + """SORTS datastet by given key""" # todo: accpept string if headers, or index nubmer pass + def save(self): pass From ea64e4cfacc7b640b65a4b23a7b18606aba8f138 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 00:33:44 -0400 Subject: [PATCH 09/23] Dataset Title Metatdata. Improved dataset repr. --- tablib/core.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 4ea6b94..ae19740 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -8,10 +8,13 @@ import csv +import os from helpers import * from packages import simplejson as json from packages import xlwt +from packages import yaml + __all__ = ['Dataset', 'source'] @@ -38,6 +41,10 @@ class Dataset(object): except KeyError, why: self.headers = None + try: + self.title = kwargs['title'] + except KeyError, why: + self.title = None def __len__(self): return self.height @@ -64,7 +71,10 @@ class Dataset(object): def __repr__(self): - return '' + if self.title: + return '<%s dataset>' % (self.title.lower()) + else: + return '' def validate(self, row=None, safety=False): @@ -84,7 +94,7 @@ class Dataset(object): def digest(self): """Retruns digest information of dataset in human-readable format.""" - pass + 'Height: Width: ' @property From 6405ec3bafcfca3ec33a7f533a8fee53afa2cd19 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 01:01:32 -0400 Subject: [PATCH 10/23] General improvments --- tablib/core.py | 44 ++++++++++++++++++++++++++++++++++++++----- tablib/tests/tests.py | 2 +- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index ae19740..530551d 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -13,9 +13,13 @@ import os from helpers import * from packages import simplejson as json from packages import xlwt -from packages import yaml +try: + import yaml +except ImportError, why: + from packages import yaml + __all__ = ['Dataset', 'source'] @@ -26,13 +30,18 @@ __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' +FILE_EXTENTIONS = ('csv', 'json', 'xls', 'yaml') + + class Dataset(object): """Amazing Tabular Dataset object. """ def __init__(self, *args, **kwargs): self._data = None - self._filename = None + self._saved_file = None + self._saved_format = None + self._data = list(args) @@ -94,7 +103,18 @@ class Dataset(object): def digest(self): """Retruns digest information of dataset in human-readable format.""" - 'Height: Width: ' + + digest_text = '' + + if self.title: + digest_text += 'Title: %s \n' % (self.title) + if self.headers: + digest_text += 'Headers: %s\n' % [self.headers] + + digest_text += 'Height: %s\nWidth: %s\n' % (self.height, self.width) + + + return digest_text @property @@ -149,16 +169,30 @@ class Dataset(object): pass - def save(self): - pass + def save(self, filename=None, format=None): + + if not format: + # set format from filename +# format = filename + pass + + if format not in FILE_EXTENTIONS: + raise UnsupportedFormat + # note export format # open file, save the bitch + class InvalidDimensions(Exception): "Invalid size" +class UnsupportedFormat(NotImplemented): + "Format is not supported" + + + def source(io_string=None, filename=None): """docstring for import""" #open by filename diff --git a/tablib/tests/tests.py b/tablib/tests/tests.py index f457dfb..9a8c417 100644 --- a/tablib/tests/tests.py +++ b/tablib/tests/tests.py @@ -14,4 +14,4 @@ print data[1] data.append(['kenneth' ,'reitz', 4.3]) -print data._data \ No newline at end of file +print data.digest() \ No newline at end of file From 1850a934aa4f11c04df32676090546f88aa83483 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 01:39:38 -0400 Subject: [PATCH 11/23] JSON support working well. --- tablib/core.py | 20 ++++++++++++++++---- tablib/tests/tests.py | 10 ++++++++-- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 530551d..53f4616 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -8,6 +8,7 @@ import csv +import itertools import os from helpers import * @@ -72,7 +73,7 @@ class Dataset(object): def __setitem__(self, key, value): self.validate(value) - self._data[key] = value + self._data[key] = tuple(value) def __delitem__(self, key): @@ -135,24 +136,35 @@ class Dataset(object): @property def json(self): + """Returns JSON representation of Dataset.""" + data = [] + if self.headers: - pass +# for (i, header) in enumerate() + data = [dict(zip(self.headers, data_row)) for data_row in self ._data] + else: + data = self._data + + return json.dumps(data) @property def yaml(self): + """Returns YAML representation of Dataset.""" # TODO: YAML Export pass @property def csv(self): + """Returns CSV representation of Dataset.""" # TODO: CSV Export pass @property def xls(self): + """Returns XLS representation of Dataset.""" # TODO: XLS Export pass @@ -160,7 +172,7 @@ class Dataset(object): def append(self, row, index=None): # todo: impliment index self.validate(row) - self._data.append(row) + self._data.append(tuple(row)) def sort_by(self, key): @@ -188,7 +200,7 @@ class InvalidDimensions(Exception): "Invalid size" -class UnsupportedFormat(NotImplemented): +class UnsupportedFormat(NotImplementedError): "Format is not supported" diff --git a/tablib/tests/tests.py b/tablib/tests/tests.py index 9a8c417..4e17065 100644 --- a/tablib/tests/tests.py +++ b/tablib/tests/tests.py @@ -10,8 +10,14 @@ data = [ data = tablib.Dataset(*data, headers=headers) -print data[1] +#print data[1] data.append(['kenneth' ,'reitz', 4.3]) -print data.digest() \ No newline at end of file +#print data.digest() + +print data.json + +data.headers = None + +print data.json \ No newline at end of file From 8d44ad8a12bca0abf875606049f8793ada71a698 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 02:32:57 -0400 Subject: [PATCH 12/23] YAML export support complete. --- tablib/core.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 53f4616..ddcd9a6 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -132,27 +132,31 @@ class Dataset(object): return len(self._data[0]) except KeyError, why: return 0 - + + def _package(self): + """Packages Dataset into lists of dictionaries for transmission.""" + if self.headers: + data = [dict(zip(self.headers, data_row)) for data_row in self ._data] + else: + data = [list(row) for row in self._data] + + return data + + @property def json(self): """Returns JSON representation of Dataset.""" data = [] - - if self.headers: -# for (i, header) in enumerate() - data = [dict(zip(self.headers, data_row)) for data_row in self ._data] - else: - data = self._data - return json.dumps(data) + return json.dumps(self._package()) @property def yaml(self): """Returns YAML representation of Dataset.""" - # TODO: YAML Export - pass + + return yaml.dump(self._package()) @property From 6dd71c88301cd18d59bd7146fb84459c024484f7 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 02:35:56 -0400 Subject: [PATCH 13/23] Updated history.rst formatting --- HISTORY.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index e69de29..a119b99 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -0,0 +1,7 @@ +History +======= + +0.1.0 (2010-09-??) +------------------ + +* Initial Release \ No newline at end of file From b7a8b65c00686a0589b6d85426430c472cb3492d Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 02:37:49 -0400 Subject: [PATCH 14/23] Validate method refactor. Removal of Digest method. --- tablib/core.py | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index ddcd9a6..23cf8d2 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -72,7 +72,7 @@ class Dataset(object): def __setitem__(self, key, value): - self.validate(value) + self._validate(value) self._data[key] = tuple(value) @@ -87,7 +87,7 @@ class Dataset(object): return '' - def validate(self, row=None, safety=False): + def _validate(self, row=None, safety=False): """Assures size of every row in dataset is of proper proportions.""" if row: is_valid = (len(row) == self.width) if self.width else True @@ -101,22 +101,16 @@ class Dataset(object): if not safety: raise InvalidDimensions return False - - def digest(self): - """Retruns digest information of dataset in human-readable format.""" - digest_text = '' - - if self.title: - digest_text += 'Title: %s \n' % (self.title) + + def _package(self): + """Packages Dataset into lists of dictionaries for transmission.""" if self.headers: - digest_text += 'Headers: %s\n' % [self.headers] - - digest_text += 'Height: %s\nWidth: %s\n' % (self.height, self.width) - - - return digest_text + data = [dict(zip(self.headers, data_row)) for data_row in self ._data] + else: + data = [list(row) for row in self._data] + return data @property def height(self): @@ -134,35 +128,25 @@ class Dataset(object): return 0 - def _package(self): - """Packages Dataset into lists of dictionaries for transmission.""" - if self.headers: - data = [dict(zip(self.headers, data_row)) for data_row in self ._data] - else: - data = [list(row) for row in self._data] - return data @property def json(self): """Returns JSON representation of Dataset.""" - data = [] - return json.dumps(self._package()) @property def yaml(self): """Returns YAML representation of Dataset.""" - return yaml.dump(self._package()) @property def csv(self): """Returns CSV representation of Dataset.""" - # TODO: CSV Export + pass @@ -175,12 +159,12 @@ class Dataset(object): def append(self, row, index=None): # todo: impliment index - self.validate(row) + self._validate(row) self._data.append(tuple(row)) def sort_by(self, key): - """SORTS datastet by given key""" + """Sorts datastet by given key""" # todo: accpept string if headers, or index nubmer pass From 61231b38ac67e5e51102b86d105115af5be4e1f7 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 02:38:59 -0400 Subject: [PATCH 15/23] Epic. --- tablib/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tablib/core.py b/tablib/core.py index 23cf8d2..81edcd9 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -36,7 +36,7 @@ FILE_EXTENTIONS = ('csv', 'json', 'xls', 'yaml') class Dataset(object): - """Amazing Tabular Dataset object. """ + """Epic Tabular-Dataset object. """ def __init__(self, *args, **kwargs): self._data = None @@ -55,10 +55,12 @@ class Dataset(object): self.title = kwargs['title'] except KeyError, why: self.title = None + def __len__(self): return self.height + def __getitem__(self, key): if is_string(key): From 9a65b8deed895ad4665da9aeb363aff8886a42e1 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 02:59:22 -0400 Subject: [PATCH 16/23] Added CSV Export Support. --- tablib/core.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 81edcd9..0e985db 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -8,7 +8,7 @@ import csv -import itertools +import cStringIO import os from helpers import * @@ -31,7 +31,7 @@ __license__ = 'MIT' __copyright__ = 'Copyright 2010 Kenneth Reitz' -FILE_EXTENTIONS = ('csv', 'json', 'xls', 'yaml') +FILE_EXTENSIONS = ('csv', 'json', 'xls', 'yaml') @@ -62,7 +62,6 @@ class Dataset(object): def __getitem__(self, key): - if is_string(key): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data @@ -98,7 +97,6 @@ class Dataset(object): if is_valid: return True - else: if not safety: raise InvalidDimensions @@ -123,15 +121,11 @@ class Dataset(object): @property def width(self): """Returns the width of the Dataset.""" - try: return len(self._data[0]) except KeyError, why: return 0 - - - @property def json(self): @@ -148,8 +142,16 @@ class Dataset(object): @property def csv(self): """Returns CSV representation of Dataset.""" - - pass + stream = cStringIO.StringIO() + _csv = csv.writer(stream) + + if self.headers: + _csv.writerow(self.headers) + + for row in self._data: + _csv.writerow(row) + + return stream.getvalue() @property @@ -178,7 +180,7 @@ class Dataset(object): # format = filename pass - if format not in FILE_EXTENTIONS: + if format not in FILE_EXTENSIONS: raise UnsupportedFormat From fecfecfd167f16381b987bab0e481b7be6cacba9 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 03:22:50 -0400 Subject: [PATCH 17/23] xlwt patch for compatibility reasons (may be unnecessary) --- tablib/packages/xlwt/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tablib/packages/xlwt/__init__.py b/tablib/packages/xlwt/__init__.py index 6f2a1b5..5053413 100644 --- a/tablib/packages/xlwt/__init__.py +++ b/tablib/packages/xlwt/__init__.py @@ -14,3 +14,5 @@ from Column import Column from Formatting import Font, Alignment, Borders, Pattern, Protection from Style import XFStyle, easyxf from ExcelFormula import * + +import CompoundDoc From f486b77a7a0d96e7d069bfcd0a519a8a25b2562f Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 03:23:56 -0400 Subject: [PATCH 18/23] Added XLS export support. --- tablib/core.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index 0e985db..a3441d8 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -103,10 +103,14 @@ class Dataset(object): return False - def _package(self): + def _package(self, dicts=True): """Packages Dataset into lists of dictionaries for transmission.""" + if self.headers: - data = [dict(zip(self.headers, data_row)) for data_row in self ._data] + if dicts: + data = [dict(zip(self.headers, data_row)) for data_row in self ._data] + else: + data = [list(self.headers)] + list(self._data) else: data = [list(row) for row in self._data] @@ -145,10 +149,7 @@ class Dataset(object): stream = cStringIO.StringIO() _csv = csv.writer(stream) - if self.headers: - _csv.writerow(self.headers) - - for row in self._data: + for row in self._package(dicts=False): _csv.writerow(row) return stream.getvalue() @@ -157,9 +158,20 @@ class Dataset(object): @property def xls(self): """Returns XLS representation of Dataset.""" - # TODO: XLS Export - pass + stream = cStringIO.StringIO() + + wb = xlwt.Workbook() + ws = wb.add_sheet(self.title if self.title else 'Tabbed Dataset') +# for row in self._package(dicts=False): + for i, row in enumerate(self._package(dicts=False)): + for j, col in enumerate(row): + ws.write(i, j, col) +# wb.save('elllo') + doc = xlwt.CompoundDoc.XlsDoc() + doc.save(stream, wb.get_biff_data()) + + return stream.getvalue() def append(self, row, index=None): # todo: impliment index From 54a83602e49da4ca5fad61254c27a1132b995bda Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 03:38:45 -0400 Subject: [PATCH 19/23] Removed unnecessary typecheck module. --- tablib/packages/typecheck/__init__.py | 1542 ------------------ tablib/packages/typecheck/doctest_support.py | 36 - tablib/packages/typecheck/mixins.py | 84 - tablib/packages/typecheck/sets.py | 62 - tablib/packages/typecheck/typeclasses.py | 35 - 5 files changed, 1759 deletions(-) delete mode 100644 tablib/packages/typecheck/__init__.py delete mode 100644 tablib/packages/typecheck/doctest_support.py delete mode 100644 tablib/packages/typecheck/mixins.py delete mode 100644 tablib/packages/typecheck/sets.py delete mode 100644 tablib/packages/typecheck/typeclasses.py diff --git a/tablib/packages/typecheck/__init__.py b/tablib/packages/typecheck/__init__.py deleted file mode 100644 index 66ce74e..0000000 --- a/tablib/packages/typecheck/__init__.py +++ /dev/null @@ -1,1542 +0,0 @@ -__all__ = ['accepts', 'returns', 'yields', 'TypeCheckError', 'Length', 'Empty' - ,'TypeSignatureError', 'And', 'Any', 'Class', 'Exact', 'HasAttr' - ,'IsAllOf', 'IsCallable', 'IsIterable', 'IsNoneOf', 'IsOneOf' - ,'IsOnlyOneOf', 'Not', 'Or', 'Self', 'Xor', 'YieldSeq' - ,'register_type', 'is_registered_type', 'unregister_type' - ,'Function'] - -import inspect -import types - -from types import GeneratorType, FunctionType, MethodType, ClassType, TypeType - -# Controls whether typechecking is on (True) or off (False) -enable_checking = True - -# Pretty little wrapper function around __typecheck__ -def check_type(type, func, val): - type.__typecheck__(func, val) - -### Internal exception classes (these MUST NOT get out to the user) -### typecheck_{args,return,yield} should catch these and convert them to -### appropriate Type{Check,Signature}Error instances - -# We can't inherit from object because raise doesn't like new-style classes -# We can't use super() because we can't inherit from object -class _TC_Exception(Exception): - def error_message(self): - raise NotImplementedError("Incomplete _TC_Exception subclass (%s)" % str(self.__class__)) - - def format_bad_object(self, bad_object): - return ("for %s, " % str(bad_object), self) - -class _TC_LengthError(_TC_Exception): - def __init__(self, wrong, right=None): - _TC_Exception.__init__(self) - - self.wrong = wrong - self.right = right - - def error_message(self): - m = None - if self.right is not None: - m = ", expected %d" % self.right - return "length was %d%s" % (self.wrong, m or "") - -class _TC_TypeError(_TC_Exception): - def __init__(self, wrong, right): - _TC_Exception.__init__(self) - - self.wrong = calculate_type(wrong) - self.right = right - - def error_message(self): - return "expected %s, got %s" % (self.right, self.wrong) - -class _TC_NestedError(_TC_Exception): - def __init__(self, inner_exception): - self.inner = inner_exception - - def error_message(self): - try: - return ", " + self.inner.error_message() - except: - print "'%s'" % self.inner.message - raw_input() - raise - -class _TC_IndexError(_TC_NestedError): - def __init__(self, index, inner_exception): - _TC_NestedError.__init__(self, inner_exception) - - self.index = index - - def error_message(self): - return ("at index %d" % self.index) + _TC_NestedError.error_message(self) - -# _TC_DictError exists as a wrapper around dict-related exceptions. -# It provides a single place to sort the bad dictionary's keys in the error -# message. -class _TC_DictError(_TC_NestedError): - def format_bad_object(self, bad_object): - message = "for {%s}, " % ', '.join(["%s: %s" % (repr(k), repr(bad_object[k])) for k in sorted(bad_object.keys())]) - - if not isinstance(self.inner, _TC_LengthError): - return (message, self) - return (message, self.inner) - - def error_message(self): - raise NotImplementedError("Incomplete _TC_DictError subclass: " + str(self.__class__)) - -class _TC_KeyError(_TC_DictError): - def __init__(self, key, inner_exception): - _TC_NestedError.__init__(self, inner_exception) - - self.key = key - - def error_message(self): - return ("for key %s" % repr(self.key)) + _TC_NestedError.error_message(self) - -class _TC_KeyValError(_TC_KeyError): - def __init__(self, key, val, inner_exception): - _TC_KeyError.__init__(self, key, inner_exception) - - self.val = val - - def error_message(self): - return ("at key %s, value %s" % (repr(self.key), repr(self.val))) + _TC_NestedError.error_message(self) - -class _TC_GeneratorError(_TC_NestedError): - def __init__(self, yield_no, inner_exception): - _TC_NestedError.__init__(self, inner_exception) - - self.yield_no = yield_no - - def error_message(self): - raise RuntimeError("_TC_GeneratorError.message should never be called") - - def format_bad_object(self, bad_object): - bad_obj, start_message = self.inner.format_bad_object(bad_object) - message = "At yield #%d: %s" % (self.yield_no, bad_obj) - return (message, start_message) - -### These next three exceptions exist to give HasAttr better error messages -class _TC_AttrException(_TC_Exception): - def __init__(self, attr): - _TC_Exception.__init__(self, attr) - - self.attr = attr - -class _TC_AttrError(_TC_AttrException, _TC_NestedError): - def __init__(self, attr, inner_exception): - _TC_AttrException.__init__(self, attr) - _TC_NestedError.__init__(self, inner_exception) - - def error_message(self): - return ("as for attribute %s" % self.attr) + _TC_NestedError.error_message(self) - -class _TC_MissingAttrError(_TC_AttrException): - def error_message(self): - return "missing attribute %s" % self.attr - -# This is like _TC_LengthError for YieldSeq -class _TC_YieldCountError(_TC_Exception): - def __init__(self, expected): - _TC_Exception.__init__(self, expected) - - self.expected = expected - - def format_bad_object(self, bad_object): - return ("", self) - - def error_message(self): - plural = "s" - if self.expected == 1: - plural = "" - - return "only expected the generator to yield %d time%s" % (self.expected, plural) - -# This exists to provide more detailed error messages about why a given -# Xor() assertion failed -class _TC_XorError(_TC_NestedError): - def __init__(self, matched_conds, inner_exception): - assert matched_conds in (0, 2) - assert isinstance(inner_exception, _TC_TypeError) - - _TC_Exception.__init__(self, matched_conds, inner_exception) - _TC_NestedError.__init__(self, inner_exception) - self.matched_conds = matched_conds - - def error_message(self): - if self.matched_conds == 0: - m = "neither assertion" - else: - m = "both assertions" - - return _TC_NestedError.error_message(self) + " (matched %s)" % m - -class _TC_FunctionError(_TC_Exception): - def __init__(self, checking_func, obj): - self.checking_func = checking_func - self.rejected_obj = obj - - def error_message(self): - return " was rejected by %s" % self.checking_func - - def format_bad_object(self, bad_object): - return (str(bad_object), self) - -class _TC_ExactError(_TC_Exception): - def __init__(self, wrong, right): - self.wrong = wrong - self.right = right - - def error_message(self): - return "expected %s, got %s" % (self.right, self.wrong) - -### The following exist to provide detailed TypeSignatureErrors -class _TS_Exception(Exception): - def error_message(self): - raise NotImplementedError("Incomplete _TS_Exception subclass (%s)" % str(self.__class__)) - -# This is used when there was an error related to an auto-unpacked tuple -# in the function's signature -class _TS_TupleError(_TS_Exception): - def __init__(self, parameters, types): - parameters = _rec_tuple(parameters) - types = _rec_tuple(types) - _TS_Exception.__init__(self, parameters, types) - - self.parameters = parameters - self.types = types - - def error_message(self): - return "the signature type %s does not match %s" % (str(self.types), str(self.parameters)) - -class _TS_ExtraKeywordError(_TS_Exception): - def __init__(self, keyword): - _TS_Exception.__init__(self, keyword) - - self.keyword = keyword - - def error_message(self): - return "the keyword '%s' in the signature is not in the function" % self.keyword - -class _TS_ExtraPositionalError(_TS_Exception): - def __init__(self, type): - _TS_Exception.__init__(self, type) - - self.type = type - - def error_message(self): - return "an extra positional type has been supplied" - -class _TS_MissingTypeError(_TS_Exception): - def __init__(self, parameter): - _TS_Exception.__init__(self, parameter) - - self.parameter = parameter - - def error_message(self): - return "parameter '%s' lacks a type" % self.parameter - -# If the user has given a keyword parameter a type both positionally and -# with a keyword argument, this will be raised -class _TS_TwiceTypedError(_TS_Exception): - def __init__(self, parameter, kw_type, pos_type): - _TS_Exception.__init__(self, parameter, kw_type, pos_type) - - self.parameter = parameter - self.kw_type = kw_type - self.pos_type = pos_type - - def error_message(self): - return "parameter '%s' is provided two types (%s and %s)" % (self.parameter, str(self.kw_type), str(self.pos_type)) - -### The following functions are the way new type handlers are registered -### The Type function will iterate over all registered type handlers; -### the first handler to return a non-None value is considered the winner -######################################################################### - -_hooks = ("__typesig__", "__startchecking__", "__stopchecking__", "__switchchecking__") - -_registered_types = set() -_registered_hooks = dict([(_h, set()) for _h in _hooks]) - -def _manage_registration(add_remove, reg_type): - if not isinstance(reg_type, (types.ClassType, types.TypeType)): - raise ValueError("registered types must be classes or types") - - valid = False - for hook in _hooks: - if hasattr(reg_type, hook): - getattr(_registered_hooks[hook], add_remove)(reg_type) - valid = True - - if valid: - getattr(_registered_types, add_remove)(reg_type) - else: - raise ValueError("registered types must have at least one of the following methods: " + ", ".join(_hooks)) - -def register_type(reg_type): - _manage_registration('add', reg_type) - -def unregister_type(reg_type): - _manage_registration('remove', reg_type) - -def is_registered_type(reg_type): - return reg_type in _registered_types - -### Factory function; this is what should be used to dispatch -### type-checker class requests - -def Type(obj): - # Note that registered types cannot count on being run in a certain order; - # their __typesig__ methods must be sufficiently flexible to account for - # this - for reg_type in _registered_hooks['__typesig__']: - v = reg_type.__typesig__(obj) - if v is not None: - return v - - raise AssertionError("Object is of type '%s'; not a type" % str(type(obj))) - -def __checking(start_stop, *args): - attr = '__%schecking__' % start_stop - - for reg_type in _registered_hooks[attr]: - getattr(reg_type, attr)(*args) - -def start_checking(function): - __checking('start', function) - -def stop_checking(function): - __checking('stop', function) - -def switch_checking(from_func, to_func): - for reg_type in _registered_types: - if hasattr(reg_type, '__switchchecking__'): - getattr(reg_type, '__switchchecking__')(from_func, to_func) - else: - if hasattr(reg_type, '__stopchecking__'): - getattr(reg_type, '__stopchecking__')(from_func) - if hasattr(reg_type, '__startchecking__'): - getattr(reg_type, '__startchecking__')(to_func) - -### Deduce the type of a data structure -### -### XXX: Find a way to allow registered utility classes -### to hook into this -def calculate_type(obj): - if isinstance(obj, types.InstanceType): - return obj.__class__ - elif isinstance(obj, dict): - if len(obj) == 0: - return {} - - key_types = set() - val_types = set() - - for (k,v) in obj.items(): - key_types.add( calculate_type(k) ) - val_types.add( calculate_type(v) ) - - if len(key_types) == 1: - key_types = key_types.pop() - else: - key_types = Or(*key_types) - - if len(val_types) == 1: - val_types = val_types.pop() - else: - val_types = Or(*val_types) - - return {key_types: val_types} - elif isinstance(obj, tuple): - return tuple([calculate_type(t) for t in obj]) - elif isinstance(obj, list): - length = len(obj) - if length == 0: - return [] - obj = [calculate_type(o) for o in obj] - - partitions = [1] - partitions.extend([i for i in range(2, int(length/2)+1) if length%i==0]) - partitions.append(length) - - def evaluate(items_per): - parts = length / items_per - - for i in range(0, parts): - for j in range(0, items_per): - if obj[items_per * i + j] != obj[j]: - raise StopIteration - return obj[0:items_per] - - for items_per in partitions: - try: - return evaluate(items_per) - except StopIteration: - continue - else: - return type(obj) - -### The following classes are the work-horses of the typechecker - -# The base class for all the other utility classes -class CheckType(object): - def __repr__(self): - return type(self).name + '(' + ', '.join(sorted(repr(t) for t in self._types)) + ')' - - __str__ = __repr__ - - def __eq__(self, other): - return not self != other - - def __ne__(self, other): - return not self == other - - def __hash__(self): - raise NotImplementedError("Incomplete CheckType subclass: %s" % self.__class__) - - def __typecheck__(self, func, obj): - raise NotImplementedError("Incomplete CheckType subclass: %s" % self.__class__) - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, CheckType): - return obj - -class Single(CheckType): - name = "Single" - - def __init__(self, type): - if not isinstance(type, (types.ClassType, types.TypeType)): - raise TypeError("Cannot type-check a %s" % type(type)) - else: - self.type = type - - self._types = [self.type] - - def __typecheck__(self, func, to_check): - if not isinstance(to_check, self.type): - raise _TC_TypeError(to_check, self.type) - - def __eq__(self, other): - if other.__class__ is not self.__class__: - return False - return self.type == other.type - - def __hash__(self): - return hash(str(hash(self.__class__)) + str(hash(self.type))) - - # XXX Is this really a good idea? - # Removing this only breaks 3 tests; that seems suspiciously low - def __repr__(self): - return repr(self.type) - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, (types.ClassType, types.TypeType)): - return Single(obj) - -### Provide a way to enforce the empty-ness of iterators -class Empty(Single): - name = "Empty" - - def __init__(self, type): - if not hasattr(type, '__len__'): - raise TypeError("Can only assert emptyness for types with __len__ methods") - - Single.__init__(self, type) - - def __typecheck__(self, func, to_check): - Single.__typecheck__(self, func, to_check) - - if len(to_check) > 0: - err = _TC_LengthError(len(to_check), 0) - if isinstance(to_check, dict): - raise _TC_DictError(err) - raise err - -class Dict(CheckType): - name = "Dict" - - def __init__(self, key, val): - self.__check_key = Type(key) - self.__check_val = Type(val) - - self.type = {key: val} - self._types = [key, val] - - def __typecheck__(self, func, to_check): - if not isinstance(to_check, types.DictType): - raise _TC_TypeError(to_check, self.type) - - for (k, v) in to_check.items(): - # Check the key - try: - check_type(self.__check_key, func, k) - except _TC_Exception, inner: - raise _TC_KeyError(k, inner) - - # Check the value - try: - check_type(self.__check_val, func, v) - except _TC_Exception, inner: - raise _TC_KeyValError(k, v, inner) - - def __eq__(self, other): - if other.__class__ is not self.__class__: - return False - return self.type == other.type - - def __hash__(self): - cls = self.__class__ - key = self.__check_key - val = self.__check_val - - def strhash(obj): - return str(hash(obj)) - - return hash(''.join(map(strhash, [cls, key, val]))) - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, dict): - if len(obj) == 0: - return Empty(dict) - return Dict(obj.keys()[0], obj.values()[0]) - -### Provide typechecking for the built-in list() type -class List(CheckType): - name = "List" - - def __init__(self, *type): - self._types = [Type(t) for t in type] - self.type = [t.type for t in self._types] - - def __typecheck__(self, func, to_check): - if not isinstance(to_check, list): - raise _TC_TypeError(to_check, self.type) - if len(to_check) % len(self._types): - raise _TC_LengthError(len(to_check)) - - # lists can be patterned, meaning that [int, float] - # requires that the to-be-checked list contain an alternating - # sequence of integers and floats. The pattern must be completed - # (e.g, [5, 5.0, 6, 6.0] but not [5, 5.0, 6]) for the list to - # typecheck successfully. - # - # A list with a single type, [int], is a sub-case of patterned - # lists - # - # XXX: Investigate speed increases by special-casing single-typed - # lists - pat_len = len(self._types) - type_tuples = [(i, val, self._types[i % pat_len]) for (i, val) - in enumerate(to_check)] - for (i, val, type) in type_tuples: - try: - check_type(type, func, val) - except _TC_Exception, e: - raise _TC_IndexError(i, e) - - def __eq__(self, other): - if other.__class__ is not self.__class__: - return False - - if len(self._types) != len(other._types): - return False - - for (s, o) in zip(self._types, other._types): - if s != o: - return False - return True - - def __hash__(self): - def strhash(obj): - return str(hash(obj)) - - return hash(''.join(map(strhash, [self.__class__] + self._types))) - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, list): - if len(obj) == 0: - return Empty(list) - return List(*obj) - -### Provide typechecking for the built-in tuple() class -class Tuple(List): - name = "Tuple" - - def __init__(self, *type): - List.__init__(self, *type) - - self.type = tuple(self.type) - - def __typecheck__(self, func, to_check): - # Note that tuples of varying length (e.g., (int, int) and (int, int, int)) - # are separate types, not merely differences in length like lists - if not isinstance(to_check, types.TupleType) or len(to_check) != len(self._types): - raise _TC_TypeError(to_check, self.type) - - for (i, (val, type)) in enumerate(zip(to_check, self._types)): - try: - check_type(type, func, val) - except _TC_Exception, inner: - raise _TC_IndexError(i, inner) - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, tuple): - return Tuple(*obj) - -class TypeVariables(CheckType): - # This is a stack of {typevariable -> type} mappings - # It is intentional that it is class-wide; it maintains - # the mappings of the outer functions if we descend into - # nested typechecked functions - __mapping_stack = [] - - # This is the {typevariable -> type} mapping for the function - # currently being checked - __active_mapping = None - - # This dict maps generators to their mappings - __gen_mappings = {} - - def __init__(self, name): - self.type = name - - def __str__(self): - return "TypeVariable(%s)" % self.type - - __repr__ = __str__ - - def __hash__(self): - return hash(''.join([str(o) for o in self.__class__ - , hash(type(self.type)) - , hash(self.type)])) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return type(self.type) is type(other.type) and self.type == other.type - - def __typecheck__(self, func, to_check): - name = self.type - if isinstance(func, GeneratorType): - active = self.__class__.__gen_mappings[func] - else: - active = self.__class__.__active_mapping - - # We have to do this because u'a' == 'a' - lookup = (name, type(name)) - if lookup in active: - check_type(active[lookup], func, to_check) - else: - # This is the first time we've encountered this - # typevariable for this function call. - # - # In this case, we automatically approve the object - active[lookup] = Type(calculate_type(to_check)) - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, basestring): - return cls(obj) - - @classmethod - def __startchecking__(cls, func): - if isinstance(func, GeneratorType): - cls.__gen_mappings.setdefault(func, {}) - elif isinstance(func, FunctionType): - cls.__mapping_stack.append(cls.__active_mapping) - cls.__active_mapping = {} - else: - raise TypeError(func) - - @classmethod - def __switchchecking__(cls, from_func, to_func): - if isinstance(from_func, FunctionType): - if isinstance(to_func, GeneratorType): - cls.__gen_mappings[to_func] = cls.__active_mapping - cls.__stopchecking__(from_func) - elif isinstance(to_func, FunctionType): - cls.__stopchecking__(from_func) - cls.__startchecking__(to_func) - else: - raise TypeError(to_func) - else: - raise TypeError(from_func) - - @classmethod - def __stopchecking__(cls, func): - if isinstance(func, GeneratorType): - del cls.__gen_mappings[func] - elif isinstance(func, FunctionType): - cls.__active_mapping = cls.__mapping_stack.pop() - else: - raise TypeError(func) - -class Function(CheckType): - def __init__(self, func): - self._func = func - self.type = self - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, (FunctionType, MethodType)): - return cls(obj) - - # Snag callable class instances (that aren't types or classes) - if type(obj) not in (types.ClassType, type) and callable(obj): - return cls(obj) - - def __typecheck__(self, func, to_check): - if False == self._func(to_check): - raise _TC_FunctionError(self._func, to_check) - - def __str__(self): - return "Function(%s)" % self._func - - def __repr__(self): - return str(self) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self._func is other._func - - def __hash__(self): - return hash(str(self.__class__) + str(hash(self._func))) - -# Register some of the above types so that Type() knows about them -for c in (CheckType, List, Tuple, Dict, Single, TypeVariables, Function): - register_type(c) - -### The following are utility classes intended to make writing complex -### signatures easier. -###################################################################### - -### Instances of Any() automatically approve of the object they're supposed -### to be checking (ie, they don't actually check it; use this with caution) -class Any(CheckType): - name = "Any" - - def __init__(self): - self.type = object - - def __typecheck__(self, func, to_check): - pass - - def __str__(self): - return "Any()" - - __repr__ = __str__ - - # All instances of this class are equal - def __eq__(self, other): - return other.__class__ is self.__class__ - - def __hash__(self): - return hash(self.__class__) - -### Base class for Or() and And() -class _Boolean(CheckType): - def __init__(self, first_type, second_type, *types): - self._types = set() - - for t in (first_type, second_type)+types: - if type(t) is type(self): - self._types.update(t._types) - else: - self._types.add(Type(t)) - - if len(self._types) < 2: - raise TypeError("there must be at least 2 distinct parameters to __init__()") - - self.type = self - - def __eq__(self, other): - if other.__class__ is not self.__class__: - return False - - return self._types == other._types - - def __hash__(self): - return hash(str(hash(self.__class__)) + str(hash(frozenset(self._types)))) - -class Or(_Boolean): - name = "Or" - - def __typecheck__(self, func, to_check): - for type in self._types: - try: - check_type(type, func, to_check) - return - except _TC_Exception: - pass - - raise _TC_TypeError(to_check, self) - -class And(_Boolean): - name = "And" - - def __typecheck__(self, func, to_check): - for type in self._types: - try: - check_type(type, func, to_check) - except _TC_Exception, e: - raise _TC_TypeError(to_check, self) - -class Not(Or): - name = "Not" - - # We override _Boolean's __init__ so that we can accept a single - # condition - def __init__(self, first_type, *types): - self._types = set([Type(t) for t in (first_type,)+types]) - - self.type = self - - def __typecheck__(self, func, to_check): - # Or does our work for us, but we invert its result - try: - Or.__typecheck__(self, func, to_check) - except _TC_Exception: - return - raise _TC_TypeError(to_check, self) - -class Xor(_Boolean): - name = "Xor" - - def __typecheck__(self, func, to_check): - already_met_1_cond = False - - for typ in self._types: - try: - check_type(typ, func, to_check) - except _TC_Exception: - pass - else: - if already_met_1_cond: - raise _TC_XorError(2, _TC_TypeError(to_check, self)) - already_met_1_cond = True - - if not already_met_1_cond: - raise _TC_XorError(0, _TC_TypeError(to_check, self)) - -class IsCallable(CheckType): - def __init__(self): - self.type = self - - def __str__(self): - return "IsCallable()" - - __repr__ = __str__ - - # They're all the same - # XXX Change IsCallable to a singleton class - def __hash__(self): - return id(self.__class__) - - def __eq__(self, other): - return self.__class__ is other.__class__ - - def __typecheck__(self, func, to_check): - if not callable(to_check): - raise _TC_TypeError(to_check, 'a callable') - -class HasAttr(CheckType): - def __init__(self, set_1, set_2=None): - attr_sets = {list: [], dict: {}} - - for (arg_1, arg_2) in ((set_1, set_2), (set_2, set_1)): - for t in (list, dict): - if isinstance(arg_1, t): - attr_sets[t] = arg_1 - if isinstance(arg_2, t): - raise TypeError("can only have one list and/or one dict") - - self._attr_types = dict.fromkeys(attr_sets[list], Any()) - - for (attr, typ) in attr_sets[dict].items(): - self._attr_types[attr] = Type(typ) - - def __typecheck__(self, func, to_check): - for (attr, typ) in self._attr_types.items(): - if not hasattr(to_check, attr): - raise _TC_MissingAttrError(attr) - - try: - check_type(typ, func, getattr(to_check, attr)) - except _TC_Exception, e: - raise _TC_AttrError(attr, e) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self._attr_types == other._attr_types - - def __hash__(self): - return hash(str(hash(self.__class__)) + str(hash(str(self._attr_types)))) - - def __str__(self): - any_type = [] - spec_type = {} - - any = Any() - - for (attr, typ) in self._attr_types.items(): - if typ == any: - any_type.append(attr) - else: - spec_type[attr] = typ - - msg = [t for t in (any_type, spec_type) if len(t)] - - return "HasAttr(" + ', '.join(map(str, msg)) + ")" - - __repr__ = __str__ - -class IsIterable(CheckType): - def __init__(self): - self.type = self - - def __eq__(self, other): - return self.__class__ is other.__class__ - - # They're all the same - # XXX Change IsIterable to a singleton class - def __hash__(self): - return id(self.__class__) - - def __str__(self): - return "IsIterable()" - - __repr__ = __str__ - - def __typecheck__(self, func, to_check): - if not (hasattr(to_check, '__iter__') and callable(to_check.__iter__)): - raise _TC_TypeError(to_check, "an iterable") - -class YieldSeq(CheckType): - _index_map = {} - - def __init__(self, type_1, type_2, *types): - self.type = self - - self._type = [type_1, type_2] + list(types) - self._types = [Type(t) for t in self._type] - - def __hash__(self): - return id(self) - - def __str__(self): - return "YieldSeq(" + ", ".join(map(str, self._type)) + ")" - - __repr__ = __str__ - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self._types == other._types - - def __hash__(self): - return hash(str(self.__class__) + str([hash(t) for t in self._types])) - - # We have to use __{start,stop}checking__ so that the indexes get - # reset every time we run through the typechecking sequence - @classmethod - def __startchecking__(cls, gen): - if isinstance(gen, GeneratorType): - cls._index_map[gen] = {} - - @classmethod - def __stopchecking__(cls, gen): - if gen in cls._index_map: - del cls._index_map[gen] - - def __typecheck__(self, gen, to_check): - index_map = self.__class__._index_map - - # There might be multiple YieldSeq's per signature - if self not in index_map[gen]: - index_map[gen][self] = -1 - index = index_map[gen] - - if index[self] >= len(self._types)-1: - raise _TC_YieldCountError(len(self._types)) - - index[self] += 1 - check_type(self._types[index[self]], gen, to_check) - -register_type(YieldSeq) - -class Exact(CheckType): - def __init__(self, obj): - self.type = self - self._obj = obj - - def __hash__(self): - try: - obj_hash = str(hash(self._obj)) - except TypeError: - obj_hash = str(type(self._obj)) + str(self._obj) - - return hash(str(self.__class__) + obj_hash) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self._obj == other._obj - - def __typecheck__(self, func, to_check): - if self._obj != to_check: - raise _TC_ExactError(to_check, self._obj) - -class Length(CheckType): - def __init__(self, length): - self.type = self - self._length = int(length) - - def __hash__(self): - return hash(str(self.__class__) + str(self._length)) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self._length == other._length - - def __typecheck__(self, func, to_check): - try: - length = len(to_check) - except TypeError: - raise _TC_TypeError(to_check, "something with a __len__ method") - - if length != self._length: - raise _TC_LengthError(length, self._length) - -import sys -class Class(CheckType): - def __init__(self, class_name): - self.type = self - self.class_name = class_name - self.class_obj = None - self._frame = sys._getframe(1) - - def __hash__(self): - return hash(str(self.__class__) + self.class_name) - - def __str__(self): - return "Class('%s')" % self.class_name - - __repr__ = __str__ - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self.class_name == other.class_name - - def __typecheck__(self, func, to_check): - if self.class_obj is None: - class_name = self.class_name - frame = self._frame - - for f_dict in (frame.f_locals, frame.f_globals): - if class_name in frame.f_locals: - if self is not frame.f_locals[class_name]: - self.class_obj = frame.f_locals[class_name] - self._frame = None - break - else: - raise NameError("name '%s' is not defined" % class_name) - - if not isinstance(to_check, self.class_obj): - raise _TC_TypeError(to_check, self.class_obj) - -class Typeclass(CheckType): - bad_members = dict.fromkeys(['__class__', '__new__', '__init__'], True) - - def __init__(self, *types): - if len(types) == 0: - raise TypeError("Must supply at least one type to __init__()") - - self.type = self - - self._cache = set() - self._interface = set() - self._instances = set() - for t in types: - self.add_instance(t) - - self._calculate_interface() - - def recalculate_interface(self): - self._cache = self._instances.copy() - self._calculate_interface() - - def instances(self): - return list(self._instances) - - def interface(self): - return list(self._interface) - - def has_instance(self, instance): - return instance in self._instances - - def add_instance(self, instance): - if isinstance(instance, self.__class__): - for inst in instance.instances(): - self._instances.add(inst) - self._cache.add(inst) - elif isinstance(instance, (ClassType, TypeType)): - self._instances.add(instance) - self._cache.add(instance) - else: - raise TypeError("All instances must be classes or types") - - def intersect(self, other): - if isinstance(other, self.__class__): - new_instances = other.instances() - else: - new_instances = other - - self._instances.update(new_instances) - self._cache.update(new_instances) - self._calculate_interface() - - def _calculate_interface(self): - bad_members = self.bad_members - - for instance in self._instances: - inst_attrs = [] - - for attr, obj in instance.__dict__.items(): - if callable(obj) and attr not in bad_members: - inst_attrs.append(attr) - - if len(self._interface) == 0: - self._interface = set(inst_attrs) - else: - self._interface.intersection_update(inst_attrs) - - def __typecheck__(self, func, to_check): - if to_check.__class__ in self._cache: - return - - for method in self._interface: - if not hasattr(to_check, method): - raise _TC_MissingAttrError(method) - - attr = getattr(to_check, method) - if not callable(attr): - raise _TC_AttrError(method, _TC_TypeError(attr, IsCallable())) - - self._cache.add(to_check.__class__) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self._instances == other._instances - - def __hash__(self): - return hash(str(self.__class__) + str(hash(frozenset(self._instances)))) - - def __repr__(self): - return object.__repr__(self) - - def __str__(self): - return 'Typeclass(' + ', '.join(map(str, self._instances)) + ')' - -# The current implementation of Self relies on the TypeVariables machinery -_Self = TypeVariables("this is the class of the invocant") -def Self(): - return _Self - -### Aliases -########### - -IsOneOf = Or -IsAllOf = And -IsNoneOf = Not -IsOnlyOneOf = Xor - -### This is the public side of the module -######################################### - -# This is for backwards compatibility with v0.1.6 and earlier -class TypeCheckException(Exception): - pass - -class TypeCheckError(TypeCheckException): - def __init__(self, prefix, bad_object, exception): - TypeCheckException.__init__(self, prefix, bad_object, exception) - - self.prefix = prefix - self.internal = exception - self.bad_object = bad_object - - (bad_obj_str, start_message) = exception.format_bad_object(bad_object) - self.__message = prefix + bad_obj_str + start_message.error_message() - - def __str__(self): - return self.__message - -class TypeSignatureError(Exception): - def __init__(self, internal_exc): - Exception.__init__(self, internal_exc) - - self.internal = internal_exc - self.__message = internal_exc.error_message() - - def __str__(self): - return self.__message - -### Begin helper classes/functions for typecheck_args -##################################################### -def _rec_tuple(obj): - if isinstance(obj, list): - return tuple(_rec_tuple(o) for o in obj) - return obj - -def _rec_tuple_str(obj): - if not isinstance(obj, (list, tuple)): - return obj - - if len(obj) == 1: - return '(%s,)' % obj - - return '(' + ', '.join(_rec_tuple_str(o) for o in obj) + ')' - -def _gen_arg_to_param(func, (posargs, varargs, varkw, defaults)): - sig_args = list() - dic_args = list() - - for obj in posargs: - if isinstance(obj, list): - rts = _rec_tuple_str(obj) - - sig_args.append(rts) - dic_args.append((_rec_tuple(obj), rts)) - else: - sig_args.append(str(obj)) - dic_args.append(('"%s"' % obj, obj)) - - func_code = '' - if varargs: - dic_args.append(('"%s"' % varargs, varargs)) - sig_args.append('*' + varargs) - func_code = '\n\t%s = list(%s)' % (varargs, varargs) - if varkw: - dic_args.append(('"%s"' % varkw, varkw)) - sig_args.append('**' + varkw) - - func_name = func.func_name + '_' - while func_name in dic_args: - func_name += '_' - - func_def = 'def %s(' % func.func_name - func_return = func_code \ - + '\n\treturn {' \ - + ', '.join('%s: %s' % kv for kv in dic_args) \ - + '}' - - locals = {} - exec func_def + ','.join(sig_args) + '):' + func_return in locals - func = locals[func.func_name] - func.func_defaults = defaults - return func - -def _validate_tuple(ref, obj): - if not isinstance(ref, (list, tuple)): - return - if not isinstance(obj, (list, tuple)): - raise _TS_TupleError(ref, obj) - - if len(ref) != len(obj): - raise _TS_TupleError(ref, obj) - - try: - for r, o in zip(ref, obj): - _validate_tuple(r, o) - except _TS_TupleError: - raise _TS_TupleError(ref, obj) - -def _param_to_type((params, varg_name, kwarg_name), vargs, kwargs): - vargs = list(vargs) - kwargs = dict(kwargs) - - # Make parameter names to values - param_value = dict() - - # There are excess positional arguments, but no *args parameter - if len(params) < len(vargs) and varg_name is None: - raise _TS_ExtraPositionalError(vargs[len(params)]) - # There are not enough position args and no kwargs to draw from - if len(params) > len(vargs) and len(kwargs) == 0: - raise _TS_MissingTypeError(params[len(vargs)]) - - # No reason to do this if there aren't any vargs - if len(vargs): - for p, a in zip(params, vargs): - # Make sure all auto-unpacked tuples match up - _validate_tuple(p, a) - param_value[_rec_tuple(p)] = a - - # No reason to do all this work if there aren't any kwargs - if len(kwargs) > 0: - # All params that still need values - params = set([k for k in params if k not in param_value]) - if kwarg_name and kwarg_name not in param_value: - params.add(kwarg_name) - if varg_name and varg_name not in param_value: - params.add(varg_name) - - # Lift this out of the loop - no_double_star = kwarg_name is None - - # All parameter slots have been filled, but there are still keyword - # args remaining with no **kwargs parameter present - if len(params) == 0 and no_double_star: - raise _TS_ExtraKeywordError(kwargs.keys()[0]) - - # Match up remaining keyword args with open parameter slots - for p, a in kwargs.items(): - if p in param_value: - raise _TS_TwiceTypedError(p, a, param_value[p]) - if p not in params and no_double_star: - raise _TS_ExtraKeywordError(p) - - # Make sure all auto-unpacked tuples match up - _validate_tuple(p, a) - - # Bookkeeping - params.remove(p) - param_value[p] = a - - # Any elements left in params indicate that the parameter is missing - # a value - if len(params): - raise _TS_MissingTypeError(params.pop()) - - return param_value - -def _make_fake_function(func): - def fake_function(*vargs, **kwargs): - # We call start_checking here, but __check_result - # has to call stop_checking on its own. The reason - # for this is so that typecheck_yield can call - # stop_checking on the function and then start_checking - # on the generator - start_checking(func) - - # If either one of these operations fails, we need to call - # stop_checking() - try: - fake_function.__check_args(vargs, kwargs) - result = func(*vargs, **kwargs) - except: - stop_checking(func) - raise - - return fake_function.__check_result(func, result) - - # These are the default implementations of __check_args - # and __check_results - def _pass_args(vargs, kwargs): - pass - def _pass_result(func, result): - stop_checking(func) - return result - - fake_function.__check_args = _pass_args - fake_function.__check_result = _pass_result - fake_function.__wrapped_func = func - - # Mock-up the fake function to look as much like the - # real function as possible - fake_function.__module__ = func.__module__ - fake_function.__name__ = func.__name__ - fake_function.__doc__ = func.__doc__ - - return fake_function - -################################################### -### End helper classes/functions for typecheck_args - -def typecheck_args(*v_sig, **kw_sig): - # typecheck_args is run to obtain the real decorator - def decorator(func): - if hasattr(func, '__wrapped_func'): - if hasattr(func, 'type_args'): - raise RuntimeError('Cannot use the same typecheck_* function more than once on the same function') - wrapped_func = func.__wrapped_func - else: - wrapped_func = func - - param_list, varg_name, kwarg_name, defaults = inspect.getargspec(wrapped_func) - args_to_params = _gen_arg_to_param(wrapped_func, (param_list, varg_name, kwarg_name, defaults)) - - try: - param_types = _param_to_type((param_list, varg_name, kwarg_name), v_sig, kw_sig) - except _TS_Exception, e: - raise TypeSignatureError(e) - - ### We need to fix-up the types of the *vargs and **kwargs parameters - ##################################################################### - if varg_name: - if not isinstance(param_types[varg_name], list): - param_types[varg_name] = [param_types[varg_name]] - - if kwarg_name: - if not isinstance(param_types[kwarg_name], dict): - param_types[kwarg_name] = {str: param_types[kwarg_name]} - - ##################################################################### - ### /Fix-up - - # Convert the signatures to types now, rather than rebuild them in every function call - check_param_types = dict() - for k, v in param_types.items(): - check_param_types[k] = Type(v) - - def __check_args(__vargs, __kwargs): - # Type-checking can be turned on and off by toggling the - # value of the global enable_checking variable - if enable_checking: - arg_dict = args_to_params(*__vargs, **__kwargs) - - # Type-check the keyword arguments - try: - for name, val in arg_dict.items(): - check_type(check_param_types[name], wrapped_func, val) - except _TC_Exception, e: - str_name = _rec_tuple_str(name) - raise TypeCheckError("Argument %s: " % str_name, val, e) - - if hasattr(func, '__check_result'): - # This is one of our wrapper functions, probably created by - # typecheck_yield or typecheck_return - fake_function = func - else: - # We need to build a wrapper - fake_function = _make_fake_function(func) - - # Specify how argument checking should be done - fake_function.__check_args = __check_args - - ### Add the publically-accessible signature information - fake_function.type_args = param_types - - return fake_function - return decorator - -# Refactor this out of typecheck_{return,yield} -def _decorator(signature, conflict_field, twice_field, check_result_func): - def decorator(func): - if hasattr(func, '__check_result'): - # This is one of our wrapper functions, probably created by - # typecheck_args - if hasattr(func, conflict_field): - raise RuntimeError("Cannot use typecheck_return and typecheck_yield on the same function") - elif hasattr(func, twice_field): - raise RuntimeError('Cannot use the same typecheck_* function more than once on the same function') - - fake_function = func - else: - fake_function = _make_fake_function(func) - - setattr(fake_function, twice_field, signature) - fake_function.__check_result = check_result_func - return fake_function - return decorator - -def typecheck_return(*signature): - if len(signature) == 1: - signature = signature[0] - sig_types = Type(signature) - - def __check_return(func, return_vals): - if enable_checking: - try: - check_type(sig_types, func, return_vals) - except _TC_Exception, e: - stop_checking(func) - raise TypeCheckError("Return value: ", return_vals, e) - - stop_checking(func) - return return_vals - return _decorator(signature, 'type_yield', 'type_return', __check_return) - -class Fake_generator(object): - def __init__(self, real_gen, signature): - # The generator should have the same yield signature - # as the function that produced it; however, we don't - # copy the args signature because the generator - # doesn't take arguments - self.type_yield = signature - - self.__yield_no = 0 - self.__real_gen = real_gen - self.__sig_types = Type(signature) - self.__needs_stopping = True - - def next(self): - gen = self.__real_gen - - self.__yield_no += 1 - - try: - return_vals = gen.next() - except StopIteration: - if self.__needs_stopping: - stop_checking(gen) - self.__needs_stopping = False - raise - - if enable_checking: - try: - check_type(self.__sig_types, gen, return_vals) - except _TC_Exception, e: - # Insert this error into the chain so we can know - # which yield the error occurred at - middle_exc = _TC_GeneratorError(self.__yield_no, e) - raise TypeCheckError("", return_vals, middle_exc) - - # Everything checks out. Return the results - return return_vals - - def __del__(self): - if self.__needs_stopping: - stop_checking(self.__real_gen) - -def typecheck_yield(*signature): - if len(signature) == 1: - signature = signature[0] - - def __check_yield(func, gen): - # If the return value isn't a generator, we blow up - if not isinstance(gen, types.GeneratorType): - stop_checking(func) - raise TypeError("typecheck_yield only works for generators") - - # Inform all listening classes that they might want to preserve any information - # from the function to the generator (*hint* TypeVariables *hint*) - # - # stop_checking() will not be invoked on the generator until it raises - # StopIteration or its refcount drops to 0 - switch_checking(func, gen) - - # Otherwise, we build ourselves a fake generator - return Fake_generator(gen, signature) - return _decorator(signature, 'type_return', 'type_yield', __check_yield) - -# Aliases -typecheck = typecheck_args -accepts = typecheck_args -returns = typecheck_return -yields = typecheck_yield diff --git a/tablib/packages/typecheck/doctest_support.py b/tablib/packages/typecheck/doctest_support.py deleted file mode 100644 index 0933dda..0000000 --- a/tablib/packages/typecheck/doctest_support.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -This module allows doctest to find typechecked functions. - -Currently, doctest verifies functions to make sure that their -globals() dict is the __dict__ of their module. In the case of -decorated functions, the globals() dict *is* not the right one. - -To enable support for doctest do: - - import typecheck.doctest_support - -This import must occur before any calls to doctest methods. -""" - -def __DocTestFinder_from_module(self, module, object): - """ - Return true if the given object is defined in the given - module. - """ - import inspect - - if module is None: - return True - elif inspect.isfunction(object) or inspect.isclass(object): - return module.__name__ == object.__module__ - elif inspect.getmodule(object) is not None: - return module is inspect.getmodule(object) - elif hasattr(object, '__module__'): - return module.__name__ == object.__module__ - elif isinstance(object, property): - return True # [XX] no way not be sure. - else: - raise ValueError("object must be a class or function") - -import doctest as __doctest -__doctest.DocTestFinder._from_module = __DocTestFinder_from_module \ No newline at end of file diff --git a/tablib/packages/typecheck/mixins.py b/tablib/packages/typecheck/mixins.py deleted file mode 100644 index df346c5..0000000 --- a/tablib/packages/typecheck/mixins.py +++ /dev/null @@ -1,84 +0,0 @@ -from typecheck import _TC_NestedError, _TC_TypeError, check_type, Or -from typecheck import register_type, _TC_Exception - -class _TC_IterationError(_TC_NestedError): - def __init__(self, iteration, value, inner_exception): - _TC_NestedError.__init__(self, inner_exception) - - self.iteration = iteration - self.value = value - - def error_message(self): - return ("at iteration %d (value: %s)" % (self.iteration, repr(self.value))) + _TC_NestedError.error_message(self) - -### This is the shadow class behind UnorderedIteratorMixin. -### Again, it tries to pretend it doesn't exist by mimicing -### the class of as much as possible. -### -### This mixin provides typechecking for iterator classes -### where you don't care about the order of the types (ie, -### you simply Or() the types together, as opposed to patterned -### lists, which would be ordered mixins) -class _UnorderedIteratorMixin(object): - def __init__(self, class_name, obj): - vals = [o for o in obj] - - self.type = self - self._type = Or(*vals) - self.__cls = obj.__class__ - self.__vals = vals - # This is necessary because it's a huge pain in the ass - # to get the "raw" name of the class once it's created - self.__cls_name = class_name - - def __typecheck__(self, func, to_check): - if not isinstance(to_check, self.__cls): - raise _TC_TypeError(to_check, self) - - for i, item in enumerate(to_check): - try: - check_type(self._type, func, item) - except _TC_Exception, e: - raise _TC_IterationError(i, item, e) - - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, cls): - return obj - - def __str__(self): - return "%s(%s)" % (self.__cls_name, str(self._type)) - - __repr__ = __str__ - -### This is included in a class's parent-class section like so: -### class MyClass(UnorderedIteratorMixin("MyClass")): -### blah blah blah -### -### This serves as a class factory, whose produced classes -### attempt to mask the fact they exist. Their purpose -### is to redirect __typesig__ calls to appropriate -### instances of _UnorderedIteratorMixin -def UnorderedIteratorMixin(class_name): - class UIM(object): - @classmethod - def __typesig__(cls, obj): - if isinstance(obj, cls): - return _UnorderedIteratorMixin(class_name, obj) - - def __repr__(self): - return "%s%s" % (class_name, str(tuple(e for e in self))) - - # We register each produced class anew - # If someone needs to unregister these classes, they should - # save a copy of it before including it in the class-definition: - # - # my_UIM = UnorderedIteratorMixin("FooClass") - # class FooClass(my_UIM): - # ... - # - # Alternatively, you could just look in FooClass.__bases__ later; whatever - register_type(UIM) - return UIM - -register_type(_UnorderedIteratorMixin) diff --git a/tablib/packages/typecheck/sets.py b/tablib/packages/typecheck/sets.py deleted file mode 100644 index 299738c..0000000 --- a/tablib/packages/typecheck/sets.py +++ /dev/null @@ -1,62 +0,0 @@ -from typecheck import CheckType, _TC_TypeError, check_type, Type -from typecheck import register_type, Or, _TC_Exception, _TC_KeyError -from typecheck import _TC_LengthError - -### Provide typechecking for the built-in set() class -### -### XXX: Investigate rewriting this in terms of -### UnorderedIteratorMixin or Or() -class Set(CheckType): - def __init__(self, set_list): - self.type = set(set_list) - self._types = [Type(t) for t in self.type] - - # self._type is used to build _TC_TypeError - if len(self._types) > 1: - self._type = Or(*self.type) - elif len(self._types) == 1: - # XXX Is there an easier way to get this? - t = self.type.pop() - self._type = t - self.type.add(t) - - def __str__(self): - return "Set(" + str([e for e in self.type]) + ")" - - __repr__ = __str__ - - def __typecheck__(self, func, to_check): - if not isinstance(to_check, set): - raise _TC_TypeError(to_check, self.type) - - if len(self._types) == 0 and len(to_check) > 0: - raise _TC_LengthError(len(to_check), 0) - - for obj in to_check: - error = False - for type in self._types: - try: - check_type(type, func, obj) - except _TC_Exception: - error = True - continue - else: - error = False - break - if error: - raise _TC_KeyError(obj, _TC_TypeError(obj, self._type)) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return False - return self.type == other.type - - def __hash__(self): - return hash(str(hash(self.__class__)) + str(hash(frozenset(self.type)))) - - @classmethod - def __typesig__(self, obj): - if isinstance(obj, set): - return Set(obj) - -register_type(Set) diff --git a/tablib/packages/typecheck/typeclasses.py b/tablib/packages/typecheck/typeclasses.py deleted file mode 100644 index 1be57a0..0000000 --- a/tablib/packages/typecheck/typeclasses.py +++ /dev/null @@ -1,35 +0,0 @@ -from typecheck import Typeclass - -### Number -#################################################### - -_numbers = [int, float, complex, long, bool] -try: - from decimal import Decimal - _numbers.append(Decimal) - del Decimal -except ImportError: - pass - -Number = Typeclass(*_numbers) -del _numbers - -### String -- subinstance of ImSequence -#################################################### - -String = Typeclass(str, unicode) - -### ImSequence -- immutable sequences -#################################################### - -ImSequence = Typeclass(tuple, xrange, String) - -### MSequence -- mutable sequences -#################################################### - -MSequence = Typeclass(list) - -### Mapping -#################################################### - -Mapping = Typeclass(dict) From 6c37412c76fcb26f0179cc727d72916a8c1efca6 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 03:45:26 -0400 Subject: [PATCH 20/23] Updated readme, commenting out unimplimented features. --- README.rst | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index 7ac287f..324abac 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,7 @@ Tabbed: format-agnostic tabular dataset library / /_ / /_/ / _ /_/ /_ /_/ // __// /_/ / \__/ \__,_/ /_.___/ /_.___/ \___/ \__,_/ -*Tabbed is under active documentation-driven development.* +.. *Tabbed is under active documentation-driven development.* Tabbed is a format-agnostic tabular dataset library, written in Python. @@ -22,7 +22,9 @@ Formats supported: - YAML - Excel - CSV -- HTML +.. - HTML + +At this time, Tabbed supports the export of it's powerful Dataset object instances into any of the above formats. Import is underway. Please note that tabbed *purposefully* excludes XML support. It always will. @@ -30,18 +32,18 @@ Please note that tabbed *purposefully* excludes XML support. It always will. Features -------- -Convert datafile formats via API: :: - - tablib.source(filename='data.csv').export('data.json') +.. Convert datafile formats via API: :: +.. +.. tablib.source(filename='data.csv').export('data.json') -Convert datafile formats via CLI: :: - - $ tabbed data.csv data.json +.. Convert datafile formats via CLI: :: +.. +.. $ tabbed data.csv data.json -Convert data formats via CLI pipe interface: :: - - $ curl http://domain.dev/dataset.json | tabbed --to excel | gist -p +.. Convert data formats via CLI pipe interface: :: +.. +.. $ curl http://domain.dev/dataset.json | tabbed --to excel | gist -p Populate fresh data files: :: @@ -62,8 +64,7 @@ Populate fresh data files: :: Intelligently add new rows: :: - data.add_row('Bob', 'Dylan') - # >>> Warning: Existing column count is 3 + data.adppend('Bob', 'Dylan', 3.2) print data.headers # >>> ('first_name', 'last_name', 'gpa') @@ -87,11 +88,11 @@ Manipulate rows by index: :: print data[0:1] # >>> [('George', 'Washington', 2.6), ('Henry', 'Ford', 2.3)] - # Update saved file - data.save() + .. # Update saved file + .. data.save() -Export to various formats: :: - - # Save copy as CSV - data.export('backup.csv') \ No newline at end of file +.. Export to various formats: :: +.. +.. # Save copy as CSV +.. data.export('backup.csv') \ No newline at end of file From 5c70fe0a0793ac0e5d9491f50d109fd5819cf14e Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 03:45:35 -0400 Subject: [PATCH 21/23] Cleanup. --- tablib/core.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tablib/core.py b/tablib/core.py index a3441d8..37db6ab 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -167,12 +167,12 @@ class Dataset(object): for j, col in enumerate(row): ws.write(i, j, col) -# wb.save('elllo') doc = xlwt.CompoundDoc.XlsDoc() doc.save(stream, wb.get_biff_data()) - return stream.getvalue() + return stream.getvalue() + def append(self, row, index=None): # todo: impliment index self._validate(row) @@ -186,7 +186,7 @@ class Dataset(object): def save(self, filename=None, format=None): - + """Saves dataset""" if not format: # set format from filename # format = filename @@ -199,6 +199,10 @@ class Dataset(object): # note export format # open file, save the bitch + + def export(self): + """Exports Dataset to given filename or file-object.""" + class InvalidDimensions(Exception): "Invalid size" @@ -209,7 +213,7 @@ class UnsupportedFormat(NotImplementedError): -def source(io_string=None, filename=None): +def source(src=None, file=None, filename=None): """docstring for import""" #open by filename pass \ No newline at end of file From e8bf07da1a817f890066c6445d63a624c7535a96 Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 03:45:49 -0400 Subject: [PATCH 22/23] Test tests. --- tablib/tests/tests.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tablib/tests/tests.py b/tablib/tests/tests.py index 4e17065..187fb27 100644 --- a/tablib/tests/tests.py +++ b/tablib/tests/tests.py @@ -16,8 +16,11 @@ data.append(['kenneth' ,'reitz', 4.3]) #print data.digest() -print data.json +#print data.yaml +#print data.json data.headers = None - -print data.json \ No newline at end of file +#print data.csv +print data.xls +#print data.yaml +#print data.json \ No newline at end of file From 0862b359056c43610789663b8813b1c7ef36302c Mon Sep 17 00:00:00 2001 From: Kenneth Reitz Date: Mon, 30 Aug 2010 03:46:30 -0400 Subject: [PATCH 23/23] Readme update. --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 324abac..db7ef1d 100644 --- a/README.rst +++ b/README.rst @@ -24,7 +24,7 @@ Formats supported: - CSV .. - HTML -At this time, Tabbed supports the export of it's powerful Dataset object instances into any of the above formats. Import is underway. +At this time, Tabbed supports the **export** of it's powerful Dataset object instances into any of the above formats. Import is underway. Please note that tabbed *purposefully* excludes XML support. It always will.