not messing around

Signed-off-by: Kenneth Reitz <me@kennethreitz.org>
This commit is contained in:
2017-09-14 20:39:29 -04:00
parent aac8c34136
commit ebb5111cd9
85 changed files with 5939 additions and 10 deletions
+1
View File
@@ -1,5 +1,6 @@
7.1.0:
- Inline TOML tables for things like requests[security]!
- Attempt to preserve comments in Pipfiles.
7.0.6:
- NO_SPIN is now automatic when CI is set.
- Additionally, vendor pip (a patched version) for doing advanced dependency resolution.
+21 -10
View File
@@ -7,7 +7,7 @@ import base64
import hashlib
import pipfile
import toml
import contoml
import delegator
from requests.compat import OrderedDict
@@ -174,7 +174,7 @@ class Project(object):
@property
def parsed_pipfile(self):
with open(self.pipfile_location) as f:
return toml.load(f, _dict=OrderedDict)
return contoml.loads(f.read())
@property
def _pipfile(self):
@@ -294,16 +294,27 @@ class Project(object):
if path is None:
path = self.pipfile_location
for section in ('packages', 'dev-packages'):
for package in data[section]:
# Convert things to inline tables — fancy :)
if hasattr(data[section][package], 'keys'):
_data = data[section][package]
data[section][package] = toml._get_empty_inline_table(dict)
data[section][package].update(_data)
formatted_data = format_toml(toml.dumps(data, preserve=True))
try:
formatted_data = contoml.dumps(data)
except RuntimeError:
import toml
for section in ('packages', 'dev-packages'):
for package in data[section]:
# Convert things to inline tables — fancy :)
if hasattr(data[section][package], 'keys'):
_data = data[section][package]
data[section][package] = toml._get_empty_inline_table(dict)
data[section][package].update(_data)
formatted_data = toml.dumps(data)
else:
pass
finally:
pass
with open(path, 'w') as f:
f.write(formatted_data)
+48
View File
@@ -0,0 +1,48 @@
from ._version import VERSION
__version__ = VERSION
def loads(text):
"""
Parses TOML text into a dict-like object and returns it.
"""
from prettytoml.parser import parse_tokens
from prettytoml.lexer import tokenize as lexer
from .file import TOMLFile
tokens = tuple(lexer(text, is_top_level=True))
elements = parse_tokens(tokens)
return TOMLFile(elements)
def load(file_path):
"""
Parses a TOML file into a dict-like object and returns it.
"""
return loads(open(file_path).read())
def dumps(value):
"""
Dumps a data structure to TOML source code.
The given value must be either a dict of dict values, a dict, or a TOML file constructed by this module.
"""
from contoml.file.file import TOMLFile
if not isinstance(value, TOMLFile):
raise RuntimeError("Can only dump a TOMLFile instance loaded by load() or loads()")
return value.dumps()
def dump(obj, file_path, prettify=False):
"""
Dumps a data structure to the filesystem as TOML.
The given value must be either a dict of dict values, a dict, or a TOML file constructed by this module.
"""
with open(file_path, 'w') as fp:
fp.write(dumps(obj))
+1
View File
@@ -0,0 +1 @@
VERSION = 'master'
+3
View File
@@ -0,0 +1,3 @@
from .file import TOMLFile
+40
View File
@@ -0,0 +1,40 @@
from prettytoml.elements.table import TableElement
from prettytoml.errors import InvalidValueError
from contoml.file.freshtable import FreshTable
from prettytoml import util
class ArrayOfTables(list):
def __init__(self, toml_file, name, iterable=None):
if iterable:
list.__init__(self, iterable)
self._name = name
self._toml_file = toml_file
def append(self, value):
if isinstance(value, dict):
table = FreshTable(parent=self, name=self._name, is_array=True)
table._append_to_parent()
index = len(self._toml_file[self._name]) - 1
for key_seq, value in util.flatten_nested(value).items():
# self._toml_file._setitem_with_key_seq((self._name, index) + key_seq, value)
self._toml_file._array_setitem_with_key_seq(self._name, index, key_seq, value)
# for k, v in value.items():
# table[k] = v
else:
raise InvalidValueError('Can only append a dict to an array of tables')
def __getitem__(self, item):
try:
return list.__getitem__(self, item)
except IndexError:
if item == len(self):
return FreshTable(parent=self, name=self._name, is_array=True)
else:
raise
def append_fresh_table(self, fresh_table):
list.append(self, fresh_table)
if self._toml_file:
self._toml_file.append_fresh_table(fresh_table)
+56
View File
@@ -0,0 +1,56 @@
import operator
from functools import reduce
from contoml.file import raw
class CascadeDict:
"""
A dict-like object made up of one or more other dict-like objects where querying for an item cascade-gets
it from all the internal dicts in order of their listing, and setting an item sets it on the first dict listed.
"""
def __init__(self, *internal_dicts):
assert internal_dicts, 'internal_dicts cannot be empty'
self._internal_dicts = tuple(internal_dicts)
def cascaded_with(self, one_more_dict):
"""
Returns another instance with one more dict cascaded at the end.
"""
return CascadeDict(self._internal_dicts, one_more_dict,)
def __getitem__(self, item):
for d in self._internal_dicts:
try:
return d[item]
except KeyError:
pass
raise KeyError
def __setitem__(self, key, value):
self._internal_dicts[0][key] = value
def keys(self):
return set(reduce(operator.or_, (set(d.keys()) for d in self._internal_dicts)))
def items(self):
all_items = reduce(operator.add, (list(d.items()) for d in reversed(self._internal_dicts)))
unique_items = {k: v for k, v in all_items}.items()
return tuple(unique_items)
def __contains__(self, item):
for d in self._internal_dicts:
if item in d:
return True
return False
@property
def neutralized(self):
return {k: raw.to_raw(v) for k, v in self.items()}
@property
def primitive_value(self):
return self.neutralized
def __repr__(self):
return repr(self.primitive_value)
+293
View File
@@ -0,0 +1,293 @@
from prettytoml.errors import NoArrayFoundError, DuplicateKeysError, DuplicateTablesError
from contoml.file import structurer, toplevels, raw
from contoml.file.array import ArrayOfTables
from contoml.file.freshtable import FreshTable
import prettytoml.elements.factory as element_factory
import prettytoml.util as util
class TOMLFile:
"""
A TOMLFile object that tries its best to prserve formatting and order of mappings of the input source.
Raises InvalidTOMLFileError on invalid input elements.
Raises DuplicateKeysError, DuplicateTableError when appropriate.
"""
def __init__(self, _elements):
self._elements = []
self._navigable = {}
self.append_elements(_elements)
def __getitem__(self, item):
try:
value = self._navigable[item]
if isinstance(value, (list, tuple)):
return ArrayOfTables(toml_file=self, name=item, iterable=value)
else:
return value
except KeyError:
return FreshTable(parent=self, name=item, is_array=False)
def get(self, item, default=None):
"""This was not here for who knows why."""
if item not in self:
return default
else:
return self.__getitem__(item)
def __contains__(self, item):
return item in self.keys()
def _setitem_with_key_seq(self, key_seq, value):
"""
Sets a the value in the TOML file located by the given key sequence.
Example:
self._setitem(('key1', 'key2', 'key3'), 'text_value')
is equivalent to doing
self['key1']['key2']['key3'] = 'text_value'
"""
table = self
key_so_far = tuple()
for key in key_seq[:-1]:
key_so_far += (key,)
self._make_sure_table_exists(key_so_far)
table = table[key]
table[key_seq[-1]] = value
def _array_setitem_with_key_seq(self, array_name, index, key_seq, value):
"""
Sets a the array value in the TOML file located by the given key sequence.
Example:
self._array_setitem(array_name, index, ('key1', 'key2', 'key3'), 'text_value')
is equivalent to doing
self.array(array_name)[index]['key1']['key2']['key3'] = 'text_value'
"""
table = self.array(array_name)[index]
key_so_far = tuple()
for key in key_seq[:-1]:
key_so_far += (key,)
new_table = self._array_make_sure_table_exists(array_name, index, key_so_far)
if new_table is not None:
table = new_table
else:
table = table[key]
table[key_seq[-1]] = value
def _make_sure_table_exists(self, name_seq):
"""
Makes sure the table with the full name comprising of name_seq exists.
"""
t = self
for key in name_seq[:-1]:
t = t[key]
name = name_seq[-1]
if name not in t:
self.append_elements([element_factory.create_table_header_element(name_seq),
element_factory.create_table({})])
def _array_make_sure_table_exists(self, array_name, index, name_seq):
"""
Makes sure the table with the full name comprising of name_seq exists.
"""
t = self[array_name][index]
for key in name_seq[:-1]:
t = t[key]
name = name_seq[-1]
if name not in t:
new_table = element_factory.create_table({})
self.append_elements([element_factory.create_table_header_element((array_name,) + name_seq), new_table])
return new_table
def __delitem__(self, key):
table_element_index = self._elements.index(self._navigable[key])
self._elements[table_element_index] = element_factory.create_table({})
self._on_element_change()
def __setitem__(self, key, value):
# Setting an array-of-tables
if key and isinstance(value, (tuple, list)) and value and all(isinstance(v, dict) for v in value):
for table in value:
self.array(key).append(table)
# Or setting a whole single table
elif isinstance(value, dict):
if key and key in self:
del self[key]
for key_seq, child_value in util.flatten_nested({key: value}).items():
self._setitem_with_key_seq(key_seq, child_value)
# if key in self._navigable:
# del self[key]
# index = self._elements.index(self._navigable[key])
# self._elements = self._elements[:index] + [element_factory.create_table(value)] + self._elements[index+1:]
# else:
# if key:
# self._elements.append(element_factory.create_table_header_element(key))
# self._elements.append(element_factory.create_table(value))
# Or updating the anonymous section table
else:
# It's mea
self[''][key] = value
self._on_element_change()
def _detect_toplevels(self):
"""
Returns a sequence of TopLevel instances for the current state of this table.
"""
return tuple(e for e in toplevels.identify(self.elements) if isinstance(e, toplevels.Table))
def _update_table_fallbacks(self, table_toplevels):
"""
Updates the fallbacks on all the table elements to make relative table access possible.
Raises DuplicateKeysError if appropriate.
"""
if len(self.elements) <= 1:
return
def parent_of(toplevel):
# Returns an TopLevel parent of the given entry, or None.
for parent_toplevel in table_toplevels:
if toplevel.name.sub_names[:-1] == parent_toplevel.name.sub_names:
return parent_toplevel
for entry in table_toplevels:
if entry.name.is_qualified:
parent = parent_of(entry)
if parent:
child_name = entry.name.without_prefix(parent.name)
parent.table_element.set_fallback({child_name.sub_names[0]: entry.table_element})
def _recreate_navigable(self):
if self._elements:
self._navigable = structurer.structure(toplevels.identify(self._elements))
def array(self, name):
"""
Returns the array of tables with the given name.
"""
if name in self._navigable:
if isinstance(self._navigable[name], (list, tuple)):
return self[name]
else:
raise NoArrayFoundError
else:
return ArrayOfTables(toml_file=self, name=name)
def _on_element_change(self):
self._recreate_navigable()
table_toplevels = self._detect_toplevels()
self._update_table_fallbacks(table_toplevels)
def append_elements(self, elements):
"""
Appends more elements to the contained internal elements.
"""
self._elements = self._elements + list(elements)
self._on_element_change()
def prepend_elements(self, elements):
"""
Prepends more elements to the contained internal elements.
"""
self._elements = list(elements) + self._elements
self._on_element_change()
def dumps(self):
"""
Returns the TOML file serialized back to str.
"""
return ''.join(element.serialized() for element in self._elements)
def dump(self, file_path):
with open(file_path, mode='w') as fp:
fp.write(self.dumps())
def keys(self):
return set(self._navigable.keys()) | {''}
def values(self):
return self._navigable.values()
def items(self):
items = self._navigable.items()
def has_anonymous_entry():
return any(key == '' for (key, _) in items)
if has_anonymous_entry():
return items
else:
return items + [('', self[''])]
@property
def primitive(self):
"""
Returns a primitive object representation for this container (which is a dict).
WARNING: The returned container does not contain any markup or formatting metadata.
"""
raw_container = raw.to_raw(self._navigable)
# Collapsing the anonymous table onto the top-level container is present
if '' in raw_container:
raw_container.update(raw_container[''])
del raw_container['']
return raw_container
def append_fresh_table(self, fresh_table):
"""
Gets called by FreshTable instances when they get written to.
"""
if fresh_table.name:
elements = []
if fresh_table.is_array:
elements += [element_factory.create_array_of_tables_header_element(fresh_table.name)]
else:
elements += [element_factory.create_table_header_element(fresh_table.name)]
elements += [fresh_table, element_factory.create_newline_element()]
self.append_elements(elements)
else:
# It's an anonymous table
self.prepend_elements([fresh_table, element_factory.create_newline_element()])
@property
def elements(self):
return self._elements
def __str__(self):
is_empty = (not self['']) and (not tuple(k for k in self.keys() if k))
def key_name(key):
return '[ANONYMOUS]' if not key else key
def pair(key, value):
return '%s = %s' % (key_name(key), str(value))
content_text = '' if is_empty else \
'\n\t' + ',\n\t'.join(pair(k, v) for (k, v) in self.items() if v) + '\n'
return "TOMLFile{%s}" % content_text
def __repr__(self):
return str(self)
+45
View File
@@ -0,0 +1,45 @@
from prettytoml.elements.table import TableElement
class FreshTable(TableElement):
"""
A fresh TableElement that appended itself to each of parents when it first gets written to at most once.
parents is a sequence of objects providing an append_fresh_table(TableElement) method
"""
def __init__(self, parent, name, is_array=False):
TableElement.__init__(self, sub_elements=[])
self._parent = parent
self._name = name
self._is_array = is_array
# As long as this flag is false, setitem() operations will append the table header and this table
# to the toml_file's elements
self.__appended = False
@property
def name(self):
return self._name
@property
def is_array(self):
return self._is_array
def _append_to_parent(self):
"""
Causes this ephemeral table to be persisted on the TOMLFile.
"""
if self.__appended:
return
if self._parent is not None:
self._parent.append_fresh_table(self)
self.__appended = True
def __setitem__(self, key, value):
TableElement.__setitem__(self, key, value)
self._append_to_parent()
+30
View File
@@ -0,0 +1,30 @@
import itertools
class PeekableIterator:
# Returned by peek() when the iterator is exhausted. Truthiness is False.
Nothing = tuple()
def __init__(self, iter):
self._iter = iter
def __next__(self):
return next(self._iter)
def next(self):
return self.__next__()
def __iter__(self):
return self
def peek(self):
"""
Returns PeekableIterator.Nothing when the iterator is exhausted.
"""
try:
v = next(self._iter)
self._iter = itertools.chain((v,), self._iter)
return v
except StopIteration:
return PeekableIterator.Nothing
+16
View File
@@ -0,0 +1,16 @@
from prettytoml.elements.abstracttable import AbstractTable
def to_raw(x):
from contoml.file.cascadedict import CascadeDict
if isinstance(x, AbstractTable):
return x.primitive_value
elif isinstance(x, CascadeDict):
return x.neutralized
elif isinstance(x, (list, tuple)):
return [to_raw(y) for y in x]
elif isinstance(x, dict):
return {k: to_raw(v) for (k, v) in x.items()}
else:
return x
+116
View File
@@ -0,0 +1,116 @@
from contoml.file import toplevels
from contoml.file.cascadedict import CascadeDict
class NamedDict(dict):
"""
A dict that can use Name instances as keys.
"""
def __init__(self, other_dict=None):
dict.__init__(self)
if other_dict:
for k, v in other_dict.items():
self[k] = v
def __setitem__(self, key, value):
"""
key can be an Name instance.
When key is a path in the form of an Name instance, all the parents and grandparents of the value are
created along the way as instances of NamedDict. If the parent of the value exists, it is replaced with a
CascadeDict() that cascades the old parent value with a new NamedDict that contains the given child name
and value.
"""
if isinstance(key, toplevels.Name):
if len(key.sub_names) == 1:
name = key.sub_names[0]
if name in self:
self[name] = CascadeDict(self[name], value)
else:
self[name] = value
elif len(key.sub_names) > 1:
name = key.sub_names[0]
rest_of_key = key.drop(1)
if name in self:
named_dict = NamedDict()
named_dict[rest_of_key] = value
self[name] = CascadeDict(self[name], named_dict)
else:
self[name] = NamedDict()
self[name][rest_of_key] = value
else:
return dict.__setitem__(self, key, value)
def __contains__(self, item):
try:
_ = self[item]
return True
except KeyError:
return False
def append(self, key, value):
"""
Makes sure the value pointed to by key exists and is a list and appends the given value to it.
"""
if key in self:
self[key].append(value)
else:
self[key] = [value]
def __getitem__(self, item):
if isinstance(item, toplevels.Name):
d = self
for name in item.sub_names:
d = d[name]
return d
else:
return dict.__getitem__(self, item)
def structure(table_toplevels):
"""
Accepts an ordered sequence of TopLevel instances and returns a navigable object structure representation of the
TOML file.
"""
table_toplevels = tuple(table_toplevels)
obj = NamedDict()
last_array_of_tables = None # The Name of the last array-of-tables header
for toplevel in table_toplevels:
if isinstance(toplevel, toplevels.AnonymousTable):
obj[''] = toplevel.table_element
elif isinstance(toplevel, toplevels.Table):
if last_array_of_tables and toplevel.name.is_prefixed_with(last_array_of_tables):
seq = obj[last_array_of_tables]
unprefixed_name = toplevel.name.without_prefix(last_array_of_tables)
seq[-1] = CascadeDict(seq[-1], NamedDict({unprefixed_name: toplevel.table_element}))
else:
obj[toplevel.name] = toplevel.table_element
else: # It's an ArrayOfTables
if last_array_of_tables and toplevel.name != last_array_of_tables and \
toplevel.name.is_prefixed_with(last_array_of_tables):
seq = obj[last_array_of_tables]
unprefixed_name = toplevel.name.without_prefix(last_array_of_tables)
if unprefixed_name in seq[-1]:
seq[-1][unprefixed_name].append(toplevel.table_element)
else:
cascaded_with = NamedDict({unprefixed_name: [toplevel.table_element]})
seq[-1] = CascadeDict(seq[-1], cascaded_with)
else:
obj.append(toplevel.name, toplevel.table_element)
last_array_of_tables = toplevel.name
return obj
+25
View File
@@ -0,0 +1,25 @@
from contoml.file.cascadedict import CascadeDict
def test_cascadedict():
d1 = {'a': 1, 'b': 2, 'c': 3}
d2 = {'b': 12, 'e': 4, 'f': 5}
cascade = CascadeDict(d1, d2)
# Test querying
assert cascade['a'] == 1
assert cascade['b'] == 2
assert cascade['c'] == 3
assert cascade['e'] == 4
assert cascade.keys() == {'a', 'b', 'c', 'e', 'f'}
assert set(cascade.items()) == {('a', 1), ('b', 2), ('c', 3), ('e', 4), ('f', 5)}
# Test mutating
cascade['a'] = 11
cascade['f'] = 'fff'
cascade['super'] = 'man'
assert d1['a'] == 11
assert d1['super'] == 'man'
assert d1['f'] == 'fff'
+20
View File
@@ -0,0 +1,20 @@
from prettytoml import parser, lexer
from contoml.file import toplevels
def test_entry_extraction():
text = open('sample.toml').read()
elements = parser.parse_tokens(lexer.tokenize(text))
e = tuple(toplevels.identify(elements))
assert len(e) == 13
assert isinstance(e[0], toplevels.AnonymousTable)
def test_entry_names():
name_a = toplevels.Name(('super', 'sub1'))
name_b = toplevels.Name(('super', 'sub1', 'sub2', 'sub3'))
assert name_b.is_prefixed_with(name_a)
assert name_b.without_prefix(name_a).sub_names == ('sub2', 'sub3')
+12
View File
@@ -0,0 +1,12 @@
from contoml.file.peekableit import PeekableIterator
def test_peekable_iterator():
peekable = PeekableIterator(i for i in (1, 2, 3, 4))
assert peekable.peek() == 1
assert peekable.peek() == 1
assert peekable.peek() == 1
assert [next(peekable), next(peekable), next(peekable), next(peekable)] == [1, 2, 3, 4]
+41
View File
@@ -0,0 +1,41 @@
from prettytoml import lexer, parser
from contoml.file import toplevels
from prettytoml.parser import elementsanitizer
from contoml.file.structurer import NamedDict, structure
from prettytoml.parser.tokenstream import TokenStream
def test_NamedDict():
d = NamedDict()
d[toplevels.Name(('super', 'sub1', 'sub2'))] = {'sub3': 12}
d[toplevels.Name(('super', 'sub1', 'sub2'))]['sub4'] = 42
assert d[toplevels.Name(('super', 'sub1', 'sub2', 'sub3'))] == 12
assert d[toplevels.Name(('super', 'sub1', 'sub2', 'sub4'))] == 42
def test_structure():
tokens = lexer.tokenize(open('sample.toml').read())
elements = elementsanitizer.sanitize(parser.parse_tokens(tokens))
entries_ = tuple(toplevels.identify(elements))
s = structure(entries_)
assert s['']['title'] == 'TOML Example'
assert s['owner']['name'] == 'Tom Preston-Werner'
assert s['database']['ports'][1] == 8001
assert s['servers']['alpha']['dc'] == 'eqdc10'
assert s['clients']['data'][1][0] == 1
assert s['clients']['key3'] == 'The quick brown fox jumps over the lazy dog.'
assert s['fruit'][0]['name'] == 'apple'
assert s['fruit'][0]['physical']['color'] == 'red'
assert s['fruit'][0]['physical']['shape'] == 'round'
assert s['fruit'][0]['variety'][0]['name'] == 'red delicious'
assert s['fruit'][0]['variety'][1]['name'] == 'granny smith'
assert s['fruit'][1]['name'] == 'banana'
assert s['fruit'][1]['variety'][0]['name'] == 'plantain'
assert s['fruit'][1]['variety'][0]['points'][2]['y'] == 4
+142
View File
@@ -0,0 +1,142 @@
"""
Top-level entries in a TOML file.
"""
from prettytoml import elements
from prettytoml.elements import TableElement, TableHeaderElement
from .peekableit import PeekableIterator
class TopLevel:
"""
A abstract top-level entry.
"""
def __init__(self, names, table_element):
self._table_element = table_element
self._names = Name(names)
@property
def table_element(self):
return self._table_element
@property
def name(self):
"""
The distinct name of a table entry as an Name instance.
"""
return self._names
class Name:
def __init__(self, names):
self._names = names
@property
def sub_names(self):
return self._names
def drop(self, n=0):
"""
Returns the name after dropping the first n entries of it.
"""
return Name(names=self._names[n:])
def is_prefixed_with(self, names):
if isinstance(names, Name):
return self.is_prefixed_with(names.sub_names)
for i, name in enumerate(names):
if self._names[i] != name:
return False
return True
def without_prefix(self, names):
if isinstance(names, Name):
return self.without_prefix(names.sub_names)
for i, name in enumerate(names):
if name != self._names[i]:
return Name(self._names[i:])
return Name(names=self.sub_names[len(names):])
@property
def is_qualified(self):
return len(self._names) > 1
def __str__(self):
return '.'.join(self.sub_names)
def __hash__(self):
return hash(str(self))
def __eq__(self, other):
return str(self) == str(other)
def __ne__(self, other):
return not self.__eq__(other)
class AnonymousTable(TopLevel):
def __init__(self, table_element):
TopLevel.__init__(self, ('',), table_element)
class Table(TopLevel):
def __init__(self, names, table_element):
TopLevel.__init__(self, names=names, table_element=table_element)
class ArrayOfTables(TopLevel):
def __init__(self, names, table_element):
TopLevel.__init__(self, names=names, table_element=table_element)
def _validate_file_elements(file_elements):
pass
def identify(file_elements):
"""
Outputs an ordered sequence of instances of TopLevel types.
Elements start with an optional TableElement, followed by zero or more pairs of (TableHeaderElement, TableElement).
"""
if not file_elements:
return
_validate_file_elements(file_elements)
# An iterator over enumerate(the non-metadata) elements
iterator = PeekableIterator((element_i, element) for (element_i, element) in enumerate(file_elements)
if element.type != elements.TYPE_METADATA)
try:
_, first_element = iterator.peek()
if isinstance(first_element, TableElement):
iterator.next()
yield AnonymousTable(first_element)
except KeyError:
pass
except StopIteration:
return
for element_i, element in iterator:
if not isinstance(element, TableHeaderElement):
continue
# If TableHeader of a regular table, return Table following it
if not element.is_array_of_tables:
table_element_i, table_element = next(iterator)
yield Table(names=element.names, table_element=table_element)
# If TableHeader of an array of tables, do your thing
else:
table_element_i, table_element = next(iterator)
yield ArrayOfTables(names=element.names, table_element=table_element)
+1
View File
@@ -0,0 +1 @@
from .iso8601 import *
+214
View File
@@ -0,0 +1,214 @@
"""ISO 8601 date time string parsing
Basic usage:
>>> import iso8601
>>> iso8601.parse_date("2007-01-25T12:00:00Z")
datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.Utc ...>)
>>>
"""
import datetime
from decimal import Decimal
import sys
import re
__all__ = ["parse_date", "ParseError", "UTC",
"FixedOffset"]
if sys.version_info >= (3, 0, 0):
_basestring = str
else:
_basestring = basestring
# Adapted from http://delete.me.uk/2005/03/iso8601.html
ISO8601_REGEX = re.compile(
r"""
(?P<year>[0-9]{4})
(
(
(-(?P<monthdash>[0-9]{1,2}))
|
(?P<month>[0-9]{2})
(?!$) # Don't allow YYYYMM
)
(
(
(-(?P<daydash>[0-9]{1,2}))
|
(?P<day>[0-9]{2})
)
(
(
(?P<separator>[ T])
(?P<hour>[0-9]{2})
(:{0,1}(?P<minute>[0-9]{2})){0,1}
(
:{0,1}(?P<second>[0-9]{1,2})
([.,](?P<second_fraction>[0-9]+)){0,1}
){0,1}
(?P<timezone>
Z
|
(
(?P<tz_sign>[-+])
(?P<tz_hour>[0-9]{2})
:{0,1}
(?P<tz_minute>[0-9]{2}){0,1}
)
){0,1}
){0,1}
)
){0,1} # YYYY-MM
){0,1} # YYYY only
$
""",
re.VERBOSE
)
class ParseError(Exception):
"""Raised when there is a problem parsing a date string"""
if sys.version_info >= (3, 2, 0):
UTC = datetime.timezone.utc
def FixedOffset(offset_hours, offset_minutes, name):
return datetime.timezone(
datetime.timedelta(
hours=offset_hours, minutes=offset_minutes),
name)
else:
# Yoinked from python docs
ZERO = datetime.timedelta(0)
class Utc(datetime.tzinfo):
"""UTC Timezone
"""
def utcoffset(self, dt):
return ZERO
def tzname(self, dt):
return "UTC"
def dst(self, dt):
return ZERO
def __repr__(self):
return "<iso8601.Utc>"
UTC = Utc()
class FixedOffset(datetime.tzinfo):
"""Fixed offset in hours and minutes from UTC
"""
def __init__(self, offset_hours, offset_minutes, name):
self.__offset_hours = offset_hours # Keep for later __getinitargs__
self.__offset_minutes = offset_minutes # Keep for later __getinitargs__
self.__offset = datetime.timedelta(
hours=offset_hours, minutes=offset_minutes)
self.__name = name
def __eq__(self, other):
if isinstance(other, FixedOffset):
return (
(other.__offset == self.__offset)
and
(other.__name == self.__name)
)
return NotImplemented
def __getinitargs__(self):
return (self.__offset_hours, self.__offset_minutes, self.__name)
def utcoffset(self, dt):
return self.__offset
def tzname(self, dt):
return self.__name
def dst(self, dt):
return ZERO
def __repr__(self):
return "<FixedOffset %r %r>" % (self.__name, self.__offset)
def to_int(d, key, default_to_zero=False, default=None, required=True):
"""Pull a value from the dict and convert to int
:param default_to_zero: If the value is None or empty, treat it as zero
:param default: If the value is missing in the dict use this default
"""
value = d.get(key) or default
if (value in ["", None]) and default_to_zero:
return 0
if value is None:
if required:
raise ParseError("Unable to read %s from %s" % (key, d))
else:
return int(value)
def parse_timezone(matches, default_timezone=UTC):
"""Parses ISO 8601 time zone specs into tzinfo offsets
"""
if matches["timezone"] == "Z":
return UTC
# This isn't strictly correct, but it's common to encounter dates without
# timezones so I'll assume the default (which defaults to UTC).
# Addresses issue 4.
if matches["timezone"] is None:
return default_timezone
sign = matches["tz_sign"]
hours = to_int(matches, "tz_hour")
minutes = to_int(matches, "tz_minute", default_to_zero=True)
description = "%s%02d:%02d" % (sign, hours, minutes)
if sign == "-":
hours = -hours
minutes = -minutes
return FixedOffset(hours, minutes, description)
def parse_date(datestring, default_timezone=UTC):
"""Parses ISO 8601 dates into datetime objects
The timezone is parsed from the date string. However it is quite common to
have dates without a timezone (not strictly correct). In this case the
default timezone specified in default_timezone is used. This is UTC by
default.
:param datestring: The date to parse as a string
:param default_timezone: A datetime tzinfo instance to use when no timezone
is specified in the datestring. If this is set to
None then a naive datetime object is returned.
:returns: A datetime.datetime instance
:raises: ParseError when there is a problem parsing the date or
constructing the datetime instance.
"""
if not isinstance(datestring, _basestring):
raise ParseError("Expecting a string %r" % datestring)
m = ISO8601_REGEX.match(datestring)
if not m:
raise ParseError("Unable to parse date string %r" % datestring)
groups = m.groupdict()
tz = parse_timezone(groups, default_timezone=default_timezone)
groups["second_fraction"] = int(Decimal("0.%s" % (groups["second_fraction"] or 0)) * Decimal("1000000.0"))
try:
return datetime.datetime(
year=to_int(groups, "year"),
month=to_int(groups, "month", default=to_int(groups, "monthdash", required=False, default=1)),
day=to_int(groups, "day", default=to_int(groups, "daydash", required=False, default=1)),
hour=to_int(groups, "hour", default_to_zero=True),
minute=to_int(groups, "minute", default_to_zero=True),
second=to_int(groups, "second", default_to_zero=True),
microsecond=groups["second_fraction"],
tzinfo=tz,
)
except Exception as e:
raise ParseError(e)
+102
View File
@@ -0,0 +1,102 @@
# coding=UTF-8
from __future__ import absolute_import
import copy
import datetime
import pickle
import pytest
from iso8601 import iso8601
def test_iso8601_regex():
assert iso8601.ISO8601_REGEX.match("2006-10-11T00:14:33Z")
def test_fixedoffset_eq():
# See https://bitbucket.org/micktwomey/pyiso8601/issues/19
datetime.tzinfo() == iso8601.FixedOffset(2, 0, '+2:00')
def test_parse_no_timezone_different_default():
tz = iso8601.FixedOffset(2, 0, "test offset")
d = iso8601.parse_date("2007-01-01T08:00:00", default_timezone=tz)
assert d == datetime.datetime(2007, 1, 1, 8, 0, 0, 0, tz)
assert d.tzinfo == tz
def test_parse_utc_different_default():
"""Z should mean 'UTC', not 'default'.
"""
tz = iso8601.FixedOffset(2, 0, "test offset")
d = iso8601.parse_date("2007-01-01T08:00:00Z", default_timezone=tz)
assert d == datetime.datetime(2007, 1, 1, 8, 0, 0, 0, iso8601.UTC)
@pytest.mark.parametrize("invalid_date, error_string", [
("2013-10-", "Unable to parse date string"),
("2013-", "Unable to parse date string"),
("", "Unable to parse date string"),
(None, "Expecting a string"),
("wibble", "Unable to parse date string"),
("23", "Unable to parse date string"),
("131015T142533Z", "Unable to parse date string"),
("131015", "Unable to parse date string"),
("20141", "Unable to parse date string"),
("201402", "Unable to parse date string"),
("2007-06-23X06:40:34.00Z", "Unable to parse date string"), # https://code.google.com/p/pyiso8601/issues/detail?id=14
("2007-06-23 06:40:34.00Zrubbish", "Unable to parse date string"), # https://code.google.com/p/pyiso8601/issues/detail?id=14
("20114-01-03T01:45:49", "Unable to parse date string"),
])
def test_parse_invalid_date(invalid_date, error_string):
assert isinstance(invalid_date, str) or invalid_date is None # Why? 'cos I've screwed up the parametrize before :)
with pytest.raises(iso8601.ParseError) as exc:
iso8601.parse_date(invalid_date)
assert exc.errisinstance(iso8601.ParseError)
assert str(exc.value).startswith(error_string)
@pytest.mark.parametrize("valid_date,expected_datetime,isoformat", [
("2007-06-23 06:40:34.00Z", datetime.datetime(2007, 6, 23, 6, 40, 34, 0, iso8601.UTC), "2007-06-23T06:40:34+00:00"), # Handle a separator other than T
("1997-07-16T19:20+01:00", datetime.datetime(1997, 7, 16, 19, 20, 0, 0, iso8601.FixedOffset(1, 0, "+01:00")), "1997-07-16T19:20:00+01:00"), # Parse with no seconds
("2007-01-01T08:00:00", datetime.datetime(2007, 1, 1, 8, 0, 0, 0, iso8601.UTC), "2007-01-01T08:00:00+00:00"), # Handle timezone-less dates. Assumes UTC. http://code.google.com/p/pyiso8601/issues/detail?id=4
("2006-10-20T15:34:56.123+02:30", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.FixedOffset(2, 30, "+02:30")), None),
("2006-10-20T15:34:56Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 0, iso8601.UTC), "2006-10-20T15:34:56+00:00"),
("2007-5-7T11:43:55.328Z", datetime.datetime(2007, 5, 7, 11, 43, 55, 328000, iso8601.UTC), "2007-05-07T11:43:55.328000+00:00"), # http://code.google.com/p/pyiso8601/issues/detail?id=6
("2006-10-20T15:34:56.123Z", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.UTC), "2006-10-20T15:34:56.123000+00:00"),
("2013-10-15T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC), "2013-10-15T18:30:00+00:00"),
("2013-10-15T22:30+04", datetime.datetime(2013, 10, 15, 22, 30, 0, 0, iso8601.FixedOffset(4, 0, "+04:00")), "2013-10-15T22:30:00+04:00"), # <time>±hh:mm
("2013-10-15T1130-0700", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(-7, 0, "-07:00")), "2013-10-15T11:30:00-07:00"), # <time>±hhmm
("2013-10-15T1130+0700", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(+7, 0, "+07:00")), "2013-10-15T11:30:00+07:00"), # <time>±hhmm
("2013-10-15T1130+07", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(+7, 0, "+07:00")), "2013-10-15T11:30:00+07:00"), # <time>±hh
("2013-10-15T1130-07", datetime.datetime(2013, 10, 15, 11, 30, 0, 0, iso8601.FixedOffset(-7, 0, "-07:00")), "2013-10-15T11:30:00-07:00"), # <time>±hh
("2013-10-15T15:00-03:30", datetime.datetime(2013, 10, 15, 15, 0, 0, 0, iso8601.FixedOffset(-3, -30, "-03:30")), "2013-10-15T15:00:00-03:30"),
("2013-10-15T183123Z", datetime.datetime(2013, 10, 15, 18, 31, 23, 0, iso8601.UTC), "2013-10-15T18:31:23+00:00"), # hhmmss
("2013-10-15T1831Z", datetime.datetime(2013, 10, 15, 18, 31, 0, 0, iso8601.UTC), "2013-10-15T18:31:00+00:00"), # hhmm
("2013-10-15T18Z", datetime.datetime(2013, 10, 15, 18, 0, 0, 0, iso8601.UTC), "2013-10-15T18:00:00+00:00"), # hh
("2013-10-15", datetime.datetime(2013, 10, 15, 0, 0, 0, 0, iso8601.UTC), "2013-10-15T00:00:00+00:00"), # YYYY-MM-DD
("20131015T18:30Z", datetime.datetime(2013, 10, 15, 18, 30, 0, 0, iso8601.UTC), "2013-10-15T18:30:00+00:00"), # YYYYMMDD
("2012-12-19T23:21:28.512400+00:00", datetime.datetime(2012, 12, 19, 23, 21, 28, 512400, iso8601.FixedOffset(0, 0, "+00:00")), "2012-12-19T23:21:28.512400+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=21
("2006-10-20T15:34:56.123+0230", datetime.datetime(2006, 10, 20, 15, 34, 56, 123000, iso8601.FixedOffset(2, 30, "+02:30")), "2006-10-20T15:34:56.123000+02:30"), # https://code.google.com/p/pyiso8601/issues/detail?id=18
("19950204", datetime.datetime(1995, 2, 4, tzinfo=iso8601.UTC), "1995-02-04T00:00:00+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=1
("2010-07-20 15:25:52.520701+00:00", datetime.datetime(2010, 7, 20, 15, 25, 52, 520701, iso8601.FixedOffset(0, 0, "+00:00")), "2010-07-20T15:25:52.520701+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=17
("2010-06-12", datetime.datetime(2010, 6, 12, tzinfo=iso8601.UTC), "2010-06-12T00:00:00+00:00"), # https://code.google.com/p/pyiso8601/issues/detail?id=16
("1985-04-12T23:20:50.52-05:30", datetime.datetime(1985, 4, 12, 23, 20, 50, 520000, iso8601.FixedOffset(-5, -30, "-05:30")), "1985-04-12T23:20:50.520000-05:30"), # https://bitbucket.org/micktwomey/pyiso8601/issue/8/015-parses-negative-timezones-incorrectly
("1997-08-29T06:14:00.000123Z", datetime.datetime(1997, 8, 29, 6, 14, 0, 123, iso8601.UTC), "1997-08-29T06:14:00.000123+00:00"), # https://bitbucket.org/micktwomey/pyiso8601/issue/9/regression-parsing-microseconds
("2014-02", datetime.datetime(2014, 2, 1, 0, 0, 0, 0, iso8601.UTC), "2014-02-01T00:00:00+00:00"), # https://bitbucket.org/micktwomey/pyiso8601/issue/14/regression-yyyy-mm-no-longer-parses
("2014", datetime.datetime(2014, 1, 1, 0, 0, 0, 0, iso8601.UTC), "2014-01-01T00:00:00+00:00"), # YYYY
("1997-08-29T06:14:00,000123Z", datetime.datetime(1997, 8, 29, 6, 14, 0, 123, iso8601.UTC), "1997-08-29T06:14:00.000123+00:00"), # Use , as decimal separator
])
def test_parse_valid_date(valid_date, expected_datetime, isoformat):
parsed = iso8601.parse_date(valid_date)
assert parsed.year == expected_datetime.year
assert parsed.month == expected_datetime.month
assert parsed.day == expected_datetime.day
assert parsed.hour == expected_datetime.hour
assert parsed.minute == expected_datetime.minute
assert parsed.second == expected_datetime.second
assert parsed.microsecond == expected_datetime.microsecond
assert parsed.tzinfo == expected_datetime.tzinfo
assert parsed == expected_datetime
assert parsed.isoformat() == expected_datetime.isoformat()
copy.deepcopy(parsed) # ensure it's deep copy-able
pickle.dumps(parsed) # ensure it pickles
if isoformat:
assert parsed.isoformat() == isoformat
assert iso8601.parse_date(parsed.isoformat()) == parsed # Test round trip
+25
View File
@@ -0,0 +1,25 @@
from ._version import VERSION
__version__ = VERSION
def prettify(toml_text):
"""
Prettifies and returns the TOML file content provided.
"""
from .parser import parse_tokens
from .lexer import tokenize
from .prettifier import prettify as element_prettify
tokens = tokenize(toml_text, is_top_level=True)
elements = parse_tokens(tokens)
prettified = element_prettify(elements)
return ''.join(pretty_element.serialized() for pretty_element in prettified)
def prettify_from_file(file_path):
"""
Reads, prettifies and returns the TOML file specified by the file_path.
"""
with open(file_path, 'r') as fp:
return prettify(fp.read())
+1
View File
@@ -0,0 +1 @@
VERSION = 'master'
+13
View File
@@ -0,0 +1,13 @@
"""
TOML file elements (a higher abstraction layer than individual lexical tokens).
"""
from .traversal import TraversalMixin
from .errors import InvalidElementError
from .table import TableElement
from .tableheader import TableHeaderElement
from .common import TYPE_METADATA, TYPE_ATOMIC, TYPE_CONTAINER, TYPE_MARKUP
from . import traversal
from . import factory
+81
View File
@@ -0,0 +1,81 @@
from prettytoml.elements.common import ContainerElement
from prettytoml.elements import traversal
class AbstractTable(ContainerElement, traversal.TraversalMixin):
"""
Common code for handling tables as key-value pairs with metadata elements sprinkled all over.
Assumes input sub_elements are correct.
"""
def __init__(self, sub_elements):
ContainerElement.__init__(self, sub_elements)
self._fallback = None
def _enumerate_items(self):
"""
Returns ((key_index, key_element), (value_index, value_element)) for all the element key-value pairs.
"""
non_metadata = self._enumerate_non_metadata_sub_elements()
while True:
yield next(non_metadata), next(non_metadata)
def items(self):
for (key_i, key), (value_i, value) in self._enumerate_items():
yield key.value, value.value
if self._fallback:
for key, value in self._fallback.items():
yield key, value
def keys(self):
return tuple(key for (key, _) in self.items())
def values(self):
return tuple(value for (_, value) in self.items())
def __len__(self):
return len(tuple(self._enumerate_items()))
def __contains__(self, item):
return item in self.keys()
def _find_key_and_value(self, key):
"""
Returns (key_i, value_i) corresponding to the given key value.
Raises KeyError if no matching key found.
"""
for (key_i, key_element), (value_i, value_element) in self._enumerate_items():
if key_element.value == key:
return key_i, value_i
raise KeyError
def __getitem__(self, item):
for key, value in self.items():
if key == item:
return value
raise KeyError
def get(self, key, default=None):
try:
return self[key]
except KeyError:
return default
def set_fallback(self, fallback):
"""
Sets a fallback dict-like instance to be used to look up values after they are not found
in this instance.
"""
self._fallback = fallback
@property
def primitive_value(self):
"""
Returns a primitive Python value without any formatting or markup metadata.
"""
return {
key:
value.primitive_value if hasattr(value, 'primitive_value') else value for key, value in self.items()
}
+136
View File
@@ -0,0 +1,136 @@
from prettytoml.elements import common, factory, traversal
from prettytoml.elements.common import Element, ContainerElement
from prettytoml.elements.factory import create_element
from prettytoml.elements.metadata import NewlineElement
from prettytoml.elements.errors import InvalidElementError
class ArrayElement(ContainerElement, traversal.TraversalMixin):
"""
A sequence-like container element containing other atomic elements or other containers.
Implements list-like interface.
Assumes input sub_elements are correct for an array element.
Raises an InvalidElementError if contains heterogeneous values.
"""
def __init__(self, sub_elements):
common.ContainerElement.__init__(self, sub_elements)
self._check_homogeneity()
def _check_homogeneity(self):
if len(set(type(v) for v in self.primitive_value)) > 1:
raise InvalidElementError('Array should be homogeneous')
def __len__(self):
return len(tuple(self._enumerate_non_metadata_sub_elements()))
def __getitem__(self, i):
"""
Returns the ith entry, which can be a primitive value, a seq-lie, or a dict-like object.
"""
return self._find_value(i)[1].value
def __setitem__(self, i, value):
value_i, _ = self._find_value(i)
new_element = value if isinstance(value, Element) else factory.create_element(value)
self._sub_elements = self.sub_elements[:value_i] + [new_element] + self.sub_elements[value_i+1:]
@property
def value(self):
return self # self is a sequence-like value
@property
def primitive_value(self):
"""
Returns a primitive Python value without any formatting or markup metadata.
"""
return list(
self[i].primitive_value if hasattr(self[i], 'primitive_value')
else self[i]
for i in range(len(self)))
def __str__(self):
return "Array{}".format(self.primitive_value)
def append(self, v):
new_entry = [create_element(v)]
if self: # If not empty, we need a comma and whitespace prefix!
new_entry = [
factory.create_operator_element(','),
factory.create_whitespace_element(),
] + new_entry
insertion_index = self._find_closing_square_bracket()
self._sub_elements = self._sub_elements[:insertion_index] + new_entry + \
self._sub_elements[insertion_index:]
def _find_value(self, i):
"""
Returns (value_index, value) of ith value in this sequence.
Raises IndexError if not found.
"""
return tuple(self._enumerate_non_metadata_sub_elements())[i]
def __delitem__(self, i):
value_i, value = self._find_value(i)
begin, end = value_i, value_i+1
# Rules:
# 1. begin should be index to the preceding comma to the value
# 2. end should be index to the following comma, or the closing bracket
# 3. If no preceding comma found but following comma found then end should be the index of the following value
preceding_comma = self._find_preceding_comma(value_i)
found_preceding_comma = preceding_comma >= 0
if found_preceding_comma:
begin = preceding_comma
following_comma = self._find_following_comma(value_i)
if following_comma >= 0:
if not found_preceding_comma:
end = self._find_following_non_metadata(following_comma)
else:
end = following_comma
else:
end = self._find_following_closing_square_bracket()
self._sub_elements = self.sub_elements[:begin] + self._sub_elements[end:]
@property
def is_multiline(self):
return any(isinstance(e, (NewlineElement)) for e in self.elements)
def turn_into_multiline(self):
"""
Turns this array into a multi-line array with each element lying on its own line.
"""
if self.is_multiline:
return
i = self._find_following_comma(-1)
def next_entry_i():
return self._find_following_non_metadata(i)
def next_newline_i():
return self._find_following_newline(i)
def next_closing_bracket_i():
return self._find_following_closing_square_bracket(i)
def next_comma_i():
return self._find_following_comma(i)
while i < len(self.elements)-1:
if next_newline_i() < next_entry_i():
self.elements.insert(i+1, factory.create_newline_element())
if float('-inf') < next_comma_i() < next_closing_bracket_i():
i = next_comma_i()
else:
i = next_closing_bracket_i()
+52
View File
@@ -0,0 +1,52 @@
from ..tokens import py2toml, toml2py
from . import common
from prettytoml.util import is_dict_like, is_sequence_like
from .errors import InvalidElementError
class AtomicElement(common.TokenElement):
"""
An element containing a sequence of tokens representing a single atomic value that can be updated in place.
Raises:
InvalidElementError: when passed an invalid sequence of tokens.
"""
def __init__(self, _tokens):
common.TokenElement.__init__(self, _tokens, common.TYPE_ATOMIC)
def _validate_tokens(self, _tokens):
if len([token for token in _tokens if not token.type.is_metadata]) != 1:
raise InvalidElementError('Tokens making up an AtomicElement must contain only one non-metadata token')
def serialized(self):
return ''.join(token.source_substring for token in self.tokens)
def _value_token_index(self):
"""
Finds the token where the value is stored.
"""
# TODO: memoize this value
for i, token in enumerate(self.tokens):
if not token.type.is_metadata:
return i
raise RuntimeError('could not find a value token')
@property
def value(self):
"""
Returns a Python value contained in this atomic element.
"""
return toml2py.deserialize(self._tokens[self._value_token_index()])
@property
def primitive_value(self):
return self.value
def set(self, value):
"""
Sets the contained value to the given one.
"""
assert (not is_sequence_like(value)) and (not is_dict_like(value)), 'the value must be an atomic primitive'
token_index = self._value_token_index()
self._tokens[token_index] = py2toml.create_primitive_token(value)
+101
View File
@@ -0,0 +1,101 @@
from abc import abstractmethod
TYPE_METADATA = 'element-metadata'
TYPE_ATOMIC = 'element-atomic'
TYPE_CONTAINER = 'element-container'
TYPE_MARKUP = 'element-markup'
class Element:
"""
An Element:
- is one or more Token instances, or one or more other Element instances. Not both.
- knows how to serialize its value back to valid TOML code.
A non-metadata Element is an Element that:
- knows how to deserialize its content into usable Python primitive, seq-like, or dict-like value.
- knows how to update its content from a Python primitive, seq-like, or dict-like value
while maintaining its formatting.
"""
def __init__(self, _type):
self._type = _type
@property
def type(self):
return self._type
@abstractmethod
def serialized(self):
"""
TOML serialization of this element as str.
"""
raise NotImplementedError
class TokenElement(Element):
"""
An Element made up of tokens
"""
def __init__(self, _tokens, _type):
Element.__init__(self, _type)
self._validate_tokens(_tokens)
self._tokens = list(_tokens)
@property
def tokens(self):
return self._tokens
@property
def first_token(self):
return self._tokens[0]
@abstractmethod
def _validate_tokens(self, _tokens):
raise NotImplementedError
def serialized(self):
return ''.join(token.source_substring for token in self._tokens)
def __repr__(self):
return repr(self.tokens)
@property
def primitive_value(self):
"""
Returns a primitive Python value without any formatting or markup metadata.
"""
raise NotImplementedError
class ContainerElement(Element):
"""
An Element containing exclusively other elements.
"""
def __init__(self, sub_elements):
Element.__init__(self, TYPE_CONTAINER)
self._sub_elements = list(sub_elements)
@property
def sub_elements(self):
return self._sub_elements
@property
def elements(self):
return self.sub_elements
def serialized(self):
return ''.join(element.serialized() for element in self.sub_elements)
def __repr__(self):
return repr(self.primitive_value)
@property
def primitive_value(self):
"""
Returns a primitive Python value without any formatting or markup metadata.
"""
raise NotImplementedError
+13
View File
@@ -0,0 +1,13 @@
class InvalidElementError(Exception):
"""
Raised by Element factories when the given sequence of tokens or sub-elements are invalid for the
specific type of Element being created.
"""
def __init__(self, message):
self.message = message
def __repr__(self):
return "InvalidElementError: {}".format(self.message)
+152
View File
@@ -0,0 +1,152 @@
import datetime
import functools
import six
from prettytoml import tokens
from prettytoml.tokens import py2toml
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.metadata import PunctuationElement, WhitespaceElement, NewlineElement
from prettytoml.elements.tableheader import TableHeaderElement
from prettytoml.util import join_with, is_sequence_like
def create_element(value, multiline_strings_allowed=True):
"""
Creates and returns the appropriate elements.Element instance from the given Python primitive, sequence-like,
or dict-like value.
"""
from prettytoml.elements.array import ArrayElement
if isinstance(value, (int, float, bool, datetime.datetime, datetime.date) + six.string_types) or value is None:
primitive_token = py2toml.create_primitive_token(value, multiline_strings_allowed=multiline_strings_allowed)
return AtomicElement((primitive_token,))
elif isinstance(value, (list, tuple)):
preamble = [create_operator_element('[')]
postable = [create_operator_element(']')]
stuffing_elements = [create_element(v) for v in value]
spaced_stuffing = join_with(stuffing_elements,
separator=[create_operator_element(','), create_whitespace_element()])
return ArrayElement(preamble + spaced_stuffing + postable)
elif isinstance(value, dict):
return create_inline_table(value, multiline_table=False, multiline_strings_allowed=multiline_strings_allowed)
else:
raise RuntimeError('Value type unaccounted for: {} of type {}'.format(value, type(value)))
def create_inline_table(from_dict, multiline_table=False, multiline_strings_allowed=True):
"""
Creates an InlineTable element from the given dict instance.
"""
from prettytoml.elements.inlinetable import InlineTableElement
preamble = [create_operator_element('{')]
postable = [create_operator_element('}')]
stuffing_elements = (
(
create_string_element(k, bare_allowed=True),
create_whitespace_element(),
create_operator_element('='),
create_whitespace_element(),
create_element(v, multiline_strings_allowed=False)
) for (k, v) in from_dict.items())
pair_separator = [create_operator_element(','),
create_newline_element() if multiline_table else create_whitespace_element()]
spaced_elements = join_with(stuffing_elements, separator=pair_separator)
return InlineTableElement(preamble + spaced_elements + postable)
def create_string_element(value, bare_allowed=False):
"""
Creates and returns an AtomicElement wrapping a string value.
"""
return AtomicElement((py2toml.create_string_token(value, bare_allowed),))
def create_operator_element(operator):
"""
Creates a PunctuationElement instance containing an operator token of the specified type. The operator
should be a TOML source str.
"""
operator_type_map = {
',': tokens.TYPE_OP_COMMA,
'=': tokens.TYPE_OP_ASSIGNMENT,
'[': tokens.TYPE_OP_SQUARE_LEFT_BRACKET,
']': tokens.TYPE_OP_SQUARE_RIGHT_BRACKET,
'[[': tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET,
']]': tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET,
'{': tokens.TYPE_OP_CURLY_LEFT_BRACKET,
'}': tokens.TYPE_OP_CURLY_RIGHT_BRACKET,
}
ts = (tokens.Token(operator_type_map[operator], operator),)
return PunctuationElement(ts)
def create_newline_element():
"""
Creates and returns a single NewlineElement.
"""
ts = (tokens.Token(tokens.TYPE_NEWLINE, '\n'),)
return NewlineElement(ts)
def create_whitespace_element(length=1, char=' '):
"""
Creates and returns a WhitespaceElement containing spaces.
"""
ts = (tokens.Token(tokens.TYPE_WHITESPACE, char),) * length
return WhitespaceElement(ts)
def create_table_header_element(names):
name_elements = []
if isinstance(names, six.string_types):
name_elements = [py2toml.create_string_token(names, bare_string_allowed=True)]
else:
for (i, name) in enumerate(names):
name_elements.append(py2toml.create_string_token(name, bare_string_allowed=True))
if i < (len(names)-1):
name_elements.append(py2toml.operator_token(tokens.TYPE_OPT_DOT))
return TableHeaderElement(
[py2toml.operator_token(tokens.TYPE_OP_SQUARE_LEFT_BRACKET)] + name_elements +
[py2toml.operator_token(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET), py2toml.operator_token(tokens.TYPE_NEWLINE)],
)
def create_array_of_tables_header_element(name):
return TableHeaderElement((
py2toml.operator_token(tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET),
py2toml.create_string_token(name, bare_string_allowed=True),
py2toml.operator_token(tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET),
py2toml.operator_token(tokens.TYPE_NEWLINE),
))
def create_table(dict_value):
"""
Creates a TableElement out of a dict instance.
"""
from prettytoml.elements.table import TableElement
if not isinstance(dict_value, dict):
raise ValueError('input must be a dict instance.')
table_element = TableElement([create_newline_element()])
for k, v in dict_value.items():
table_element[k] = create_element(v)
return table_element
def create_multiline_string(text, maximum_line_length):
return AtomicElement(_tokens=[py2toml.create_multiline_string(text, maximum_line_length)])
+78
View File
@@ -0,0 +1,78 @@
from prettytoml.elements import factory, abstracttable
from prettytoml.elements.common import Element
class InlineTableElement(abstracttable.AbstractTable):
"""
An Element containing key-value pairs, representing an inline table.
Implements dict-like interface.
Assumes input sub_elements are correct for an inline table element.
"""
def __init__(self, sub_elements):
abstracttable.AbstractTable.__init__(self, sub_elements)
def __setitem__(self, key, value):
new_element = value if isinstance(value, Element) else factory.create_element(value)
try:
key_i, value_i = self._find_key_and_value(key)
# Found, then replace the value element with a new one
self._sub_elements = self.sub_elements[:value_i] + [new_element] + self.sub_elements[value_i+1:]
except KeyError: # Key does not exist, adding anew!
new_entry = [
factory.create_string_element(key, bare_allowed=True),
factory.create_whitespace_element(),
factory.create_operator_element('='),
factory.create_whitespace_element(),
new_element,
]
if self: # If not empty
new_entry = [
factory.create_operator_element(','),
factory.create_whitespace_element(),
] + new_entry
insertion_index = self._find_closing_curly_bracket()
self._sub_elements = self.sub_elements[:insertion_index] + new_entry + self.sub_elements[insertion_index:]
def __delitem__(self, key):
key_i, value_i = self._find_key_and_value(key)
begin, end = key_i, value_i+1
# Rules:
# 1. begin should be index to the preceding comma to the key
# 2. end should be index to the following comma, or the closing bracket
# 3. If no preceding comma found but following comma found then end should be the index of the following key
preceding_comma = self._find_preceding_comma(begin)
found_preceding_comma = preceding_comma >= 0
if found_preceding_comma:
begin = preceding_comma
following_comma = self._find_following_comma(value_i)
if following_comma >= 0:
if not found_preceding_comma:
end = self._find_following_non_metadata(following_comma)
else:
end = following_comma
else:
end = self._find_closing_curly_bracket()
self._sub_elements = self.sub_elements[:begin] + self.sub_elements[end:]
def multiline_equivalent(self):
return factory.create_inline_table(self.primitive_value, multiline_table=True, multiline_strings_allowed=True)
@property
def value(self):
return self # self is a dict-like value that is perfectly usable
+80
View File
@@ -0,0 +1,80 @@
from prettytoml import tokens
from prettytoml.elements import common
from .errors import InvalidElementError
class WhitespaceElement(common.TokenElement):
"""
An element that contains tokens of whitespace
"""
def __init__(self, _tokens):
common.TokenElement.__init__(self, _tokens, common.TYPE_METADATA)
def _validate_tokens(self, _tokens):
for token in _tokens:
if token.type != tokens.TYPE_WHITESPACE:
raise InvalidElementError('Tokens making up a WhitespaceElement must all be whitespace')
@property
def length(self):
"""
The whitespace length of this element
"""
return len(self.tokens)
class NewlineElement(common.TokenElement):
"""
An element containing newline tokens
Raises:
InvalidElementError: when passed an invalid sequence of tokens.
"""
def __init__(self, _tokens):
common.TokenElement.__init__(self, _tokens, common.TYPE_METADATA)
def _validate_tokens(self, _tokens):
for token in _tokens:
if token.type != tokens.TYPE_NEWLINE:
raise InvalidElementError('Tokens making a NewlineElement must all be newlines')
class CommentElement(common.TokenElement):
"""
An element containing a single comment token followed by a newline.
Raises:
InvalidElementError: when passed an invalid sequence of tokens.
"""
def __init__(self, _tokens):
common.TokenElement.__init__(self, _tokens, common.TYPE_METADATA)
def _validate_tokens(self, _tokens):
if len(_tokens) != 2 or _tokens[0].type != tokens.TYPE_COMMENT or _tokens[1].type != tokens.TYPE_NEWLINE:
raise InvalidElementError('CommentElement needs one comment token followed by one newline token')
class PunctuationElement(common.TokenElement):
"""
An element containing a single punctuation token.
Raises:
InvalidElementError: when passed an invalid sequence of tokens.
"""
def __init__(self, _tokens):
common.TokenElement.__init__(self, _tokens, common.TYPE_METADATA)
@property
def token(self):
"""
Returns the token contained in this Element.
"""
return self.tokens[0]
def _validate_tokens(self, _tokens):
if not _tokens or not tokens.is_operator(_tokens[0]):
raise InvalidElementError('PunctuationElement must be made of only a single operator token')
+122
View File
@@ -0,0 +1,122 @@
from prettytoml.elements import abstracttable, factory
from prettytoml.elements.errors import InvalidElementError
from prettytoml.elements.common import Element
from prettytoml.elements.metadata import CommentElement, NewlineElement, WhitespaceElement
from . import common
class TableElement(abstracttable.AbstractTable):
"""
An Element containing an unnamed top-level table.
Implements dict-like interface.
Assumes input sub_elements are correct.
Raises InvalidElementError on duplicate keys.
"""
def __init__(self, sub_elements):
abstracttable.AbstractTable.__init__(self, sub_elements)
self._check_for_duplicate_keys()
def _check_for_duplicate_keys(self):
if len(set(self.keys())) < len(self.keys()):
raise InvalidElementError('Duplicate keys found')
def __setitem__(self, key, value):
if key in self:
self._update(key, value)
else:
self._insert(key, value)
def _update(self, key, value):
_, value_i = self._find_key_and_value(key)
self._sub_elements[value_i] = value if isinstance(value, Element) else factory.create_element(value)
def _find_insertion_index(self):
"""
Returns the self.sub_elements index in which new entries should be inserted.
"""
non_metadata_elements = tuple(self._enumerate_non_metadata_sub_elements())
if not non_metadata_elements:
return 0
last_entry_i = non_metadata_elements[-1][0]
following_newline_i = self._find_following_line_terminator(last_entry_i)
return following_newline_i + 1
def _detect_indentation_size(self):
"""
Detects the level of indentation used in this table.
"""
def lines():
# Returns a sequence of sequences of elements belonging to each line
start = 0
for i, element in enumerate(self.elements):
if isinstance(element, (CommentElement, NewlineElement)):
yield self.elements[start:i+1]
start = i+1
def indentation(line):
# Counts the number of whitespace tokens at the beginning of this line
try:
first_non_whitespace_i = next(i for (i, e) in enumerate(line) if not isinstance(e, WhitespaceElement))
return sum(space.length for space in line[:first_non_whitespace_i])
except StopIteration:
return 0
def is_empty_line(line):
return all(e.type == common.TYPE_METADATA for e in line)
try:
return min(indentation(line) for line in lines() if len(line) > 1 and not is_empty_line(line))
except ValueError: # Raised by ValueError when no matching lines found
return 0
def _insert(self, key, value):
value_element = value if isinstance(value, Element) else factory.create_element(value)
indentation_size = self._detect_indentation_size()
indentation = [factory.create_whitespace_element(self._detect_indentation_size())] if indentation_size else []
inserted_elements = indentation + [
factory.create_string_element(key, bare_allowed=True),
factory.create_whitespace_element(),
factory.create_operator_element('='),
factory.create_whitespace_element(),
value_element,
factory.create_newline_element(),
]
insertion_index = self._find_insertion_index()
self._sub_elements = \
self.sub_elements[:insertion_index] + inserted_elements + self.sub_elements[insertion_index:]
def __delitem__(self, key):
begin, _ = self._find_key_and_value(key)
preceding_newline = self._find_preceding_newline(begin)
if preceding_newline >= 0:
begin = preceding_newline
end = self._find_following_newline(begin)
if end < 0:
end = len(tuple(self._sub_elements))
self._sub_elements = self.sub_elements[:begin] + self.sub_elements[end:]
def pop(self, key):
v = self[key]
del self[key]
return v
def value(self):
return self
def __str__(self):
return str(self.primitive_value)
+95
View File
@@ -0,0 +1,95 @@
from prettytoml import tokens
from prettytoml.tokens import toml2py
from prettytoml.elements import common
from prettytoml.elements.common import Element, TokenElement
from prettytoml.elements.errors import InvalidElementError
_opening_bracket_types = (tokens.TYPE_OP_SQUARE_LEFT_BRACKET, tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET)
_closing_bracket_types = (tokens.TYPE_OP_SQUARE_RIGHT_BRACKET, tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET)
_name_types = (
tokens.TYPE_BARE_STRING,
tokens.TYPE_LITERAL_STRING,
tokens.TYPE_STRING,
)
class TableHeaderElement(TokenElement):
"""
An element containing opening and closing single and double square brackets, strings and dots and ending with
a newline.
Raises InvalidElementError.
"""
def __init__(self, _tokens):
TokenElement.__init__(self, _tokens, common.TYPE_MARKUP)
self._names = tuple(toml2py.deserialize(token) for token in self._tokens if token.type in _name_types)
@property
def is_array_of_tables(self):
opening_bracket = next(token for i, token in enumerate(self._tokens) if token.type in _opening_bracket_types)
return opening_bracket.type == tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET
@property
def names(self):
"""
Returns a sequence of string names making up this table header name.
"""
return self._names
def has_name_prefix(self, names):
"""
Returns True if the header names is prefixed by the given sequence of names.
"""
for i, name in enumerate(names):
if self.names[i] != name:
return False
return True
def serialized(self):
return ''.join(token.source_substring for token in self._tokens)
def is_named(self, names):
"""
Returns True if the given name sequence matches the full name of this header.
"""
return tuple(names) == self.names
def _validate_tokens(self, _tokens):
opening_bracket_i = next((i for i, token in enumerate(_tokens)
if token.type in _opening_bracket_types), float('-inf'))
if opening_bracket_i < 0:
raise InvalidElementError('Expected an opening bracket')
_tokens = _tokens[opening_bracket_i+1:]
first_name_i = next((i for i, token in enumerate(_tokens) if token.type in _name_types), float('-inf'))
if first_name_i < 0:
raise InvalidElementError('Expected a table header name')
_tokens = _tokens[first_name_i+1:]
while True:
next_dot_i = next((i for i, token in enumerate(_tokens) if token.type == tokens.TYPE_OPT_DOT),
float('-inf'))
if next_dot_i < 0:
break
_tokens = _tokens[next_dot_i+1:]
next_name_i = next((i for i, token in enumerate(_tokens) if token.type in _name_types), float('-inf'))
if next_name_i < 0:
raise InvalidElementError('Expected a name after the dot')
_tokens = _tokens[next_name_i+1:]
closing_bracket_i = next((i for i, token in enumerate(_tokens) if token.type in _closing_bracket_types),
float('-inf'))
if closing_bracket_i < 0:
raise InvalidElementError('Expected a closing bracket')
if _tokens[-1].type != tokens.TYPE_NEWLINE:
raise InvalidElementError('Must end with a newline')
+67
View File
@@ -0,0 +1,67 @@
import pytest
from prettytoml import lexer
from prettytoml.elements.array import ArrayElement
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.metadata import PunctuationElement, WhitespaceElement, NewlineElement
def test_array_element():
tokens = tuple(lexer.tokenize('[4, 8, 42, \n 23, 15]'))
assert len(tokens) == 17
sub_elements = (
PunctuationElement(tokens[:1]),
AtomicElement(tokens[1:2]),
PunctuationElement(tokens[2:3]),
WhitespaceElement(tokens[3:4]),
AtomicElement(tokens[4:5]),
PunctuationElement(tokens[5:6]),
WhitespaceElement(tokens[6:7]),
AtomicElement(tokens[7:8]),
PunctuationElement(tokens[8:9]),
WhitespaceElement(tokens[9:10]),
NewlineElement(tokens[10:11]),
WhitespaceElement(tokens[11:12]),
AtomicElement(tokens[12:13]),
PunctuationElement(tokens[13:14]),
WhitespaceElement(tokens[14:15]),
AtomicElement(tokens[15:16]),
PunctuationElement(tokens[16:17])
)
array_element = ArrayElement(sub_elements)
# Test length
assert len(array_element) == 5
# Test getting a value
assert array_element[0] == 4
assert array_element[1] == 8
assert array_element[2] == 42
assert array_element[3] == 23
assert array_element[-1] == 15
# Test assignment with a negative index
array_element[-1] = 12
# Test persistence of formatting
assert '[4, 8, 42, \n 23, 12]' == array_element.serialized()
# Test raises IndexError on invalid index
with pytest.raises(IndexError) as _:
print(array_element[5])
# Test appending a new value
array_element.append(77)
assert '[4, 8, 42, \n 23, 12, 77]' == array_element.serialized()
# Test deleting a value
del array_element[3]
assert '[4, 8, 42, 12, 77]' == array_element.serialized()
# Test primitive_value
assert [4, 8, 42, 12, 77] == array_element.primitive_value
+9
View File
@@ -0,0 +1,9 @@
from prettytoml import lexer
from prettytoml.elements.atomic import AtomicElement
def test_atomic_element():
element = AtomicElement(tuple(lexer.tokenize(' \t 42 ')))
assert element.value == 42
element.set(23)
assert element.serialized() == ' \t 23 '
+89
View File
@@ -0,0 +1,89 @@
from prettytoml import tokens, lexer
from prettytoml.elements import traversal
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.metadata import NewlineElement, PunctuationElement, WhitespaceElement, CommentElement
from prettytoml.elements.table import TableElement
from prettytoml.elements.tableheader import TableHeaderElement
atomic_token_types = (
tokens.TYPE_INTEGER,
tokens.TYPE_FLOAT,
tokens.TYPE_BARE_STRING,
tokens.TYPE_STRING,
tokens.TYPE_LITERAL_STRING,
tokens.TYPE_MULTILINE_STRING,
tokens.TYPE_MULTILINE_LITERAL_STRING,
)
punctuation_token_types = (
tokens.TYPE_OPT_DOT,
tokens.TYPE_OP_CURLY_LEFT_BRACKET,
tokens.TYPE_OP_SQUARE_LEFT_BRACKET,
tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET,
tokens.TYPE_OP_SQUARE_RIGHT_BRACKET,
tokens.TYPE_OP_CURLY_RIGHT_BRACKET,
tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET,
tokens.TYPE_OP_ASSIGNMENT,
)
def primitive_token_to_primitive_element(token):
if token.type == tokens.TYPE_NEWLINE:
return NewlineElement((token,))
elif token.type in atomic_token_types:
return AtomicElement((token,))
elif token.type == tokens.TYPE_NEWLINE:
return NewlineElement((token,))
elif token.type in punctuation_token_types:
return PunctuationElement((token,))
elif token.type == tokens.TYPE_WHITESPACE:
return WhitespaceElement((token,))
elif token.type == tokens.TYPE_COMMENT:
return CommentElement((token,))
else:
raise RuntimeError("{} has no mapped primitive element".format(token))
def primitive_tokens_to_primitive_elements(tokens):
return list(map(primitive_token_to_primitive_element, tokens))
def dummy_file_elements():
tokens_ = tuple(lexer.tokenize("""
name = fawzy
another_name=another_fawzy
[details]
id= 42
section =fourth
[[person]]
personname= lefawzy
dest=north
[[person]]
dest=south
personname=lafawzy
[details.extended]
number = 313
type =complex"""))
elements = \
[TableElement(primitive_tokens_to_primitive_elements(tokens_[:12]))] + \
[TableHeaderElement(tokens_[12:16])] + \
[TableElement(primitive_tokens_to_primitive_elements(tokens_[16:25]))] + \
[TableHeaderElement(tokens_[25:31])] + \
[TableElement(primitive_tokens_to_primitive_elements(tokens_[31:39]))] + \
[TableHeaderElement(tokens_[39:45])] + \
[TableElement(primitive_tokens_to_primitive_elements(tokens_[45:53]))] + \
[TableHeaderElement(tokens_[53:60])] + \
[TableElement(primitive_tokens_to_primitive_elements(tokens_[60:]))]
return elements
class DummyFile(traversal.TraversalMixin):
@property
def elements(self):
return dummy_file_elements()
+22
View File
@@ -0,0 +1,22 @@
from collections import OrderedDict
from prettytoml.elements import factory
from prettytoml.elements.array import ArrayElement
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.inlinetable import InlineTableElement
def test_creating_elements():
atomic = factory.create_element(42)
assert isinstance(atomic, AtomicElement)
assert atomic.value == 42
seq = factory.create_element(['a', 'p', 'p', 'l', 'e'])
assert isinstance(seq, ArrayElement)
assert seq.serialized() == '["a", "p", "p", "l", "e"]'
assert ''.join(seq.primitive_value) == 'apple'
mapping = factory.create_element(OrderedDict((('one', 1), ('two', 2))))
assert isinstance(mapping, InlineTableElement)
assert mapping.serialized() == '{one = 1, two = 2}'
+52
View File
@@ -0,0 +1,52 @@
from prettytoml import lexer
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.inlinetable import InlineTableElement
from prettytoml.elements.metadata import PunctuationElement, WhitespaceElement
def test_inline_table():
tokens = tuple(lexer.tokenize('{ name= "first", id=42}'))
elements = (
PunctuationElement(tokens[:1]),
WhitespaceElement(tokens[1:2]),
AtomicElement(tokens[2:3]),
PunctuationElement(tokens[3:4]),
WhitespaceElement(tokens[4:5]),
AtomicElement(tokens[5:6]),
PunctuationElement(tokens[6:7]),
WhitespaceElement(tokens[7:8]),
AtomicElement(tokens[8:9]),
PunctuationElement(tokens[9:10]),
AtomicElement(tokens[10:11]),
PunctuationElement(tokens[11:12])
)
table = InlineTableElement(elements)
assert table['name'] == 'first'
assert table['id'] == 42
table['name'] = 'fawzy'
table['nickname'] = 'nickfawzy'
assert set(table.items()) == {('name', 'fawzy'), ('id', 42), ('nickname', 'nickfawzy')}
assert table.serialized() == '{ name= "fawzy", id=42, nickname = "nickfawzy"}'
del table['name']
assert table.serialized() == '{ id=42, nickname = "nickfawzy"}'
del table['nickname']
assert table.serialized() == '{ id=42}'
del table['id']
assert table.serialized() == '{ }'
table['item1'] = 11
table['item2'] = 22
assert table.serialized() == '{ item1 = 11, item2 = 22}'
+25
View File
@@ -0,0 +1,25 @@
from prettytoml import lexer
from prettytoml.elements.metadata import WhitespaceElement, NewlineElement, CommentElement, PunctuationElement
def test_whitespace_element():
element = WhitespaceElement(tuple(lexer.tokenize(' \t ')))
assert element.serialized() == ' \t '
def test_newline_element():
element = NewlineElement(tuple(lexer.tokenize('\n\n\n')))
assert element.serialized() == '\n\n\n'
def test_comment_element():
element = CommentElement(tuple(lexer.tokenize('# This is my insightful remark\n')))
assert element.serialized() == '# This is my insightful remark\n'
def test_punctuation_element():
PunctuationElement(tuple(lexer.tokenize('[')))
PunctuationElement(tuple(lexer.tokenize('[[')))
PunctuationElement(tuple(lexer.tokenize('.')))
PunctuationElement(tuple(lexer.tokenize(']')))
PunctuationElement(tuple(lexer.tokenize(']]')))
+59
View File
@@ -0,0 +1,59 @@
from prettytoml import lexer
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.metadata import WhitespaceElement, PunctuationElement, NewlineElement, CommentElement
from prettytoml.elements.table import TableElement
def test_table():
initial_toml = """name = "first"
id=42 # My id
"""
tokens = tuple(lexer.tokenize(initial_toml))
elements = (
AtomicElement(tokens[:1]),
WhitespaceElement(tokens[1:2]),
PunctuationElement(tokens[2:3]),
WhitespaceElement(tokens[3:4]),
AtomicElement(tokens[4:5]),
NewlineElement(tokens[5:6]),
AtomicElement(tokens[6:7]),
PunctuationElement(tokens[7:8]),
AtomicElement(tokens[8:9]),
WhitespaceElement(tokens[9:10]),
CommentElement(tokens[10:12]),
NewlineElement(tokens[12:13]),
NewlineElement(tokens[13:14]),
)
table = TableElement(elements)
assert set(table.items()) == {('name', 'first'), ('id', 42)}
assert table['name'] == 'first'
assert table['id'] == 42
table['relation'] = 'another'
assert set(table.items()) == {('name', 'first'), ('id', 42), ('relation', 'another')}
table['name'] = 'fawzy'
assert set(table.items()) == {('name', 'fawzy'), ('id', 42), ('relation', 'another')}
expected_toml = """name = "fawzy"
id=42 # My id
relation = "another"
"""
assert table.serialized() == expected_toml
+12
View File
@@ -0,0 +1,12 @@
from prettytoml import lexer
from prettytoml.elements.tableheader import TableHeaderElement
def test_tableheader():
tokens = tuple(lexer.tokenize('\n\t [[personal. information.details]] \n'))
element = TableHeaderElement(tokens)
assert element.is_array_of_tables
assert ('personal', 'information', 'details') == element.names
assert element.has_name_prefix(('personal', 'information'))
+18
View File
@@ -0,0 +1,18 @@
from prettytoml.elements.test_common import DummyFile
def test_traversal():
dummy_file = DummyFile()
assert dummy_file._find_following_table_header(-1) == 1
assert dummy_file._find_following_table_header(1) == 3
assert dummy_file._find_following_table_header(3) == 5
assert dummy_file._find_following_table_header(5) == 7
assert dummy_file._find_following_table_header(7) < 0
assert dummy_file._find_preceding_table(30) == 8
assert dummy_file._find_preceding_table(8) == 6
assert dummy_file._find_preceding_table(6) == 4
assert dummy_file._find_preceding_table(4) == 2
assert dummy_file._find_preceding_table(2) == 0
assert dummy_file._find_preceding_table(0) < 0
+175
View File
@@ -0,0 +1,175 @@
from prettytoml import tokens
from prettytoml.elements import common
from prettytoml.elements.metadata import PunctuationElement, NewlineElement
from prettytoml.elements.traversal import predicates
class TraversalMixin:
"""
A mix-in that provides convenient sub-element traversal to any class with
an `elements` member that is a sequence of Element instances
"""
def __find_following_element(self, index, predicate):
"""
Finds and returns the index of element in self.elements that evaluates the given predicate to True
and whose index is higher than the given index, or returns -Infinity on failure.
"""
return find_following(self.elements, predicate, index)
def __find_preceding_element(self, index, predicate):
"""
Finds and returns the index of the element in self.elements that evaluates the given predicate to True
and whose index is lower than the given index.
"""
i = find_previous(self.elements, predicate, index)
if i == float('inf'):
return float('-inf')
return i
def __must_find_following_element(self, predicate):
"""
Finds and returns the index to the element in self.elements that evaluatest the predicate to True, or raises
an error.
"""
i = self.__find_following_element(-1, predicate)
if i < 0:
raise RuntimeError('Could not find non-optional element')
return i
def _enumerate_non_metadata_sub_elements(self):
"""
Returns a sequence of of (index, sub_element) of the non-metadata sub-elements.
"""
return ((i, element) for i, element in enumerate(self.elements) if element.type != common.TYPE_METADATA)
def _find_preceding_comma(self, index):
"""
Returns the index of the preceding comma element to the given index, or -Infinity.
"""
return self.__find_preceding_element(index, predicates.op_comma)
def _find_following_comma(self, index):
"""
Returns the index of the following comma element after the given index, or -Infinity.
"""
def predicate(element):
return isinstance(element, PunctuationElement) and element.token.type == tokens.TYPE_OP_COMMA
return self.__find_following_element(index, predicate)
def _find_following_newline(self, index):
"""
Returns the index of the following newline element after the given index, or -Infinity.
"""
return self.__find_following_element(index, lambda e: isinstance(e, NewlineElement))
def _find_following_comment(self, index):
"""
Returns the index of the following comment element after the given index, or -Infinity.
"""
return self.__find_following_element(index, predicates.comment)
def _find_following_line_terminator(self, index):
"""
Returns the index of the following comment or newline element after the given index, or -Infinity.
"""
following_comment = self._find_following_comment(index)
following_newline = self._find_following_newline(index)
if following_comment == float('-inf'):
return following_newline
if following_newline == float('inf'):
return following_comment
if following_newline < following_comment:
return following_newline
else:
return following_comment
def _find_preceding_newline(self, index):
"""
Returns the index of the preceding newline element to the given index, or -Infinity.
"""
return self.__find_preceding_element(index, predicates.newline)
def _find_following_non_metadata(self, index):
"""
Returns the index to the following non-metadata element after the given index, or -Infinity.
"""
return self.__find_following_element(index, predicates.non_metadata)
def _find_closing_square_bracket(self):
"""
Returns the index to the closing square bracket, or raises an Error.
"""
return self.__must_find_following_element(predicates.closing_square_bracket)
def _find_following_opening_square_bracket(self, index):
"""
Returns the index to the opening square bracket, or -Infinity.
"""
return self.__find_following_element(index, predicates.opening_square_bracket)
def _find_following_closing_square_bracket(self, index):
"""
Returns the index to the closing square bracket, or -Infinity.
"""
return self.__find_following_element(index, predicates.closing_square_bracket)
def _find_following_table(self, index):
"""
Returns the index to the next TableElement after the specified index, or -Infinity.
"""
return self.__find_following_element(index, predicates.table)
def _find_preceding_table(self, index):
"""
Returns the index to the preceding TableElement to the specified index, or -Infinity.
"""
return self.__find_preceding_element(index,predicates.table)
def _find_closing_curly_bracket(self):
"""
Returns the index to the closing curly bracket, or raises an Error.
"""
def predicate(element):
return isinstance(element, PunctuationElement) and element.token.type == tokens.TYPE_OP_CURLY_RIGHT_BRACKET
return self.__must_find_following_element(predicate)
def _find_following_table_header(self, index):
"""
Returns the index to the table header after the given element index, or -Infinity.
"""
return self.__find_following_element(index, predicates.table_header)
def find_following(element_seq, predicate, index=None):
"""
Finds and returns the index of the next element fulfilling the specified predicate after the specified
index, or -Infinity.
Starts searching linearly from the start_from index.
"""
if isinstance(index, (int, float)) and index < 0:
index = None
for i, element in tuple(enumerate(element_seq))[index+1 if index is not None else index:]:
if predicate(element):
return i
return float('-inf')
def find_previous(element_seq, predicate, index=None):
"""
Finds and returns the index of the previous element fulfilling the specified predicate preceding to the specified
index, or Infinity.
"""
if isinstance(index, (int, float)) and index >= len(element_seq):
index = None
for i, element in reversed(tuple(enumerate(element_seq))[:index]):
if predicate(element):
return i
return float('inf')
+48
View File
@@ -0,0 +1,48 @@
"""
The following predicates can be used in the traversal functions directly.
"""
from ..atomic import AtomicElement
from ..metadata import PunctuationElement, CommentElement, NewlineElement, WhitespaceElement
from prettytoml import tokens
from .. import common
atomic = lambda e: isinstance(e, AtomicElement)
op_assignment = lambda e: isinstance(e, PunctuationElement) and e.token.type == tokens.TYPE_OP_ASSIGNMENT
op_comma = lambda e: isinstance(e, PunctuationElement) and e.token.type == tokens.TYPE_OP_COMMA
comment = lambda e: isinstance(e, CommentElement)
newline = lambda e: isinstance(e, NewlineElement)
non_metadata = lambda e: e.type != common.TYPE_METADATA
closing_square_bracket = \
lambda e: isinstance(e, PunctuationElement) and e.token.type == tokens.TYPE_OP_SQUARE_RIGHT_BRACKET
opening_square_bracket = \
lambda e: isinstance(e, PunctuationElement) and e.token.type == tokens.TYPE_OP_SQUARE_LEFT_BRACKET
def table(e):
from ..table import TableElement
return isinstance(e, TableElement)
def table_header(e):
from prettytoml.elements.tableheader import TableHeaderElement
return isinstance(e, TableHeaderElement)
whitespace = lambda e: isinstance(e, WhitespaceElement)
+32
View File
@@ -0,0 +1,32 @@
class TOMLError(Exception):
"""
All errors raised by this module are descendants of this type.
"""
class InvalidTOMLFileError(TOMLError):
pass
class NoArrayFoundError(TOMLError):
"""
An array of tables was requested but none exist by the given name.
"""
class InvalidValueError(TOMLError):
pass
class DuplicateKeysError(TOMLError):
"""
Duplicate keys detected in the parsed file.
"""
class DuplicateTablesError(TOMLError):
"""
Duplicate tables detected in the parsed file.
"""
+123
View File
@@ -0,0 +1,123 @@
"""
A regular expression based Lexer/tokenizer for TOML.
"""
from collections import namedtuple
import re
from prettytoml import tokens
from prettytoml.errors import TOMLError
TokenSpec = namedtuple('TokenSpec', ('type', 're'))
# Specs of all the valid tokens
_LEXICAL_SPECS = (
TokenSpec(tokens.TYPE_COMMENT, re.compile(r'^(#.*)\n')),
TokenSpec(tokens.TYPE_STRING, re.compile(r'^("(([^"]|\\")+?[^\\]|([^"]|\\")|)")')), # Single line only
TokenSpec(tokens.TYPE_MULTILINE_STRING, re.compile(r'^(""".*?""")', re.DOTALL)),
TokenSpec(tokens.TYPE_LITERAL_STRING, re.compile(r"^('.*?')")),
TokenSpec(tokens.TYPE_MULTILINE_LITERAL_STRING, re.compile(r"^('''.*?''')", re.DOTALL)),
TokenSpec(tokens.TYPE_BARE_STRING, re.compile(r'^([A-Za-z0-9_-]+)')),
TokenSpec(tokens.TYPE_DATE, re.compile(
r'^([0-9]{4}-[0-9]{2}-[0-9]{2}(T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]*)?)?(([zZ])|((\+|-)[0-9]{2}:[0-9]{2}))?)')),
TokenSpec(tokens.TYPE_WHITESPACE, re.compile(r'^( |\t)', re.DOTALL)),
TokenSpec(tokens.TYPE_INTEGER, re.compile(r'^(((\+|-)[0-9_]+)|([0-9][0-9_]*))')),
TokenSpec(tokens.TYPE_FLOAT,
re.compile(r'^((((\+|-)[0-9_]+)|([1-9][0-9_]*))(\.[0-9_]+)?([eE](\+|-)?[0-9_]+)?)')),
TokenSpec(tokens.TYPE_BOOLEAN, re.compile(r'^(true|false)')),
TokenSpec(tokens.TYPE_OP_SQUARE_LEFT_BRACKET, re.compile(r'^(\[)')),
TokenSpec(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET, re.compile(r'^(\])')),
TokenSpec(tokens.TYPE_OP_CURLY_LEFT_BRACKET, re.compile(r'^(\{)')),
TokenSpec(tokens.TYPE_OP_CURLY_RIGHT_BRACKET, re.compile(r'^(\})')),
TokenSpec(tokens.TYPE_OP_ASSIGNMENT, re.compile(r'^(=)')),
TokenSpec(tokens.TYPE_OP_COMMA, re.compile(r'^(,)')),
TokenSpec(tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET, re.compile(r'^(\[\[)')),
TokenSpec(tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET, re.compile(r'^(\]\])')),
TokenSpec(tokens.TYPE_OPT_DOT, re.compile(r'^(\.)')),
TokenSpec(tokens.TYPE_NEWLINE, re.compile('^(\n|\r\n)')),
)
def _next_token_candidates(source):
matches = []
for token_spec in _LEXICAL_SPECS:
match = token_spec.re.search(source)
if match:
matches.append(tokens.Token(token_spec.type, match.group(1)))
return matches
def _choose_from_next_token_candidates(candidates):
if len(candidates) == 1:
return candidates[0]
elif len(candidates) > 1:
# Return the maximal-munch with ties broken by natural order of token type.
maximal_munch_length = max(len(token.source_substring) for token in candidates)
maximal_munches = [token for token in candidates if len(token.source_substring) == maximal_munch_length]
return sorted(maximal_munches)[0] # Return the first in sorting by priority
def _munch_a_token(source):
"""
Munches a single Token instance if it could recognize one at the beginning of the
given source text, or None if no token type could be recognized.
"""
candidates = _next_token_candidates(source)
return _choose_from_next_token_candidates(candidates)
class LexerError(TOMLError):
def __init__(self, message):
self._message = message
def __repr__(self):
return self._message
def __str__(self):
return self._message
def tokenize(source, is_top_level=False):
"""
Tokenizes the input TOML source into a stream of tokens.
If is_top_level is set to True, will make sure that the input source has a trailing newline character
before it is tokenized.
Raises a LexerError when it fails recognize another token while not at the end of the source.
"""
# Newlines are going to be normalized to UNIX newlines.
source = source.replace('\r\n', '\n')
if is_top_level and source and source[-1] != '\n':
source += '\n'
next_row = 1
next_col = 1
next_index = 0
while next_index < len(source):
new_token = _munch_a_token(source[next_index:])
if not new_token:
raise LexerError("failed to read the next token at ({}, {}): {}".format(
next_row, next_col, source[next_index:]))
# Set the col and row on the new token
new_token = tokens.Token(new_token.type, new_token.source_substring, next_col, next_row)
# Advance the index, row and col count
next_index += len(new_token.source_substring)
for c in new_token.source_substring:
if c == '\n':
next_row += 1
next_col = 1
else:
next_col += 1
yield new_token
+153
View File
@@ -0,0 +1,153 @@
# -*- coding: utf-8 -*-
from prettytoml.lexer import _munch_a_token
from prettytoml.lexer import *
# A mapping from token types to a sequence of pairs of (source_text, expected_matched_text)
valid_tokens = {
tokens.TYPE_COMMENT: (
(
'# My very insightful comment about the state of the universe\n# And now for something completely different!',
'# My very insightful comment about the state of the universe',
),
),
tokens.TYPE_STRING: (
('"a valid hug3 text" "some other string" = 42', '"a valid hug3 text"'),
(
r'"I\'m a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF." "some other string" = 42',
r'"I\'m a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF."'
),
('"ʎǝʞ" key', '"ʎǝʞ"'),
('""', '""'),
('"t"', '"t"'),
),
tokens.TYPE_MULTILINE_STRING: (
('"""\nRoses are red\nViolets are blue""" """other text"""', '"""\nRoses are red\nViolets are blue"""'),
),
tokens.TYPE_LITERAL_STRING: (
(r"'This is \ \n a \\ literal string' 'another \ literal string'", r"'This is \ \n a \\ literal string'"),
),
tokens.TYPE_MULTILINE_LITERAL_STRING: (
(
"'''\nThe first newline is\ntrimmed in raw strings.\n All other whitespace\n is preserved.\n''' '''some other\n\n\t string'''",
"'''\nThe first newline is\ntrimmed in raw strings.\n All other whitespace\n is preserved.\n'''"
),
),
tokens.TYPE_DATE: (
('1979-05-27 5345', '1979-05-27'),
('1979-05-27T07:32:00Z something', '1979-05-27T07:32:00Z'),
('1979-05-27T00:32:00-07:00 ommm', '1979-05-27T00:32:00-07:00'),
('1979-05-27T00:32:00.999999-07:00 2346', '1979-05-27T00:32:00.999999-07:00'),
),
tokens.TYPE_WHITESPACE: (
(' \t\n \r some_text', ' '),
),
tokens.TYPE_INTEGER: (
('+99 "number"', "+99"),
('42 fwfwef', "42"),
('-17 fh34g34g', "-17"),
('5_349_221 apples', "5_349_221"),
('-1_2_3_4_5 steps', '-1_2_3_4_5')
),
tokens.TYPE_FLOAT: (
('1.0 fwef', '1.0'),
('3.1415 g4g', '3.1415'),
('-0.01 433re', '-0.01'),
('5e+2_2 ersdvf', '5e+2_2'),
('1e6 ewe23', '1e6'),
('-2E-2.2 3 rf23', '-2E-2'),
('6.626e-34 +234f', '6.626e-34'),
('9_224_617.445_991_228_313 f1ewer 23f4h = nonesense', '9_224_617.445_991_228_313'),
('1e1_000 2346f,ef2!!', '1e1_000'),
),
tokens.TYPE_BOOLEAN: (
('false business = true', 'false'),
('true true', 'true'),
),
tokens.TYPE_OP_SQUARE_LEFT_BRACKET: (
('[table_name]', '['),
),
tokens.TYPE_OP_SQUARE_RIGHT_BRACKET: (
(']\nbusiness = awesome', ']'),
),
tokens.TYPE_OP_CURLY_LEFT_BRACKET: (
('{item_exists = no}', '{'),
),
tokens.TYPE_OP_CURLY_RIGHT_BRACKET: (
('} moving on', '}'),
),
tokens.TYPE_OP_COMMA: (
(',item2,item4', ','),
),
tokens.TYPE_OP_ASSIGNMENT: (
('== 42', '='),
),
tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET: (
('[[array.of.tables]]', '[['),
),
tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET: (
(']] item=3', ']]'),
),
tokens.TYPE_BARE_STRING: (
('key another', 'key'),
('bare_key 2fews', 'bare_key'),
('bare-key kfcw', 'bare-key'),
),
tokens.TYPE_OPT_DOT: (
('."another key"', '.'),
('.subname', '.'),
),
tokens.TYPE_NEWLINE: (
('\n\r \n', '\n'),
)
}
# A mapping from a token type to a sequence of (source, matched_text) pairs that shouldn't result from consuming the
# source text.
invalid_tokens = {
tokens.TYPE_INTEGER: (
('_234_423', ''),
('0446234234', ''),
),
tokens.TYPE_STRING: (
('"""', '"""'),
),
tokens.TYPE_BOOLEAN: (
('True', 'True'),
('True', 'true'),
),
tokens.TYPE_FLOAT: (
('', ''),
)
}
def test_valid_tokenizing():
for token_type in valid_tokens:
for (source, expected_match) in valid_tokens[token_type]:
token = _munch_a_token(source)
assert token, "Failed to tokenize: {}\nExpected: {}\nOut of: {}\nGot nothing!".format(
token_type, expected_match, source)
assert token.type == token_type, \
"Expected type: {}\nOut of: {}\nThat matched: {}\nOf type: {}".format(
token_type, source, token.source_substring, token.type)
assert token.source_substring == expected_match
def test_invalid_tokenizing():
for token_type in invalid_tokens:
for source, expected_match in invalid_tokens[token_type]:
token = _munch_a_token(source)
if token:
assert not (token.type == token_type and token.source_substring == expected_match)
def test_token_type_order():
type_a = tokens.TokenType('a', 5, is_metadata=False)
type_b = tokens.TokenType('b', 0, is_metadata=False)
type_c = tokens.TokenType('c', 3, is_metadata=False)
assert type_b < type_c < type_a
assert type_a > type_c > type_b
+34
View File
@@ -0,0 +1,34 @@
"""
A parser for TOML tokens into TOML elements.
"""
from prettytoml.parser.errors import ParsingError
def parse_tokens(tokens):
"""
Parses the given token sequence into a sequence of top-level TOML elements.
Raises ParserError on invalid TOML input.
"""
from .tokenstream import TokenStream
return _parse_token_stream(TokenStream(tokens))
def _parse_token_stream(token_stream):
"""
Parses the given token_stream into a sequence of top-level TOML elements.
Raises ParserError on invalid input TOML.
"""
from .parser import toml_file_elements
from .elementsanitizer import sanitize
elements, pending = toml_file_elements(token_stream)
if not pending.at_end:
raise ParsingError('Failed to parse line {}'.format(pending.head.row))
return sanitize(elements)
+58
View File
@@ -0,0 +1,58 @@
from prettytoml import elements
from prettytoml.elements.table import TableElement
from prettytoml.elements.tableheader import TableHeaderElement
from prettytoml.errors import InvalidTOMLFileError
from prettytoml.util import PeekableIterator
def sanitize(_elements):
"""
Finds TableHeader elements that are not followed by TableBody elements and inserts empty TableElement
right after those.
"""
output = list(_elements)
def find_next_table_header(after=-1):
return next((i for (i, element) in enumerate(output)
if i > after and isinstance(element, TableHeaderElement)), float('-inf'))
def find_next_table_body(after=-1):
return next((i for (i, element) in enumerate(output)
if i > after and isinstance(element, TableElement)), float('-inf'))
next_table_header_i = find_next_table_header()
while next_table_header_i >= 0:
following_table_header_i = find_next_table_header(next_table_header_i)
following_table_body_i = find_next_table_body(next_table_header_i)
if (following_table_body_i < 0) or \
(following_table_header_i >= 0 and (following_table_header_i < following_table_body_i)):
output.insert(next_table_header_i+1, TableElement(tuple()))
next_table_header_i = find_next_table_header(next_table_header_i)
return output
def validate_sanitized(_elements):
# Non-metadata elements must start with an optional TableElement, followed by
# zero or more (TableHeaderElement, TableElement) pairs.
if not _elements:
return
it = PeekableIterator(e for e in _elements if e.type != elements.TYPE_METADATA)
if isinstance(it.peek(), TableElement):
it.next()
while it.peek():
if not isinstance(it.peek(), TableHeaderElement):
raise InvalidTOMLFileError
it.next()
if not isinstance(it.peek(), TableElement):
raise InvalidTOMLFileError
it.next()
+17
View File
@@ -0,0 +1,17 @@
from prettytoml.errors import TOMLError
class ParsingError(TOMLError):
def __init__(self, message='', token=None):
self.message = message
self.token = token
def __repr__(self):
if self.message and self.token:
return "{} at row {} and col {}".format(self.message, self.token.row, self.token.col)
else:
return self.message
def __str__(self):
return repr(self)
+376
View File
@@ -0,0 +1,376 @@
"""
A Recursive Descent implementation of a lexical parser for TOML.
Grammar:
--------
Newline -> NEWLINE
Comment -> COMMENT Newline
LineTerminator -> Comment | Newline
Space -> WHITESPACE Space | WHITESPACE | EMPTY
TableHeader -> Space [ Space TableHeaderName Space ] Space LineTerminator |
Space [[ Space TableHeaderName Space ]] Space LineTerminator
TableHeaderName -> STRING Space '.' Space TableHeaderName | STRING
Atomic -> STRING | INTEGER | FLOAT | DATE | BOOLEAN
Array -> '[' Space ArrayInternal Space ']' | '[' Space ArrayInternal Space LineTerminator Space ']'
ArrayInternal -> LineTerminator Space ArrayInternal | Value Space ',' Space LineTerminator Space ArrayInternal |
Value Space ',' Space ArrayInternal | LineTerminator | Value | EMPTY
InlineTable -> '{' Space InlineTableInternal Space '}'
InlineTableKeyValuePair = STRING Space '=' Space Value
InlineTableInternal -> InlineTableKeyValuePair Space ',' Space InlineTableInternal |
InlineTableKeyValuePair | Empty
Value -> Atomic | InlineTable | Array
KeyValuePair -> Space STRING Space '=' Space Value Space LineTerminator
TableBody -> KeyValuePair TableBody | EmptyLine TableBody | EmptyLine | KeyValuePair
EmptyLine -> Space LineTerminator
FileEntry -> TableHeader | TableBody
TOMLFileElements -> FileEntry TOMLFileElements | FileEntry | EmptyLine | EMPTY
"""
from prettytoml import tokens
from prettytoml.elements.array import ArrayElement
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.inlinetable import InlineTableElement
from prettytoml.elements.metadata import NewlineElement, CommentElement, WhitespaceElement, PunctuationElement
from prettytoml.elements.table import TableElement
from prettytoml.elements.tableheader import TableHeaderElement
from prettytoml.parser.recdesc import capture_from
from prettytoml.parser.errors import ParsingError
from prettytoml.parser.tokenstream import TokenStream
"""
Non-terminals are represented as functions which return (RESULT, pending_token_stream), or raise ParsingError.
"""
def token(token_type):
def factory(ts):
t = ts.head
if t.type != token_type:
raise ParsingError('Expected a token of type {}'.format(token_type))
return t, ts.tail
return factory
def newline_element(token_stream):
"""
Returns NewlineElement, pending_token_stream or raises ParsingError.
"""
captured = capture_from(token_stream).find(token(tokens.TYPE_NEWLINE))
return NewlineElement(captured.value()), captured.pending_tokens
def comment_tokens(ts1):
c1 = capture_from(ts1).find(token(tokens.TYPE_COMMENT)).and_find(token(tokens.TYPE_NEWLINE))
return c1.value(), c1.pending_tokens
def comment_element(token_stream):
"""
Returns CommentElement, pending_token_stream or raises ParsingError.
"""
captured = capture_from(token_stream).find(comment_tokens)
return CommentElement(captured.value()), captured.pending_tokens
def line_terminator_tokens(token_stream):
captured = capture_from(token_stream).find(comment_tokens).or_find(token(tokens.TYPE_NEWLINE))
return captured.value(), captured.pending_tokens
def line_terminator_element(token_stream):
captured = capture_from(token_stream).find(comment_element).or_find(newline_element)
return captured.value('Expected a comment or a newline')[0], captured.pending_tokens
def zero_or_more_tokens(token_type):
def factory(token_stream):
def more(ts):
c = capture_from(ts).find(token(token_type)).and_find(zero_or_more_tokens(token_type))
return c.value(), c.pending_tokens
def two(ts):
c = capture_from(ts).find(token(tokens.TYPE_WHITESPACE))
return c.value(), c.pending
def zero(ts):
return tuple(), ts
captured = capture_from(token_stream).find(more).or_find(two).or_find(zero)
return captured.value(), captured.pending_tokens
return factory
def space_element(token_stream):
captured = capture_from(token_stream).find(zero_or_more_tokens(tokens.TYPE_WHITESPACE))
return WhitespaceElement([t for t in captured.value() if t]), captured.pending_tokens
def string_token(token_stream):
captured = capture_from(token_stream).\
find(token(tokens.TYPE_BARE_STRING)).\
or_find(token(tokens.TYPE_STRING)).\
or_find(token(tokens.TYPE_LITERAL_STRING)).\
or_find(token(tokens.TYPE_MULTILINE_STRING)).\
or_find(token(tokens.TYPE_MULTILINE_LITERAL_STRING))
return captured.value('Expected a string'), captured.pending_tokens
def string_element(token_stream):
captured = capture_from(token_stream).find(string_token)
return AtomicElement(captured.value()), captured.pending_tokens
def table_header_name_tokens(token_stream):
def one(ts):
c = capture_from(ts).\
find(string_token).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(token(tokens.TYPE_OPT_DOT)).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(table_header_name_tokens)
return c.value(), c.pending_tokens
captured = capture_from(token_stream).find(one).or_find(string_token)
return captured.value(), captured.pending_tokens
def table_header_element(token_stream):
def single(ts1):
c1 = capture_from(ts1).\
find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(token(tokens.TYPE_OP_SQUARE_LEFT_BRACKET)).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(table_header_name_tokens).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(token(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET)).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(line_terminator_tokens)
return c1.value(), c1.pending_tokens
def double(ts2):
c2 = capture_from(ts2).\
find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(token(tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET)).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(table_header_name_tokens).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(token(tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET)).\
and_find(zero_or_more_tokens(tokens.TYPE_WHITESPACE)).\
and_find(line_terminator_tokens)
return c2.value(), c2.pending_tokens
captured = capture_from(token_stream).find(single).or_find(double)
return TableHeaderElement(captured.value()), captured.pending_tokens
def atomic_element(token_stream):
captured = capture_from(token_stream).\
find(string_token).\
or_find(token(tokens.TYPE_INTEGER)).\
or_find(token(tokens.TYPE_FLOAT)).\
or_find(token(tokens.TYPE_DATE)).\
or_find(token(tokens.TYPE_BOOLEAN))
return AtomicElement(captured.value('Expected an atomic primitive value')), captured.pending_tokens
def punctuation_element(token_type):
def factory(ts):
c = capture_from(ts).find(token(token_type))
return PunctuationElement(c.value('Expected the punctuation element: {}'.format(token_type))), c.pending_tokens
return factory
def value(token_stream):
captured = capture_from(token_stream).\
find(atomic_element).\
or_find(array_element).\
or_find(inline_table_element)
return captured.value('Expected a primitive value, array or an inline table'), captured.pending_tokens
def array_internal(ts):
def zero(ts0):
c = capture_from(ts0).\
and_find(line_terminator_element).\
and_find(space_element).\
and_find(array_internal)
return c.value(), c.pending_tokens
def one(ts1):
c = capture_from(ts1).\
find(value).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_COMMA)).\
and_find(space_element).\
and_find(line_terminator_element).\
and_find(space_element).\
and_find(array_internal)
return c.value(), c.pending_tokens
def two(ts2):
c = capture_from(ts2).\
find(value).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_COMMA)).\
and_find(space_element).\
and_find(array_internal)
return c.value(), c.pending_tokens
def three(ts3):
c = capture_from(ts3).\
find(space_element).\
and_find(line_terminator_element)
return c.value(), c.pending_tokens
captured = capture_from(ts).find(zero).or_find(one).or_find(two).or_find(three).or_find(value).or_empty()
return captured.value(), captured.pending_tokens
def array_element(token_stream):
def one(ts1):
ca = capture_from(ts1).\
find(punctuation_element(tokens.TYPE_OP_SQUARE_LEFT_BRACKET)).\
and_find(space_element).\
and_find(array_internal).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET))
return ca.value(), ca.pending_tokens
def two(ts2):
ca = capture_from(ts2).\
find(punctuation_element(tokens.TYPE_OP_SQUARE_LEFT_BRACKET)).\
and_find(space_element).\
and_find(array_internal).\
and_find(space_element).\
and_find(line_terminator_element).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET))
return ca.value(), ca.pending_tokens
captured = capture_from(token_stream).find(one).or_find(two)
return ArrayElement(captured.value()), captured.pending_tokens
def inline_table_element(token_stream):
# InlineTableElement -> '{' Space InlineTableInternal Space '}'
# InlineTableKeyValuePair = STRING Space '=' Space Value
# InlineTableInternal -> InlineTableKeyValuePair Space ',' Space InlineTableInternal |
# InlineTableKeyValuePair | Empty
def key_value(ts):
ca = capture_from(ts).\
find(string_element).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_ASSIGNMENT)).\
and_find(space_element).\
and_find(value)
return ca.value(), ca.pending_tokens
def internal(ts):
def one(ts1):
c1 = capture_from(ts1).\
find(key_value).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_COMMA)).\
and_find(space_element).\
and_find(internal)
return c1.value(), c1.pending_tokens
c = capture_from(ts).find(one).or_find(key_value).or_empty()
return c.value(), c.pending_tokens
captured = capture_from(token_stream).\
find(punctuation_element(tokens.TYPE_OP_CURLY_LEFT_BRACKET)).\
and_find(space_element).\
and_find(internal).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_CURLY_RIGHT_BRACKET))
return InlineTableElement(captured.value()), captured.pending_tokens
def key_value_pair(token_stream):
captured = capture_from(token_stream).\
find(space_element).\
and_find(string_element).\
and_find(space_element).\
and_find(punctuation_element(tokens.TYPE_OP_ASSIGNMENT)).\
and_find(space_element).\
and_find(value).\
and_find(space_element).\
and_find(line_terminator_element)
return captured.value(), captured.pending_tokens
def table_body_elements(token_stream):
# TableBody -> KeyValuePair TableBody | EmptyLine TableBody | EmptyLine | KeyValuePair
def one(ts1):
c = capture_from(ts1).\
find(key_value_pair).\
and_find(table_body_elements)
return c.value(), c.pending_tokens
def two(ts2):
c = capture_from(ts2).\
find(empty_line_elements).\
and_find(table_body_elements)
return c.value(), c.pending_tokens
captured = capture_from(token_stream).\
find(one).\
or_find(two).\
or_find(empty_line_elements).\
or_find(key_value_pair)
return captured.value(), captured.pending_tokens
def table_body_element(token_stream):
captured = capture_from(token_stream).find(table_body_elements)
return TableElement(captured.value()), captured.pending_tokens
def empty_line_tokens(ts1):
c1 = capture_from(ts1).find(space_element).and_find(line_terminator_element)
return c1.value(), c1.pending_tokens
def empty_line_elements(token_stream):
captured = capture_from(token_stream).find(empty_line_tokens)
return captured.value(), captured.pending_tokens
def file_entry_element(token_stream):
captured = capture_from(token_stream).find(table_header_element).\
or_find(table_body_element)
return captured.value(), captured.pending_tokens
def toml_file_elements(token_stream):
def one(ts1):
c1 = capture_from(ts1).find(file_entry_element).and_find(toml_file_elements)
return c1.value(), c1.pending_tokens
captured = capture_from(token_stream).find(one).or_find(file_entry_element).or_empty()
return captured.value(), captured.pending_tokens
+114
View File
@@ -0,0 +1,114 @@
from prettytoml.parser.errors import ParsingError
from prettytoml.parser.tokenstream import TokenStream
class Capturer:
"""
Recursive-descent matching DSL. Yeah..
"""
def __init__(self, token_stream, value=tuple(), dormant_error=None):
self._token_stream = token_stream
self._value = value
self._dormant_error = dormant_error
def find(self, finder):
"""
Searches the token stream using the given finder.
`finder(ts)` is a function that accepts a `TokenStream` instance and returns `(element, pending_ts)`
where `element` is the found "something" or a sequence of "somethings", and `pending_ts` the unconsumed
`TokenStream`.
`finder(ts)` can raise `ParsingError` to indicate that it couldn't find anything, or
a `TokenStream.EndOfStream` to indicate a premature end of the TokenStream.
This method returns a Capturer instance that can be further used to find more and more "somethings". The value
at any given moment can be retrieved via the `Capturer.value()` method.
"""
try:
# Execute finder!
element, pending_ts = finder(self._token_stream)
# If result is not a sequence, make it so
if not isinstance(element, (tuple, list)):
element = (element,)
# Return a Capturer with accumulated findings
return Capturer(pending_ts, value=self.value() + element)
except ParsingError as e:
# Failed to find, store error in returned value
return Capturer(self._token_stream, dormant_error=e)
except TokenStream.EndOfStream as e:
# Premature end of stream, store error in returned value
return Capturer(self._token_stream, dormant_error=e)
def value(self, parsing_expectation_msg=None):
"""
Returns the accumulated values found as a sequence of values, or raises an encountered dormant error.
If parsing_expectation_msg is specified and a dormant_error is a ParsingError, the expectation message is used
instead in it.
"""
if self._dormant_error:
if parsing_expectation_msg and isinstance(self._dormant_error, ParsingError):
raise ParsingError(parsing_expectation_msg, token=self._token_stream.head)
else:
raise self._dormant_error
return self._value
@property
def pending_tokens(self):
"""
Returns a TokenStream with the pending tokens yet to be processed.
"""
return self._token_stream
def or_find(self, finder):
"""
If a dormant_error is present, try this new finder instead. If not, does nothing.
"""
if self._dormant_error:
return Capturer(self._token_stream).find(finder)
else:
return self
def or_end_of_file(self):
"""
Discards any errors if at end of the stream.
"""
if isinstance(self._dormant_error, TokenStream.EndOfStream):
return Capturer(self.pending_tokens, value=self._value)
else:
return self
def or_empty(self):
"""
Discards any previously-encountered dormant error.
"""
if self._dormant_error:
return Capturer(self.pending_tokens, value=self._value)
else:
return self
def and_find(self, finder):
"""
Accumulate new "somethings" to the stored value using the given finder.
"""
if self._dormant_error:
return Capturer(self.pending_tokens, dormant_error=self._dormant_error)
return Capturer(self.pending_tokens, self.value()).find(finder)
def capture_from(token_stream):
return Capturer(token_stream)
+156
View File
@@ -0,0 +1,156 @@
from prettytoml.elements.array import ArrayElement
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.metadata import CommentElement, NewlineElement, WhitespaceElement
from prettytoml.elements.tableheader import TableHeaderElement
from prettytoml.lexer import tokenize
from prettytoml.parser import parser
from prettytoml.parser.tokenstream import TokenStream
def test_line_terminator_1():
tokens = tokenize('# Sup\n')
ts = TokenStream(tokens)
element, pending_ts = parser.line_terminator_element(ts)
assert isinstance(element, CommentElement)
assert pending_ts.offset == 2
assert ts.offset == 0
def test_line_terminator_2():
tokens = tokenize('\n')
ts = TokenStream(tokens)
element, pending_ts = parser.line_terminator_element(ts)
assert isinstance(element, NewlineElement)
assert pending_ts.offset == 1
assert ts.offset == 0
def test_space_1():
ts = TokenStream(tokenize(' noo'))
space_element, pending_ts = parser.space_element(ts)
assert isinstance(space_element, WhitespaceElement)
assert len(space_element.tokens) == 2
assert pending_ts.offset == 2
assert ts.offset == 0
def test_space_2():
ts = TokenStream(tokenize(' noo'))
space_element, pending_ts = parser.space_element(ts)
assert isinstance(space_element, WhitespaceElement)
assert len(space_element.tokens) == 1
assert pending_ts.offset == 1
assert ts.offset == 0
def test_space_3():
ts = TokenStream(tokenize('noo'))
space_element, pending_ts = parser.space_element(ts)
assert isinstance(space_element, WhitespaceElement)
assert len(space_element.tokens) == 0
assert pending_ts.offset == 0
assert ts.offset == 0
def test_table_header():
ts = TokenStream(tokenize(" [ namez . namey . namex ] \n other things"))
table_header_element, pending_tokens = parser.table_header_element(ts)
assert isinstance(table_header_element, TableHeaderElement)
assert len(pending_tokens) == 4
def test_atomic_element():
e1, p1 = parser.atomic_element(TokenStream(tokenize('42 not')))
assert isinstance(e1, AtomicElement) and e1.value == 42
assert len(p1) == 2
e2, p2 = parser.atomic_element(TokenStream(tokenize('not 42')))
assert isinstance(e2, AtomicElement) and e2.value == 'not'
assert len(p2) == 2
def test_array():
array_element, pending_ts = parser.array_element(TokenStream(tokenize('[ 3, 4, 5,6,7] ')))
assert isinstance(array_element, ArrayElement)
assert len(array_element) == 5
assert len(pending_ts) == 1
def test_array_2():
text = """[
"alpha",
"omega"
]"""
array_element, pending_ts = parser.array_element(TokenStream(tokenize(text)))
assert array_element[0] == 'alpha'
assert array_element[1] == 'omega'
def test_empty_array():
text = '[]'
array_element, pending_ts = parser.array_element(TokenStream(tokenize(text)))
assert isinstance(array_element, ArrayElement)
assert pending_ts.at_end
def test_inline_table():
inline_table, pending_ts = parser.inline_table_element(TokenStream(tokenize('{ "id"= 42,test = name} vroom')))
assert set(inline_table.keys()) == {'id', 'test'}
assert len(pending_ts) == 2
assert inline_table['id'] == 42
assert inline_table['test'] == 'name'
def test_table_body():
table_body, pending_ts = parser.table_body_element(TokenStream(tokenize(' name= "test" # No way man!\nid =42\n vvv')))
assert set(table_body.keys()) == {'name', 'id'}
assert len(pending_ts) == 2
assert table_body['name'] == 'test'
assert table_body['id'] == 42
def test_key_value_pair():
text = """hosts = [
"alpha",
"omega"
]
"""
parsed, pending_ts = parser.key_value_pair(TokenStream(tokenize(text)))
assert isinstance(parsed[1], AtomicElement)
assert isinstance(parsed[5], ArrayElement)
def test_table_body_2():
text = """
data = [ ["gamma", "delta"], [1, 2] ]
# Line breaks are OK when inside arrays
hosts = [
"alpha",
"omega"
]
str_multiline = wohoo
"""
table_body, pending_ts = parser.table_body_element(TokenStream(tokenize(text)))
assert len(pending_ts) == 0
+39
View File
@@ -0,0 +1,39 @@
class TokenStream:
"""
An immutable subset of a token sequence
"""
class EndOfStream(Exception):
pass
Nothing = tuple()
def __init__(self, _tokens, offset=0):
if isinstance(_tokens, tuple):
self._tokens = _tokens
else:
self._tokens = tuple(_tokens)
self._head_index = offset
def __len__(self):
return len(self._tokens) - self.offset
@property
def head(self):
try:
return self._tokens[self._head_index]
except IndexError:
raise TokenStream.EndOfStream
@property
def tail(self):
return TokenStream(self._tokens, offset=self._head_index+1)
@property
def offset(self):
return self._head_index
@property
def at_end(self):
return self.offset >= len(self._tokens)
+39
View File
@@ -0,0 +1,39 @@
from . import deindentanonymoustable, tableindent, tableassignment
from prettytoml.prettifier import tablesep, commentspace, linelength, tableentrysort
"""
TOMLFile prettifiers
Each prettifier is a function that accepts a sequence of Element instances that make up a
TOML file and it is allowed to modify it as it pleases.
"""
UNIFORM_TABLE_INDENTATION = tableindent.table_entries_should_be_uniformly_indented
UNIFORM_TABLE_ASSIGNMENT_SPACING = tableassignment.table_assignment_spacing
ANONYMOUS_TABLE_INDENTATION = deindentanonymoustable.deindent_anonymous_table
COMMENT_SPACING = commentspace.comment_space
TABLE_SPACING = tablesep.table_separation
LINE_LENGTH_ENFORCERS = linelength.line_length_limiter
TABLE_ENTRY_SORTING = tableentrysort.sort_table_entries
ALL = (
TABLE_SPACING, # Must be before COMMENT_SPACING
COMMENT_SPACING, # Must be after TABLE_SPACING
UNIFORM_TABLE_INDENTATION,
UNIFORM_TABLE_ASSIGNMENT_SPACING,
ANONYMOUS_TABLE_INDENTATION,
LINE_LENGTH_ENFORCERS,
TABLE_ENTRY_SORTING,
)
def prettify(toml_file_elements, prettifiers=ALL):
"""
Prettifies a sequence of element instances according to pre-defined set of formatting rules.
"""
elements = toml_file_elements[:]
for prettifier in prettifiers:
elements = prettifier(elements)
return elements
+35
View File
@@ -0,0 +1,35 @@
from prettytoml.elements import traversal as t, factory as element_factory
from prettytoml.elements.table import TableElement
def comment_space(toml_file_elements):
"""
Rule: Line-terminating comments should always be prefixed by a single tab character whitespace only.
"""
elements = toml_file_elements[:]
for element in elements:
if isinstance(element, TableElement):
_do_table(element.sub_elements)
return elements
def _do_table(table_elements):
# Iterator index
i = float('-inf')
def next_newline():
return t.find_following(table_elements, t.predicates.newline, i)
def next_comment():
return t.find_following(table_elements, t.predicates.comment, i)
def last_non_metadata():
return t.find_previous(table_elements, t.predicates.non_metadata, next_comment())
while next_comment() >= 0:
if i < last_non_metadata() < next_comment() < next_newline():
del table_elements[last_non_metadata()+1:next_comment()]
table_elements.insert(next_comment(), element_factory.create_whitespace_element(char='\t', length=1))
i = next_newline()
+54
View File
@@ -0,0 +1,54 @@
from itertools import *
from prettytoml.elements.common import TokenElement
from prettytoml.elements.metadata import NewlineElement
def text_to_elements(toml_text):
from ..lexer import tokenize
from ..parser import parse_tokens
return parse_tokens(tokenize(toml_text))
def elements_to_text(toml_elements):
return ''.join(e.serialized() for e in toml_elements)
def assert_prettifier_works(source_text, expected_text, prettifier_func):
assert expected_text == elements_to_text(prettifier_func(text_to_elements(source_text)))
def lines(elements):
"""
Splits a sequence of elements into a sub-sequence of each line.
A line is defined as a sequence of elements terminated by a NewlineElement.
"""
def __next_line(es):
# Returns the next line and the remaining sequence of elements
line = tuple(takewhile(lambda e: not isinstance(e, NewlineElement), es))
line += (es[len(line)],)
return line, es[len(line):]
left_elements = tuple(elements)
while left_elements:
line, left_elements = __next_line(left_elements)
yield line
def non_empty_elements(elements):
"""
Filters out TokenElement instances with zero tokens.
"""
return filter(lambda e: not (isinstance(e, TokenElement) and not e.tokens), elements)
def index(predicate, seq):
"""
Returns the index of the element satisfying the given predicate, or None.
"""
try:
return next(i for (i, e) in enumerate(seq) if predicate(e))
except StopIteration:
return None
+43
View File
@@ -0,0 +1,43 @@
import operator
from prettytoml.elements import traversal as t, traversal
from itertools import *
from functools import *
from prettytoml.elements.metadata import WhitespaceElement
from prettytoml.elements.table import TableElement
from prettytoml.prettifier import common
def deindent_anonymous_table(toml_file_elements):
"""
Rule: Anonymous table should never be indented.
"""
anonymous_table_index = _find_anonymous_table(toml_file_elements)
if anonymous_table_index is None:
return toml_file_elements
return toml_file_elements[:anonymous_table_index] + \
[_unindent_table(toml_file_elements[anonymous_table_index])] + \
toml_file_elements[anonymous_table_index+1:]
def _unindent_table(table_element):
table_lines = tuple(common.lines(table_element.sub_elements))
unindented_lines = tuple(tuple(dropwhile(lambda e: isinstance(e, WhitespaceElement), line)) for line in table_lines)
return TableElement(reduce(operator.concat, unindented_lines))
def _find_anonymous_table(toml_file_elements):
"""
Finds and returns the index of the TableElement comprising the anonymous table or None.
"""
first_table_index = common.index(t.predicates.table, toml_file_elements)
first_table_header_index = common.index(t.predicates.table_header, toml_file_elements)
if first_table_header_index is None:
return first_table_index
elif first_table_index < first_table_header_index:
return first_table_index
+62
View File
@@ -0,0 +1,62 @@
import operator
from prettytoml import tokens
from prettytoml.prettifier import common
from prettytoml.elements import traversal as t, factory as element_factory
from prettytoml.elements.array import ArrayElement
from prettytoml.elements.atomic import AtomicElement
from prettytoml.elements.inlinetable import InlineTableElement
from prettytoml.elements.table import TableElement
from functools import *
MAXIMUM_LINE_LENGTH = 120
def line_length_limiter(toml_file_elements):
"""
Rule: Lines whose lengths exceed 120 characters whose values are strings, arrays should have the array or
string value broken onto multiple lines
"""
return tuple(_fixed_table(e) if isinstance(e, TableElement) else e for e in toml_file_elements)
def _fixed_table(table_element):
"""
Returns a new TableElement.
"""
assert isinstance(table_element, TableElement)
lines = tuple(common.lines(table_element.sub_elements))
fixed_lines = tuple(_fixed_line(l) if _line_length(l) > MAXIMUM_LINE_LENGTH else l for l in lines)
return TableElement(sub_elements=tuple(reduce(operator.concat, fixed_lines)))
def _line_length(line_elements):
"""
Returns the character length of the serialized elements of the given line.
"""
return sum(len(e.serialized()) for e in line_elements)
def _fixed_line(line_elements):
def line_value_index():
# Returns index of value element in the line
key_index = t.find_following(line_elements, t.predicates.non_metadata)
return t.find_following(line_elements, t.predicates.non_metadata, key_index)
def multiline_equivalent(element):
if isinstance(element, AtomicElement) and tokens.is_string(element.first_token):
return element_factory.create_multiline_string(element.value, MAXIMUM_LINE_LENGTH)
elif isinstance(element, ArrayElement):
element.turn_into_multiline()
return element
else:
return element
line_elements = tuple(line_elements)
value_index = line_value_index()
if value_index >= 0:
return line_elements[:value_index] + (multiline_equivalent(line_elements[value_index]),) + \
line_elements[value_index+1:]
else:
return line_elements
+40
View File
@@ -0,0 +1,40 @@
from prettytoml.elements import traversal as t, factory as element_factory
def table_assignment_spacing(toml_file_elements):
"""
Rule: Every key and value pair in any table should be separated the triplet
(single space character, an assignment character =, single space character)
"""
elements = toml_file_elements[:]
for table_element in (e for e in elements if t.predicates.table(e)):
_do_table(table_element)
return elements
def _do_table(table_element):
elements = table_element.sub_elements
# Our iterator index
i = float('-inf')
def next_key():
return t.find_following(elements, t.predicates.non_metadata, i)
def next_assignment():
return t.find_following(elements, t.predicates.op_assignment, next_key())
def next_value():
return t.find_following(elements, t.predicates.non_metadata, next_assignment())
while next_key() >= 0:
del elements[next_key()+1:next_assignment()]
del elements[next_assignment()+1:next_value()]
elements.insert(next_assignment(), element_factory.create_whitespace_element(1))
elements.insert(next_value(), element_factory.create_whitespace_element(1))
i = t.find_following(elements, t.predicates.newline, i)
+38
View File
@@ -0,0 +1,38 @@
import operator
from prettytoml import tokens
from prettytoml.elements.common import TokenElement
from prettytoml.elements.table import TableElement
from prettytoml.prettifier import common
from functools import *
def sort_table_entries(toml_file_elements):
"""
Rule: Entries within a single table should be ordered lexicographically by key
"""
return [_sorted_table(element) if isinstance(element, TableElement) else element for element in toml_file_elements]
def _line_key(line_elements):
"""
Given a sequence of elements comprising a single line, returns an orderable value to use in ordering lines.
"""
for e in line_elements:
if isinstance(e, TokenElement) and tokens.is_string(e.first_token):
return e.primitive_value
return 'z' * 10 # Metadata lines should be at the end
def _sorted_table(table):
"""
Returns another TableElement where the table entries are sorted lexicographically by key.
"""
assert isinstance(table, TableElement)
# Discarding TokenElements with no tokens in them
table_elements = common.non_empty_elements(table.sub_elements)
lines = tuple(common.lines(table_elements))
sorted_lines = sorted(lines, key=_line_key)
sorted_elements = reduce(operator.concat, sorted_lines)
return TableElement(sorted_elements)
+49
View File
@@ -0,0 +1,49 @@
from prettytoml import tokens
from prettytoml.elements import traversal as t, factory as element_factory
from prettytoml.tokens import py2toml
def table_entries_should_be_uniformly_indented(toml_file_elements):
"""
Rule: Nth-level table sections should be indented by (N-1)*2 spaces
"""
elements = toml_file_elements[:]
for (i, e) in enumerate(elements):
if t.predicates.table_header(e):
table = elements[t.find_following(elements, t.predicates.table, i)]
_do_table_header(e)
_do_table(table, len(e.names))
return elements
def _do_table_header(table_header):
indent_start = 0
indent_end = next(i for (i, token) in enumerate(table_header.tokens) if token.type != tokens.TYPE_WHITESPACE)
del table_header.tokens[indent_start:indent_end]
table_header.tokens.insert(0, py2toml.create_whitespace(' ' * ((len(table_header.names)-1) * 2)))
def _do_table(table_element, table_level):
elements = table_element.sub_elements
# Iterator index
i = float('-inf')
def first_indent():
return t.find_following(elements, t.predicates.whitespace, i)
def next_non_metadata():
return t.find_following(elements, t.predicates.non_metadata, i)
def next_newline():
return t.find_following(elements, t.predicates.newline, next_non_metadata())
while next_non_metadata() >= 0:
if first_indent() >= 0:
del elements[first_indent():next_non_metadata()]
elements.insert(next_non_metadata(), element_factory.create_whitespace_element((table_level-1)*2))
i = next_newline()
+31
View File
@@ -0,0 +1,31 @@
from prettytoml.elements import traversal as t, factory as element_factory
from prettytoml.elements.metadata import WhitespaceElement, NewlineElement
from prettytoml.elements.table import TableElement
def table_separation(toml_file_elements):
"""
Rule: Tables should always be separated by an empty line.
"""
elements = toml_file_elements[:]
for element in elements:
if isinstance(element, TableElement):
_do_table(element.sub_elements)
return elements
def _do_table(table_elements):
while table_elements and isinstance(table_elements[-1], WhitespaceElement):
del table_elements[-1]
if not table_elements:
return
if isinstance(table_elements[-1], NewlineElement):
last_non_metadata_i = t.find_previous(table_elements, t.predicates.non_metadata)
del table_elements[last_non_metadata_i+1:]
table_elements.append(element_factory.create_newline_element())
table_elements.append(element_factory.create_newline_element())
+28
View File
@@ -0,0 +1,28 @@
from .common import assert_prettifier_works
from .commentspace import comment_space
def test_comment_space():
toml_text = """
my_key = string
id = 12 # My special ID
[section.name]
headerk = false
# Own-line comment should stay the same
other_key = "value"
"""
expected_toml_text = """
my_key = string
id = 12\t# My special ID
[section.name]
headerk = false
# Own-line comment should stay the same
other_key = "value"
"""
assert_prettifier_works(toml_text, expected_toml_text, comment_space)
+22
View File
@@ -0,0 +1,22 @@
"""
This testing module depends on all the other modules.
"""
from .deindentanonymoustable import deindent_anonymous_table
from .common import assert_prettifier_works
def test_anon_table_indent():
toml_text = """
key=value
another_key =44
noname = me
"""
expected_toml_text = """
key=value
another_key =44
noname = me
"""
assert_prettifier_works(toml_text, expected_toml_text, deindent_anonymous_table)
+39
View File
@@ -0,0 +1,39 @@
from .linelength import line_length_limiter
from .common import assert_prettifier_works, elements_to_text, text_to_elements
import pytoml
def test_splitting_string():
toml_text = """
k = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. In et lectus nec erat condimentum scelerisque gravida sed ipsum. Mauris non orci tincidunt, viverra enim eget, tincidunt orci. Sed placerat nibh vitae ante maximus egestas maximus eu quam. Praesent vehicula mauris vestibulum, mattis turpis sollicitudin, aliquam felis. Pellentesque volutpat pharetra purus vel finibus. Vestibulum sed tempus dui. Maecenas auctor sit amet diam et porta. Morbi id libero at elit ultricies porta vel vitae nullam. "
"""
expected_toml_text = """
k = \"\"\"
Lorem ipsum dolor sit amet, consectetur adipiscing elit. In et lectus nec erat condimentum scelerisque gravida sed \\
ipsum. Mauris non orci tincidunt, viverra enim eget, tincidunt orci. Sed placerat nibh vitae ante maximus egestas \\
maximus eu quam. Praesent vehicula mauris vestibulum, mattis turpis sollicitudin, aliquam felis. Pellentesque volutpat \\
pharetra purus vel finibus. Vestibulum sed tempus dui. Maecenas auctor sit amet diam et porta. Morbi id libero at elit \\
ultricies porta vel vitae nullam. \"\"\"
"""
assert_prettifier_works(toml_text, expected_toml_text, line_length_limiter)
def test_splitting_array():
toml_text = """
somethingweird = false
[section]
k = [4, 8, 15, 16, 23, 42, 4, 8, 15, 16, 23, 42, 4, 8, 15, 16, 23, 42, 4, 8, 15, 16, 23, 42, 4, 8, 15, 16, 23, 42, 4, 8, 15, 16, 23, 42, 4, 8, 15, 16, 23, 42]
[data]
id = 12
"""
prettified = elements_to_text(line_length_limiter(text_to_elements(toml_text)))
assert pytoml.loads(prettified) == pytoml.loads(toml_text)
assert all(len(line) < 120 for line in prettified.split('\n'))
+29
View File
@@ -0,0 +1,29 @@
from .tableassignment import table_assignment_spacing
from .common import assert_prettifier_works
def test_table_assignment_spacing():
toml_text = """
key1= "my value"
key2 =42
keys = [4, 5,1]
[section]
key1= "my value"
key2 =42
keys = [4, 5,1]
"""
expected_prettified = """
key1 = "my value"
key2 = 42
keys = [4, 5,1]
[section]
key1 = "my value"
key2 = 42
keys = [4, 5,1]
"""
assert_prettifier_works(toml_text, expected_prettified, table_assignment_spacing)
+45
View File
@@ -0,0 +1,45 @@
from .tableentrysort import sort_table_entries
from .common import assert_prettifier_works
def test_table_sorting():
toml_text = """description = ""
firstname = "adnan"
lastname = "fatayerji"
git_aydo = ""
groups = ["sales", "dubai", "mgmt"]
skype = ""
emails = ["adnan@incubaid.com",
"fatayera@incubaid.com",
"adnan.fatayerji@incubaid.com",
"adnan@greenitglobe.com",
"fatayera@greenitglobe.com",
"adnan.fatayerji@greenitglobe.com"]
# I really like this table
id = "fatayera"
git_github = ""
telegram = "971507192009"
mobiles = ["971507192009"]
"""
prettified = """description = ""
emails = ["adnan@incubaid.com",
"fatayera@incubaid.com",
"adnan.fatayerji@incubaid.com",
"adnan@greenitglobe.com",
"fatayera@greenitglobe.com",
"adnan.fatayerji@greenitglobe.com"]
firstname = "adnan"
git_aydo = ""
git_github = ""
groups = ["sales", "dubai", "mgmt"]
# I really like this table
id = "fatayera"
lastname = "fatayerji"
mobiles = ["971507192009"]
skype = ""
telegram = "971507192009"
"""
assert_prettifier_works(toml_text, prettified, sort_table_entries)
+25
View File
@@ -0,0 +1,25 @@
from .tableindent import table_entries_should_be_uniformly_indented
from .common import assert_prettifier_works
def test_table_entries_should_be_uniformly_indented():
toml_text = """
[firstlevel]
hello = "my name"
my_id = 12
[firstlevel.secondlevel]
my_truth = False
"""
expected_toml_text = """
[firstlevel]
hello = "my name"
my_id = 12
[firstlevel.secondlevel]
my_truth = False
"""
assert_prettifier_works(toml_text, expected_toml_text, table_entries_should_be_uniformly_indented)
+34
View File
@@ -0,0 +1,34 @@
from .tablesep import table_separation
from .common import assert_prettifier_works
def test_table_separation():
toml_text = """key1 = "value1"
key2 = 22
[section]
k = false
m= "true"
[another.section]
l = "t"
creativity = "on vacation"
"""
expected_toml_text = """key1 = "value1"
key2 = 22
[section]
k = false
m= "true"
[another.section]
l = "t"
creativity = "on vacation"
"""
assert_prettifier_works(toml_text, expected_toml_text, table_separation)
+12
View File
@@ -0,0 +1,12 @@
from .prettifier import prettify
from .prettifier.common import assert_prettifier_works
import pytoml
def test_prettifying_against_humanly_verified_sample():
toml_source = open('sample.toml').read()
expected = open('sample-prettified.toml').read()
assert_prettifier_works(toml_source, expected, prettify)
assert pytoml.loads(toml_source) == pytoml.loads(expected)
+22
View File
@@ -0,0 +1,22 @@
from prettytoml.util import is_sequence_like, is_dict_like, chunkate_string
def test_is_sequence_like():
assert is_sequence_like([1, 3, 4])
assert not is_sequence_like(42)
def test_is_dict_like():
assert is_dict_like({'name': False})
assert not is_dict_like(42)
assert not is_dict_like([4, 8, 15])
def test_chunkate_string():
text = """Lorem ipsum dolor sit amet, consectetur adipiscing elit. In et lectus nec erat condimentum scelerisque gravida sed ipsum. Mauris non orci tincidunt, viverra enim eget, tincidunt orci. Sed placerat nibh vitae ante maximus egestas maximus eu quam. Praesent vehicula mauris vestibulum, mattis turpis sollicitudin, aliquam felis. Pellentesque volutpat pharetra purus vel finibus. Vestibulum sed tempus dui. Maecenas auctor sit amet diam et porta. Morbi id libero at elit ultricies porta vel vitae nullam. """
chunks = chunkate_string(text, 50)
assert ''.join(chunks) == text
assert all(len(chunk) <= 50 for chunk in chunks)
+136
View File
@@ -0,0 +1,136 @@
"""
TOML lexical tokens.
"""
class TokenType:
"""
A TokenType is a concrete type of a source token along with a defined priority and a higher-order kind.
The priority will be used in determining the tokenization behaviour of the lexer in the following manner:
whenever more than one token is recognizable as the next possible token and they are all of equal source
length, this priority is going to be used to break the tie by favoring the token type of the lowest priority
value. A TokenType instance is naturally ordered by its priority.
"""
def __init__(self, name, priority, is_metadata):
self._priority = priority
self._name = name
self._is_metadata = is_metadata
@property
def is_metadata(self):
return self._is_metadata
@property
def priority(self):
return self._priority
def __repr__(self):
return "{}-{}".format(self.priority, self._name)
def __lt__(self, other):
return isinstance(other, TokenType) and self._priority < other.priority
# Possible types of tokens
TYPE_BOOLEAN = TokenType('boolean', 0, is_metadata=False)
TYPE_INTEGER = TokenType('integer', 0, is_metadata=False)
TYPE_OP_COMMA = TokenType('comma', 0, is_metadata=True)
TYPE_OP_SQUARE_LEFT_BRACKET = TokenType('square_left_bracket', 0, is_metadata=True)
TYPE_OP_SQUARE_RIGHT_BRACKET = TokenType('square_right_bracket', 0, is_metadata=True)
TYPE_OP_CURLY_LEFT_BRACKET = TokenType('curly_left_bracket', 0, is_metadata=True)
TYPE_OP_CURLY_RIGHT_BRACKET = TokenType('curly_right_bracket', 0, is_metadata=True)
TYPE_OP_ASSIGNMENT = TokenType('assignment', 0, is_metadata=True)
TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET = TokenType('double_square_left_bracket', 0, is_metadata=True)
TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET = TokenType('double_square_right_bracket', 0, is_metadata=True)
TYPE_FLOAT = TokenType('float', 1, is_metadata=False)
TYPE_DATE = TokenType('date', 40, is_metadata=False)
TYPE_OPT_DOT = TokenType('dot', 40, is_metadata=True)
TYPE_BARE_STRING = TokenType('bare_string', 50, is_metadata=False)
TYPE_STRING = TokenType('string', 90, is_metadata=False)
TYPE_MULTILINE_STRING = TokenType('multiline_string', 90, is_metadata=False)
TYPE_LITERAL_STRING = TokenType('literal_string', 90, is_metadata=False)
TYPE_MULTILINE_LITERAL_STRING = TokenType('multiline_literal_string', 90, is_metadata=False)
TYPE_NEWLINE = TokenType('newline', 91, is_metadata=True)
TYPE_WHITESPACE = TokenType('whitespace', 93, is_metadata=True)
TYPE_COMMENT = TokenType('comment', 95, is_metadata=True)
def is_operator(token):
"""
Returns True if the given token is an operator token.
"""
return token.type in (
TYPE_OP_COMMA,
TYPE_OP_SQUARE_LEFT_BRACKET,
TYPE_OP_SQUARE_RIGHT_BRACKET,
TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET,
TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET,
TYPE_OP_CURLY_LEFT_BRACKET,
TYPE_OP_CURLY_RIGHT_BRACKET,
TYPE_OP_ASSIGNMENT,
TYPE_OPT_DOT,
)
def is_string(token):
return token.type in (
TYPE_STRING,
TYPE_MULTILINE_STRING,
TYPE_LITERAL_STRING,
TYPE_BARE_STRING,
TYPE_MULTILINE_LITERAL_STRING
)
class Token:
"""
A token/lexeme in a TOML source file.
A Token instance is naturally ordered by its type.
"""
def __init__(self, _type, source_substring, col=None, row=None):
self._source_substring = source_substring
self._type = _type
self._col = col
self._row = row
def __eq__(self, other):
if not isinstance(other, Token):
return False
return self.source_substring == other.source_substring and self.type == other.type
@property
def col(self):
"""
Column number (1-indexed).
"""
return self._col
@property
def row(self):
"""
Row number (1-indexed).
"""
return self._row
@property
def type(self):
"""
One of of the TOKEN_TYPE_* constants.
"""
return self._type
@property
def source_substring(self):
"""
The substring of the initial source file containing this token.
"""
return self._source_substring
def __lt__(self, other):
return isinstance(other, Token) and self.type < other.type
def __repr__(self):
return "{}: {}".format(self.type, self.source_substring)
+13
View File
@@ -0,0 +1,13 @@
from prettytoml.errors import TOMLError
class DeserializationError(TOMLError):
pass
class BadEscapeCharacter(TOMLError):
pass
class MalformedDateError(DeserializationError):
pass
+134
View File
@@ -0,0 +1,134 @@
"""
A converter of python values to TOML Token instances.
"""
import codecs
import datetime
import six
import strict_rfc3339
import timestamp
from prettytoml import tokens
import re
from prettytoml.elements.metadata import NewlineElement
from prettytoml.errors import TOMLError
from prettytoml.tokens import Token
from prettytoml.util import chunkate_string
class NotPrimitiveError(TOMLError):
pass
_operator_tokens_by_type = {
tokens.TYPE_OP_SQUARE_LEFT_BRACKET: tokens.Token(tokens.TYPE_OP_SQUARE_LEFT_BRACKET, u'['),
tokens.TYPE_OP_SQUARE_RIGHT_BRACKET: tokens.Token(tokens.TYPE_OP_SQUARE_RIGHT_BRACKET, u']'),
tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET: tokens.Token(tokens.TYPE_OP_DOUBLE_SQUARE_LEFT_BRACKET, u'[['),
tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET: tokens.Token(tokens.TYPE_OP_DOUBLE_SQUARE_RIGHT_BRACKET, u']]'),
tokens.TYPE_OP_COMMA: tokens.Token(tokens.TYPE_OP_COMMA, u','),
tokens.TYPE_NEWLINE: tokens.Token(tokens.TYPE_NEWLINE, u'\n'),
tokens.TYPE_OPT_DOT: tokens.Token(tokens.TYPE_OPT_DOT, u'.'),
}
def operator_token(token_type):
return _operator_tokens_by_type[token_type]
def create_primitive_token(value, multiline_strings_allowed=True):
"""
Creates and returns a single token for the given primitive atomic value.
Raises NotPrimitiveError when the given value is not a primitive atomic value
"""
if value is None:
return create_primitive_token('')
elif isinstance(value, bool):
return tokens.Token(tokens.TYPE_BOOLEAN, u'true' if value else u'false')
elif isinstance(value, int):
return tokens.Token(tokens.TYPE_INTEGER, u'{}'.format(value))
elif isinstance(value, float):
return tokens.Token(tokens.TYPE_FLOAT, u'{}'.format(value))
elif isinstance(value, (datetime.datetime, datetime.date, datetime.time)):
ts = timestamp(value) // 1000
return tokens.Token(tokens.TYPE_DATE, strict_rfc3339.timestamp_to_rfc3339_utcoffset(ts))
elif isinstance(value, six.string_types):
return create_string_token(value, multiline_strings_allowed=multiline_strings_allowed)
raise NotPrimitiveError("{} of type {}".format(value, type(value)))
_bare_string_regex = re.compile('^[a-zA-Z0-9_-]*$')
def create_string_token(text, bare_string_allowed=False, multiline_strings_allowed=True):
"""
Creates and returns a single string token.
Raises ValueError on non-string input.
"""
if not isinstance(text, six.string_types):
raise ValueError('Given value must be a string')
if text == '':
return tokens.Token(tokens.TYPE_STRING, '""'.format(_escape_single_line_quoted_string(text)))
elif bare_string_allowed and _bare_string_regex.match(text):
return tokens.Token(tokens.TYPE_BARE_STRING, text)
elif multiline_strings_allowed and (len(tuple(c for c in text if c == '\n')) >= 2 or len(text) > 80):
# If containing two or more newlines or is longer than 80 characters we'll use the multiline string format
return _create_multiline_string_token(text)
else:
return tokens.Token(tokens.TYPE_STRING, '"{}"'.format(_escape_single_line_quoted_string(text)))
def _escape_single_line_quoted_string(text):
if six.PY2:
return text.encode('unicode-escape').encode('string-escape').replace('"', '\\"').replace("\\'", "'")
else:
return codecs.encode(text, 'unicode-escape').decode().replace('"', '\\"')
def _create_multiline_string_token(text):
escaped = text.replace(u'"""', u'\"\"\"')
if len(escaped) > 50:
return tokens.Token(tokens.TYPE_MULTILINE_STRING, u'"""\n{}\\\n"""'.format(_break_long_text(escaped)))
else:
return tokens.Token(tokens.TYPE_MULTILINE_STRING, u'"""{}"""'.format(escaped))
def _break_long_text(text, maximum_length=75):
"""
Breaks into lines of 75 character maximum length that are terminated by a backslash.
"""
def next_line(remaining_text):
# Returns a line and the remaining text
if '\n' in remaining_text and remaining_text.index('\n') < maximum_length:
i = remaining_text.index('\n')
return remaining_text[:i+1], remaining_text[i+2:]
elif len(remaining_text) > maximum_length and ' ' in remaining_text:
i = remaining_text[:maximum_length].rfind(' ')
return remaining_text[:i+1] + '\\\n', remaining_text[i+2:]
else:
return remaining_text, ''
remaining_text = text
lines = []
while remaining_text:
line, remaining_text = next_line(remaining_text)
lines += [line]
return ''.join(lines)
def create_whitespace(source_substring):
return Token(tokens.TYPE_WHITESPACE, source_substring)
def create_multiline_string(text, maximum_line_length=120):
def escape(t):
return t.replace(u'"""', six.u(r'\"\"\"'))
source_substring = u'"""\n{}"""'.format(u'\\\n'.join(chunkate_string(escape(text), maximum_line_length)))
return Token(tokens.TYPE_MULTILINE_STRING, source_substring)
+69
View File
@@ -0,0 +1,69 @@
import datetime
import strict_rfc3339
from prettytoml import tokens
from prettytoml.tokens import py2toml
def test_string():
assert py2toml.create_string_token('fawzy', bare_string_allowed=True) == tokens.Token(tokens.TYPE_BARE_STRING, 'fawzy')
assert py2toml.create_primitive_token('I am a "cr\'azy" sentence.') == \
tokens.Token(tokens.TYPE_STRING, '"I am a \\"cr\'azy\\" sentence."')
def test_multiline_string():
text = 'The\nSuper\nT"""OML"""\n\nIs coming'
primitive_token = py2toml.create_primitive_token(text)
assert primitive_token.source_substring == '"""The\nSuper\nT\"\"\"OML\"\"\"\n\nIs coming"""'
def test_long_string():
text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse faucibus nibh id urna euismod, " \
"vitae blandit nisi blandit. Nam eu odio ex. Praesent iaculis sapien justo. Proin vehicula orci rhoncus " \
"risus mattis cursus. Sed quis commodo diam. Morbi dictum fermentum ex. Ut augue lorem, facilisis eu " \
"posuere ut, ullamcorper et quam. Donec porta neque eget erat lacinia, in convallis elit scelerisque. " \
"Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Praesent " \
"felis metus, venenatis eu aliquam vel, fringilla in turpis. Praesent interdum pulvinar enim, et mattis " \
"urna dapibus et. Sed ut egestas mauris. Etiam eleifend dui."
primitive_token = py2toml.create_primitive_token(text)
assert primitive_token.source_substring[3:-3] == r"""
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse \
aucibus nibh id urna euismod, vitae blandit nisi blandit. Nam eu odio ex. \
raesent iaculis sapien justo. Proin vehicula orci rhoncus risus mattis \
ursus. Sed quis commodo diam. Morbi dictum fermentum ex. Ut augue lorem, \
acilisis eu posuere ut, ullamcorper et quam. Donec porta neque eget erat \
acinia, in convallis elit scelerisque. Class aptent taciti sociosqu ad \
itora torquent per conubia nostra, per inceptos himenaeos. Praesent felis \
etus, venenatis eu aliquam vel, fringilla in turpis. Praesent interdum \
ulvinar enim, et mattis urna dapibus et. Sed ut egestas mauris. Etiam \
leifend dui.\
"""
def test_int():
assert py2toml.create_primitive_token(42) == tokens.Token(tokens.TYPE_INTEGER, '42')
def test_float():
assert py2toml.create_primitive_token(4.2) == tokens.Token(tokens.TYPE_FLOAT, '4.2')
def test_bool():
assert py2toml.create_primitive_token(False) == tokens.Token(tokens.TYPE_BOOLEAN, 'false')
assert py2toml.create_primitive_token(True) == tokens.Token(tokens.TYPE_BOOLEAN, 'true')
def test_date():
ts = strict_rfc3339.rfc3339_to_timestamp('1979-05-27T00:32:00-07:00')
dt = datetime.datetime.fromtimestamp(ts)
assert py2toml.create_primitive_token(dt) == tokens.Token(tokens.TYPE_DATE, '1979-05-27T07:32:00Z')
def test_none():
t = py2toml.create_primitive_token(None)
assert t.type == tokens.TYPE_STRING and t.source_substring == '""'
+86
View File
@@ -0,0 +1,86 @@
from datetime import datetime
import pytz
from prettytoml import tokens
from prettytoml.tokens import toml2py
from prettytoml.tokens.errors import BadEscapeCharacter, DeserializationError
def test_integer():
t1 = tokens.Token(tokens.TYPE_INTEGER, '42')
t2 = tokens.Token(tokens.TYPE_INTEGER, '1_001_2')
assert toml2py.deserialize(t1) == 42
assert toml2py.deserialize(t2) == 10012
def test_float():
tokens_and_values = (
('4.2', 4.2),
('12e2', 12e2),
('1_000e2', 1e5),
('314.1e-2', 3.141)
)
for token_string, value in tokens_and_values:
token = tokens.Token(tokens.TYPE_FLOAT, token_string)
assert toml2py.deserialize(token) == value
def test_string():
t0 = tokens.Token(tokens.TYPE_BARE_STRING, 'fawzy')
assert toml2py.deserialize(t0) == 'fawzy'
t1 = tokens.Token(tokens.TYPE_STRING, '"I\'m a string. \\"You can quote me\\". Name\\tJos\\u00E9\\nLocation\\tSF."')
assert toml2py.deserialize(t1) == u'I\'m a string. "You can quote me". Name\tJos\xe9\nLocation\tSF.'
t2 = tokens.Token(tokens.TYPE_MULTILINE_STRING, '"""\nRoses are red\nViolets are blue"""')
assert toml2py.deserialize(t2) == 'Roses are red\nViolets are blue'
t3_str = '"""\nThe quick brown \\\n\n\n fox jumps over \\\n the lazy dog."""'
t3 = tokens.Token(tokens.TYPE_MULTILINE_STRING, t3_str)
assert toml2py.deserialize(t3) == 'The quick brown fox jumps over the lazy dog.'
t4_str = '"""\\\n The quick brown \\\n fox jumps over \\\n the lazy dog.\\\n """'
t4 = tokens.Token(tokens.TYPE_MULTILINE_STRING, t4_str)
assert toml2py.deserialize(t4) == 'The quick brown fox jumps over the lazy dog.'
t5 = tokens.Token(tokens.TYPE_LITERAL_STRING, r"'C:\Users\nodejs\templates'")
assert toml2py.deserialize(t5) == r'C:\Users\nodejs\templates'
t6_str = "'''\nThe first newline is\ntrimmed in raw strings.\n All other whitespace\n is preserved.\n'''"
t6 = tokens.Token(tokens.TYPE_MULTILINE_LITERAL_STRING, t6_str)
assert toml2py.deserialize(t6) == 'The first newline is\ntrimmed in raw strings.\n All' \
' other whitespace\n is preserved.\n'
def test_date():
t0 = tokens.Token(tokens.TYPE_DATE, '1979-05-27T07:32:00Z')
assert toml2py.deserialize(t0) == datetime(1979, 5, 27, 7, 32, tzinfo=pytz.utc)
t1 = tokens.Token(tokens.TYPE_DATE, '1979-05-27T00:32:00-07:00')
assert toml2py.deserialize(t1) == datetime(1979, 5, 27, 7, 32, tzinfo=pytz.utc)
t3 = tokens.Token(tokens.TYPE_DATE, '1987-07-05T17:45:00')
try:
toml2py.deserialize(t3)
assert False, 'Should detect malformed date'
except DeserializationError:
pass
def test_unescaping_a_string():
bad_escapes = (
r"This string has a bad \a escape character.",
r'\x33',
)
for source in bad_escapes:
# Should complain about bad escape jobs
try:
toml2py._unescape_str(source)
assert False, "Should have thrown an exception for: " + source
except BadEscapeCharacter:
pass
+131
View File
@@ -0,0 +1,131 @@
import re
import string
import iso8601
from prettytoml import tokens
from prettytoml.tokens import TYPE_BOOLEAN, TYPE_INTEGER, TYPE_FLOAT, TYPE_DATE, \
TYPE_MULTILINE_STRING, TYPE_BARE_STRING, TYPE_MULTILINE_LITERAL_STRING, TYPE_LITERAL_STRING, \
TYPE_STRING
import codecs
import six
from prettytoml.tokens.errors import MalformedDateError
from .errors import BadEscapeCharacter
import functools
import operator
def deserialize(token):
"""
Deserializes the value of a single tokens.Token instance based on its type.
Raises DeserializationError when appropriate.
"""
if token.type == TYPE_BOOLEAN:
return _to_boolean(token)
elif token.type == TYPE_INTEGER:
return _to_int(token)
elif token.type == TYPE_FLOAT:
return _to_float(token)
elif token.type == TYPE_DATE:
return _to_date(token)
elif token.type in (TYPE_STRING, TYPE_MULTILINE_STRING, TYPE_BARE_STRING,
TYPE_LITERAL_STRING, TYPE_MULTILINE_LITERAL_STRING):
return _to_string(token)
else:
raise Exception('This should never happen!')
def _unescape_str(text):
"""
Unescapes a string according the TOML spec. Raises BadEscapeCharacter when appropriate.
"""
# Detect bad escape jobs
bad_escape_regexp = re.compile(r'([^\\]|^)\\[^btnfr"\\uU]')
if bad_escape_regexp.findall(text):
raise BadEscapeCharacter
# Do the unescaping
if six.PY2:
return _unicode_escaped_string(text).decode('string-escape').decode('unicode-escape')
else:
return codecs.decode(_unicode_escaped_string(text), 'unicode-escape')
def _unicode_escaped_string(text):
"""
Escapes all unicode characters in the given string
"""
if six.PY2:
text = unicode(text)
def is_unicode(c):
return c.lower() not in string.ascii_letters + string.whitespace + string.punctuation + string.digits
def escape_unicode_char(x):
if six.PY2:
return x.encode('unicode-escape')
else:
return codecs.encode(x, 'unicode-escape')
if any(is_unicode(c) for c in text):
homogeneous_chars = tuple(escape_unicode_char(c) if is_unicode(c) else c.encode() for c in text)
homogeneous_bytes = functools.reduce(operator.add, homogeneous_chars)
return homogeneous_bytes.decode()
else:
return text
def _to_string(token):
if token.type == tokens.TYPE_BARE_STRING:
return token.source_substring
elif token.type == tokens.TYPE_STRING:
escaped = token.source_substring[1:-1]
return _unescape_str(escaped)
elif token.type == tokens.TYPE_MULTILINE_STRING:
escaped = token.source_substring[3:-3]
# Drop the first newline if existed
if escaped and escaped[0] == '\n':
escaped = escaped[1:]
# Remove all occurrences of a slash-newline-zero-or-more-whitespace patterns
escaped = re.sub(r'\\\n\s*', repl='', string=escaped, flags=re.DOTALL)
return _unescape_str(escaped)
elif token.type == tokens.TYPE_LITERAL_STRING:
return token.source_substring[1:-1]
elif token.type == tokens.TYPE_MULTILINE_LITERAL_STRING:
text = token.source_substring[3:-3]
if text[0] == '\n':
text = text[1:]
return text
raise RuntimeError('Control should never reach here.')
def _to_int(token):
return int(token.source_substring.replace('_', ''))
def _to_float(token):
assert token.type == tokens.TYPE_FLOAT
string = token.source_substring.replace('_', '')
return float(string)
def _to_boolean(token):
return token.source_substring == 'true'
_correct_date_format = re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(Z|(\+|-)\d{2}:\d{2})')
def _to_date(token):
if not _correct_date_format.match(token.source_substring):
raise MalformedDateError
return iso8601.parse_date(token.source_substring)
+141
View File
@@ -0,0 +1,141 @@
import math
import itertools
def is_sequence_like(x):
"""
Returns True if x exposes a sequence-like interface.
"""
required_attrs = (
'__len__',
'__getitem__'
)
return all(hasattr(x, attr) for attr in required_attrs)
def is_dict_like(x):
"""
Returns True if x exposes a dict-like interface.
"""
required_attrs = (
'__len__',
'__getitem__',
'keys',
'values',
)
return all(hasattr(x, attr) for attr in required_attrs)
def join_with(iterable, separator):
"""
Joins elements from iterable with separator and returns the produced sequence as a list.
separator must be addable to a list.
"""
inputs = list(iterable)
b = []
for i, element in enumerate(inputs):
if isinstance(element, (list, tuple, set)):
b += tuple(element)
else:
b += [element]
if i < len(inputs)-1:
b += separator
return b
def chunkate_string(text, length):
"""
Iterates over the given seq in chunks of at maximally the given length. Will never break a whole word.
"""
iterator_index = 0
def next_newline():
try:
return next(i for (i, c) in enumerate(text) if i > iterator_index and c == '\n')
except StopIteration:
return len(text)
def next_breaker():
try:
return next(i for (i, c) in reversed(tuple(enumerate(text)))
if i >= iterator_index and
(i < iterator_index+length) and
c in (' ', '\t'))
except StopIteration:
return len(text)
while iterator_index < len(text):
next_chunk = text[iterator_index:min(next_newline(), next_breaker()+1)]
iterator_index += len(next_chunk)
yield next_chunk
def flatten_nested(nested_dicts):
"""
Flattens dicts and sequences into one dict with tuples of keys representing the nested keys.
Example
>>> dd = { \
'dict1': {'name': 'Jon', 'id': 42}, \
'dict2': {'name': 'Sam', 'id': 41}, \
'seq1': [{'one': 1, 'two': 2}] \
}
>>> flatten_nested(dd) == { \
('dict1', 'name'): 'Jon', ('dict1', 'id'): 42, \
('dict2', 'name'): 'Sam', ('dict2', 'id'): 41, \
('seq1', 0, 'one'): 1, ('seq1', 0, 'two'): 2, \
}
True
"""
assert isinstance(nested_dicts, (dict, list, tuple)), 'Only works with a collection parameter'
def items(c):
if isinstance(c, dict):
return c.items()
elif isinstance(c, (list, tuple)):
return enumerate(c)
else:
raise RuntimeError('c must be a collection')
def flatten(dd):
output = {}
for k, v in items(dd):
if isinstance(v, (dict, list, tuple)):
for child_key, child_value in flatten(v).items():
output[(k,) + child_key] = child_value
else:
output[(k,)] = v
return output
return flatten(nested_dicts)
class PeekableIterator:
# Returned by peek() when the iterator is exhausted. Truthiness is False.
Nothing = tuple()
def __init__(self, iter):
self._iter = iter
def __next__(self):
return next(self._iter)
def next(self):
return self.__next__()
def __iter__(self):
return self
def peek(self):
"""
Returns PeekableIterator.Nothing when the iterator is exhausted.
"""
try:
v = next(self._iter)
self._iter = itertools.chain((v,), self._iter)
return v
except StopIteration:
return PeekableIterator.Nothing
+3
View File
@@ -0,0 +1,3 @@
from .core import TomlError
from .parser import load, loads
from .writer import dump, dumps
Vendored Executable
+13
View File
@@ -0,0 +1,13 @@
class TomlError(RuntimeError):
def __init__(self, message, line, col, filename):
RuntimeError.__init__(self, message, line, col, filename)
self.message = message
self.line = line
self.col = col
self.filename = filename
def __str__(self):
return '{}({}, {}): {}'.format(self.filename, self.line, self.col, self.message)
def __repr__(self):
return 'TomlError({!r}, {!r}, {!r}, {!r})'.format(self.message, self.line, self.col, self.filename)
+374
View File
@@ -0,0 +1,374 @@
import string, re, sys, datetime
from .core import TomlError
if sys.version_info[0] == 2:
_chr = unichr
else:
_chr = chr
def load(fin, translate=lambda t, x, v: v):
return loads(fin.read(), translate=translate, filename=getattr(fin, 'name', repr(fin)))
def loads(s, filename='<string>', translate=lambda t, x, v: v):
if isinstance(s, bytes):
s = s.decode('utf-8')
s = s.replace('\r\n', '\n')
root = {}
tables = {}
scope = root
src = _Source(s, filename=filename)
ast = _p_toml(src)
def error(msg):
raise TomlError(msg, pos[0], pos[1], filename)
def process_value(v):
kind, text, value, pos = v
if kind == 'str' and value.startswith('\n'):
value = value[1:]
if kind == 'array':
if value and any(k != value[0][0] for k, t, v, p in value[1:]):
error('array-type-mismatch')
value = [process_value(item) for item in value]
elif kind == 'table':
value = dict([(k, process_value(value[k])) for k in value])
return translate(kind, text, value)
for kind, value, pos in ast:
if kind == 'kv':
k, v = value
if k in scope:
error('duplicate_keys. Key "{0}" was used more than once.'.format(k))
scope[k] = process_value(v)
else:
is_table_array = (kind == 'table_array')
cur = tables
for name in value[:-1]:
if isinstance(cur.get(name), list):
d, cur = cur[name][-1]
else:
d, cur = cur.setdefault(name, (None, {}))
scope = {}
name = value[-1]
if name not in cur:
if is_table_array:
cur[name] = [(scope, {})]
else:
cur[name] = (scope, {})
elif isinstance(cur[name], list):
if not is_table_array:
error('table_type_mismatch')
cur[name].append((scope, {}))
else:
if is_table_array:
error('table_type_mismatch')
old_scope, next_table = cur[name]
if old_scope is not None:
error('duplicate_tables')
cur[name] = (scope, next_table)
def merge_tables(scope, tables):
if scope is None:
scope = {}
for k in tables:
if k in scope:
error('key_table_conflict')
v = tables[k]
if isinstance(v, list):
scope[k] = [merge_tables(sc, tbl) for sc, tbl in v]
else:
scope[k] = merge_tables(v[0], v[1])
return scope
return merge_tables(root, tables)
class _Source:
def __init__(self, s, filename=None):
self.s = s
self._pos = (1, 1)
self._last = None
self._filename = filename
self.backtrack_stack = []
def last(self):
return self._last
def pos(self):
return self._pos
def fail(self):
return self._expect(None)
def consume_dot(self):
if self.s:
self._last = self.s[0]
self.s = self[1:]
self._advance(self._last)
return self._last
return None
def expect_dot(self):
return self._expect(self.consume_dot())
def consume_eof(self):
if not self.s:
self._last = ''
return True
return False
def expect_eof(self):
return self._expect(self.consume_eof())
def consume(self, s):
if self.s.startswith(s):
self.s = self.s[len(s):]
self._last = s
self._advance(s)
return True
return False
def expect(self, s):
return self._expect(self.consume(s))
def consume_re(self, re):
m = re.match(self.s)
if m:
self.s = self.s[len(m.group(0)):]
self._last = m
self._advance(m.group(0))
return m
return None
def expect_re(self, re):
return self._expect(self.consume_re(re))
def __enter__(self):
self.backtrack_stack.append((self.s, self._pos))
def __exit__(self, type, value, traceback):
if type is None:
self.backtrack_stack.pop()
else:
self.s, self._pos = self.backtrack_stack.pop()
return type == TomlError
def commit(self):
self.backtrack_stack[-1] = (self.s, self._pos)
def _expect(self, r):
if not r:
raise TomlError('msg', self._pos[0], self._pos[1], self._filename)
return r
def _advance(self, s):
suffix_pos = s.rfind('\n')
if suffix_pos == -1:
self._pos = (self._pos[0], self._pos[1] + len(s))
else:
self._pos = (self._pos[0] + s.count('\n'), len(s) - suffix_pos)
_ews_re = re.compile(r'(?:[ \t]|#[^\n]*\n|#[^\n]*\Z|\n)*')
def _p_ews(s):
s.expect_re(_ews_re)
_ws_re = re.compile(r'[ \t]*')
def _p_ws(s):
s.expect_re(_ws_re)
_escapes = { 'b': '\b', 'n': '\n', 'r': '\r', 't': '\t', '"': '"', '\'': '\'',
'\\': '\\', '/': '/', 'f': '\f' }
_basicstr_re = re.compile(r'[^"\\\000-\037]*')
_short_uni_re = re.compile(r'u([0-9a-fA-F]{4})')
_long_uni_re = re.compile(r'U([0-9a-fA-F]{8})')
_escapes_re = re.compile('[bnrt"\'\\\\/f]')
_newline_esc_re = re.compile('\n[ \t\n]*')
def _p_basicstr_content(s, content=_basicstr_re):
res = []
while True:
res.append(s.expect_re(content).group(0))
if not s.consume('\\'):
break
if s.consume_re(_newline_esc_re):
pass
elif s.consume_re(_short_uni_re) or s.consume_re(_long_uni_re):
res.append(_chr(int(s.last().group(1), 16)))
else:
s.expect_re(_escapes_re)
res.append(_escapes[s.last().group(0)])
return ''.join(res)
_key_re = re.compile(r'[0-9a-zA-Z-_]+')
def _p_key(s):
with s:
s.expect('"')
r = _p_basicstr_content(s, _basicstr_re)
s.expect('"')
return r
if s.consume('\''):
if s.consume('\'\''):
r = s.expect_re(_litstr_ml_re).group(0)
s.expect('\'\'\'')
else:
r = s.expect_re(_litstr_re).group(0)
s.expect('\'')
return r
return s.expect_re(_key_re).group(0)
_float_re = re.compile(r'[+-]?(?:0|[1-9](?:_?\d)*)(?:\.\d(?:_?\d)*)?(?:[eE][+-]?(?:\d(?:_?\d)*))?')
_datetime_re = re.compile(r'(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(\.\d+)?(?:Z|([+-]\d{2}):(\d{2}))')
_basicstr_ml_re = re.compile(r'(?:(?:|"|"")[^"\\\000-\011\013-\037])*')
_litstr_re = re.compile(r"[^'\000-\037]*")
_litstr_ml_re = re.compile(r"(?:(?:|'|'')(?:[^'\000-\011\013-\037]))*")
def _p_value(s):
pos = s.pos()
if s.consume('true'):
return 'bool', s.last(), True, pos
if s.consume('false'):
return 'bool', s.last(), False, pos
if s.consume('"'):
if s.consume('""'):
r = _p_basicstr_content(s, _basicstr_ml_re)
s.expect('"""')
else:
r = _p_basicstr_content(s, _basicstr_re)
s.expect('"')
return 'str', r, r, pos
if s.consume('\''):
if s.consume('\'\''):
r = s.expect_re(_litstr_ml_re).group(0)
s.expect('\'\'\'')
else:
r = s.expect_re(_litstr_re).group(0)
s.expect('\'')
return 'str', r, r, pos
if s.consume_re(_datetime_re):
m = s.last()
s0 = m.group(0)
r = map(int, m.groups()[:6])
if m.group(7):
micro = float(m.group(7))
else:
micro = 0
if m.group(8):
g = int(m.group(8), 10) * 60 + int(m.group(9), 10)
tz = _TimeZone(datetime.timedelta(0, g * 60))
else:
tz = _TimeZone(datetime.timedelta(0, 0))
y, m, d, H, M, S = r
dt = datetime.datetime(y, m, d, H, M, S, int(micro * 1000000), tz)
return 'datetime', s0, dt, pos
if s.consume_re(_float_re):
m = s.last().group(0)
r = m.replace('_','')
if '.' in m or 'e' in m or 'E' in m:
return 'float', m, float(r), pos
else:
return 'int', m, int(r, 10), pos
if s.consume('['):
items = []
with s:
while True:
_p_ews(s)
items.append(_p_value(s))
s.commit()
_p_ews(s)
s.expect(',')
s.commit()
_p_ews(s)
s.expect(']')
return 'array', None, items, pos
if s.consume('{'):
_p_ws(s)
items = {}
if not s.consume('}'):
k = _p_key(s)
_p_ws(s)
s.expect('=')
_p_ws(s)
items[k] = _p_value(s)
_p_ws(s)
while s.consume(','):
_p_ws(s)
k = _p_key(s)
_p_ws(s)
s.expect('=')
_p_ws(s)
items[k] = _p_value(s)
_p_ws(s)
s.expect('}')
return 'table', None, items, pos
s.fail()
def _p_stmt(s):
pos = s.pos()
if s.consume( '['):
is_array = s.consume('[')
_p_ws(s)
keys = [_p_key(s)]
_p_ws(s)
while s.consume('.'):
_p_ws(s)
keys.append(_p_key(s))
_p_ws(s)
s.expect(']')
if is_array:
s.expect(']')
return 'table_array' if is_array else 'table', keys, pos
key = _p_key(s)
_p_ws(s)
s.expect('=')
_p_ws(s)
value = _p_value(s)
return 'kv', (key, value), pos
_stmtsep_re = re.compile(r'(?:[ \t]*(?:#[^\n]*)?\n)+[ \t]*')
def _p_toml(s):
stmts = []
_p_ews(s)
with s:
stmts.append(_p_stmt(s))
while True:
s.commit()
s.expect_re(_stmtsep_re)
stmts.append(_p_stmt(s))
_p_ews(s)
s.expect_eof()
return stmts
class _TimeZone(datetime.tzinfo):
def __init__(self, offset):
self._offset = offset
def utcoffset(self, dt):
return self._offset
def dst(self, dt):
return None
def tzname(self, dt):
m = self._offset.total_seconds() // 60
if m < 0:
res = '-'
m = -m
else:
res = '+'
h = m // 60
m = m - h * 60
return '{}{:.02}{:.02}'.format(res, h, m)
+127
View File
@@ -0,0 +1,127 @@
from __future__ import unicode_literals
import io, datetime, math, sys
if sys.version_info[0] == 3:
long = int
unicode = str
def dumps(obj, sort_keys=False):
fout = io.StringIO()
dump(obj, fout, sort_keys=sort_keys)
return fout.getvalue()
_escapes = {'\n': 'n', '\r': 'r', '\\': '\\', '\t': 't', '\b': 'b', '\f': 'f', '"': '"'}
def _escape_string(s):
res = []
start = 0
def flush():
if start != i:
res.append(s[start:i])
return i + 1
i = 0
while i < len(s):
c = s[i]
if c in '"\\\n\r\t\b\f':
start = flush()
res.append('\\' + _escapes[c])
elif ord(c) < 0x20:
start = flush()
res.append('\\u%04x' % ord(c))
i += 1
flush()
return '"' + ''.join(res) + '"'
def _escape_id(s):
if any(not c.isalnum() and c not in '-_' for c in s):
return _escape_string(s)
return s
def _format_list(v):
return '[{0}]'.format(', '.join(_format_value(obj) for obj in v))
# Formula from:
# https://docs.python.org/2/library/datetime.html#datetime.timedelta.total_seconds
# Once support for py26 is dropped, this can be replaced by td.total_seconds()
def _total_seconds(td):
return ((td.microseconds
+ (td.seconds + td.days * 24 * 3600) * 10**6) / 10.0**6)
def _format_value(v):
if isinstance(v, bool):
return 'true' if v else 'false'
if isinstance(v, int) or isinstance(v, long):
return unicode(v)
if isinstance(v, float):
if math.isnan(v) or math.isinf(v):
raise ValueError("{0} is not a valid TOML value".format(v))
else:
return repr(v)
elif isinstance(v, unicode) or isinstance(v, bytes):
return _escape_string(v)
elif isinstance(v, datetime.datetime):
offs = v.utcoffset()
offs = _total_seconds(offs) // 60 if offs is not None else 0
if offs == 0:
suffix = 'Z'
else:
if offs > 0:
suffix = '+'
else:
suffix = '-'
offs = -offs
suffix = '{0}{1:.02}{2:.02}'.format(suffix, offs // 60, offs % 60)
if v.microsecond:
return v.strftime('%Y-%m-%dT%H:%M:%S.%f') + suffix
else:
return v.strftime('%Y-%m-%dT%H:%M:%S') + suffix
elif isinstance(v, list):
return _format_list(v)
else:
raise RuntimeError(v)
def dump(obj, fout, sort_keys=False):
tables = [((), obj, False)]
while tables:
name, table, is_array = tables.pop()
if name:
section_name = '.'.join(_escape_id(c) for c in name)
if is_array:
fout.write('[[{0}]]\n'.format(section_name))
else:
fout.write('[{0}]\n'.format(section_name))
table_keys = sorted(table.keys()) if sort_keys else table.keys()
new_tables = []
has_kv = False
for k in table_keys:
v = table[k]
if isinstance(v, dict):
new_tables.append((name + (k,), v, False))
elif isinstance(v, list) and v and all(isinstance(o, dict) for o in v):
new_tables.extend((name + (k,), d, True) for d in v)
elif v is None:
# based on mojombo's comment: https://github.com/toml-lang/toml/issues/146#issuecomment-25019344
fout.write(
'#{} = null # To use: uncomment and replace null with value\n'.format(_escape_id(k)))
has_kv = True
else:
fout.write('{0} = {1}\n'.format(_escape_id(k), _format_value(v)))
has_kv = True
tables.extend(reversed(new_tables))
if (name or has_kv) and tables:
fout.write('\n')
+202
View File
@@ -0,0 +1,202 @@
# Copyright 2012 (C) Daniel Richman, Adam Greig
#
# This file is part of strict_rfc3339.
#
# strict_rfc3339 is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# strict_rfc3339 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with strict_rfc3339. If not, see <http://www.gnu.org/licenses/>.
"""
Super simple lightweight RFC3339 functions
"""
import re
import time
import calendar
__all__ = ["validate_rfc3339",
"InvalidRFC3339Error",
"rfc3339_to_timestamp",
"timestamp_to_rfc3339_utcoffset",
"timestamp_to_rfc3339_localoffset",
"now_to_rfc3339_utcoffset",
"now_to_rfc3339_localoffset"]
rfc3339_regex = re.compile(
r"^(\d\d\d\d)\-(\d\d)\-(\d\d)T"
r"(\d\d):(\d\d):(\d\d)(\.\d+)?(Z|([+\-])(\d\d):(\d\d))$")
def validate_rfc3339(datestring):
"""Check an RFC3339 string is valid via a regex and some range checks"""
m = rfc3339_regex.match(datestring)
if m is None:
return False
groups = m.groups()
year, month, day, hour, minute, second = [int(i) for i in groups[:6]]
if not 1 <= year <= 9999:
# Have to reject this, unfortunately (despite it being OK by rfc3339):
# calendar.timegm/calendar.monthrange can't cope (since datetime can't)
return False
if not 1 <= month <= 12:
return False
(_, max_day) = calendar.monthrange(year, month)
if not 1 <= day <= max_day:
return False
if not (0 <= hour <= 23 and 0 <= minute <= 59 and 0 <= second <= 59):
# forbid leap seconds :-(. See README
return False
if groups[7] != "Z":
(offset_sign, offset_hours, offset_mins) = groups[8:]
if not (0 <= int(offset_hours) <= 23 and 0 <= int(offset_mins) <= 59):
return False
# all OK
return True
class InvalidRFC3339Error(ValueError):
"""Subclass of ValueError thrown by rfc3339_to_timestamp"""
pass
def rfc3339_to_timestamp(datestring):
"""Convert an RFC3339 date-time string to a UTC UNIX timestamp"""
if not validate_rfc3339(datestring):
raise InvalidRFC3339Error
groups = rfc3339_regex.match(datestring).groups()
time_tuple = [int(p) for p in groups[:6]]
timestamp = calendar.timegm(time_tuple)
seconds_part = groups[6]
if seconds_part is not None:
timestamp += float("0" + seconds_part)
if groups[7] != "Z":
(offset_sign, offset_hours, offset_mins) = groups[8:]
offset_seconds = int(offset_hours) * 3600 + int(offset_mins) * 60
if offset_sign == '-':
offset_seconds = -offset_seconds
timestamp -= offset_seconds
return timestamp
def _seconds_and_microseconds(timestamp):
"""
Split a floating point timestamp into an integer number of seconds since
the epoch, and an integer number of microseconds (having rounded to the
nearest microsecond).
If `_seconds_and_microseconds(x) = (y, z)` then the following holds (up to
the error introduced by floating point operations):
* `x = y + z / 1_000_000.`
* `0 <= z < 1_000_000.`
"""
if isinstance(timestamp, int):
return (timestamp, 0)
else:
timestamp_us = int(round(timestamp * 1e6))
return divmod(timestamp_us, 1000000)
def _make_datestring_start(time_tuple, microseconds):
ds_format = "{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}"
datestring = ds_format.format(*time_tuple)
seconds_part_str = "{0:06d}".format(microseconds)
# There used to be a bug here where it could be 1000000
assert len(seconds_part_str) == 6 and seconds_part_str[0] != '-'
seconds_part_str = seconds_part_str.rstrip("0")
if seconds_part_str != "":
datestring += "." + seconds_part_str
return datestring
def timestamp_to_rfc3339_utcoffset(timestamp):
"""Convert a UTC UNIX timestamp to RFC3339, with the offset as 'Z'"""
seconds, microseconds = _seconds_and_microseconds(timestamp)
time_tuple = time.gmtime(seconds)
datestring = _make_datestring_start(time_tuple, microseconds)
datestring += "Z"
assert abs(rfc3339_to_timestamp(datestring) - timestamp) < 0.000001
return datestring
def timestamp_to_rfc3339_localoffset(timestamp):
"""
Convert a UTC UNIX timestamp to RFC3339, using the local offset.
localtime() provides the time parts. The difference between gmtime and
localtime tells us the offset.
"""
seconds, microseconds = _seconds_and_microseconds(timestamp)
time_tuple = time.localtime(seconds)
datestring = _make_datestring_start(time_tuple, microseconds)
gm_time_tuple = time.gmtime(seconds)
offset = calendar.timegm(time_tuple) - calendar.timegm(gm_time_tuple)
if abs(offset) % 60 != 0:
raise ValueError("Your local offset is not a whole minute")
offset_minutes = abs(offset) // 60
offset_hours = offset_minutes // 60
offset_minutes %= 60
offset_string = "{0:02d}:{1:02d}".format(offset_hours, offset_minutes)
if offset < 0:
datestring += "-"
else:
datestring += "+"
datestring += offset_string
assert abs(rfc3339_to_timestamp(datestring) - timestamp) < 0.000001
return datestring
def now_to_rfc3339_utcoffset(integer=True):
"""Convert the current time to RFC3339, with the offset as 'Z'"""
timestamp = time.time()
if integer:
timestamp = int(timestamp)
return timestamp_to_rfc3339_utcoffset(timestamp)
def now_to_rfc3339_localoffset(integer=True):
"""Convert the current time to RFC3339, using the local offset."""
timestamp = time.time()
if integer:
timestamp = int(timestamp)
return timestamp_to_rfc3339_localoffset(timestamp)
+9
View File
@@ -0,0 +1,9 @@
import sys
def timestamp(d=None):
import datetime
import time
return int(time.mktime(d.timetuple()) * 1000) if d else int(time.time() * 1000)
sys.modules[__name__] = timestamp