diff --git a/examples/customserializer.py b/examples/customserializer.py index 12f5d31..0efab92 100644 --- a/examples/customserializer.py +++ b/examples/customserializer.py @@ -2,7 +2,7 @@ import pickle import json import time -def custom_encoder(python_object): +def to_json(python_object): if isinstance(python_object, time.struct_time): return {'__class__': 'time.asctime', '__value__': time.asctime(python_object)} @@ -11,7 +11,7 @@ def custom_encoder(python_object): '__value__': list(python_object)} raise TypeError(repr(python_object) + ' is not JSON serializable') -def custom_decoder(json_object): +def from_json(json_object): if '__class__' in json_object: if json_object['__class__'] == 'time.asctime': return time.strptime(json_object['__value__']) diff --git a/serializing.html b/serializing.html index 9f4f3d6..e3b0203 100644 --- a/serializing.html +++ b/serializing.html @@ -22,8 +22,297 @@ body{counter-reset:h1 13}
FIXME +
+>>> shell = 1+ +
FIXME + +
+>>> shell = 2+
⁂ +
FIXME - introduction to pickle module, concepts, what datatypes can be pickled w/o additional work + +
The pickle module works with data structures. Let’s build one.
+
+
+>>> shell
+1
+>>> entry = {}
+>>> entry['title'] = 'Dive into history, 2009 edition'
+>>> entry['article_link'] = 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition'
+>>> entry['comments_link'] = None
+>>> entry['internal_id'] = b'\xde\xd5\xb4\xf8'
+>>> entry['tags'] = ('diveintopython', 'docbook', 'html')
+>>> entry['published'] = True
+>>> import time
+>>> entry['published_date'] = time.strptime('Fri Mar 27 22:20:42 2009')
+>>> entry['published_date']
+time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1)
+
+>>> shell
+1
+>>> with open('entry.pickle', 'wb') as f:
+... pickle.dump(entry, f)
+...
++you@localhost:~/diveintopython3/examples$ ls -l entry.pickle +-rw-r--r-- 1 you you 324 Aug 3 13:34 entry.pickle +you@localhost:~/diveintopython3/examples$ cat entry.pickle +comments_linkqNXtagsqXdiveintopythonqXdocbookqXhtmlq?qX publishedq? +XlinkXJhttp://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition +q Xpublished_dateq +ctime +struct_time +?qRqXtitleqXDive into history, 2009 editionqu.+
FIXME now switch to your second Python Shell + +
+>>> shell
+2
+>>> entry
+Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+NameError: name 'entry' is not defined
+>>> import pickle
+>>> with open('entry.pickle', 'rb') as f:
+... entry = pickle.load(f)
+...
+>>> entry
+FIXME
+FIXME + +
+>>> shell
+1
+>>> with open('entry.pickle', 'rb') as f:
+... entry2 = pickle.load(f)
+...
+>>> entry2 == entry
+True
+>>> entry2['tags']
+('diveintopython', 'docbook', 'html')
+>>> entry2['internal_id']
+b'\xde\xd5\xb4\xf8'
+FIXME + +
FIXME - discussion of pickle protocol versions, backward incompatibility of protocol version 3 due to bytes/strings separation in Python 3, link to http://docs.python.org/3.1/library/pickle.html#data-stream-format + +
⁂ + +
FIXME - discussion of pickling class instances, stateful objects, __getstate__ and __setstate__, links to http://docs.python.org/3.1/library/pickle.html#pickle-inst and http://docs.python.org/3.1/library/pickle.html#pickle-state + +
FIXME - pickled objects can be modified in memory, in transit, or on disk; no checksums; no built-in guarantee that the pickle you're loading is the pickle you dumped; never unpickle untrusted input; xref to "eval() is evil" discussion in advanced-iterators chapter + +
The data format used by the pickle module is Python-specific. It makes no attempt to be compatible with other programming languages. If cross-language compatibility is one of your requirements, you need to look at other serialization formats.
+
+
One format that is designed to be used by multiple programming languages is JSON. + +
FIXME - pickle format is python-specific; JSON format is designed to be cross-language (in fact, it was originally designed for JavaScript, hence the name); differences with pickle format (table or list); json module implements dumping and loading JSON-formatted data structures; JSON format is string-based (and always encoded as UTF-8 where bytes are required); compact vs. pretty-printing; JSONEncoder; JSONDecoder; iterencode + +
+[source: help(json)] + ++---------------+-------------------+ +| JSON | Python | ++===============+===================+ +| object | dict | ++---------------+-------------------+ +| array | list | ++---------------+-------------------+ +| string | unicode | ++---------------+-------------------+ +| number (int) | int, long | ++---------------+-------------------+ +| number (real) | float | ++---------------+-------------------+ +| true | True | ++---------------+-------------------+ +| false | False | ++---------------+-------------------+ +| null | None | ++---------------+-------------------+ ++ +
+>>> shell
+1
+>>> entry
+FIXME
+>>> import json
+>>> with open('entry.json', 'w', encoding='utf-8') as f:
+... json.dump(entry)
+...
+FIXME
+FIXME + +
# customserializer.py
+def to_json(python_object):
+ if isinstance(python_object, bytes):
+ return {'__class__': 'bytes',
+ '__value__': list(python_object)}
+ raise TypeError(repr(python_object) + ' is not JSON serializable')
+FIXME + +
+>>> shell
+1
+>>> import customserializer
+>>> with open('entry.json', 'w', encoding='utf-8') as f:
+... json.dump(entry, default = customserializer.to_json)
+...
+FIXME
+# customserializer.py
+def to_json(python_object):
+ if isinstance(python_object, time.struct_time):
+ return {'__class__': 'time.asctime',
+ '__value__': time.asctime(python_object)}
+ if isinstance(python_object, bytes):
+ return {'__class__': 'bytes',
+ '__value__': list(python_object)}
+ raise TypeError(repr(python_object) + ' is not JSON serializable')
+FIXME + +
+>>> shell
+1
+>>> with open('entry.json', 'w', encoding='utf-8') as f:
+... json.dump(entry, default = customserializer.to_json)
+...
+FIXME + +
+you@localhost:~/diveintopython3/examples$ ls -l example.json +FIXME +you@localhost:~/diveintopython3/examples$ cat example.json +FIXME+
FIXME + +
+>>> shell
+2
+>>> del entry
+>>> entry
+Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+NameError: name 'entry' is not defined
+>>> import json
+>>> with open('entry.json', 'r', encoding='utf-8') as f:
+... entry = json.load(f)
+...
+FIXME
+FIXME + +
# customserializer.py
+def from_json(json_object):
+ if '__class__' in json_object:
+ if json_object['__class__'] == 'time.asctime':
+ return time.strptime(json_object['__value__'])
+ if json_object['__class__'] == 'bytes':
+ return bytes(json_object['__value__'])
+ return json_object
+
+
+>>> shell
+2
+>>> import customserializer
+>>> with open('entry.json', 'r', encoding='utf-8') as f:
+... entry = json.load(f, object_hook = customserializer.from_json)
+...
+>>> entry
+FIXME
+FIXME + +
+>>> shell
+1
+>>> import customserializer
+>>> with open('entry.json', 'r', encoding='utf-8') as f:
+... entry2 = json.load(f, object_hook = customserializer.from_json)
+...
+>>> entry2 == entry
+False
+>>> entry['tags']
+('diveintopython', 'docbook', 'html')
+>>> entry2['tags']
+['diveintopython', 'docbook', 'html']
+FIXME +
@@ -31,10 +320,12 @@ body{counter-reset:h1 13}