From 46bfe5225cf60504947b2124125048a9b4ee8997 Mon Sep 17 00:00:00 2001 From: Mark Pilgrim Date: Mon, 3 Aug 2009 12:04:21 -0700 Subject: [PATCH] some rough notes --- examples/customserializer.py | 4 +- serializing.html | 293 ++++++++++++++++++++++++++++++++++- 2 files changed, 294 insertions(+), 3 deletions(-) diff --git a/examples/customserializer.py b/examples/customserializer.py index 12f5d31..0efab92 100644 --- a/examples/customserializer.py +++ b/examples/customserializer.py @@ -2,7 +2,7 @@ import pickle import json import time -def custom_encoder(python_object): +def to_json(python_object): if isinstance(python_object, time.struct_time): return {'__class__': 'time.asctime', '__value__': time.asctime(python_object)} @@ -11,7 +11,7 @@ def custom_encoder(python_object): '__value__': list(python_object)} raise TypeError(repr(python_object) + ' is not JSON serializable') -def custom_decoder(json_object): +def from_json(json_object): if '__class__' in json_object: if json_object['__class__'] == 'time.asctime': return time.strptime(json_object['__value__']) diff --git a/serializing.html b/serializing.html index 9f4f3d6..e3b0203 100644 --- a/serializing.html +++ b/serializing.html @@ -22,8 +22,297 @@ body{counter-reset:h1 13}

Diving In

FIXME +

+>>> shell = 1
+ +

FIXME + +

+>>> shell = 2
+

⁂ +

Serializing Simple Python Objects

+ +

FIXME - introduction to pickle module, concepts, what datatypes can be pickled w/o additional work + +

Saving to (and Loading from) a File

+ +

The pickle module works with data structures. Let’s build one. + +

+>>> shell
+1
+>>> entry = {}
+>>> entry['title'] = 'Dive into history, 2009 edition'
+>>> entry['article_link'] = 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition'
+>>> entry['comments_link'] = None
+>>> entry['internal_id'] = b'\xde\xd5\xb4\xf8'
+>>> entry['tags'] = ('diveintopython', 'docbook', 'html')
+>>> entry['published'] = True
+>>> import time
+>>> entry['published_date'] = time.strptime('Fri Mar 27 22:20:42 2009')
+>>> entry['published_date']
+time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1)
+
    +
  1. FIXME +
+ +
+>>> shell
+1
+>>> with open('entry.pickle', 'wb') as f:
+...     pickle.dump(entry, f)
+... 
+
    +
  1. FIXME +
+ +
+you@localhost:~/diveintopython3/examples$ ls -l entry.pickle
+-rw-r--r-- 1 you  you  324 Aug  3 13:34 entry.pickle
+you@localhost:~/diveintopython3/examples$ cat entry.pickle
+comments_linkqNXtagsqXdiveintopythonqXdocbookqXhtmlq?qX publishedq?
+XlinkXJhttp://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition
+q   Xpublished_dateq
+ctime
+struct_time
+?qRqXtitleqXDive into history, 2009 editionqu.
+
    +
  1. FIXME +
+ +

FIXME now switch to your second Python Shell + +

+>>> shell
+2
+>>> entry
+Traceback (most recent call last):
+  File "<stdin>", line 1, in <module>
+NameError: name 'entry' is not defined
+>>> import pickle
+>>> with open('entry.pickle', 'rb') as f:
+...     entry = pickle.load(f)
+... 
+>>> entry
+FIXME
+
    +
  1. FIXME +
+ +

FIXME + +

+>>> shell
+1
+>>> with open('entry.pickle', 'rb') as f:
+...     entry2 = pickle.load(f)
+... 
+>>> entry2 == entry
+True
+>>> entry2['tags']
+('diveintopython', 'docbook', 'html')
+>>> entry2['internal_id']
+b'\xde\xd5\xb4\xf8'
+
    +
  1. FIXME +
+ +

Saving to (and Loading from) an Object in Memory

+ +

FIXME + +

Bytes and Strings Rear Their Ugly Heads (Again!)

+ +

FIXME - discussion of pickle protocol versions, backward incompatibility of protocol version 3 due to bytes/strings separation in Python 3, link to http://docs.python.org/3.1/library/pickle.html#data-stream-format + +

⁂ + +

Serializing Complex Python Objects

+ +

FIXME - discussion of pickling class instances, stateful objects, __getstate__ and __setstate__, links to http://docs.python.org/3.1/library/pickle.html#pickle-inst and http://docs.python.org/3.1/library/pickle.html#pickle-state + +

Security Concerns with Pickled Objects

+ +

FIXME - pickled objects can be modified in memory, in transit, or on disk; no checksums; no built-in guarantee that the pickle you're loading is the pickle you dumped; never unpickle untrusted input; xref to "eval() is evil" discussion in advanced-iterators chapter + +

Serializing Python Objects to be Read by Other Languages

+ +

The data format used by the pickle module is Python-specific. It makes no attempt to be compatible with other programming languages. If cross-language compatibility is one of your requirements, you need to look at other serialization formats. + +

One format that is designed to be used by multiple programming languages is JSON. + +

FIXME - pickle format is python-specific; JSON format is designed to be cross-language (in fact, it was originally designed for JavaScript, hence the name); differences with pickle format (table or list); json module implements dumping and loading JSON-formatted data structures; JSON format is string-based (and always encoded as UTF-8 where bytes are required); compact vs. pretty-printing; JSONEncoder; JSONDecoder; iterencode + +

Mapping of Python Datatypes to JSON

+ +
+[source: help(json)]
+
++---------------+-------------------+
+| JSON          | Python            |
++===============+===================+
+| object        | dict              |
++---------------+-------------------+
+| array         | list              |
++---------------+-------------------+
+| string        | unicode           |
++---------------+-------------------+
+| number (int)  | int, long         |
++---------------+-------------------+
+| number (real) | float             |
++---------------+-------------------+
+| true          | True              |
++---------------+-------------------+
+| false         | False             |
++---------------+-------------------+
+| null          | None              |
++---------------+-------------------+
+
+ +

Serializing Datatypes Unsupported by JSON

+ +
+>>> shell
+1
+>>> entry
+FIXME
+>>> import json
+>>> with open('entry.json', 'w', encoding='utf-8') as f:
+...     json.dump(entry)
+... 
+FIXME
+
    +
  1. FIXME +
+ +

FIXME + +

# customserializer.py
+def to_json(python_object):
+    if isinstance(python_object, bytes):
+        return {'__class__': 'bytes',
+                '__value__': list(python_object)}
+    raise TypeError(repr(python_object) + ' is not JSON serializable')
+
    +
  1. FIXME +
+ +

FIXME + +

+>>> shell
+1
+>>> import customserializer
+>>> with open('entry.json', 'w', encoding='utf-8') as f:
+...     json.dump(entry, default = customserializer.to_json)
+... 
+FIXME
+
    +
  1. FIXME +
+ +
# customserializer.py
+def to_json(python_object):
+    if isinstance(python_object, time.struct_time):
+        return {'__class__': 'time.asctime',
+                '__value__': time.asctime(python_object)}
+    if isinstance(python_object, bytes):
+        return {'__class__': 'bytes',
+                '__value__': list(python_object)}
+    raise TypeError(repr(python_object) + ' is not JSON serializable')
+
    +
  1. FIXME +
+ +

FIXME + +

+>>> shell
+1
+>>> with open('entry.json', 'w', encoding='utf-8') as f:
+...     json.dump(entry, default = customserializer.to_json)
+... 
+
    +
  1. FIXME +
+ +

FIXME + +

+you@localhost:~/diveintopython3/examples$ ls -l example.json
+FIXME
+you@localhost:~/diveintopython3/examples$ cat example.json
+FIXME
+
    +
  1. FIXME +
+ +

FIXME + +

+>>> shell
+2
+>>> del entry
+>>> entry
+Traceback (most recent call last):
+  File "<stdin>", line 1, in <module>
+NameError: name 'entry' is not defined
+>>> import json
+>>> with open('entry.json', 'r', encoding='utf-8') as f:
+...     entry = json.load(f)
+... 
+FIXME
+
    +
  1. FIXME +
+ +

FIXME + +

# customserializer.py
+def from_json(json_object):
+    if '__class__' in json_object:
+        if json_object['__class__'] == 'time.asctime':
+            return time.strptime(json_object['__value__'])
+        if json_object['__class__'] == 'bytes':
+            return bytes(json_object['__value__'])
+    return json_object
+ +
+>>> shell
+2
+>>> import customserializer
+>>> with open('entry.json', 'r', encoding='utf-8') as f:
+...     entry = json.load(f, object_hook = customserializer.from_json)
+... 
+>>> entry
+FIXME
+
    +
  1. FIXME +
+ +

FIXME + +

+>>> shell
+1
+>>> import customserializer
+>>> with open('entry.json', 'r', encoding='utf-8') as f:
+...     entry2 = json.load(f, object_hook = customserializer.from_json)
+... 
+>>> entry2 == entry
+False
+>>> entry['tags']
+('diveintopython', 'docbook', 'html')
+>>> entry2['tags']
+['diveintopython', 'docbook', 'html']
+
    +
  1. FIXME +
+ +

FIXME +

Further Reading

@@ -31,10 +320,12 @@ body{counter-reset:h1 13}