diff --git a/examples/customserializer.py b/examples/customserializer.py index 842f3f3..dfb8c5d 100644 --- a/examples/customserializer.py +++ b/examples/customserializer.py @@ -9,7 +9,6 @@ def to_json(python_object): if isinstance(python_object, bytes): return {'__class__': 'bytes', '__value__': list(python_object)} - raise TypeError(repr(python_object) + ' is not JSON serializable') def from_json(json_object): if '__class__' in json_object: @@ -24,7 +23,7 @@ if __name__ == '__main__': entry['title'] = 'Dive into history, 2009 edition' entry['article_link'] = 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition' entry['comments_link'] = None - entry['internal_id'] = b'\xde\xd5\xb4\xf8' + entry['internal_id'] = b'\xDE\xD5\xB4\xF8' entry['tags'] = ('diveintopython', 'docbook', 'html') entry['published'] = True entry['published_date'] = time.strptime('Fri Mar 27 22:20:42 2009') diff --git a/serializing.html b/serializing.html index 99e4687..b5ba159 100644 --- a/serializing.html +++ b/serializing.html @@ -16,7 +16,7 @@ body{counter-reset:h1 13}
Difficulty level: ♦♦♦♦♢
-❝ FIXME ❞
— FIXME +❝ Every Saturday since we’ve lived in this apartment, I have awakened at 6:15, poured myself a bowl of cereal, added
a quarter-cup of 2% milk, sat on this end of this couch, turned on BBC America, and watched Doctor Who. ❞
— Sheldon, The Big Bang Theory
entry.pickle file.
@@ -348,7 +348,7 @@ def protocol_version(file_object):
>>> shell
1
>>> with open('basic-pretty.json', mode='w', encoding='utf-8') as f:
-... json.dump(basic_entry, f, indent=2) ①
+... json.dump(basic_entry, f, indent=2) ①
json.dump() function, it will make the resulting JSON file more readable, at the expense of larger file size. The indent parameter is an integer. 0 means “put each value on its own line.” A number greater than 0 means “put each value on its own line, and indent that many spaces.”
null
None
-Did you notice what was missing? Tuples & bytes! JSON has an array type, which the json module maps to a Python list, but it does not have a separate type for “frozen arrays” (tuples). And while JSON supports strings quite nicely, it has no support for bytes objects or byte arrays.
@@ -411,13 +411,19 @@ def protocol_version(file_object):
Even if JSON has no built-in support for bytes, that doesn’t mean you can’t serialize bytes objects. The json module provides extensibility hooks for encoding and decoding unknown datatypes. (By “unknown,” I mean “not defined in JSON.” Obviously the json module knows about byte arrays, but it’s constrained by the limitations of the JSON specification.) If you want to encode bytes or other datatypes that JSON doesn’t support natively, you need to provide custom encoders and decoders for those types.
->>> shell ① +>>> shell 1 ->>> entry -FIXME +>>> entry ① +{'comments_link': None, + 'internal_id': b'\xDE\xD5\xB4\xF8', + 'title': 'Dive into history, 2009 edition', + 'tags': ('diveintopython', 'docbook', 'html'), + 'article_link': 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition', + 'published_date': time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1), + 'published': True} >>> import json >>> with open('entry.json', 'w', encoding='utf-8') as f: ② -... json.dump(entry, f) +... json.dump(entry, f) ③ ... Traceback (most recent call last): File "<stdin>", line 5, in <module> @@ -431,32 +437,35 @@ def protocol_version(file_object): o = _default(o) File "C:\Python31\lib\json\encoder.py", line 170, in default raise TypeError(repr(o) + " is not JSON serializable") -TypeError: b'\xde\xd5\xb4\xf8' is not JSON serializable+TypeError: b'\xDE\xD5\xB4\xF8' is not JSON serializable
None value, a string, a tuple of strings, a bytes object, and a time structure.
+FIXME +
Here’s what happened: the json.dump() function tried to serialize the bytes object b'\xDE\xD5\xB4\xF8', but it failed, because JSON has no support for bytes objects. However, if storing bytes is important to you, you can define your own “mini-serialization format.”
-
# customserializer.py
-def to_json(python_object):
- if isinstance(python_object, bytes):
- return {'__class__': 'bytes',
- '__value__': list(python_object)}
- raise TypeError(repr(python_object) + ' is not JSON serializable')
+[download customserializer.py]
+
+def to_json(python_object): ①
+ if isinstance(python_object, bytes): ②
+ return {'__class__': 'bytes',
+ '__value__': list(python_object)} ③
json.dump() function is unable to serialize by itself — in this case, the bytes object b'\xDE\xD5\xB4\xF8'.
+json.dump() function passed to it. This is not strictly necessary if your function only serializes one datatype, but it makes it crystal clear what case your function is covering, and it makes it easier to extend if you need to add serializations for more datatypes later.
+bytes object into a dictionary. The __class__ key will hold the original datatype (as a string, 'bytes'), and the __value__ key will hold the actual value. Of course this can’t be a bytes object; the entire point is to convert it into something that can be serialized in JSON! A bytes object is just a sequence of integers; each integer is somewhere in the range 0–255. We can use the list() function to convert the bytes object into a list of integers. So b'\xDE\xD5\xB4\xF8' becomes [222, 213, 180, 248]. (Do the math! It works! The byte \xDE in hexadecimal is 222 in decimal, \xD5 is 213, and so on.)
FIXME +
That’s it; you don’t need to do anything else. In particular, this custom serialization function returns a Python dictionary, not a string. You’re not doing the entire serializing-to-JSON yourself; you’re only doing the converting-to-a-supported-datatype part. The json.dump() function will do the rest.
>>> shell
1
->>> import customserializer
->>> with open('entry.json', 'w', encoding='utf-8') as f:
-... json.dump(entry, default = customserializer.to_json)
+>>> import customserializer ①
+>>> with open('entry.json', 'w', encoding='utf-8') as f: ②
+... json.dump(entry, default=customserializer.to_json) ③
...
Traceback (most recent call last):
File "<stdin>", line 9, in <module>
@@ -470,34 +479,39 @@ def to_json(python_object):
File "C:\Python31\lib\json\encoder.py", line 416, in _iterencode
o = _default(o)
File "/Users/pilgrim/diveintopython3/examples/customserializer.py", line 12, in to_json
- raise TypeError(repr(python_object) + ' is not JSON serializable')
+ raise TypeError(repr(python_object) + ' is not JSON serializable') ④
TypeError: time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1) is not JSON serializable
customserializer module is where you just defined the to_json() function in the previous example.
+json.dump() function, pass your function into the json.dump() function in the default parameter. (Hooray, everything in Python is an object!)
+json.dump() function is no longer complaining about being unable to serialize the bytes object. Now it’s complaining about a completely different object: the time.struct_time object.
FIXME +
While getting a different exception might not seem like progress, it really is! It’ll just take one more tweak to get past this. + +
+import time
-# customserializer.py
def to_json(python_object):
- if isinstance(python_object, time.struct_time):
- return {'__class__': 'time.asctime',
- '__value__': time.asctime(python_object)}
+ if isinstance(python_object, time.struct_time): ①
+ return {'__class__': 'time.asctime',
+ '__value__': time.asctime(python_object)} ②
if isinstance(python_object, bytes):
return {'__class__': 'bytes',
- '__value__': list(python_object)}
- raise TypeError(repr(python_object) + ' is not JSON serializable')
+ '__value__': list(python_object)}
customserializer.to_json() function, we need to check whether the Python object (that the json.dump() function is having trouble with) is a time.struct_time.
+bytes object: convert the time.struct_time object to a dictionary that only contains JSON-serializable values. In this case, the easiest way to convert a datetime into a JSON-serializable value is to convert it to a string with the time.asctime() function. The time.asctime() function will convert that nasty-looking time.struct_time into the string 'Fri Mar 27 22:20:42 2009'.
FIXME +
>>> shell
1
>>> with open('entry.json', 'w', encoding='utf-8') as f:
-... json.dump(entry, default = customserializer.to_json)
+... json.dump(entry, default=customserializer.to_json)
...