diff --git a/examples/customserializer.py b/examples/customserializer.py index 842f3f3..dfb8c5d 100644 --- a/examples/customserializer.py +++ b/examples/customserializer.py @@ -9,7 +9,6 @@ def to_json(python_object): if isinstance(python_object, bytes): return {'__class__': 'bytes', '__value__': list(python_object)} - raise TypeError(repr(python_object) + ' is not JSON serializable') def from_json(json_object): if '__class__' in json_object: @@ -24,7 +23,7 @@ if __name__ == '__main__': entry['title'] = 'Dive into history, 2009 edition' entry['article_link'] = 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition' entry['comments_link'] = None - entry['internal_id'] = b'\xde\xd5\xb4\xf8' + entry['internal_id'] = b'\xDE\xD5\xB4\xF8' entry['tags'] = ('diveintopython', 'docbook', 'html') entry['published'] = True entry['published_date'] = time.strptime('Fri Mar 27 22:20:42 2009') diff --git a/serializing.html b/serializing.html index 99e4687..b5ba159 100644 --- a/serializing.html +++ b/serializing.html @@ -16,7 +16,7 @@ body{counter-reset:h1 13}

Difficulty level: ♦♦♦♦♢

Serializing Python Objects

-

FIXME
— FIXME +

Every Saturday since we’ve lived in this apartment, I have awakened at 6:15, poured myself a bowl of cereal, added
a quarter-cup of 2% milk, sat on this end of this couch, turned on BBC America, and watched Doctor Who.
— Sheldon, The Big Bang Theory

 

Diving In

@@ -64,7 +64,7 @@ body{counter-reset:h1 13} >>> entry['title'] = 'Dive into history, 2009 edition' >>> entry['article_link'] = 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition' >>> entry['comments_link'] = None ->>> entry['internal_id'] = b'\xde\xd5\xb4\xf8' +>>> entry['internal_id'] = b'\xDE\xD5\xB4\xF8' >>> entry['tags'] = ('diveintopython', 'docbook', 'html') >>> entry['published'] = True >>> import time @@ -121,7 +121,7 @@ NameError: name 'entry' is not defined ... >>> entry {'comments_link': None, - 'internal_id': b'\xde\xd5\xb4\xf8', + 'internal_id': b'\xDE\xD5\xB4\xF8', 'title': 'Dive into history, 2009 edition', 'tags': ('diveintopython', 'docbook', 'html'), 'article_link': @@ -149,7 +149,7 @@ NameError: name 'entry' is not defined >>> entry2['tags'] ('diveintopython', 'docbook', 'html') >>> entry2['internal_id'] -b'\xde\xd5\xb4\xf8' +b'\xDE\xD5\xB4\xF8'
  1. Switch back to Python Shell #1.
  2. Open the entry.pickle file. @@ -348,7 +348,7 @@ def protocol_version(file_object): >>> shell 1 >>> with open('basic-pretty.json', mode='w', encoding='utf-8') as f: -... json.dump(basic_entry, f, indent=2) +... json.dump(basic_entry, f, indent=2)
    1. If you pass an indent parameter to the json.dump() function, it will make the resulting JSON file more readable, at the expense of larger file size. The indent parameter is an integer. 0 means “put each value on its own line.” A number greater than 0 means “put each value on its own line, and indent that many spaces.”
    @@ -401,7 +401,7 @@ def protocol_version(file_object): * null None -* Remember that JSON values are case-sensitive. +* All JSON values are case-sensitive.

    Did you notice what was missing? Tuples & bytes! JSON has an array type, which the json module maps to a Python list, but it does not have a separate type for “frozen arrays” (tuples). And while JSON supports strings quite nicely, it has no support for bytes objects or byte arrays. @@ -411,13 +411,19 @@ def protocol_version(file_object):

    Even if JSON has no built-in support for bytes, that doesn’t mean you can’t serialize bytes objects. The json module provides extensibility hooks for encoding and decoding unknown datatypes. (By “unknown,” I mean “not defined in JSON.” Obviously the json module knows about byte arrays, but it’s constrained by the limitations of the JSON specification.) If you want to encode bytes or other datatypes that JSON doesn’t support natively, you need to provide custom encoders and decoders for those types.

    ->>> shell                                                 
    +>>> shell
     1
    ->>> entry
    -FIXME
    +>>> entry                                                 
    +{'comments_link': None,
    + 'internal_id': b'\xDE\xD5\xB4\xF8',
    + 'title': 'Dive into history, 2009 edition',
    + 'tags': ('diveintopython', 'docbook', 'html'),
    + 'article_link': 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition',
    + 'published_date': time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1),
    + 'published': True}
     >>> import json
     >>> with open('entry.json', 'w', encoding='utf-8') as f:  
    -...     json.dump(entry, f)
    +...     json.dump(entry, f)                               
     ... 
     Traceback (most recent call last):
       File "<stdin>", line 5, in <module>
    @@ -431,32 +437,35 @@ def protocol_version(file_object):
         o = _default(o)
       File "C:\Python31\lib\json\encoder.py", line 170, in default
         raise TypeError(repr(o) + " is not JSON serializable")
    -TypeError: b'\xde\xd5\xb4\xf8' is not JSON serializable
    +TypeError: b'\xDE\xD5\xB4\xF8' is not JSON serializable
      -
    1. FIXME -
    2. FIXME +
    3. OK, it’s time to revisit the entry data structure. This has it all: a boolean value, a None value, a string, a tuple of strings, a bytes object, and a time structure. +
    4. I know I’ve said it before, but it’s worth repeating: JSON is a text-based format. Always open JSON files in text mode with a UTF-8 character encoding. +
    5. Well that’s not good. What happened?
    -

    FIXME +

    Here’s what happened: the json.dump() function tried to serialize the bytes object b'\xDE\xD5\xB4\xF8', but it failed, because JSON has no support for bytes objects. However, if storing bytes is important to you, you can define your own “mini-serialization format.” -

    # customserializer.py
    -def to_json(python_object):
    -    if isinstance(python_object, bytes):
    -        return {'__class__': 'bytes',
    -                '__value__': list(python_object)}
    -    raise TypeError(repr(python_object) + ' is not JSON serializable')
    +

    [download customserializer.py] +

    
    +def to_json(python_object):                                             
    +    if isinstance(python_object, bytes):                                
    +        return {'__class__': 'bytes',
    +                '__value__': list(python_object)}                       
      -
    1. FIXME +
    2. To define your own “mini-serialization format” for a datatype that JSON doesn’t support natively, just define a function that takes a Python object as a parameter. This Python object will be the actual object that the json.dump() function is unable to serialize by itself — in this case, the bytes object b'\xDE\xD5\xB4\xF8'. +
    3. Your custom serialization function should check the type of the Python object that the json.dump() function passed to it. This is not strictly necessary if your function only serializes one datatype, but it makes it crystal clear what case your function is covering, and it makes it easier to extend if you need to add serializations for more datatypes later. +
    4. In this case, I’ve chosen to convert a bytes object into a dictionary. The __class__ key will hold the original datatype (as a string, 'bytes'), and the __value__ key will hold the actual value. Of course this can’t be a bytes object; the entire point is to convert it into something that can be serialized in JSON! A bytes object is just a sequence of integers; each integer is somewhere in the range 0–255. We can use the list() function to convert the bytes object into a list of integers. So b'\xDE\xD5\xB4\xF8' becomes [222, 213, 180, 248]. (Do the math! It works! The byte \xDE in hexadecimal is 222 in decimal, \xD5 is 213, and so on.)
    -

    FIXME +

    That’s it; you don’t need to do anything else. In particular, this custom serialization function returns a Python dictionary, not a string. You’re not doing the entire serializing-to-JSON yourself; you’re only doing the converting-to-a-supported-datatype part. The json.dump() function will do the rest.

     >>> shell
     1
    ->>> import customserializer
    ->>> with open('entry.json', 'w', encoding='utf-8') as f:
    -...     json.dump(entry, default = customserializer.to_json)
    +>>> import customserializer                                                             
    +>>> with open('entry.json', 'w', encoding='utf-8') as f:                                
    +...     json.dump(entry, default=customserializer.to_json)                              
     ... 
     Traceback (most recent call last):
       File "<stdin>", line 9, in <module>
    @@ -470,34 +479,39 @@ def to_json(python_object):
       File "C:\Python31\lib\json\encoder.py", line 416, in _iterencode
         o = _default(o)
       File "/Users/pilgrim/diveintopython3/examples/customserializer.py", line 12, in to_json
    -    raise TypeError(repr(python_object) + ' is not JSON serializable')
    +    raise TypeError(repr(python_object) + ' is not JSON serializable')                     
     TypeError: time.struct_time(tm_year=2009, tm_mon=3, tm_mday=27, tm_hour=22, tm_min=20, tm_sec=42, tm_wday=4, tm_yday=86, tm_isdst=-1) is not JSON serializable
      -
    1. FIXME +
    2. The customserializer module is where you just defined the to_json() function in the previous example. +
    3. Text mode, UTF-8 encoding, yadda yadda. (You’ll forget! I forget sometimes! And everything will work right up until the moment that it fails, and then it will fail most spectacularly.) +
    4. This is the important bit: to hook your custom conversion function into the json.dump() function, pass your function into the json.dump() function in the default parameter. (Hooray, everything in Python is an object!) +
    5. OK, so it didn’t actually work. But take a look at the exception. The json.dump() function is no longer complaining about being unable to serialize the bytes object. Now it’s complaining about a completely different object: the time.struct_time object.
    -

    FIXME +

    While getting a different exception might not seem like progress, it really is! It’ll just take one more tweak to get past this. + +

    
    +import time
     
    -
    # customserializer.py
     def to_json(python_object):
    -    if isinstance(python_object, time.struct_time):
    -        return {'__class__': 'time.asctime',
    -                '__value__': time.asctime(python_object)}
    +    if isinstance(python_object, time.struct_time):          
    +        return {'__class__': 'time.asctime',
    +                '__value__': time.asctime(python_object)}    
         if isinstance(python_object, bytes):
             return {'__class__': 'bytes',
    -                '__value__': list(python_object)}
    -    raise TypeError(repr(python_object) + ' is not JSON serializable')
    + '__value__': list(python_object)}
      -
    1. FIXME +
    2. Adding to our existing customserializer.to_json() function, we need to check whether the Python object (that the json.dump() function is having trouble with) is a time.struct_time. +
    3. If so, we’ll do something similar to the conversion we did with the bytes object: convert the time.struct_time object to a dictionary that only contains JSON-serializable values. In this case, the easiest way to convert a datetime into a JSON-serializable value is to convert it to a string with the time.asctime() function. The time.asctime() function will convert that nasty-looking time.struct_time into the string 'Fri Mar 27 22:20:42 2009'.
    -

    FIXME +

     >>> shell
     1
     >>> with open('entry.json', 'w', encoding='utf-8') as f:
    -...     json.dump(entry, default = customserializer.to_json)
    +...     json.dump(entry, default=customserializer.to_json)
     ... 
    1. FIXME @@ -564,7 +578,7 @@ def from_json(json_object): ... >>> entry {'comments_link': None, - 'internal_id': b'\xde\xd5\xb4\xf8', + 'internal_id': b'\xDE\xD5\xB4\xF8', 'title': 'Dive into history, 2009 edition', 'tags': ['diveintopython', 'docbook', 'html'], 'article_link': 'http://diveintomark.org/archives/2009/03/27/dive-into-history-2009-edition',