From 8a6b1c73bf685f2105682ad032c9c0d3b5f59ad6 Mon Sep 17 00:00:00 2001 From: Harish Kesava Rao Date: Sun, 16 Dec 2018 22:35:35 -0600 Subject: [PATCH 1/4] issue-938: added sections in serialization for simple file, csv, yaml, json --- docs/scenarios/serialization.rst | 142 ++++++++++++++++++++++++++++++- 1 file changed, 141 insertions(+), 1 deletion(-) diff --git a/docs/scenarios/serialization.rst b/docs/scenarios/serialization.rst index 559ad36..ef4a193 100644 --- a/docs/scenarios/serialization.rst +++ b/docs/scenarios/serialization.rst @@ -12,10 +12,150 @@ What is data serialization? Data serialization is the concept of converting structured data into a format that allows it to be shared or stored in such a way that its original -structure to be recovered. In some cases, the secondary intention of data +structure can be recovered or reconstructed. In some cases, the secondary intention of data serialization is to minimize the size of the serialized data which then minimizes disk space or bandwidth requirements. +******************** +Flat vs. Nested data +******************** + +Before beginning to serialize data, it is important to identify or decide how the +data needs to be structured during data serialization - flat or nested. +The differences in the two styles are shown in the below examples. + +Flat style: + +.. code-block:: python + + { "Type" : "A", "field1": "value1", "field2": "value2", "field3": "value3" } + + +Nested style: + +.. code-block:: python + + {"A" + { "field1": "value1", "field2": "value2", "field3": "value3" } } + + +For more reading on the two styles, please see the discussion on +`Python mailing list `__, +`IETF mailing list `__ and +`here `__. + +**************** +Serializing Text +**************** + +======================= +Simple file (flat data) +======================= + +If the data to be serialized is located in a file and contains flat data, Python offers two methods to serialize data. + +repr +---- + +The repr method in Python takes a single object parameter and returns a printable representation of the input + +.. code-block:: python + + # input as flat text + a = { "Type" : "A", "field1": "value1", "field2": "value2", "field3": "value3" } + + # the same input can also be read from a file + a = + + # returns a printable representation of the input; + # the output can be written to a file as well + print(repr(a)) + + # write content to files using repr + with open('/tmp/file.py') as f:f.write(repr(a)) + +ast.literal_eval +________________ + +The literal_eval method safely parses and evaluates an expression for a Python datatype. +Supported data types are: strings, numbers, tuples, lists, dicts, booleans and None. + +.. code-block:: python + + with open('/tmp/file.py', 'r') as f: inp = ast.literal_eval(f.read()) + +==================== +CSV file (flat data) +==================== + +The CSV module in Python implements classes to read and write tabular +data in CSV format. + +Simple example for reading: + +.. code-block:: python + + import csv + with open('/tmp/file.csv', newline='') as f: + reader = csv.reader(f) + for row in reader: + print(row) + +Simple example for writing: + +.. code-block:: python + + import csv + with open('/temp/file.csv', 'w', newline='') as f: + writer = csv.writer(f) + writer.writerows(iterable) + + +The module's contents, functions and examples can be found +`here `__. + +================== +YAML (nested data) +================== + +There are many third party modules to parse and read/write YAML file +structures in Python. One such example is below. + +.. code-block:: python + + import yaml + with open('/tmp/file.yaml', 'r', newline='') as f: + try: + print(yaml.load(f)) + except yaml.YAMLError as ymlexcp: + print(ymlexcp) + +Documentation on the third party module can be found +`here `__. + +======================= +JSON file (nested data) +======================= + +Python's JSON module can be used to read and write JSON files. +Example code is below. + +Reading: + +.. code-block:: python + + import json + with open('/tmp/file.json', 'r') as f: + data = json.dump(f) + +Writing: + +.. code-block:: python + + import json + with open('/tmp/file.json', 'w') as f: + json.dump(data, f, sort_keys=True) + ****** Pickle From fd2a8f3e8ff91f5a3098458ea022fcbbaf4527d8 Mon Sep 17 00:00:00 2001 From: Harish Kesava Rao Date: Mon, 17 Dec 2018 21:02:21 -0600 Subject: [PATCH 2/4] Final commit --- docs/scenarios/serialization.rst | 58 +++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 5 deletions(-) diff --git a/docs/scenarios/serialization.rst b/docs/scenarios/serialization.rst index ef4a193..5ba84fa 100644 --- a/docs/scenarios/serialization.rst +++ b/docs/scenarios/serialization.rst @@ -74,8 +74,9 @@ The repr method in Python takes a single object parameter and returns a printabl # write content to files using repr with open('/tmp/file.py') as f:f.write(repr(a)) + ast.literal_eval -________________ +---------------- The literal_eval method safely parses and evaluates an expression for a Python datatype. Supported data types are: strings, numbers, tuples, lists, dicts, booleans and None. @@ -95,6 +96,7 @@ Simple example for reading: .. code-block:: python + # Reading CSV content from a file import csv with open('/tmp/file.csv', newline='') as f: reader = csv.reader(f) @@ -105,6 +107,7 @@ Simple example for writing: .. code-block:: python + # Writing CSV content to a file import csv with open('/temp/file.csv', 'w', newline='') as f: writer = csv.writer(f) @@ -123,6 +126,7 @@ structures in Python. One such example is below. .. code-block:: python + # Reading YAML content from a file using the load method import yaml with open('/tmp/file.yaml', 'r', newline='') as f: try: @@ -144,22 +148,66 @@ Reading: .. code-block:: python + # Reading JSON content from a file import json with open('/tmp/file.json', 'r') as f: - data = json.dump(f) + data = json.load(f) Writing: .. code-block:: python + # writing JSON content to a file using the dump method import json with open('/tmp/file.json', 'w') as f: json.dump(data, f, sort_keys=True) +================= +XML (nested data) +================= -****** -Pickle -****** +XML parsing in Python is possible using the `xml` package. + +Example: + +.. code-block:: python + + # reading XML content from a file + import xml.etree.ElementTree as ET + tree = ET.parse('country_data.xml') + root = tree.getroot() + +More documentation on using the `xml.dom` and `xml.sax` packages can be found +`here `__. + + +******* +Binary +******* + +======================= +Numpy Array (flat data) +======================= + +Python's Numpy array can be used to serialize and deserialize data to and from byte representation. + +Example: + +.. code-block:: python + + import numpy as np + + # Converting Numpy array to byte format + byte_output = np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]).tobytes() + + # Converting byte format back to Numpy array + array_format = np.frombuffer(byte_output) + + + +==================== +Pickle (nested data) +==================== The native data serialization module for Python is called `Pickle `_. From 3cf750bea1f12cb80f879e73982ee364ebcbcacb Mon Sep 17 00:00:00 2001 From: Harish Kesava Rao Date: Tue, 18 Dec 2018 07:01:38 -0600 Subject: [PATCH 3/4] Incorporated changes from style suggestions --- docs/scenarios/serialization.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/scenarios/serialization.rst b/docs/scenarios/serialization.rst index 5ba84fa..408b59f 100644 --- a/docs/scenarios/serialization.rst +++ b/docs/scenarios/serialization.rst @@ -21,7 +21,7 @@ Flat vs. Nested data ******************** Before beginning to serialize data, it is important to identify or decide how the -data needs to be structured during data serialization - flat or nested. +data should to be structured during data serialization - flat or nested. The differences in the two styles are shown in the below examples. Flat style: @@ -42,7 +42,7 @@ Nested style: For more reading on the two styles, please see the discussion on `Python mailing list `__, `IETF mailing list `__ and -`here `__. +`in stackexchange `__. **************** Serializing Text @@ -57,7 +57,7 @@ If the data to be serialized is located in a file and contains flat data, Python repr ---- -The repr method in Python takes a single object parameter and returns a printable representation of the input +The repr method in Python takes a single object parameter and returns a printable representation of the input: .. code-block:: python @@ -79,7 +79,7 @@ ast.literal_eval ---------------- The literal_eval method safely parses and evaluates an expression for a Python datatype. -Supported data types are: strings, numbers, tuples, lists, dicts, booleans and None. +Supported data types are: strings, numbers, tuples, lists, dicts, booleans, and None. .. code-block:: python @@ -114,8 +114,8 @@ Simple example for writing: writer.writerows(iterable) -The module's contents, functions and examples can be found -`here `__. +The module's contents, functions, and examples can be found +`in the Python documentation `__. ================== YAML (nested data) @@ -178,7 +178,7 @@ Example: root = tree.getroot() More documentation on using the `xml.dom` and `xml.sax` packages can be found -`here `__. +`in the Python XML library documentation `__. ******* @@ -186,21 +186,21 @@ Binary ******* ======================= -Numpy Array (flat data) +NumPy Array (flat data) ======================= -Python's Numpy array can be used to serialize and deserialize data to and from byte representation. +Python's NumPy array can be used to serialize and deserialize data to and from byte representation. Example: .. code-block:: python - import numpy as np + import NumPy as np - # Converting Numpy array to byte format + # Converting NumPy array to byte format byte_output = np.array([ [1, 2, 3], [4, 5, 6], [7, 8, 9] ]).tobytes() - # Converting byte format back to Numpy array + # Converting byte format back to NumPy array array_format = np.frombuffer(byte_output) From 4e09d8aae016ed84abd36196563ef629022a8836 Mon Sep 17 00:00:00 2001 From: Harish Kesava Rao Date: Thu, 20 Dec 2018 20:05:01 -0600 Subject: [PATCH 4/4] Implemented feedback and suggestions from code review --- docs/scenarios/serialization.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/scenarios/serialization.rst b/docs/scenarios/serialization.rst index 408b59f..312327b 100644 --- a/docs/scenarios/serialization.rst +++ b/docs/scenarios/serialization.rst @@ -10,9 +10,9 @@ Data Serialization What is data serialization? *************************** -Data serialization is the concept of converting structured data into a format +Data serialization is the process of converting structured data into a format that allows it to be shared or stored in such a way that its original -structure can be recovered or reconstructed. In some cases, the secondary intention of data +structure should be recovered or reconstructed. In some cases, the secondary intention of data serialization is to minimize the size of the serialized data which then minimizes disk space or bandwidth requirements. @@ -21,7 +21,7 @@ Flat vs. Nested data ******************** Before beginning to serialize data, it is important to identify or decide how the -data should to be structured during data serialization - flat or nested. +data should be structured during data serialization - flat or nested. The differences in the two styles are shown in the below examples. Flat style: @@ -65,7 +65,7 @@ The repr method in Python takes a single object parameter and returns a printabl a = { "Type" : "A", "field1": "value1", "field2": "value2", "field3": "value3" } # the same input can also be read from a file - a = + a = open('/tmp/file.py', 'r') # returns a printable representation of the input; # the output can be written to a file as well @@ -135,7 +135,7 @@ structures in Python. One such example is below. print(ymlexcp) Documentation on the third party module can be found -`here `__. +`in the PyYAML Documentation `__. ======================= JSON file (nested data) @@ -157,7 +157,7 @@ Writing: .. code-block:: python - # writing JSON content to a file using the dump method + # Writing JSON content to a file using the dump method import json with open('/tmp/file.json', 'w') as f: json.dump(data, f, sort_keys=True)