From d8666e190631b5330c2851bd354d07831afba114 Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Sun, 31 Dec 2017 14:46:15 -0600 Subject: [PATCH 01/10] Reduce overall memory usage of Requests module by removing cgi module dependency in utils.py. Instead wrote a nested function to parse header and return content type and params. --- requests/utils.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/requests/utils.py b/requests/utils.py index f9565287..a1a3a7cb 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -8,7 +8,6 @@ This module provides utility functions that are used within Requests that are also useful for external consumption. """ -import cgi import codecs import collections import contextlib @@ -453,13 +452,28 @@ def get_encoding_from_headers(headers): :param headers: dictionary to extract encoding from. :rtype: str """ + def parse_header(content_type): + #Inner function to parse header + content_type_and_params_delimiter = ';' + + #append delimiter on end to ensure atleast two elements when split by ';' + content_type += content_type_and_params_delimiter + + tokens = content_type.split(content_type_and_params_delimiter) + content_type_index = 0 + params_index = 1 + + content_type = tokens[content_type_index] + params = tokens[params_index] + params_dict = dict(param.split('=') for param in params.split()) + return content_type,params_dict content_type = headers.get('content-type') if not content_type: return None - content_type, params = cgi.parse_header(content_type) + content_type, params = parse_header(content_type) if 'charset' in params: return params['charset'].strip("'\"") From cef08304197b8b8747015d94a1700716202355ee Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Sun, 31 Dec 2017 15:02:39 -0600 Subject: [PATCH 02/10] clean --- requests/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requests/utils.py b/requests/utils.py index a1a3a7cb..37e3e27d 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -454,9 +454,8 @@ def get_encoding_from_headers(headers): """ def parse_header(content_type): #Inner function to parse header - content_type_and_params_delimiter = ';' - #append delimiter on end to ensure atleast two elements when split by ';' + content_type_and_params_delimiter = ';' content_type += content_type_and_params_delimiter tokens = content_type.split(content_type_and_params_delimiter) From 19cfec28a8ee8f2044a883bc25406f7865fffeac Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Sun, 31 Dec 2017 22:18:19 -0600 Subject: [PATCH 03/10] CI --- .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/requests_open_source.iml | 11 + .idea/vcs.xml | 6 + .idea/workspace.xml | 398 +++++++++++++++++++++++++++++++++ .travis.yml | 4 +- 6 files changed, 429 insertions(+), 2 deletions(-) create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/requests_open_source.iml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 00000000..96eb542b --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..85d7cf7a --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/requests_open_source.iml b/.idea/requests_open_source.iml new file mode 100644 index 00000000..67116063 --- /dev/null +++ b/.idea/requests_open_source.iml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..94a25f7f --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 00000000..f41d18e5 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,398 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + header + encoding + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1514747970257 - - - 1514753175234 - - - 1514754159918 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 1ee87011..b7693f63 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,8 +3,8 @@ language: python python: # - "2.6" - "2.7" -# - "3.4" -# - "3.5" + - "3.4" + - "3.5" - "3.6" # - "3.7-dev" # - "pypy" -- appears to hang From 1988d9cf72c3a3a6da87968e04ad57fb32df01cb Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Mon, 1 Jan 2018 14:20:55 -0600 Subject: [PATCH 05/10] Move nested function up to module level and rename. Add more tests for function. --- requests/utils.py | 46 ++++++++++++++++++++++++++++++--------------- tests/test_utils.py | 37 +++++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/requests/utils.py b/requests/utils.py index 37e3e27d..118e7e1b 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -446,33 +446,49 @@ def get_encodings_from_content(content): xml_re.findall(content)) +def _parse_content_type_header(header): + """Returns content type and parameters from given header + + :param header: string + :return: tuple containing content type and dictionary of + parameters + """ + if not header: + return None + # append delimiter on end to ensure at least two elements when split by ';' + header += ';' + # split content type's main value from params + tokens = header.split(';', 1) + content_type_index = 0 + params_index = 1 + + content_type = tokens[content_type_index].strip() + params = tokens[params_index] + params_dict = dict() + + for param in params.split(';'): + if param and not param.isspace(): + param = param.strip() + key, value = param, True + if '=' in param: + param_tokens = [x.strip('\'" ') for x in param.split('=', 1)] + key, value = param_tokens[0], param_tokens[1] + params_dict[key] = value + return content_type, params_dict + def get_encoding_from_headers(headers): """Returns encodings from given HTTP Header Dict. :param headers: dictionary to extract encoding from. :rtype: str """ - def parse_header(content_type): - #Inner function to parse header - #append delimiter on end to ensure atleast two elements when split by ';' - content_type_and_params_delimiter = ';' - content_type += content_type_and_params_delimiter - - tokens = content_type.split(content_type_and_params_delimiter) - content_type_index = 0 - params_index = 1 - - content_type = tokens[content_type_index] - params = tokens[params_index] - params_dict = dict(param.split('=') for param in params.split()) - return content_type,params_dict content_type = headers.get('content-type') if not content_type: return None - content_type, params = parse_header(content_type) + content_type, params = _parse_content_type_header(content_type) if 'charset' in params: return params['charset'].strip("'\"") diff --git a/tests/test_utils.py b/tests/test_utils.py index 2dd16923..e734b8f8 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,7 +13,7 @@ from requests.cookies import RequestsCookieJar from requests.structures import CaseInsensitiveDict from requests.utils import ( address_in_network, dotted_netmask, extract_zipped_paths, - get_auth_from_url, get_encoding_from_headers, + get_auth_from_url, _parse_content_type_header, get_encoding_from_headers, get_encodings_from_content, get_environ_proxies, guess_filename, guess_json_utf, is_ipv4_address, is_valid_cidr, iter_slices, parse_dict_header, @@ -470,6 +470,41 @@ def test_parse_dict_header(value, expected): assert parse_dict_header(value) == expected +@pytest.mark.parametrize( + 'value, expected', ( + ( + None, + None + ), +( + '', + None + ), + ( + 'application/xml', + ('application/xml', dict()) + ), + ( + 'application/json ; charset=utf-8', + ('application/json', {'charset': 'utf-8'}) + ), + ( + 'text/plain', + ('text/plain', dict()) + ), + ( + 'multipart/form-data; boundary = something ; \'boundary2=something_else\' ; no_equals ', + ('multipart/form-data', {'boundary': 'something', 'boundary2': 'something_else', 'no_equals': True}) + ), + ( + 'application/json ;; ; ', + ('application/json', dict()) + ) + )) +def test__parse_content_type_header(value, expected): + assert _parse_content_type_header(value) == expected + + @pytest.mark.parametrize( 'value, expected', ( ( From 071796d83f1bfb79793170945fdb4f623a1f344a Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Wed, 3 Jan 2018 23:40:08 -0600 Subject: [PATCH 06/10] implement changes after code review --- requests/utils.py | 17 +++++------------ tests/test_utils.py | 8 -------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/requests/utils.py b/requests/utils.py index 118e7e1b..44b3e016 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -453,20 +453,12 @@ def _parse_content_type_header(header): :return: tuple containing content type and dictionary of parameters """ - if not header: - return None - # append delimiter on end to ensure at least two elements when split by ';' - header += ';' - # split content type's main value from params - tokens = header.split(';', 1) - content_type_index = 0 - params_index = 1 - content_type = tokens[content_type_index].strip() - params = tokens[params_index] - params_dict = dict() + tokens = header.split(';') + content_type, params = tokens[0].strip(), tokens[1:] + params_dict = {} # Using dict is actually slower than a dictionary literal. Weird but tru - for param in params.split(';'): + for param in params: if param and not param.isspace(): param = param.strip() key, value = param, True @@ -476,6 +468,7 @@ def _parse_content_type_header(header): params_dict[key] = value return content_type, params_dict + def get_encoding_from_headers(headers): """Returns encodings from given HTTP Header Dict. diff --git a/tests/test_utils.py b/tests/test_utils.py index e734b8f8..f89d15aa 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -472,14 +472,6 @@ def test_parse_dict_header(value, expected): @pytest.mark.parametrize( 'value, expected', ( - ( - None, - None - ), -( - '', - None - ), ( 'application/xml', ('application/xml', dict()) From 80a790443e693d982296db93ceebc9135b6efb9c Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Wed, 3 Jan 2018 23:41:41 -0600 Subject: [PATCH 07/10] implement changes after code review --- requests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requests/utils.py b/requests/utils.py index 44b3e016..958f694d 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -456,7 +456,7 @@ def _parse_content_type_header(header): tokens = header.split(';') content_type, params = tokens[0].strip(), tokens[1:] - params_dict = {} # Using dict is actually slower than a dictionary literal. Weird but tru + params_dict = {} for param in params: if param and not param.isspace(): From cb0914407b6bb8153c8be5d52bc497e1d10b04ac Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Thu, 4 Jan 2018 10:30:50 -0600 Subject: [PATCH 08/10] Continue to refactor, remove list comprehension, add double quotes test case. --- AUTHORS.rst | 1 + requests/utils.py | 16 ++++++++++------ tests/test_utils.py | 12 ++++++++---- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 8379f65c..481ac6c7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -181,3 +181,4 @@ Patches and Suggestions - Taylor Hoff (`@PrimordialHelios `_) - Arthur Vigil (`@ahvigil `_) - Nehal J Wani (`@nehaljwani `_) +- Demetrios Bairaktaris (`@DemetriosBairaktaris `_) diff --git a/requests/utils.py b/requests/utils.py index 958f694d..6c2bf5f5 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -456,15 +456,19 @@ def _parse_content_type_header(header): tokens = header.split(';') content_type, params = tokens[0].strip(), tokens[1:] - params_dict = {} + params_dict = {} + items_to_strip = "\"' " for param in params: - if param and not param.isspace(): - param = param.strip() + param = param.strip() + if param: key, value = param, True - if '=' in param: - param_tokens = [x.strip('\'" ') for x in param.split('=', 1)] - key, value = param_tokens[0], param_tokens[1] + index_of_equals = param.find("=") + if index_of_equals != -1: + before_equals = slice(0, index_of_equals) + after_equals = slice(index_of_equals + 1, len(param)) + key = param[before_equals].strip(items_to_strip) + value = param[after_equals].strip(items_to_strip) params_dict[key] = value return content_type, params_dict diff --git a/tests/test_utils.py b/tests/test_utils.py index f89d15aa..53d27a26 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -474,7 +474,7 @@ def test_parse_dict_header(value, expected): 'value, expected', ( ( 'application/xml', - ('application/xml', dict()) + ('application/xml', {}) ), ( 'application/json ; charset=utf-8', @@ -482,15 +482,19 @@ def test_parse_dict_header(value, expected): ), ( 'text/plain', - ('text/plain', dict()) + ('text/plain', {}) ), ( 'multipart/form-data; boundary = something ; \'boundary2=something_else\' ; no_equals ', ('multipart/form-data', {'boundary': 'something', 'boundary2': 'something_else', 'no_equals': True}) ), ( - 'application/json ;; ; ', - ('application/json', dict()) + 'multipart/form-data; boundary = something ; \"boundary2=something_else\" ; no_equals ', + ('multipart/form-data', {'boundary': 'something', 'boundary2': 'something_else', 'no_equals': True}) + ), + ( + 'application/json ; ; ', + ('application/json', {}) ) )) def test__parse_content_type_header(value, expected): From 7deee699ada6e5ec0d41c7561d9b5fa4cd80e535 Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Thu, 4 Jan 2018 10:48:17 -0600 Subject: [PATCH 09/10] slice function removed --- requests/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/requests/utils.py b/requests/utils.py index 6c2bf5f5..8c1b9bec 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -465,10 +465,8 @@ def _parse_content_type_header(header): key, value = param, True index_of_equals = param.find("=") if index_of_equals != -1: - before_equals = slice(0, index_of_equals) - after_equals = slice(index_of_equals + 1, len(param)) - key = param[before_equals].strip(items_to_strip) - value = param[after_equals].strip(items_to_strip) + key = param[:index_of_equals].strip(items_to_strip) + value = param[index_of_equals + 1:].strip(items_to_strip) params_dict[key] = value return content_type, params_dict From e0ab287317fcde8fa4631bc7bee5aa1749bc4ac5 Mon Sep 17 00:00:00 2001 From: dbairaktaris1 Date: Thu, 4 Jan 2018 10:59:47 -0600 Subject: [PATCH 10/10] added more to test scenarios --- tests/test_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 53d27a26..01cabe23 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -484,12 +484,20 @@ def test_parse_dict_header(value, expected): 'text/plain', ('text/plain', {}) ), + ( + 'multipart/form-data; boundary = something ; boundary2=\'something_else\' ; no_equals ', + ('multipart/form-data', {'boundary': 'something', 'boundary2': 'something_else', 'no_equals': True}) + ), + ( + 'multipart/form-data; boundary = something ; boundary2="something_else" ; no_equals ', + ('multipart/form-data', {'boundary': 'something', 'boundary2': 'something_else', 'no_equals': True}) + ), ( 'multipart/form-data; boundary = something ; \'boundary2=something_else\' ; no_equals ', ('multipart/form-data', {'boundary': 'something', 'boundary2': 'something_else', 'no_equals': True}) ), ( - 'multipart/form-data; boundary = something ; \"boundary2=something_else\" ; no_equals ', + 'multipart/form-data; boundary = something ; "boundary2=something_else" ; no_equals ', ('multipart/form-data', {'boundary': 'something', 'boundary2': 'something_else', 'no_equals': True}) ), (