Files
requests3/requests/utils.py
T
2011-08-20 19:46:50 -04:00

140 lines
3.2 KiB
Python

# -*- coding: utf-8 -*-
"""
requests.utils
~~~~~~~~~~~~~~
This module provides utlity functions that are used within Requests
that are also useful for external consumption.
"""
import cgi
import cookielib
import re
import zlib
def dict_from_cookiejar(cookiejar):
"""Returns a key/value dictionary from a CookieJar."""
cookie_dict = {}
for _, cookies in cookiejar._cookies.items():
for _, cookies in cookies.items():
for cookie in cookies.values():
# print cookie
cookie_dict[cookie.name] = cookie.value
return cookie_dict
def cookiejar_from_dict(cookie_dict):
"""Returns a CookieJar from a key/value dictionary."""
# return cookiejar if one was passed in
if isinstance(cookie_dict, cookielib.CookieJar):
return cookie_dict
# create cookiejar
cj = cookielib.CookieJar()
cj = add_dict_to_cookiejar(cj, cookie_dict)
return cj
def add_dict_to_cookiejar(cj, cookie_dict):
"""Returns a CookieJar from a key/value dictionary."""
for k, v in cookie_dict.items():
cookie = cookielib.Cookie(
version=0,
name=k,
value=v,
port=None,
port_specified=False,
domain='',
domain_specified=False,
domain_initial_dot=False,
path='/',
path_specified=True,
secure=False,
expires=None,
discard=True,
comment=None,
comment_url=None,
rest={'HttpOnly': None},
rfc2109=False
)
# add cookie to cookiejar
cj.set_cookie(cookie)
return cj
def get_encodings_from_content(content):
"""Returns encodings from given content string."""
charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
return charset_re.findall(content)
def get_encoding_from_headers(headers):
"""Returns encodings from given HTTP Header Dict."""
content_type = headers.get('content-type')
content_type, params = cgi.parse_header(content_type)
if 'charset' in params:
return params['charset'].strip("'\"")
def get_unicode_from_response(r):
"""Returns the requested content back in unicode.
Tried:
1. charset from content-type
2. every encodings from <meta ... charset=XXX>
3. fall back and replace all unicode characters
"""
tried_encodings = []
# Try charset from content-type
encoding = get_encoding_from_headers(r.headers)
if encoding:
try:
print '!'
return unicode(r.content, encoding)
except UnicodeError:
tried_encodings.append(encoding)
# Try every encodings from <meta ... charset=XXX>
encodings = get_encodings_from_content(r.content)
for encoding in encodings:
if encoding in tried_encodings:
continue
try:
return unicode(r.content, encoding)
except (UnicodeError, TypeError):
tried_encodings.append(encoding)
# Fall back:
try:
return unicode(r.content, encoding, errors='replace')
except TypeError:
return r.content
def decode_gzip(content):
"""Return gzip-decoded string."""
return zlib.decompress(content, 16+zlib.MAX_WBITS)