diff --git a/AUTHORS b/AUTHORS index 131c0f61..f0b66eda 100644 --- a/AUTHORS +++ b/AUTHORS @@ -7,6 +7,12 @@ Development Lead - Kenneth Reitz +Urllib3 +``````` + +- Andrey Petrov + + Patches and Suggestions ``````````````````````` @@ -49,4 +55,7 @@ Patches and Suggestions - Daniel Hengeveld - Dan Head - Bruno Renié -- David Fischer \ No newline at end of file +- David Fischer +- Joseph McCullough +- Juergen Brendel +- Juan Riaza diff --git a/HISTORY.rst b/HISTORY.rst index 36d47f7c..05ba2f49 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,21 @@ History ------- + +0.8.0 (2011-11-13) +++++++++++++++++++ + +* Keep-alive support! +* Complete removal of Urllib2 +* Complete removal of Poster +* Complete removal of CookieJars +* New ConnectionError raising +* Safe_mode for error catching +* prefetch parameter for request methods +* OPTION method +* Async pool size throttling +* File uploads send real names + 0.7.6 (2011-11-07) ++++++++++++++++++ diff --git a/README.rst b/README.rst index d5a323b2..5e8fa6db 100644 --- a/README.rst +++ b/README.rst @@ -27,7 +27,7 @@ See `the same code, without Requests `_. Requests allow you to send **HEAD**, **GET**, **POST**, **PUT**, **PATCH**, and **DELETE** HTTP requests. You can add headers, form data, multipart files, and parameters with simple Python dictionaries, and access the -response data in the same way. It's powered by urllib2, but it does +response data in the same way. It's powered by httplib, but it does all the hard work and crazy hacks for you. diff --git a/docs/_themes/kr/layout.html b/docs/_themes/kr/layout.html index 4a7affe2..7a9c7e0c 100644 --- a/docs/_themes/kr/layout.html +++ b/docs/_themes/kr/layout.html @@ -4,8 +4,6 @@ {% if theme_touch_icon %} {% endif %} - {% endblock %} {%- block relbar2 %}{% endblock %} diff --git a/docs/_themes/kr/static/flasky.css_t b/docs/_themes/kr/static/flasky.css_t index bb00e935..bf2a2664 100644 --- a/docs/_themes/kr/static/flasky.css_t +++ b/docs/_themes/kr/static/flasky.css_t @@ -387,7 +387,7 @@ a:hover tt { } -@media screen and (max-width: 600px) { +@media screen and (max-width: 870px) { div.sphinxsidebar { display: none; @@ -436,6 +436,96 @@ a:hover tt { display: none; } + + +} + + + +@media screen and (max-width: 875px) { + + body { + margin: 0; + padding: 20px 30px; + } + + div.documentwrapper { + float: none; + background: white; + } + + div.sphinxsidebar { + display: block; + float: none; + width: 102.5%; + margin: 50px -30px -20px -30px; + padding: 10px 20px; + background: #333; + color: white; + } + + div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, + div.sphinxsidebar h3 a { + color: white; + } + + div.sphinxsidebar a { + color: #aaa; + } + + div.sphinxsidebar p.logo { + display: none; + } + + div.document { + width: 100%; + margin: 0; + } + + div.related { + display: block; + margin: 0; + padding: 10px 0 20px 0; + } + + div.related ul, + div.related ul li { + margin: 0; + padding: 0; + } + + div.footer { + display: none; + } + + div.bodywrapper { + margin: 0; + } + + div.body { + min-height: 0; + padding: 0; + } + + .rtd_doc_footer { + display: none; + } + + .document { + width: auto; + } + + .footer { + width: auto; + } + + .footer { + width: auto; + } + + .github { + display: none; + } } diff --git a/docs/_themes/kr/static/small_flask.css b/docs/_themes/kr/static/small_flask.css deleted file mode 100644 index 8d55e95f..00000000 --- a/docs/_themes/kr/static/small_flask.css +++ /dev/null @@ -1,90 +0,0 @@ -/* - * small_flask.css_t - * ~~~~~~~~~~~~~~~~~ - * - * :copyright: Copyright 2010 by Armin Ronacher. - * :license: Flask Design License, see LICENSE for details. - */ - -body { - margin: 0; - padding: 20px 30px; -} - -div.documentwrapper { - float: none; - background: white; -} - -div.sphinxsidebar { - display: block; - float: none; - width: 102.5%; - margin: 50px -30px -20px -30px; - padding: 10px 20px; - background: #333; - color: white; -} - -div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, -div.sphinxsidebar h3 a { - color: white; -} - -div.sphinxsidebar a { - color: #aaa; -} - -div.sphinxsidebar p.logo { - display: none; -} - -div.document { - width: 100%; - margin: 0; -} - -div.related { - display: block; - margin: 0; - padding: 10px 0 20px 0; -} - -div.related ul, -div.related ul li { - margin: 0; - padding: 0; -} - -div.footer { - display: none; -} - -div.bodywrapper { - margin: 0; -} - -div.body { - min-height: 0; - padding: 0; -} - -.rtd_doc_footer { - display: none; -} - -.document { - width: auto; -} - -.footer { - width: auto; -} - -.footer { - width: auto; -} - -.github { - display: none; -} \ No newline at end of file diff --git a/docs/api.rst b/docs/api.rst index 9b1b10f2..f77fc0ad 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -17,6 +17,15 @@ All of Request's functionality can be accessed by these 7 methods. They all return an instance of the :class:`Response ` object. .. autofunction:: request + +--------------------- + + +.. autoclass:: Response + :inherited-members: + +--------------------- + .. autofunction:: head .. autofunction:: get .. autofunction:: post @@ -25,11 +34,33 @@ They all return an instance of the :class:`Response ` object. .. autofunction:: delete ------------ +----------------- + +.. autofunction:: session -.. autoclass:: Response - :inherited-members: + +Exceptions +~~~~~~~~~~ + +.. module:: requests + +.. autoexception:: RequestException +.. autoexception:: ConnectionError +.. autoexception:: HTTPError +.. autoexception:: URLRequired +.. autoexception:: TooManyRedirects + + +.. _configurations: + +Configurations +-------------- + +.. automodule:: requests.defaults + + +.. _async: Async ----- @@ -56,6 +87,22 @@ Requests. .. module:: requests.utils +Status Code Lookup +~~~~~~~~~~~~~~~~~~ + +.. autofunction:: requests.codes + +:: + + >>> requests.codes['temporary_redirect'] + 301 + + >>> requests.codes.teapot + 416 + + >>> requests.codes['\o/'] + 416 + Cookies ~~~~~~~ @@ -80,26 +127,18 @@ These items are an internal component to Requests, and should never be seen by the end user (developer). This part of the API documentation exists for those who are extending the functionality of Requests. -Exceptions -~~~~~~~~~~ - -.. module:: requests - -.. autoexception:: HTTPError - -.. autoexception:: RequestException - -.. autoexception:: AuthenticationError -.. autoexception:: URLRequired -.. autoexception:: InvalidMethod -.. autoexception:: TooManyRedirects - - Classes ~~~~~~~ +.. autoclass:: requests.Response + :inherited-members: + .. autoclass:: requests.Request :inherited-members: +.. _sessionapi: + +.. autoclass:: requests.Session + :inherited-members: diff --git a/docs/community/faq.rst b/docs/community/faq.rst index dd6f80d9..bad71b88 100644 --- a/docs/community/faq.rst +++ b/docs/community/faq.rst @@ -73,7 +73,7 @@ Support for Python 3.x is coming *very* soon. Keep-alive Support? ------------------- -It's on the way. +Yep! Proxy Support? diff --git a/docs/community/out-there.rst b/docs/community/out-there.rst index 7870de9e..553c7444 100644 --- a/docs/community/out-there.rst +++ b/docs/community/out-there.rst @@ -1,7 +1,7 @@ Modules ======= -- `requests-oauth-hook `_, adds OAuth support to Requests. +- `requests-oauth `_, adds OAuth support to Requests. - `FacePy `_, a Python wrapper to the Facebook API. - `robotframework-requests `_, a Robot Framework API wrapper. - `fullerene `_, a Graphite Dashboard. diff --git a/docs/index.rst b/docs/index.rst index 73da870c..bab640b0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,15 +33,14 @@ See `the same code, without Requests `_. Requests allow you to send **HEAD**, **GET**, **POST**, **PUT**, **PATCH**, and **DELETE** HTTP requests. You can add headers, form data, multipart files, and parameters with simple Python dictionaries, and access the -response data in the same way. It's powered by :py:class:`urllib2`, but it does -all the hard work and crazy hacks for you. +response data in the same way. It's powered by :py:class:`httplib` and :py:class:`urllib3`, and it strives to be as elegant and approachable as possible. Testimonials ------------ `The Washington Post `_, `Twitter, Inc `_, a U.S. Federal Institution, -NIH, +NIH, `Readability `_, and `Work for Pie `_ use Requests internally. diff --git a/docs/user/advanced.rst b/docs/user/advanced.rst index 90d11433..70514ef7 100644 --- a/docs/user/advanced.rst +++ b/docs/user/advanced.rst @@ -10,8 +10,8 @@ Session Objects --------------- The Session object allows you to persist certain parameters across -requests. It also establishes a CookieJar and passes it along -to any requests made from the Session instance. +requests. It also perstists cookies across all requests made from the +Session instance. A session object has all the methods of the main Requests API. @@ -37,20 +37,63 @@ Sessions can also be used to provide default data to the request methods:: c.get('http://httpbin.org/headers', headers={'x-test2': 'true'}) -.. admonition:: Global Settings +Any dictionaries that you pass to a request method will be merged with the session-level values that are set. The method-level parameters override session parameters. - Certain parameters are best set in the ``config`` dictionary - (e.g. user agent header). +.. admonition:: Remove a Value From a Dict Parameter + + Sometimes you'll want to omit session-level keys from a dict parameter. To do this, you simply set that key's value to ``None`` in the method-level parameter. It will automatically be omitted. + +All values that are contained within a session are directly available to you. See the :ref:`Session API Docs ` to learn more. + + +Body Content Workflow +---------------------- + +By default, When you make a request, the body of the response isn't downloaded immediately. The response headers are downloaded when you make a request, but the content isn't downloaded until you access the :class:`Response.content` attribute. + +Let's walk through it:: + + tarball_url = 'https://github.com/kennethreitz/requests/tarball/master' + r = requests.get(tarball_url) + +The request has been made, but the connection is still open. The response body has not been downloaded yet. :: + + r.content + +The content has been downloaded and cached. + +You can override this default behavior with the ``prefetch`` parameter:: + + r = requests.get(tarball_url, prefetch=True) + # Blocks until all of request body has been downloaded. + + +Configuring Requests +-------------------- + +Sometimes you may want to configure a request to customize it's behavior. To do +this, you can pass in a ``config`` dictionary to a request or session. See the :ref:`Configuration API Docs ` to learn more. + + +Keep-Alive +---------- + +Excellent news — thanks to urllib3. keep-alive is 100% automatic within a session! Any requests that you make within a session will automatically reuse the appropriate connection! + +If you'd like to disable keep-alive, you can simply set the ``keep_alive`` configuration to ``False``:: + + s = requests.session() + s.config['keep_alive'] = False Asynchronous Requests ---------------------- -Requests has first-class support for non-blocking i/o requests, powered +Requests has first-class support for concurrent requests, powered by gevent. This allows you to send a bunch of HTTP requests at the same First, let's import the async module. Heads up — if you don't have -`gevent `_ this will fail:: +`gevent `_ this will fail:: from requests import async @@ -77,6 +120,12 @@ will also guarantee execution of the ``response`` hook, described below. :: >>> async.map(rs) [, , , ] +.. admonition:: Throttling + + The ``map`` function also takes a ``size`` parameter, that specifies the nubmer of connections to make at a time:: + + async.map(rs, size=5) + Event Hooks ----------- @@ -199,7 +248,7 @@ by your application, you can turn on verbose logging. To do so, just configure Requests with a stream to write to:: - >>> requests.settings.verbose = sys.stderr - >>> requests.get('http://httpbin.org/headers') + >>> my_config = {'verbose': sys.stderr} + >>> requests.get('http://httpbin.org/headers', config=my_config) 2011-08-17T03:04:23.380175 GET http://httpbin.org/headers diff --git a/docs/user/quickstart.rst b/docs/user/quickstart.rst index 17e503a2..4ead8755 100644 --- a/docs/user/quickstart.rst +++ b/docs/user/quickstart.rst @@ -39,6 +39,98 @@ We can read the content of the server's response:: >>> r.content '[{"repository":{"open_issues":0,"url":"https://github.com/... +Requests does its best to decode content from the server. Most unicode charsets, ``gzip``, and ``deflate`` encodings are all seamlessly decoded. + + +Make a POST Request +------------------- + +POST requests are equally simple:: + + r = requests.post("http://httpbin.org/post") + + +Typically, you want to send some form-encoded data — much like an HTML form. +To do this, simply pass a dictionary to the `data` argument. Your dictionary of data will automatically be form-encoded when the request is made:: + + >>> payload = {'key1': 'value1', 'key2': 'value2'} + >>> r = requests.post("http://httpbin.org/post", data=payload) + >>> print r.content + { + "origin": "179.13.100.4", + "files": {}, + "form": { + "key2": "value2", + "key1": "value1" + }, + "url": "http://httpbin.org/post", + "args": {}, + "headers": { + "Content-Length": "23", + "Accept-Encoding": "identity, deflate, compress, gzip", + "Accept": "*/*", + "User-Agent": "python-requests/0.8.0", + "Host": "127.0.0.1:7077", + "Content-Type": "application/x-www-form-urlencoded" + }, + "data": "" + } + +There are many times that you want to send data that is not form-encoded. If you pass in a ``string`` instead of a ``dict``, that data will be posted directly. + +For example, the GitHub API v3 accepts JSON-Encoded POST/PATCH data:: + + url = 'https://api.github.com/some/endpoint' + payload = {'some': 'data'} + + r = requests.post(url, data=json.dumps(payload)) + + +Custom Headers +-------------- + +If you'd like to add HTTP headers to a request, simply pass in a ``dict`` to the +``headers`` parameter. + +For example, we didn't specify our content-type in the previous example:: + + url = 'https://api.github.com/some/endpoint' + payload = {'some': 'data'} + headers = {'content-type': 'application/json'} + + r = requests.post(url, data=json.dumps(payload), headers=headers) + + +POST a Multipart-Encoded File +----------------------------- + +Requests makes it simple to upload Multipart-encoded files:: + + >>> url = 'http://httpbin.org/post' + >>> files = {'report.xls': open('report.xls', 'rb')} + + >>> r = requests.post(url, files=files) + >>> r.content + { + "origin": "179.13.100.4", + "files": { + "hmm": "" + }, + "form": {}, + "url": "http://httpbin.org/post", + "args": {}, + "headers": { + "Content-Length": "3196", + "Accept-Encoding": "identity, deflate, compress, gzip", + "Accept": "*/*", + "User-Agent": "python-requests/0.8.0", + "Host": "httpbin.org:80", + "Content-Type": "multipart/form-data; boundary=127.0.0.1.502.21746.1321131593.786.1" + }, + "data": "" + } + + Response Status Codes --------------------- @@ -54,7 +146,7 @@ reference:: >>> r.status_code == requests.codes.ok True -If we made a bad request, we can raise it with +If we made a bad request (non-200 response), we can raise it with :class:`Response.raise_for_status()`:: >>> _r = requests.get('http://httpbin.org/status/404') @@ -122,11 +214,6 @@ If a response contains some Cookies, you can get quick access to them:: >>> print r.cookies {'requests-is': 'awesome'} -The underlying CookieJar is also available for more advanced handling:: - - >>> r.request.cookiejar - - To send your own cookies to the server, you can use the ``cookies`` parameter:: @@ -142,9 +229,9 @@ Basic Authentication -------------------- Most web services require authentication. There many different types of -authentication, but the most common is called HTTP Basic Auth. +authentication, but the most common is HTTP Basic Auth. -Making requests with Basic Auth is easy, with Requests:: +Making requests with Basic Auth is extremely simple:: >>> requests.get('https://api.github.com/user', auth=('user', 'pass')) @@ -153,15 +240,82 @@ Making requests with Basic Auth is easy, with Requests:: Digest Authentication --------------------- -Another popular form of protecting web service is Digest Authentication. - -Requests supports it!:: +Another popular form of web service protection is Digest Authentication:: >>> url = 'http://httpbin.org/digest-auth/auth/user/pass' >>> requests.get(url, auth=('digest', 'user', 'pass')) +Redirection and History +----------------------- + +Requests will automatically perform location redirection while using impodotent methods. + +GitHub redirects all HTTP requests to HTTPS. Let's see what happens:: + + >>> r = request.get('http://github.com') + >>> r.url + 'https://github.com/' + >>> r.status_code + 200 + >>> r.history + [] + +The :class:`Response.history` list contains a list of the +:class:`Request` objects that were created in order to complete the request. + +If you're using GET, HEAD, or OPTIONS, you can disable redirection +handling with the ``disable_redirects`` parameter:: + + >>> r = request.get('http://github.com') + >>> r.status_code + 301 + >>> r.history + [] + +If you're using POST, PUT, PATCH, *&c*, you can also explicitly enable redirection as well:: + + >>> r = request.post('http://github.com') + >>> r.url + 'https://github.com/' + >>> r.history + [] + + +Timeouts +-------- + +You can tell requests to stop waiting for a response after a given number of seconds with the ``timeout`` parameter:: + + >>> requests.get('http://github.com', timeout=0.001) + Traceback (most recent call last): + File "", line 1, in + requests.exceptions.Timeout: Request timed out. + +.. admonition:: Note + + ``timeout`` only effects the connection process itself, not the downloading of the respone body. + + +Note + +Errors and Exceptions +--------------------- + +In the event of a network problem (e.g. DNS failure, refused connection, etc), +Requests will raise a :class:`ConnectionError` exception. + +In the event of the rare invalid HTTP response, Requests will raise +an :class:`HTTPError` exception. + +If a request times out, a :class:`Timeout` exception is raised. + +If a request exceeds the configured number of maximum redirections, a :class:`TooManyRedirects` exception is raised. + +All exceptions that Requests explicitly raises inherit from +:class:`requests.exceptions.RequestException`. + ----------------------- Ready for more? Check out the :ref:`advanced ` section. diff --git a/requests/__init__.py b/requests/__init__.py index 1140a523..e3a9d2a0 100644 --- a/requests/__init__.py +++ b/requests/__init__.py @@ -15,8 +15,8 @@ requests """ __title__ = 'requests' -__version__ = '0.7.6' -__build__ = 0x000706 +__version__ = '0.8.0' +__build__ = 0x000800 __author__ = 'Kenneth Reitz' __license__ = 'ISC' __copyright__ = 'Copyright 2011 Kenneth Reitz' @@ -24,10 +24,10 @@ __copyright__ = 'Copyright 2011 Kenneth Reitz' from . import utils from .models import Request, Response -from .api import request, get, head, post, patch, put, delete -from .sessions import session +from .api import request, get, head, post, patch, put, delete, options +from .sessions import session, Session from .status_codes import codes from .exceptions import ( - RequestException, AuthenticationError, Timeout, URLRequired, - TooManyRedirects + RequestException, Timeout, URLRequired, + TooManyRedirects, HTTPError, ConnectionError ) diff --git a/requests/api.py b/requests/api.py index a139f3d5..56702db1 100644 --- a/requests/api.py +++ b/requests/api.py @@ -13,8 +13,6 @@ This module implements the Requests API. from .sessions import session -__all__ = ('request', 'get', 'head', 'post', 'patch', 'put', 'delete') - def request(method, url, params=None, @@ -28,6 +26,7 @@ def request(method, url, proxies=None, hooks=None, return_response=True, + prefetch=False, config=None): """Constructs and sends a :class:`Request `. Returns :class:`Response ` object. @@ -49,9 +48,21 @@ def request(method, url, s = session() return s.request( - method, url, params, data, headers, cookies, files, auth, - timeout, allow_redirects, proxies, hooks, return_response, - config + method=method, + url=url, + params=params, + data=data, + headers=headers, + cookies=cookies, + files=files, + auth=auth, + timeout=timeout, + allow_redirects=allow_redirects, + proxies=proxies, + hooks=hooks, + return_response=return_response, + config=config, + prefetch=prefetch ) @@ -64,7 +75,18 @@ def get(url, **kwargs): """ kwargs.setdefault('allow_redirects', True) - return request('GET', url, **kwargs) + return request('get', url, **kwargs) + + +def options(url, **kwargs): + """Sends a OPTIONS request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return request('options', url, **kwargs) def head(url, **kwargs): @@ -75,10 +97,10 @@ def head(url, **kwargs): """ kwargs.setdefault('allow_redirects', True) - return request('HEAD', url, **kwargs) + return request('head', url, **kwargs) -def post(url, data='', **kwargs): +def post(url, data=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. @@ -89,7 +111,7 @@ def post(url, data='', **kwargs): return request('post', url, data=data, **kwargs) -def put(url, data='', **kwargs): +def put(url, data=None, **kwargs): """Sends a PUT request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. @@ -100,7 +122,7 @@ def put(url, data='', **kwargs): return request('put', url, data=data, **kwargs) -def patch(url, data='', **kwargs): +def patch(url, data=None, **kwargs): """Sends a PATCH request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. diff --git a/requests/async.py b/requests/async.py index 92839c30..8bafb1ee 100644 --- a/requests/async.py +++ b/requests/async.py @@ -12,6 +12,7 @@ by gevent. All API methods return a ``Request`` instance (as opposed to try: import gevent from gevent import monkey as curious_george + from gevent.pool import Pool except ImportError: raise RuntimeError('Gevent is required for requests.async.') @@ -19,12 +20,11 @@ except ImportError: curious_george.patch_all(thread=False) from . import api -from .hooks import dispatch_hook __all__ = ( 'map', - 'get', 'head', 'post', 'put', 'patch', 'delete', 'request' + 'get', 'options', 'head', 'post', 'put', 'patch', 'delete', 'request' ) @@ -34,6 +34,7 @@ def patched(f): def wrapped(*args, **kwargs): kwargs['return_response'] = False + kwargs['prefetch'] = True return f(*args, **kwargs) @@ -53,6 +54,7 @@ def send(r, pools=None): # Patched requests.api functions. get = patched(api.get) +options = patched(api.options) head = patched(api.head) post = patched(api.post) put = patched(api.put) @@ -61,15 +63,21 @@ delete = patched(api.delete) request = patched(api.request) -def map(requests, prefetch=True): +def map(requests, prefetch=True, size=None): """Concurrently converts a list of Requests to Responses. :param requests: a collection of Request objects. :param prefetch: If False, the content will not be downloaded immediately. + :param size: Specifies the number of requests to make at a time. If None, no throttling occurs. """ - jobs = [gevent.spawn(send, r) for r in requests] - gevent.joinall(jobs) + if size: + pool = Pool(size) + pool.map(send, requests) + pool.join() + else: + jobs = [gevent.spawn(send, r) for r in requests] + gevent.joinall(jobs) if prefetch: [r.response.content for r in requests] diff --git a/requests/defaults.py b/requests/defaults.py index 770001f9..7a5a3fb8 100644 --- a/requests/defaults.py +++ b/requests/defaults.py @@ -6,15 +6,18 @@ requests.defaults This module provides the Requests configuration defaults. -settings parameters: +Configurations: -- :base_headers: - Sets default User-Agent to `python-requests.org` -- :accept_gzip: - Whether or not to accept gzip-compressed data -- :proxies: - http proxies? -- :verbose: - display verbose information? -- :timeout: - timeout time until request terminates -- :max_redirects: - maximum number of allowed redirects? -- :decode_unicode: - whether or not to accept unicode? +:base_headers: Default HTTP headers. +:verbose: Stream to write request logging to. +:timeout: Seconds until request timeout. +:max_redirects: Maximum njumber of redirects allowed within a request. +:decode_unicode: Decode unicode responses automatically? +:keep_alive: Reuse HTTP Connections? +:max_retries: The number of times a request should be retried in the event of a connection failure. +:safe_mode: If true, Requests will catch all errors. +:pool_maxsize: The maximium size of an HTTP connection pool. +:pool_connections: The number of active HTTP connection pools to use. """ @@ -29,11 +32,11 @@ defaults['base_headers'] = { 'Accept': '*/*' } -defaults['proxies'] = {} defaults['verbose'] = None -defaults['timeout'] = None defaults['max_redirects'] = 30 defaults['decode_unicode'] = True -defaults['timeout_fallback'] = True -# defaults['keep_alive'] = True -# defaults['max_connections'] = 10 \ No newline at end of file +defaults['pool_connections'] = 10 +defaults['pool_maxsize'] = 10 +defaults['max_retries'] = 0 +defaults['safe_mode'] = False +defaults['keep_alive'] = True diff --git a/requests/exceptions.py b/requests/exceptions.py index 101d1ce6..d20a95cd 100644 --- a/requests/exceptions.py +++ b/requests/exceptions.py @@ -12,8 +12,11 @@ class RequestException(Exception): """There was an ambiguous exception that occurred while handling your request.""" -class AuthenticationError(RequestException): - """The authentication credentials provided were invalid.""" +class HTTPError(RequestException): + """An HTTP error occured.""" + +class ConnectionError(RequestException): + """A Connection error occured.""" class Timeout(RequestException): """The request timed out.""" diff --git a/requests/models.py b/requests/models.py index f9fc002e..34de8005 100644 --- a/requests/models.py +++ b/requests/models.py @@ -1,32 +1,33 @@ - # -*- coding: utf-8 -*- """ requests.models ~~~~~~~~~~~~~~~ +This module contains the primary objects that power Requests. """ import urllib -import urllib2 -import socket import zlib +from Cookie import SimpleCookie from urlparse import urlparse, urlunparse, urljoin from datetime import datetime from .auth import dispatch as auth_dispatch from .hooks import dispatch_hook from .structures import CaseInsensitiveDict -from .packages.poster.encode import multipart_encode -from .packages.poster.streaminghttp import register_openers, get_handlers from .status_codes import codes -from .exceptions import Timeout, URLRequired, TooManyRedirects -from .monkeys import Request as _Request -from .monkeys import HTTPRedirectHandler +from .packages.urllib3.exceptions import MaxRetryError +from .packages.urllib3.exceptions import SSLError as _SSLError +from .packages.urllib3.exceptions import HTTPError as _HTTPError +from .packages.urllib3 import connectionpool, poolmanager +from .packages.urllib3.filepost import encode_multipart_formdata +from .exceptions import ( + Timeout, URLRequired, TooManyRedirects, HTTPError, ConnectionError) from .utils import ( - dict_from_cookiejar, get_unicode_from_response, - stream_decode_response_unicode, decode_gzip, stream_decode_gzip) + get_unicode_from_response, stream_decode_response_unicode, + decode_gzip, stream_decode_gzip, guess_filename) REDIRECT_STATI = (codes.moved, codes.found, codes.other, codes.temporary_moved) @@ -52,7 +53,8 @@ class Request(object): allow_redirects=False, proxies=None, hooks=None, - config=None): + config=None, + _poolmanager=None): #: Float describes the timeout of the request. # (Use socket.setdefaulttimeout() as fallback) @@ -62,7 +64,7 @@ class Request(object): self.url = url #: Dictionary of HTTP Headers to attach to the :class:`Request `. - self.headers = headers + self.headers = dict(headers or []) #: Dictionary of files to multipart upload (``{filename: content}``). self.files = files @@ -77,6 +79,7 @@ class Request(object): #: Dictionary or byte of querystring data to attach to the #: :class:`Request `. self.params = None + self.params = dict(params or []) #: True if :class:`Request ` is part of a redirect chain (disables history #: and HTTPError storage). @@ -86,7 +89,7 @@ class Request(object): self.allow_redirects = allow_redirects # Dictionary mapping protocol to the URL of the proxy (e.g. {'http': 'foo.bar:3128'}) - self.proxies = proxies + self.proxies = dict(proxies or []) self.data, self._enc_data = self._encode_params(data) self.params, self._enc_params = self._encode_params(params) @@ -100,10 +103,10 @@ class Request(object): self.auth = auth_dispatch(auth) #: CookieJar to attach to :class:`Request `. - self.cookies = cookies + self.cookies = dict(cookies or []) #: Dictionary of configurations for this request. - self.config = config + self.config = dict(config or []) #: True if Request has been sent. self.sent = False @@ -111,6 +114,9 @@ class Request(object): #: Event-handling hooks. self.hooks = hooks + #: Session. + self.session = None + if headers: headers = CaseInsensitiveDict(self.headers) else: @@ -121,6 +127,7 @@ class Request(object): headers[k] = v self.headers = headers + self._poolmanager = _poolmanager # Pre-request hook. r = dispatch_hook('pre_request', hooks, self) @@ -131,39 +138,6 @@ class Request(object): return '' % (self.method) - def _get_opener(self): - """Creates appropriate opener object for urllib2.""" - - _handlers = [] - - if self.cookies is not None: - _handlers.append(urllib2.HTTPCookieProcessor(self.cookies)) - - if self.proxies: - _handlers.append(urllib2.ProxyHandler(self.proxies)) - - _handlers.append(HTTPRedirectHandler) - - if not _handlers: - return urllib2.urlopen - - if self.data or self.files: - _handlers.extend(get_handlers()) - - opener = urllib2.build_opener(*_handlers) - - if self.headers: - # Allow default headers in the opener to be overloaded - normal_keys = [k.capitalize() for k in self.headers] - for key, val in opener.addheaders[:]: - if key not in normal_keys: - continue - # Remove it, we have a value to take its place - opener.addheaders.remove((key, val)) - - return opener.open - - def _build_response(self, resp, is_error=False): """Build internal :class:`Response ` object from given response. @@ -173,24 +147,41 @@ class Request(object): def build(resp): response = Response() + + # Pass settings over. response.config = self.config - response.status_code = getattr(resp, 'code', None) - try: - response.headers = CaseInsensitiveDict(getattr(resp.info(), 'dict', None)) - response.raw = resp + if resp: - if self.cookies: - response.cookies = dict_from_cookiejar(self.cookies) + # Fallback to None if there's no staus_code, for whatever reason. + response.status_code = getattr(resp, 'status', None) + # Make headers case-insensitive. + response.headers = CaseInsensitiveDict(getattr(resp, 'headers', None)) - except AttributeError: - pass + # Start off with our local cookies. + cookies = self.cookies or dict() + + # Add new cookies from the server. + if 'set-cookie' in response.headers: + cookie_header = response.headers['set-cookie'] + + c = SimpleCookie() + c.load(cookie_header) + + for k,v in c.items(): + cookies.update({k: v.value}) + + # Save cookies in Response. + response.cookies = cookies + + # Save original resopnse for later. + response.raw = resp if is_error: response.error = resp - response.url = getattr(resp, 'url', None) + response.url = self._build_url() return response @@ -198,6 +189,8 @@ class Request(object): history = [] r = build(resp) + cookies = self.cookies + self.cookies.update(r.cookies) if r.status_code in REDIRECT_STATI and not self.redirect: @@ -206,8 +199,6 @@ class Request(object): ((r.status_code is codes.see_other) or (self.allow_redirects)) ): - r.raw.close() - if not len(history) < self.config.get('max_redirects'): raise TooManyRedirects() @@ -231,25 +222,36 @@ class Request(object): else: method = self.method + # Remove the cookie headers that were sent. + headers = self.headers + try: + del headers['Cookie'] + except KeyError: + pass + request = Request( url=url, - headers=self.headers, + headers=headers, files=self.files, method=method, - # data=self.data, - # params=self.params, + params=self.session.params, auth=self._auth, - cookies=self.cookies, + cookies=cookies, redirect=True, - config=self.config + config=self.config, + _poolmanager=self._poolmanager ) + request.send() + cookies.update(request.response.cookies) r = request.response + self.cookies.update(r.cookies) r.history = history self.response = r self.response.request = self + self.response.cookies.update(self.cookies) @staticmethod @@ -264,6 +266,9 @@ class Request(object): returns it twice. """ + if hasattr(data, '__iter__'): + data = dict(data) + if hasattr(data, 'items'): result = [] for k, vs in data.items(): @@ -278,11 +283,20 @@ class Request(object): def _build_url(self): """Build the actual URL to use.""" + if not self.url: + raise URLRequired() + # Support for unicode domain names and paths. scheme, netloc, path, params, query, fragment = urlparse(self.url) + + if not scheme: + raise ValueError() + netloc = netloc.encode('idna') + if isinstance(path, unicode): path = path.encode('utf-8') + path = urllib.quote(urllib.unquote(path)) self.url = str(urlunparse([ scheme, netloc, path, params, query, fragment ])) @@ -295,7 +309,7 @@ class Request(object): return self.url - def send(self, anyway=False): + def send(self, anyway=False, prefetch=False): """Sends the request. Returns True of successful, false if not. If there was an HTTPError during transmission, self.response.status_code will contain the HTTPError code. @@ -306,10 +320,6 @@ class Request(object): already been sent. """ - # Some people... - if not self.url: - raise URLRequired - # Logging if self.config.get('verbose'): self.config.get('verbose').write('%s %s %s\n' % ( @@ -319,86 +329,120 @@ class Request(object): # Build the URL url = self._build_url() - # Attach uploaded files. + # Nottin' on you. + body = None + content_type = None + + # Multi-part file uploads. if self.files: - register_openers() + if not isinstance(self.data, basestring): - # Add form-data to the multipart. - if self.data: - self.files.update(self.data) + try: + fields = self.data.copy() + except AttributeError: + fields = dict(self.data) - data, headers = multipart_encode(self.files) + for (k, v) in self.files.items(): + fields.update({k: (guess_filename(k) or k, v.read())}) + (body, content_type) = encode_multipart_formdata(fields) + else: + pass + # TODO: Conflict? else: - data = self._enc_data - headers = {} + if self.data: + + body = self._enc_data + if isinstance(self.data, basestring): + content_type = None + else: + content_type = 'application/x-www-form-urlencoded' + + # Add content-type if it wasn't explicitly provided. + if (content_type) and (not 'content-type' in self.headers): + self.headers['Content-Type'] = content_type + if self.auth: auth_func, auth_args = self.auth + # Allow auth to make its changes. r = auth_func(self, *auth_args) + # Update self to reflect the auth changes. self.__dict__.update(r.__dict__) - # Build the Urllib2 Request. - req = _Request(url, data=data, headers=headers, method=self.method) + _p = urlparse(url) + proxy = self.proxies.get(_p.scheme) - # Add the headers to the request. - if self.headers: - for k,v in self.headers.iteritems(): - req.add_header(k, v) + if proxy: + conn = poolmanager.proxy_from_url(url) + else: + # Check to see if keep_alive is allowed. + if self.config.get('keep_alive'): + conn = self._poolmanager.connection_from_url(url) + else: + conn = connectionpool.connection_from_url(url) if not self.sent or anyway: + if self.cookies: + + # Skip if 'cookie' header is explicitly set. + if 'cookie' not in self.headers: + + # Simple cookie with our dict. + c = SimpleCookie() + for (k, v) in self.cookies.items(): + c[k] = v + + # Turn it into a header. + cookie_header = c.output(header='').strip() + + # Attach Cookie header to request. + self.headers['Cookie'] = cookie_header + try: - opener = self._get_opener() - try: + # Send the request. - resp = opener(req, timeout=self.timeout) - - except TypeError, err: - # timeout argument is new since Python v2.6 - if not 'timeout' in str(err): - raise - - if self.config.get('timeout_fallback'): - # fall-back and use global socket timeout (This is not thread-safe!) - old_timeout = socket.getdefaulttimeout() - socket.setdefaulttimeout(self.timeout) - - resp = opener(req) - - if self.config.get('timeout_fallback'): - # restore global timeout - socket.setdefaulttimeout(old_timeout) - - if self.cookies is not None: - self.cookies.extract_cookies(resp, req) - - except (urllib2.HTTPError, urllib2.URLError), why: - if hasattr(why, 'reason'): - if isinstance(why.reason, socket.timeout): - why = Timeout(why) - elif isinstance(why.reason, socket.error): - why = Timeout(why) - - self._build_response(why, is_error=True) - - else: - self._build_response(resp) - self.response.ok = True + r = conn.urlopen( + method=self.method, + url=url, + body=body, + headers=self.headers, + redirect=False, + assert_same_host=False, + preload_content=prefetch, + decode_content=False, + retries=self.config.get('max_retries', 0), + timeout=self.timeout, + ) - self.sent = self.response.ok + except MaxRetryError, e: + if not self.config.get('safe_mode', False): + raise ConnectionError(e) + else: + r = None - # Response manipulation hook. - self.response = dispatch_hook('response', self.hooks, self.response) + except (_SSLError, _HTTPError), e: + if not self.config.get('safe_mode', False): + raise Timeout('Request timed out.') - # Post-request hook. - r = dispatch_hook('post_request', self.hooks, self) - self.__dict__.update(r.__dict__) + self._build_response(r) - return self.sent + # Response manipulation hook. + self.response = dispatch_hook('response', self.hooks, self.response) + + # Post-request hook. + r = dispatch_hook('post_request', self.hooks, self) + self.__dict__.update(r.__dict__) + + # If prefetch is True, mark content as consumed. + if prefetch: + self.response._content_consumed = True + + return self.sent class Response(object): @@ -427,9 +471,6 @@ class Response(object): #: Final URL location of Response. self.url = None - #: True if no :attr:`error` occurred. - self.ok = False - #: Resulting :class:`HTTPError` of request, if one occurred. self.error = None @@ -442,20 +483,27 @@ class Response(object): self.request = None #: A dictionary of Cookies the server sent back. - self.cookies = None + self.cookies = {} #: Dictionary of configurations for this request. - self.config = None + self.config = {} def __repr__(self): return '' % (self.status_code) - def __nonzero__(self): """Returns true if :attr:`status_code` is 'OK'.""" + return self.ok + + @property + def ok(self): + try: + self.raise_for_status() + except HTTPError: + return False + return True - return not self.error def iter_content(self, chunk_size=10 * 1024, decode_unicode=None): """Iterates over the response data. This avoids reading the content @@ -464,8 +512,9 @@ class Response(object): length of each item returned as decoding can take place. """ if self._content_consumed: - raise RuntimeError('The content for this response was ' - 'already consumed') + raise RuntimeError( + 'The content for this response was already consumed' + ) def generate(): while 1: @@ -474,15 +523,21 @@ class Response(object): break yield chunk self._content_consumed = True + gen = generate() + if 'gzip' in self.headers.get('content-encoding', ''): gen = stream_decode_gzip(gen) + if decode_unicode is None: decode_unicode = self.config.get('decode_unicode') + if decode_unicode: gen = stream_decode_response_unicode(gen, self) + return gen + @property def content(self): """Content of the response, in bytes or unicode @@ -520,6 +575,17 @@ class Response(object): def raise_for_status(self): """Raises stored :class:`HTTPError` or :class:`URLError`, if one occurred.""" + if self.error: raise self.error + if (self.status_code >= 300) and (self.status_code < 400): + raise HTTPError('%s Redirection' % self.status_code) + + elif (self.status_code >= 400) and (self.status_code < 500): + raise HTTPError('%s Client Error' % self.status_code) + + elif (self.status_code >= 500) and (self.status_code < 600): + raise HTTPError('%s Server Error' % self.status_code) + + diff --git a/requests/packages/__init__.py b/requests/packages/__init__.py index ab2669e8..d62c4b71 100644 --- a/requests/packages/__init__.py +++ b/requests/packages/__init__.py @@ -1,3 +1,3 @@ from __future__ import absolute_import -from . import poster +from . import urllib3 diff --git a/requests/packages/poster/__init__.py b/requests/packages/poster/__init__.py deleted file mode 100644 index 6e216fce..00000000 --- a/requests/packages/poster/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2010 Chris AtLee -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -"""poster module - -Support for streaming HTTP uploads, and multipart/form-data encoding - -```poster.version``` is a 3-tuple of integers representing the version number. -New releases of poster will always have a version number that compares greater -than an older version of poster. -New in version 0.6.""" - -from __future__ import absolute_import - -from . import streaminghttp -from . import encode - -version = (0, 8, 0) # Thanks JP! diff --git a/requests/packages/poster/encode.py b/requests/packages/poster/encode.py deleted file mode 100644 index cf2298d7..00000000 --- a/requests/packages/poster/encode.py +++ /dev/null @@ -1,414 +0,0 @@ -"""multipart/form-data encoding module - -This module provides functions that faciliate encoding name/value pairs -as multipart/form-data suitable for a HTTP POST or PUT request. - -multipart/form-data is the standard way to upload files over HTTP""" - -__all__ = ['gen_boundary', 'encode_and_quote', 'MultipartParam', - 'encode_string', 'encode_file_header', 'get_body_size', 'get_headers', - 'multipart_encode'] - -try: - import uuid - def gen_boundary(): - """Returns a random string to use as the boundary for a message""" - return uuid.uuid4().hex -except ImportError: - import random, sha - def gen_boundary(): - """Returns a random string to use as the boundary for a message""" - bits = random.getrandbits(160) - return sha.new(str(bits)).hexdigest() - -import urllib, re, os, mimetypes -try: - from email.header import Header -except ImportError: - # Python 2.4 - from email.Header import Header - -def encode_and_quote(data): - """If ``data`` is unicode, return urllib.quote_plus(data.encode("utf-8")) - otherwise return urllib.quote_plus(data)""" - if data is None: - return None - - if isinstance(data, unicode): - data = data.encode("utf-8") - return urllib.quote_plus(data) - -def _strify(s): - """If s is a unicode string, encode it to UTF-8 and return the results, - otherwise return str(s), or None if s is None""" - if s is None: - return None - if isinstance(s, unicode): - return s.encode("utf-8") - return str(s) - -class MultipartParam(object): - """Represents a single parameter in a multipart/form-data request - - ``name`` is the name of this parameter. - - If ``value`` is set, it must be a string or unicode object to use as the - data for this parameter. - - If ``filename`` is set, it is what to say that this parameter's filename - is. Note that this does not have to be the actual filename any local file. - - If ``filetype`` is set, it is used as the Content-Type for this parameter. - If unset it defaults to "text/plain; charset=utf8" - - If ``filesize`` is set, it specifies the length of the file ``fileobj`` - - If ``fileobj`` is set, it must be a file-like object that supports - .read(). - - Both ``value`` and ``fileobj`` must not be set, doing so will - raise a ValueError assertion. - - If ``fileobj`` is set, and ``filesize`` is not specified, then - the file's size will be determined first by stat'ing ``fileobj``'s - file descriptor, and if that fails, by seeking to the end of the file, - recording the current position as the size, and then by seeking back to the - beginning of the file. - - ``cb`` is a callable which will be called from iter_encode with (self, - current, total), representing the current parameter, current amount - transferred, and the total size. - """ - def __init__(self, name, value=None, filename=None, filetype=None, - filesize=None, fileobj=None, cb=None): - self.name = Header(name).encode() - self.value = _strify(value) - if filename is None: - self.filename = None - else: - if isinstance(filename, unicode): - # Encode with XML entities - self.filename = filename.encode("ascii", "xmlcharrefreplace") - else: - self.filename = str(filename) - self.filename = self.filename.encode("string_escape").\ - replace('"', '\\"') - self.filetype = _strify(filetype) - - self.filesize = filesize - self.fileobj = fileobj - self.cb = cb - - if self.value is not None and self.fileobj is not None: - raise ValueError("Only one of value or fileobj may be specified") - - if fileobj is not None and filesize is None: - # Try and determine the file size - try: - self.filesize = os.fstat(fileobj.fileno()).st_size - except (OSError, AttributeError): - try: - fileobj.seek(0, 2) - self.filesize = fileobj.tell() - fileobj.seek(0) - except: - raise ValueError("Could not determine filesize") - - def __cmp__(self, other): - attrs = ['name', 'value', 'filename', 'filetype', 'filesize', 'fileobj'] - myattrs = [getattr(self, a) for a in attrs] - oattrs = [getattr(other, a) for a in attrs] - return cmp(myattrs, oattrs) - - def reset(self): - if self.fileobj is not None: - self.fileobj.seek(0) - elif self.value is None: - raise ValueError("Don't know how to reset this parameter") - - @classmethod - def from_file(cls, paramname, filename): - """Returns a new MultipartParam object constructed from the local - file at ``filename``. - - ``filesize`` is determined by os.path.getsize(``filename``) - - ``filetype`` is determined by mimetypes.guess_type(``filename``)[0] - - ``filename`` is set to os.path.basename(``filename``) - """ - - return cls(paramname, filename=os.path.basename(filename), - filetype=mimetypes.guess_type(filename)[0], - filesize=os.path.getsize(filename), - fileobj=open(filename, "rb")) - - @classmethod - def from_params(cls, params): - """Returns a list of MultipartParam objects from a sequence of - name, value pairs, MultipartParam instances, - or from a mapping of names to values - - The values may be strings or file objects, or MultipartParam objects. - MultipartParam object names must match the given names in the - name,value pairs or mapping, if applicable.""" - if hasattr(params, 'items'): - params = params.items() - - retval = [] - for item in params: - if isinstance(item, cls): - retval.append(item) - continue - name, value = item - if isinstance(value, cls): - assert value.name == name - retval.append(value) - continue - if hasattr(value, 'read'): - # Looks like a file object - filename = getattr(value, 'name', None) - if filename is not None: - filetype = mimetypes.guess_type(filename)[0] - else: - filetype = None - - retval.append(cls(name=name, filename=filename, - filetype=filetype, fileobj=value)) - else: - retval.append(cls(name, value)) - return retval - - def encode_hdr(self, boundary): - """Returns the header of the encoding of this parameter""" - boundary = encode_and_quote(boundary) - - headers = ["--%s" % boundary] - - if self.filename: - disposition = 'form-data; name="%s"; filename="%s"' % (self.name, - self.filename) - else: - disposition = 'form-data; name="%s"' % self.name - - headers.append("Content-Disposition: %s" % disposition) - - if self.filetype: - filetype = self.filetype - else: - filetype = "text/plain; charset=utf-8" - - headers.append("Content-Type: %s" % filetype) - - headers.append("") - headers.append("") - - return "\r\n".join(headers) - - def encode(self, boundary): - """Returns the string encoding of this parameter""" - if self.value is None: - value = self.fileobj.read() - else: - value = self.value - - if re.search("^--%s$" % re.escape(boundary), value, re.M): - raise ValueError("boundary found in encoded string") - - return "%s%s\r\n" % (self.encode_hdr(boundary), value) - - def iter_encode(self, boundary, blocksize=4096): - """Yields the encoding of this parameter - If self.fileobj is set, then blocks of ``blocksize`` bytes are read and - yielded.""" - total = self.get_size(boundary) - current = 0 - if self.value is not None: - block = self.encode(boundary) - current += len(block) - yield block - if self.cb: - self.cb(self, current, total) - else: - block = self.encode_hdr(boundary) - current += len(block) - yield block - if self.cb: - self.cb(self, current, total) - last_block = "" - encoded_boundary = "--%s" % encode_and_quote(boundary) - boundary_exp = re.compile("^%s$" % re.escape(encoded_boundary), - re.M) - while True: - block = self.fileobj.read(blocksize) - if not block: - current += 2 - yield "\r\n" - if self.cb: - self.cb(self, current, total) - break - last_block += block - if boundary_exp.search(last_block): - raise ValueError("boundary found in file data") - last_block = last_block[-len(encoded_boundary)-2:] - current += len(block) - yield block - if self.cb: - self.cb(self, current, total) - - def get_size(self, boundary): - """Returns the size in bytes that this param will be when encoded - with the given boundary.""" - if self.filesize is not None: - valuesize = self.filesize - else: - valuesize = len(self.value) - - return len(self.encode_hdr(boundary)) + 2 + valuesize - -def encode_string(boundary, name, value): - """Returns ``name`` and ``value`` encoded as a multipart/form-data - variable. ``boundary`` is the boundary string used throughout - a single request to separate variables.""" - - return MultipartParam(name, value).encode(boundary) - -def encode_file_header(boundary, paramname, filesize, filename=None, - filetype=None): - """Returns the leading data for a multipart/form-data field that contains - file data. - - ``boundary`` is the boundary string used throughout a single request to - separate variables. - - ``paramname`` is the name of the variable in this request. - - ``filesize`` is the size of the file data. - - ``filename`` if specified is the filename to give to this field. This - field is only useful to the server for determining the original filename. - - ``filetype`` if specified is the MIME type of this file. - - The actual file data should be sent after this header has been sent. - """ - - return MultipartParam(paramname, filesize=filesize, filename=filename, - filetype=filetype).encode_hdr(boundary) - -def get_body_size(params, boundary): - """Returns the number of bytes that the multipart/form-data encoding - of ``params`` will be.""" - size = sum(p.get_size(boundary) for p in MultipartParam.from_params(params)) - return size + len(boundary) + 6 - -def get_headers(params, boundary): - """Returns a dictionary with Content-Type and Content-Length headers - for the multipart/form-data encoding of ``params``.""" - headers = {} - boundary = urllib.quote_plus(boundary) - headers['Content-Type'] = "multipart/form-data; boundary=%s" % boundary - headers['Content-Length'] = str(get_body_size(params, boundary)) - return headers - -class multipart_yielder: - def __init__(self, params, boundary, cb): - self.params = params - self.boundary = boundary - self.cb = cb - - self.i = 0 - self.p = None - self.param_iter = None - self.current = 0 - self.total = get_body_size(params, boundary) - - def __iter__(self): - return self - - def next(self): - """generator function to yield multipart/form-data representation - of parameters""" - if self.param_iter is not None: - try: - block = self.param_iter.next() - self.current += len(block) - if self.cb: - self.cb(self.p, self.current, self.total) - return block - except StopIteration: - self.p = None - self.param_iter = None - - if self.i is None: - raise StopIteration - elif self.i >= len(self.params): - self.param_iter = None - self.p = None - self.i = None - block = "--%s--\r\n" % self.boundary - self.current += len(block) - if self.cb: - self.cb(self.p, self.current, self.total) - return block - - self.p = self.params[self.i] - self.param_iter = self.p.iter_encode(self.boundary) - self.i += 1 - return self.next() - - def reset(self): - self.i = 0 - self.current = 0 - for param in self.params: - param.reset() - -def multipart_encode(params, boundary=None, cb=None): - """Encode ``params`` as multipart/form-data. - - ``params`` should be a sequence of (name, value) pairs or MultipartParam - objects, or a mapping of names to values. - Values are either strings parameter values, or file-like objects to use as - the parameter value. The file-like objects must support .read() and either - .fileno() or both .seek() and .tell(). - - If ``boundary`` is set, then it as used as the MIME boundary. Otherwise - a randomly generated boundary will be used. In either case, if the - boundary string appears in the parameter values a ValueError will be - raised. - - If ``cb`` is set, it should be a callback which will get called as blocks - of data are encoded. It will be called with (param, current, total), - indicating the current parameter being encoded, the current amount encoded, - and the total amount to encode. - - Returns a tuple of `datagen`, `headers`, where `datagen` is a - generator that will yield blocks of data that make up the encoded - parameters, and `headers` is a dictionary with the assoicated - Content-Type and Content-Length headers. - - Examples: - - >>> datagen, headers = multipart_encode( [("key", "value1"), ("key", "value2")] ) - >>> s = "".join(datagen) - >>> assert "value2" in s and "value1" in s - - >>> p = MultipartParam("key", "value2") - >>> datagen, headers = multipart_encode( [("key", "value1"), p] ) - >>> s = "".join(datagen) - >>> assert "value2" in s and "value1" in s - - >>> datagen, headers = multipart_encode( {"key": "value1"} ) - >>> s = "".join(datagen) - >>> assert "value2" not in s and "value1" in s - - """ - if boundary is None: - boundary = gen_boundary() - else: - boundary = urllib.quote_plus(boundary) - - headers = get_headers(params, boundary) - params = MultipartParam.from_params(params) - - return multipart_yielder(params, boundary, cb), headers diff --git a/requests/packages/poster/streaminghttp.py b/requests/packages/poster/streaminghttp.py deleted file mode 100644 index 1b591d4b..00000000 --- a/requests/packages/poster/streaminghttp.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Streaming HTTP uploads module. - -This module extends the standard httplib and urllib2 objects so that -iterable objects can be used in the body of HTTP requests. - -In most cases all one should have to do is call :func:`register_openers()` -to register the new streaming http handlers which will take priority over -the default handlers, and then you can use iterable objects in the body -of HTTP requests. - -**N.B.** You must specify a Content-Length header if using an iterable object -since there is no way to determine in advance the total size that will be -yielded, and there is no way to reset an interator. - -Example usage: - ->>> from StringIO import StringIO ->>> import urllib2, poster.streaminghttp - ->>> opener = poster.streaminghttp.register_openers() - ->>> s = "Test file data" ->>> f = StringIO(s) - ->>> req = urllib2.Request("http://localhost:5000", f, -... {'Content-Length': str(len(s))}) -""" - -import httplib, urllib2, socket -from httplib import NotConnected - -__all__ = ['StreamingHTTPConnection', 'StreamingHTTPRedirectHandler', - 'StreamingHTTPHandler', 'register_openers'] - -if hasattr(httplib, 'HTTPS'): - __all__.extend(['StreamingHTTPSHandler', 'StreamingHTTPSConnection']) - -class _StreamingHTTPMixin: - """Mixin class for HTTP and HTTPS connections that implements a streaming - send method.""" - def send(self, value): - """Send ``value`` to the server. - - ``value`` can be a string object, a file-like object that supports - a .read() method, or an iterable object that supports a .next() - method. - """ - # Based on python 2.6's httplib.HTTPConnection.send() - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise NotConnected() - - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. - if self.debuglevel > 0: - print "send:", repr(value) - try: - blocksize = 8192 - if hasattr(value, 'read') : - if hasattr(value, 'seek'): - value.seek(0) - if self.debuglevel > 0: - print "sendIng a read()able" - data = value.read(blocksize) - while data: - self.sock.sendall(data) - data = value.read(blocksize) - elif hasattr(value, 'next'): - if hasattr(value, 'reset'): - value.reset() - if self.debuglevel > 0: - print "sendIng an iterable" - for data in value: - self.sock.sendall(data) - else: - self.sock.sendall(value) - except socket.error, v: - if v[0] == 32: # Broken pipe - self.close() - raise - -class StreamingHTTPConnection(_StreamingHTTPMixin, httplib.HTTPConnection): - """Subclass of `httplib.HTTPConnection` that overrides the `send()` method - to support iterable body objects""" - -class StreamingHTTPRedirectHandler(urllib2.HTTPRedirectHandler): - """Subclass of `urllib2.HTTPRedirectHandler` that overrides the - `redirect_request` method to properly handle redirected POST requests - - This class is required because python 2.5's HTTPRedirectHandler does - not remove the Content-Type or Content-Length headers when requesting - the new resource, but the body of the original request is not preserved. - """ - - handler_order = urllib2.HTTPRedirectHandler.handler_order - 1 - - # From python2.6 urllib2's HTTPRedirectHandler - def redirect_request(self, req, fp, code, msg, headers, newurl): - """Return a Request or None in response to a redirect. - - This is called by the http_error_30x methods when a - redirection response is received. If a redirection should - take place, return a new Request to allow http_error_30x to - perform the redirect. Otherwise, raise HTTPError if no-one - else should try to handle this url. Return None if you can't - but another Handler might. - """ - m = req.get_method() - if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") - or code in (301, 302, 303) and m == "POST"): - # Strictly (according to RFC 2616), 301 or 302 in response - # to a POST MUST NOT cause a redirection without confirmation - # from the user (of urllib2, in this case). In practice, - # essentially all clients do redirect in this case, so we - # do the same. - # be conciliant with URIs containing a space - newurl = newurl.replace(' ', '%20') - newheaders = dict((k, v) for k, v in req.headers.items() - if k.lower() not in ( - "content-length", "content-type") - ) - return urllib2.Request(newurl, - headers=newheaders, - origin_req_host=req.get_origin_req_host(), - unverifiable=True) - else: - raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp) - -class StreamingHTTPHandler(urllib2.HTTPHandler): - """Subclass of `urllib2.HTTPHandler` that uses - StreamingHTTPConnection as its http connection class.""" - - handler_order = urllib2.HTTPHandler.handler_order - 1 - - def http_open(self, req): - """Open a StreamingHTTPConnection for the given request""" - return self.do_open(StreamingHTTPConnection, req) - - def http_request(self, req): - """Handle a HTTP request. Make sure that Content-Length is specified - if we're using an interable value""" - # Make sure that if we're using an iterable object as the request - # body, that we've also specified Content-Length - if req.has_data(): - data = req.get_data() - if hasattr(data, 'read') or hasattr(data, 'next'): - if not req.has_header('Content-length'): - raise ValueError( - "No Content-Length specified for iterable body") - return urllib2.HTTPHandler.do_request_(self, req) - -if hasattr(httplib, 'HTTPS'): - class StreamingHTTPSConnection(_StreamingHTTPMixin, - httplib.HTTPSConnection): - """Subclass of `httplib.HTTSConnection` that overrides the `send()` - method to support iterable body objects""" - - class StreamingHTTPSHandler(urllib2.HTTPSHandler): - """Subclass of `urllib2.HTTPSHandler` that uses - StreamingHTTPSConnection as its http connection class.""" - - handler_order = urllib2.HTTPSHandler.handler_order - 1 - - def https_open(self, req): - return self.do_open(StreamingHTTPSConnection, req) - - def https_request(self, req): - # Make sure that if we're using an iterable object as the request - # body, that we've also specified Content-Length - if req.has_data(): - data = req.get_data() - if hasattr(data, 'read') or hasattr(data, 'next'): - if not req.has_header('Content-length'): - raise ValueError( - "No Content-Length specified for iterable body") - return urllib2.HTTPSHandler.do_request_(self, req) - - -def get_handlers(): - handlers = [StreamingHTTPHandler, StreamingHTTPRedirectHandler] - if hasattr(httplib, "HTTPS"): - handlers.append(StreamingHTTPSHandler) - return handlers - -def register_openers(): - """Register the streaming http handlers in the global urllib2 default - opener object. - - Returns the created OpenerDirector object.""" - opener = urllib2.build_opener(*get_handlers()) - - urllib2.install_opener(opener) - - return opener diff --git a/requests/packages/urllib3/__init__.py b/requests/packages/urllib3/__init__.py new file mode 100644 index 00000000..20b1fb4e --- /dev/null +++ b/requests/packages/urllib3/__init__.py @@ -0,0 +1,48 @@ +# urllib3/__init__.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +urllib3 - Thread-safe connection pooling and re-using. +""" + +__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' +__license__ = 'MIT' +__version__ = '1.0.2' + + +from .connectionpool import ( + HTTPConnectionPool, + HTTPSConnectionPool, + connection_from_url, + get_host, + make_headers) + + +from .exceptions import ( + HTTPError, + MaxRetryError, + SSLError, + TimeoutError) + +from .poolmanager import PoolManager, ProxyManager, proxy_from_url +from .response import HTTPResponse +from .filepost import encode_multipart_formdata + + +# Set default logging handler to avoid "No handler found" warnings. +import logging +try: + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +logging.getLogger(__name__).addHandler(NullHandler()) + +# ... Clean up. +del logging +del NullHandler diff --git a/requests/packages/urllib3/_collections.py b/requests/packages/urllib3/_collections.py new file mode 100644 index 00000000..00b2cd58 --- /dev/null +++ b/requests/packages/urllib3/_collections.py @@ -0,0 +1,131 @@ +# urllib3/_collections.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from collections import deque + +from threading import RLock + +__all__ = ['RecentlyUsedContainer'] + + +class AccessEntry(object): + __slots__ = ('key', 'is_valid') + + def __init__(self, key, is_valid=True): + self.key = key + self.is_valid = is_valid + + +class RecentlyUsedContainer(dict): + """ + Provides a dict-like that maintains up to ``maxsize`` keys while throwing + away the least-recently-used keys beyond ``maxsize``. + """ + + # If len(self.access_log) exceeds self._maxsize * CLEANUP_FACTOR, then we + # will attempt to cleanup the invalidated entries in the access_log + # datastructure during the next 'get' operation. + CLEANUP_FACTOR = 10 + + def __init__(self, maxsize=10): + self._maxsize = maxsize + + self._container = {} + + # We use a deque to to store our keys ordered by the last access. + self.access_log = deque() + self.access_log_lock = RLock() + + # We look up the access log entry by the key to invalidate it so we can + # insert a new authorative entry at the head without having to dig and + # find the old entry for removal immediately. + self.access_lookup = {} + + # Trigger a heap cleanup when we get past this size + self.access_log_limit = maxsize * self.CLEANUP_FACTOR + + def _invalidate_entry(self, key): + "If exists: Invalidate old entry and return it." + old_entry = self.access_lookup.get(key) + if old_entry: + old_entry.is_valid = False + + return old_entry + + def _push_entry(self, key): + "Push entry onto our access log, invalidate the old entry if exists." + self._invalidate_entry(key) + + new_entry = AccessEntry(key) + self.access_lookup[key] = new_entry + + self.access_log_lock.acquire() + self.access_log.appendleft(new_entry) + self.access_log_lock.release() + + def _prune_entries(self, num): + "Pop entries from our access log until we popped ``num`` valid ones." + while num > 0: + self.access_log_lock.acquire() + p = self.access_log.pop() + self.access_log_lock.release() + + if not p.is_valid: + continue # Invalidated entry, skip + + dict.pop(self, p.key, None) + self.access_lookup.pop(p.key, None) + num -= 1 + + def _prune_invalidated_entries(self): + "Rebuild our access_log without the invalidated entries." + self.access_log_lock.acquire() + self.access_log = deque(e for e in self.access_log if e.is_valid) + self.access_log_lock.release() + + def _get_ordered_access_keys(self): + "Return ordered access keys for inspection. Used for testing." + self.access_log_lock.acquire() + r = [e.key for e in self.access_log if e.is_valid] + self.access_log_lock.release() + + return r + + def __getitem__(self, key): + item = dict.get(self, key) + + if not item: + raise KeyError(key) + + # Insert new entry with new high priority, also implicitly invalidates + # the old entry. + self._push_entry(key) + + if len(self.access_log) > self.access_log_limit: + # Heap is getting too big, try to clean up any tailing invalidated + # entries. + self._prune_invalidated_entries() + + return item + + def __setitem__(self, key, item): + # Add item to our container and access log + dict.__setitem__(self, key, item) + self._push_entry(key) + + # Discard invalid and excess entries + self._prune_entries(len(self) - self._maxsize) + + def __delitem__(self, key): + self._invalidate_entry(key) + self.access_lookup.pop(key, None) + dict.__delitem__(self, key) + + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default diff --git a/requests/packages/urllib3/connectionpool.py b/requests/packages/urllib3/connectionpool.py new file mode 100644 index 00000000..4c2c6b54 --- /dev/null +++ b/requests/packages/urllib3/connectionpool.py @@ -0,0 +1,525 @@ +# urllib3/connectionpool.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import logging +import socket + + +from httplib import HTTPConnection, HTTPSConnection, HTTPException +from Queue import Queue, Empty, Full +from select import select +from socket import error as SocketError, timeout as SocketTimeout + + +try: + import ssl + BaseSSLError = ssl.SSLError +except ImportError: + ssl = None + BaseSSLError = None + + +from .request import RequestMethods +from .response import HTTPResponse +from .exceptions import ( + SSLError, + MaxRetryError, + TimeoutError, + HostChangedError, + EmptyPoolError, +) + + +log = logging.getLogger(__name__) + +_Default = object() + + +## Connection objects (extension of httplib) + +class VerifiedHTTPSConnection(HTTPSConnection): + """ + Based on httplib.HTTPSConnection but wraps the socket with + SSL certification. + """ + cert_reqs = None + ca_certs = None + + def set_cert(self, key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + ssl_req_scheme = { + 'CERT_NONE': ssl.CERT_NONE, + 'CERT_OPTIONAL': ssl.CERT_OPTIONAL, + 'CERT_REQUIRED': ssl.CERT_REQUIRED + } + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = ssl_req_scheme.get(cert_reqs) or ssl.CERT_NONE + self.ca_certs = ca_certs + + def connect(self): + # Add certificate verification + sock = socket.create_connection((self.host, self.port), self.timeout) + + # Wrap socket using verification with the root certs in + # trusted_root_certs + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs) + + +## Pool objects + +class ConnectionPool(object): + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + """ + pass + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`httplib.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`httplib.HTTPConnection`. + + :param strict: + Causes BadStatusLine to be raised if the status line can't be parsed + as a valid HTTP/1.0 or 1.1 status line, passed into + :class:`httplib.HTTPConnection`. + + :param timeout: + Socket timeout for each individual connection, can be a float. None + disables timeout. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to false, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + """ + + scheme = 'http' + + def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1, + block=False, headers=None): + self.host = host + self.port = port + self.strict = strict + self.timeout = timeout + self.pool = Queue(maxsize) + self.block = block + self.headers = headers or {} + + # Fill the queue up so that doing get() on it will block properly + for _ in xrange(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTP connection (%d): %s" % + (self.num_connections, self.host)) + return HTTPConnection(host=self.host, port=self.port) + + def _get_conn(self, timeout=None): + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + # If this is a persistent connection, check if it got disconnected + if conn and conn.sock and select([conn.sock], [], [], 0.0)[0]: + # Either data is buffered (bad), or the connection is dropped. + log.info("Resetting dropped connection: %s" % self.host) + conn.close() + + except Empty: + if self.block: + raise EmptyPoolError("Pool reached maximum size and no more " + "connections are allowed.") + pass # Oh well, we'll create a new connection then + + return conn or self._new_conn() + + def _put_conn(self, conn): + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is discarded because we + exceeded maxsize. If connections are discarded frequently, then maxsize + should be increased. + """ + try: + self.pool.put(conn, block=False) + except Full: + # This should never happen if self.block == True + log.warning("HttpConnectionPool is full, discarding connection: %s" + % self.host) + + def _make_request(self, conn, method, url, timeout=_Default, + **httplib_request_kw): + """ + Perform a request on a given httplib connection object taken from our + pool. + """ + self.num_requests += 1 + + if timeout is _Default: + timeout = self.timeout + + conn.request(method, url, **httplib_request_kw) + conn.sock.settimeout(timeout) + httplib_response = conn.getresponse() + + log.debug("\"%s %s %s\" %s %s" % + (method, url, + conn._http_vsn_str, # pylint: disable-msg=W0212 + httplib_response.status, httplib_response.length)) + + return httplib_response + + + def is_same_host(self, url): + """ + Check if the given ``url`` is a member of the same host as this + conncetion pool. + """ + # TODO: Add optional support for socket.gethostbyname checking. + return (url.startswith('/') or + get_host(url) == (self.scheme, self.host, self.port)) + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True, timeout=_Default, + pool_timeout=None, release_conn=None, **response_kw): + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method provided + by :class:`.RequestMethods`, such as :meth:`.request`. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param body: + Data to send in the request body (useful for creating + POST requests, see HTTPConnectionPool.post_url for + more convenience). + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Number of retries to allow before raising a MaxRetryError exception. + + :param redirect: + Automatically handle redirects (status codes 301, 302, 303, 307), + each redirect counts as a retry. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When False, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one request. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received. This is useful if + you're not preloading the response's content immediately. You will + need to call ``r.release_conn()`` on the response ``r`` to return + the connection back into the pool. If None, it takes the value of + ``response_kw.get('preload_content', True)``. + + :param \**response_kw: + Additional parameters are passed to + :meth:`urllib3.response.HTTPResponse.from_httplib` + """ + if headers is None: + headers = self.headers + + if retries < 0: + raise MaxRetryError("Max retries exceeded for url: %s" % url) + + if release_conn is None: + release_conn = response_kw.get('preload_content', True) + + # Check host + if assert_same_host and not self.is_same_host(url): + host = "%s://%s" % (self.scheme, self.host) + if self.port: + host = "%s:%d" % (host, self.port) + + raise HostChangedError("Connection pool with host '%s' tried to " + "open a foreign host: %s" % (host, url)) + + conn = None + + try: + # Request a connection from the queue + # (Could raise SocketError: Bad file descriptor) + conn = self._get_conn(timeout=pool_timeout) + + # Make the request on the httplib connection object + httplib_response = self._make_request(conn, method, url, + timeout=timeout, + body=body, headers=headers) + + # If we're going to release the connection in ``finally:``, then + # the request doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = not release_conn and conn + + # Import httplib's response into our own wrapper object + response = HTTPResponse.from_httplib(httplib_response, + pool=self, + connection=response_conn, + **response_kw) + + # else: + # The connection will be put back into the pool when + # ``response.release_conn()`` is called (implicitly by + # ``response.read()``) + + except (SocketTimeout, Empty), e: + # Timed out either by socket or queue + raise TimeoutError("Request timed out after %f seconds" % + self.timeout) + + except (BaseSSLError), e: + # SSL certificate error + raise SSLError(e) + + except (HTTPException, SocketError), e: + # Connection broken, discard. It will be replaced next _get_conn(). + conn = None + + finally: + if conn and release_conn: + # Put the connection back to be reused + self._put_conn(conn) + + if not conn: + log.warn("Retrying (%d attempts remain) after connection " + "broken by '%r': %s" % (retries, e, url)) + return self.urlopen(method, url, body, headers, retries - 1, + redirect, assert_same_host) # Try again + + # Handle redirection + if (redirect and + response.status in [301, 302, 303, 307] and + 'location' in response.headers): # Redirect, retry + log.info("Redirecting %s -> %s" % + (url, response.headers.get('location'))) + return self.urlopen(method, response.headers.get('location'), body, + headers, retries - 1, redirect, + assert_same_host) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + When Python is compiled with the :mod:`ssl` module, then + :class:`.VerifiedHTTPSConnection` is used, which *can* verify certificates, + instead of :class:httplib.HTTPSConnection`. + + The ``key_file``, ``cert_file``, ``cert_reqs``, and ``ca_certs`` parameters + are only used if :mod:`ssl` is available and are fed into + :meth:`ssl.wrap_socket` to upgrade the connection socket into an SSL socket. + """ + + scheme = 'https' + + def __init__(self, host, port=None, + strict=False, timeout=None, maxsize=1, + block=False, headers=None, + key_file=None, cert_file=None, + cert_reqs='CERT_NONE', ca_certs=None): + + super(HTTPSConnectionPool, self).__init__(host, port, + strict, timeout, maxsize, + block, headers) + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.ca_certs = ca_certs + + def _new_conn(self): + """ + Return a fresh :class:`httplib.HTTPSConnection`. + """ + self.num_connections += 1 + log.info("Starting new HTTPS connection (%d): %s" + % (self.num_connections, self.host)) + + if not ssl: + return HTTPSConnection(host=self.host, port=self.port) + + connection = VerifiedHTTPSConnection(host=self.host, port=self.port) + connection.set_cert(key_file=self.key_file, cert_file=self.cert_file, + cert_reqs=self.cert_reqs, ca_certs=self.ca_certs) + return connection + + +## Helpers + +def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, + basic_auth=None): + """ + Shortcuts for generating request headers. + + :param keep_alive: + If ``True``, adds 'connection: keep-alive' header. + + :param accept_encoding: + Can be a boolean, list, or string. + ``True`` translates to 'gzip,deflate'. + List will get joined by comma. + String will be used as provided. + + :param user_agent: + String representing the user-agent you want, such as + "python-urllib3/0.6" + + :param basic_auth: + Colon-separated username:password string for 'authorization: basic ...' + auth header. + + Example: :: + + >>> make_headers(keep_alive=True, user_agent="Batman/1.0") + {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} + >>> make_headers(accept_encoding=True) + {'accept-encoding': 'gzip,deflate'} + """ + headers = {} + if accept_encoding: + if isinstance(accept_encoding, str): + pass + elif isinstance(accept_encoding, list): + accept_encoding = ','.join(accept_encoding) + else: + accept_encoding = 'gzip,deflate' + headers['accept-encoding'] = accept_encoding + + if user_agent: + headers['user-agent'] = user_agent + + if keep_alive: + headers['connection'] = 'keep-alive' + + if basic_auth: + headers['authorization'] = 'Basic ' + \ + basic_auth.encode('base64').strip() + + return headers + + +def get_host(url): + """ + Given a url, return its scheme, host and port (None if it's not there). + + For example: :: + + >>> get_host('http://google.com/mail/') + ('http', 'google.com', None) + >>> get_host('google.com:80') + ('http', 'google.com', 80) + """ + # This code is actually similar to urlparse.urlsplit, but much + # simplified for our needs. + port = None + scheme = 'http' + if '//' in url: + scheme, url = url.split('://', 1) + if '/' in url: + url, _path = url.split('/', 1) + if ':' in url: + url, port = url.split(':', 1) + port = int(port) + return scheme, url, port + + +def connection_from_url(url, **kw): + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example: :: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, host, port = get_host(url) + if scheme == 'https': + return HTTPSConnectionPool(host, port=port, **kw) + else: + return HTTPConnectionPool(host, port=port, **kw) diff --git a/requests/packages/urllib3/contrib/__init__.py b/requests/packages/urllib3/contrib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/requests/packages/urllib3/contrib/ntlmpool.py b/requests/packages/urllib3/contrib/ntlmpool.py new file mode 100644 index 00000000..c5f010e1 --- /dev/null +++ b/requests/packages/urllib3/contrib/ntlmpool.py @@ -0,0 +1,117 @@ +# urllib3/contrib/ntlmpool.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +""" +NTLM authenticating pool, contributed by erikcederstran + +Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 +""" + +import httplib +from logging import getLogger +from ntlm import ntlm + +from urllib3 import HTTPSConnectionPool + + +log = getLogger(__name__) + + +class NTLMConnectionPool(HTTPSConnectionPool): + """ + Implements an NTLM authentication version of an urllib3 connection pool + """ + + scheme = 'https' + + def __init__(self, user, pw, authurl, *args, **kwargs): + """ + authurl is a random URL on the server that is protected by NTLM. + user is the Windows user, probably in the DOMAIN\username format. + pw is the password for the user. + """ + super(NTLMConnectionPool, self).__init__(*args, **kwargs) + self.authurl = authurl + self.rawuser = user + user_parts = user.split('\\', 1) + self.domain = user_parts[0].upper() + self.user = user_parts[1] + self.pw = pw + + def _new_conn(self): + # Performs the NTLM handshake that secures the connection. The socket + # must be kept open while requests are performed. + self.num_connections += 1 + log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % + (self.num_connections, self.host, self.authurl)) + + headers = {} + headers['Connection'] = 'Keep-Alive' + req_header = 'Authorization' + resp_header = 'www-authenticate' + + conn = httplib.HTTPSConnection(host=self.host, port=self.port) + + # Send negotiation message + headers[req_header] = ( + 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + reshdr = dict(res.getheaders()) + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % reshdr) + log.debug('Response data: %s [...]' % res.read(100)) + + # Remove the reference to the socket, so that it can not be closed by + # the response object (we want to keep the socket open) + res.fp = None + + # Server should respond with a challenge message + auth_header_values = reshdr[resp_header].split(', ') + auth_header_value = None + for s in auth_header_values: + if s[:5] == 'NTLM ': + auth_header_value = s[5:] + if auth_header_value is None: + raise Exception('Unexpected %s response header: %s' % + (resp_header, reshdr[resp_header])) + + # Send authentication message + ServerChallenge, NegotiateFlags = \ + ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) + auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, + self.user, + self.domain, + self.pw, + NegotiateFlags) + headers[req_header] = 'NTLM %s' % auth_msg + log.debug('Request headers: %s' % headers) + conn.request('GET', self.authurl, None, headers) + res = conn.getresponse() + log.debug('Response status: %s %s' % (res.status, res.reason)) + log.debug('Response headers: %s' % dict(res.getheaders())) + log.debug('Response data: %s [...]' % res.read()[:100]) + if res.status != 200: + if res.status == 401: + raise Exception('Server rejected request: wrong ' + 'username or password') + raise Exception('Wrong server response: %s %s' % + (res.status, res.reason)) + + res.fp = None + log.debug('Connection established') + return conn + + def urlopen(self, method, url, body=None, headers=None, retries=3, + redirect=True, assert_same_host=True): + if headers is None: + headers = {} + headers['Connection'] = 'Keep-Alive' + return super(NTLMConnectionPool, self).urlopen(method, url, body, + headers, retries, + redirect, + assert_same_host) diff --git a/requests/packages/urllib3/exceptions.py b/requests/packages/urllib3/exceptions.py new file mode 100644 index 00000000..69f459bd --- /dev/null +++ b/requests/packages/urllib3/exceptions.py @@ -0,0 +1,35 @@ +# urllib3/exceptions.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +## Exceptions + +class HTTPError(Exception): + "Base exception used by this module." + pass + + +class SSLError(Exception): + "Raised when SSL certificate fails in an HTTPS connection." + pass + + +class MaxRetryError(HTTPError): + "Raised when the maximum number of retries is exceeded." + pass + + +class TimeoutError(HTTPError): + "Raised when a socket timeout occurs." + pass + + +class HostChangedError(HTTPError): + "Raised when an existing pool gets a request for a foreign host." + pass + +class EmptyPoolError(HTTPError): + "Raised when a pool runs out of connections and no more are allowed." + pass diff --git a/requests/packages/urllib3/filepost.py b/requests/packages/urllib3/filepost.py new file mode 100644 index 00000000..2ffea8bb --- /dev/null +++ b/requests/packages/urllib3/filepost.py @@ -0,0 +1,71 @@ +# urllib3/filepost.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import codecs +import mimetools +import mimetypes + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO # pylint: disable-msg=W0404 + + +writer = codecs.lookup('utf-8')[3] + + +def get_content_type(filename): + return mimetypes.guess_type(filename)[0] or 'application/octet-stream' + + +def encode_multipart_formdata(fields, boundary=None): + """ + Encode a dictionary of ``fields`` using the multipart/form-data mime format. + + :param fields: + Dictionary of fields. The key is treated as the field name, and the + value as the body of the form-data. If the value is a tuple of two + elements, then the first element is treated as the filename of the + form-data section. + + :param boundary: + If not specified, then a random boundary will be generated using + :func:`mimetools.choose_boundary`. + """ + body = StringIO() + if boundary is None: + boundary = mimetools.choose_boundary() + + for fieldname, value in fields.iteritems(): + body.write('--%s\r\n' % (boundary)) + + if isinstance(value, tuple): + filename, data = value + writer(body).write('Content-Disposition: form-data; name="%s"; ' + 'filename="%s"\r\n' % (fieldname, filename)) + body.write('Content-Type: %s\r\n\r\n' % + (get_content_type(filename))) + else: + data = value + writer(body).write('Content-Disposition: form-data; name="%s"\r\n' + % (fieldname)) + body.write('Content-Type: text/plain\r\n\r\n') + + if isinstance(data, int): + data = str(data) # Backwards compatibility + + if isinstance(data, unicode): + writer(body).write(data) + else: + body.write(data) + + body.write('\r\n') + + body.write('--%s--\r\n' % (boundary)) + + content_type = 'multipart/form-data; boundary=%s' % boundary + + return body.getvalue(), content_type diff --git a/requests/packages/urllib3/poolmanager.py b/requests/packages/urllib3/poolmanager.py new file mode 100644 index 00000000..c08e327f --- /dev/null +++ b/requests/packages/urllib3/poolmanager.py @@ -0,0 +1,128 @@ +# urllib3/poolmanager.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +from ._collections import RecentlyUsedContainer +from .connectionpool import ( + HTTPConnectionPool, HTTPSConnectionPool, + get_host, connection_from_url, +) + + +__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] + + +from .request import RequestMethods +from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool + + +pool_classes_by_scheme = { + 'http': HTTPConnectionPool, + 'https': HTTPSConnectionPool, +} + +port_by_scheme = { + 'http': 80, + 'https': 443, +} + + +class PoolManager(RequestMethods): + """ + Allows for arbitrary requests while transparently keeping track of + necessary connection pools for you. + + :param num_pools: + Number of connection pools to cache before discarding the least recently + used pool. + + :param \**connection_pool_kw: + Additional parameters are used to create fresh + :class:`urllib3.connectionpool.ConnectionPool` instances. + + Example: :: + + >>> manager = PoolManager() + >>> r = manager.urlopen("http://google.com/") + >>> r = manager.urlopen("http://google.com/mail") + >>> r = manager.urlopen("http://yahoo.com/") + >>> len(r.pools) + 2 + + """ + + # TODO: Make sure there are no memory leaks here. + + def __init__(self, num_pools=10, **connection_pool_kw): + self.connection_pool_kw = connection_pool_kw + self.pools = RecentlyUsedContainer(num_pools) + + def connection_from_host(self, host, port=80, scheme='http'): + """ + Get a :class:`ConnectionPool` based on the host, port, and scheme. + + Note that an appropriate ``port`` value is required here to normalize + connection pools in our container most effectively. + """ + pool_key = (scheme, host, port) + + # If the scheme, host, or port doesn't match existing open connections, + # open a new ConnectionPool. + pool = self.pools.get(pool_key) + if pool: + return pool + + # Make a fresh ConnectionPool of the desired type + pool_cls = pool_classes_by_scheme[scheme] + pool = pool_cls(host, port, **self.connection_pool_kw) + + self.pools[pool_key] = pool + + return pool + + def connection_from_url(self, url): + """ + Similar to :func:`urllib3.connectionpool.connection_from_url` but + doesn't pass any additional parameters to the + :class:`urllib3.connectionpool.ConnectionPool` constructor. + + Additional parameters are taken from the :class:`.PoolManager` + constructor. + """ + scheme, host, port = get_host(url) + + port = port or port_by_scheme.get(scheme, 80) + + return self.connection_from_host(host, port=port, scheme=scheme) + + def urlopen(self, method, url, **kw): + """ + Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`. + + ``url`` must be absolute, such that an appropriate + :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. + """ + conn = self.connection_from_url(url) + return conn.urlopen(method, url, assert_same_host=False, **kw) + + +class ProxyManager(RequestMethods): + """ + Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method + will make requests to any url through the defined proxy. + """ + + def __init__(self, proxy_pool): + self.proxy_pool = proxy_pool + + def urlopen(self, method, url, **kw): + "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." + kw['assert_same_host'] = False + return self.proxy_pool.urlopen(method, url, **kw) + + +def proxy_from_url(url, **pool_kw): + proxy_pool = connection_from_url(url, **pool_kw) + return ProxyManager(proxy_pool) diff --git a/requests/packages/urllib3/request.py b/requests/packages/urllib3/request.py new file mode 100644 index 00000000..a7e0b5de --- /dev/null +++ b/requests/packages/urllib3/request.py @@ -0,0 +1,145 @@ +# urllib3/request.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + + +from urllib import urlencode + +from .filepost import encode_multipart_formdata + + +__all__ = ['RequestMethods'] + + +class RequestMethods(object): + """ + Convenience mixin for classes who implement a :meth:`urlopen` method, such + as :class:`~urllib3.connectionpool.HTTPConnectionPool` and + :class:`~urllib3.poolmanager.PoolManager`. + + Provides behavior for making common types of HTTP request methods and + decides which type of request field encoding to use. + + Specifically, + + :meth:`.request_encode_url` is for sending requests whose fields are encoded + in the URL (such as GET, HEAD, DELETE). + + :meth:`.request_encode_body` is for sending requests whose fields are + encoded in the *body* of the request using multipart or www-orm-urlencoded + (such as for POST, PUT, PATCH). + + :meth:`.request` is for making any kind of request, it will look up the + appropriate encoding format and use one of the above two methods to make + the request. + """ + + _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) + + _encode_body_methods = set(['PATCH', 'POST', 'PUT', 'TRACE']) + + def urlopen(self, method, url, body=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **kw): + raise NotImplemented("Classes extending RequestMethods must implement " + "their own ``urlopen`` method.") + + def request(self, method, url, fields=None, headers=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the appropriate encoding of + ``fields`` based on the ``method`` used. + + This is a convenience method that requires the least amount of manual + effort. It can be used in most situations, while still having the option + to drop down to more specific methods when necessary, such as + :meth:`request_encode_url`, :meth:`request_encode_body`, + or even the lowest level :meth:`urlopen`. + """ + method = method.upper() + + if method in self._encode_url_methods: + return self.request_encode_url(method, url, fields=fields, + headers=headers, + **urlopen_kw) + else: + return self.request_encode_body(method, url, fields=fields, + headers=headers, + **urlopen_kw) + + def request_encode_url(self, method, url, fields=None, **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the url. This is useful for request methods like GET, HEAD, DELETE, etc. + """ + if fields: + url += '?' + urlencode(fields) + return self.urlopen(method, url, **urlopen_kw) + + def request_encode_body(self, method, url, fields=None, headers=None, + encode_multipart=True, multipart_boundary=None, + **urlopen_kw): + """ + Make a request using :meth:`urlopen` with the ``fields`` encoded in + the body. This is useful for request methods like POST, PUT, PATCH, etc. + + When ``encode_multipart=True`` (default), then + :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the + payload with the appropriate content type. Otherwise + :meth:`urllib.urlencode` is used with the + 'application/x-www-form-urlencoded' content type. + + Multipart encoding must be used when posting files, and it's reasonably + safe to use it in other times too. However, it may break request signing, + such as with OAuth. + + Supports an optional ``fields`` parameter of key/value strings AND + key/filetuple. A filetuple is a (filename, data) tuple. For example: :: + + fields = { + 'foo': 'bar', + 'fakefile': ('foofile.txt', 'contents of foofile'), + 'realfile': ('barfile.txt', open('realfile').read()), + 'nonamefile': ('contents of nonamefile field'), + } + + When uploading a file, providing a filename (the first parameter of the + tuple) is optional but recommended to best mimick behavior of browsers. + + Note that if ``headers`` are supplied, the 'Content-Type' header will be + overwritten because it depends on the dynamic random boundary string + which is used to compose the body of the request. The random boundary + string can be explicitly set with the ``multipart_boundary`` parameter. + """ + if encode_multipart: + body, content_type = encode_multipart_formdata(fields or {}, + boundary=multipart_boundary) + else: + body, content_type = (urlencode(fields or {}), + 'application/x-www-form-urlencoded') + + headers = headers or {} + headers.update({'Content-Type': content_type}) + + return self.urlopen(method, url, body=body, headers=headers, + **urlopen_kw) + + # Deprecated: + + def get_url(self, url, fields=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_url('GET', url, fields=fields, + **urlopen_kw) + + def post_url(self, url, fields=None, headers=None, **urlopen_kw): + """ + .. deprecated:: 1.0 + Use :meth:`request` instead. + """ + return self.request_encode_body('POST', url, fields=fields, + headers=headers, + **urlopen_kw) diff --git a/requests/packages/urllib3/response.py b/requests/packages/urllib3/response.py new file mode 100644 index 00000000..4cd15c11 --- /dev/null +++ b/requests/packages/urllib3/response.py @@ -0,0 +1,181 @@ +# urllib3/response.py +# Copyright 2008-2011 Andrey Petrov and contributors (see CONTRIBUTORS.txt) +# +# This module is part of urllib3 and is released under +# the MIT License: http://www.opensource.org/licenses/mit-license.php + +import gzip +import logging +import zlib + + +try: + from cStringIO import StringIO +except ImportError: + from StringIO import StringIO # pylint: disable-msg=W0404 + + +from .exceptions import HTTPError + + +log = logging.getLogger(__name__) + + +def decode_gzip(data): + gzipper = gzip.GzipFile(fileobj=StringIO(data)) + return gzipper.read() + + +def decode_deflate(data): + try: + return zlib.decompress(data) + except zlib.error: + return zlib.decompress(data, -zlib.MAX_WBITS) + + +class HTTPResponse(object): + """ + HTTP Response container. + + Backwards-compatible to httplib's HTTPResponse but the response ``body`` is + loaded and decoded on-demand when the ``data`` property is accessed. + + Extra parameters for behaviour not present in httplib.HTTPResponse: + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, attempts to decode specific content-encoding's based on headers + (like 'gzip' and 'deflate') will be skipped and raw data will be used + instead. + + :param original_response: + When this HTTPResponse wrapper is generated from an httplib.HTTPResponse + object, it's convenient to include the original for debug purposes. It's + otherwise unused. + """ + + CONTENT_DECODERS = { + 'gzip': decode_gzip, + 'deflate': decode_deflate, + } + + def __init__(self, body='', headers=None, status=0, version=0, reason=None, + strict=0, preload_content=True, decode_content=True, + original_response=None, pool=None, connection=None): + self.headers = headers or {} + self.status = status + self.version = version + self.reason = reason + self.strict = strict + + self._decode_content = decode_content + self._body = None + self._fp = None + self._original_response = original_response + + self._pool = pool + self._connection = connection + + if hasattr(body, 'read'): + self._fp = body + + if preload_content: + self._body = self.read(decode_content=decode_content) + + def release_conn(self): + if not self._pool or not self._connection: + return + + self._pool._put_conn(self._connection) + self._connection = None + + @property + def data(self): + # For backwords-compat with earlier urllib3 0.4 and earlier. + if self._body: + return self._body + + if self._fp: + return self.read(decode_content=self._decode_content, + cache_content=True) + + def read(self, amt=None, decode_content=True, cache_content=False): + """ + Similar to :meth:`httplib.HTTPResponse.read`, but with two additional + parameters: ``decode_content`` and ``cache_content``. + + :param amt: + How much of the content to read. If specified, decoding and caching + is skipped because we can't decode partial content nor does it make + sense to cache partial content as the full response. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. (Overridden if ``amt`` is set.) + + :param cache_content: + If True, will save the returned data such that the same result is + returned despite of the state of the underlying file object. This + is useful if you want the ``.data`` property to continue working + after having ``.read()`` the file object. (Overridden if ``amt`` is + set.) + """ + content_encoding = self.headers.get('content-encoding') + decoder = self.CONTENT_DECODERS.get(content_encoding) + + data = self._fp and self._fp.read(amt) + + try: + + if amt: + return data + + if not decode_content or not decoder: + if cache_content: + self._body = data + + return data + + try: + data = decoder(data) + except IOError: + raise HTTPError("Received response with content-encoding: %s, but " + "failed to decode it." % content_encoding) + + if cache_content: + self._body = data + + return data + + finally: + + if self._original_response and self._original_response.isclosed(): + self.release_conn() + + @staticmethod + def from_httplib(r, **response_kw): + """ + Given an :class:`httplib.HTTPResponse` instance ``r``, return a + corresponding :class:`urllib3.response.HTTPResponse` object. + + Remaining parameters are passed to the HTTPResponse constructor, along + with ``original_response=r``. + """ + + return HTTPResponse(body=r, + headers=dict(r.getheaders()), + status=r.status, + version=r.version, + reason=r.reason, + strict=r.strict, + original_response=r, + **response_kw) + + # Backwards-compatibility methods for httplib.HTTPResponse + def getheaders(self): + return self.headers + + def getheader(self, name, default=None): + return self.headers.get(name, default) diff --git a/requests/sessions.py b/requests/sessions.py index eaeb96e4..9610fd5e 100644 --- a/requests/sessions.py +++ b/requests/sessions.py @@ -9,12 +9,11 @@ requests (cookies, auth, proxies). """ -import cookielib - from .defaults import defaults from .models import Request from .hooks import dispatch_hook -from .utils import add_dict_to_cookiejar, cookiejar_from_dict, header_expand +from .utils import header_expand +from .packages.urllib3.poolmanager import PoolManager def merge_kwargs(local_kwarg, default_kwarg): @@ -64,8 +63,7 @@ class Session(object): proxies=None, hooks=None, params=None, - config=None, - keep_alive=True): + config=None): self.headers = headers or {} self.cookies = cookies or {} @@ -75,13 +73,21 @@ class Session(object): self.hooks = hooks or {} self.params = params or {} self.config = config or {} - self.keep_alive = keep_alive for (k, v) in defaults.items(): self.config.setdefault(k, v) + self.poolmanager = PoolManager( + num_pools=self.config.get('pool_connections'), + maxsize=self.config.get('pool_maxsize') + ) + # Set up a CookieJar to be used by default - self.cookies = cookielib.FileCookieJar() + self.cookies = {} + + # Add passed cookies in. + if cookies is not None: + self.cookies.update(cookies) def __repr__(self): return '' % (id(self)) @@ -104,7 +110,8 @@ class Session(object): proxies=None, hooks=None, return_response=True, - config=None): + config=None, + prefetch=False): """Constructs and sends a :class:`Request `. Returns :class:`Response ` object. @@ -122,19 +129,19 @@ class Session(object): :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param return_response: (optional) If False, an un-sent Request object will returned. :param config: (optional) A configuration dictionary. + :param prefetch: (optional) if ``True``, the response content will be immediately downloaded. """ method = str(method).upper() - if cookies is None: - cookies = {} + # Default empty dicts for dict params. + cookies = {} if cookies is None else cookies + data = {} if data is None else data + files = {} if files is None else files + headers = {} if headers is None else headers + params = {} if params is None else params - if isinstance(cookies, dict): - cookies = add_dict_to_cookiejar(self.cookies, cookies) - - cookies = cookiejar_from_dict(cookies) - - # Expand header values + # Expand header values. if headers: for k, v in headers.items() or {}: headers[k] = header_expand(v) @@ -152,28 +159,37 @@ class Session(object): timeout=timeout, allow_redirects=allow_redirects, proxies=proxies, - config=config + config=config, + _poolmanager=self.poolmanager ) + # Merge local kwargs with session kwargs. for attr in self.__attrs__: session_val = getattr(self, attr, None) local_val = args.get(attr) args[attr] = merge_kwargs(local_val, session_val) - # Arguments manipulation hook. args = dispatch_hook('args', args['hooks'], args) + # Create the (empty) response. r = Request(**args) + # Give the response some context. + r.session = self + # Don't send if asked nicely. if not return_response: return r # Send the HTTP Request. - r.send() + r.send(prefetch=prefetch) + # Send any cookies back up the to the session. + self.cookies.update(r.response.cookies) + + # Return the response. return r.response @@ -185,7 +201,18 @@ class Session(object): """ kwargs.setdefault('allow_redirects', True) - return self.request('GET', url, **kwargs) + return self.request('get', url, **kwargs) + + + def options(self, url, **kwargs): + """Sends a OPTIONS request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param **kwargs: Optional arguments that ``request`` takes. + """ + + kwargs.setdefault('allow_redirects', True) + return self.request('options', url, **kwargs) def head(self, url, **kwargs): @@ -196,10 +223,10 @@ class Session(object): """ kwargs.setdefault('allow_redirects', True) - return self.request('HEAD', url, **kwargs) + return self.request('head', url, **kwargs) - def post(self, url, data='', **kwargs): + def post(self, url, data=None, **kwargs): """Sends a POST request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. @@ -210,7 +237,7 @@ class Session(object): return self.request('post', url, data=data, **kwargs) - def put(self, url, data='', **kwargs): + def put(self, url, data=None, **kwargs): """Sends a PUT request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. @@ -221,7 +248,7 @@ class Session(object): return self.request('put', url, data=data, **kwargs) - def patch(self, url, data='', **kwargs): + def patch(self, url, data=None, **kwargs): """Sends a PATCH request. Returns :class:`Response` object. :param url: URL for the new :class:`Request` object. @@ -229,7 +256,7 @@ class Session(object): :param **kwargs: Optional arguments that ``request`` takes. """ - return self.request('patch', url, data='', **kwargs) + return self.request('patch', url, data=data, **kwargs) def delete(self, url, **kwargs): @@ -242,8 +269,7 @@ class Session(object): return self.request('delete', url, **kwargs) - def session(**kwargs): """Returns a :class:`Session` for context-management.""" - return Session(**kwargs) \ No newline at end of file + return Session(**kwargs) diff --git a/requests/structures.py b/requests/structures.py index f23f0b38..35a903fd 100644 --- a/requests/structures.py +++ b/requests/structures.py @@ -8,6 +8,7 @@ Data structures that power Requests. """ + class CaseInsensitiveDict(dict): """Case-insensitive Dictionary diff --git a/requests/utils.py b/requests/utils.py index bd7dead4..0249e9d6 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -20,6 +20,12 @@ import zlib from urllib2 import parse_http_list as _parse_list_header +def guess_filename(obj): + """Tries to guess the filename of the given object.""" + name = getattr(obj, 'name', None) + if name and name[0] != '<' and name[-1] != '>': + return name + # From mitsuhiko/werkzeug (used with permission). def parse_list_header(value): """Parse lists as described by RFC 2068 Section 2. diff --git a/setup.py b/setup.py index 6b95c31e..17e612ba 100755 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ setup( packages= [ 'requests', 'requests.packages', - 'requests.packages.poster' + 'requests.packages.urllib3' ], install_requires=required, license='ISC', diff --git a/test_requests.py b/test_requests.py index ebff33dd..61953a37 100755 --- a/test_requests.py +++ b/test_requests.py @@ -4,13 +4,12 @@ from __future__ import with_statement import time -import cookielib import os import unittest import requests import envoy -from urllib2 import HTTPError +from requests import HTTPError try: import omnijson as json @@ -58,9 +57,10 @@ class RequestsTestSuite(unittest.TestCase): def tearDown(self): """Teardown.""" # self.httpbin.kill() + pass + def test_entry_points(self): - import requests requests.session requests.session().get @@ -235,6 +235,8 @@ class RequestsTestSuite(unittest.TestCase): for service in SERVICES: r = requests.get(service('status', '404')) + # print r.status_code + # r.raise_for_status() self.assertEqual(r.ok, False) @@ -247,26 +249,6 @@ class RequestsTestSuite(unittest.TestCase): r.raise_for_status() - def test_cookie_jar(self): - - jar = cookielib.CookieJar() - self.assertFalse(jar) - - url = httpbin('cookies', 'set', 'requests_cookie', 'awesome') - r = requests.get(url, cookies=jar) - self.assertTrue(jar) - - cookie_found = False - for cookie in jar: - if cookie.name == 'requests_cookie': - self.assertEquals(cookie.value, 'awesome') - cookie_found = True - self.assertTrue(cookie_found) - - r = requests.get(httpbin('cookies'), cookies=jar) - self.assertTrue('awesome' in r.content) - - def test_decompress_gzip(self): r = requests.get(httpbin('gzip')) @@ -324,7 +306,7 @@ class RequestsTestSuite(unittest.TestCase): rbody = json.loads(r.content) # Body wasn't valid url encoded data, so the server returns None as # "form" and the raw body as "data". - self.assertEquals(rbody.get('form'), None) + self.assertEquals(rbody.get('form'), {}) self.assertEquals(rbody.get('data'), 'fooaowpeuf') @@ -343,21 +325,6 @@ class RequestsTestSuite(unittest.TestCase): self.assertEquals(rbody.get('data'), '') - def test_nonurlencoded_post_querystring(self): - - for service in SERVICES: - - r = requests.post(service('post'), params='fooaowpeuf') - - self.assertEquals(r.status_code, 200) - self.assertEquals(r.headers['content-type'], 'application/json') - self.assertEquals(r.url, service('post?fooaowpeuf')) - - rbody = json.loads(r.content) - self.assertEquals(rbody.get('form'), {}) # No form supplied - self.assertEquals(rbody.get('data'), '') - - def test_urlencoded_post_query_and_data(self): for service in SERVICES: @@ -376,20 +343,18 @@ class RequestsTestSuite(unittest.TestCase): self.assertEquals(rbody.get('data'), '') - def test_nonurlencoded_post_query_and_data(self): + def test_nonurlencoded_postdata(self): for service in SERVICES: - r = requests.post(service('post'), - params='fooaowpeuf', data="foobar") + r = requests.post(service('post'), data="foobar") self.assertEquals(r.status_code, 200) self.assertEquals(r.headers['content-type'], 'application/json') - self.assertEquals(r.url, service('post?fooaowpeuf')) rbody = json.loads(r.content) - self.assertEquals(rbody.get('form'), None) + self.assertEquals(rbody.get('form'), {}) self.assertEquals(rbody.get('data'), 'foobar') @@ -501,6 +466,49 @@ class RequestsTestSuite(unittest.TestCase): self.assertEqual(r2.status_code, 200) + def test_session_persistent_cookies(self): + + s = requests.session() + + # Internally dispatched cookies are sent. + _c = {'kenneth': 'reitz', 'bessie': 'monke'} + r = s.get(httpbin('cookies'), cookies=_c) + r = s.get(httpbin('cookies')) + + # Those cookies persist transparently. + c = json.loads(r.content).get('cookies') + assert c == _c + + # Double check. + r = s.get(httpbin('cookies'), cookies={}) + c = json.loads(r.content).get('cookies') + assert c == _c + + # Remove a cookie by setting it's value to None. + r = s.get(httpbin('cookies'), cookies={'bessie': None}) + c = json.loads(r.content).get('cookies') + del _c['bessie'] + assert c == _c + + # Test session-level cookies. + s = requests.session(cookies=_c) + r = s.get(httpbin('cookies')) + c = json.loads(r.content).get('cookies') + assert c == _c + + # Have the server set a cookie. + r = s.get(httpbin('cookies', 'set', 'k', 'v'), allow_redirects=True) + c = json.loads(r.content).get('cookies') + + assert 'k' in c + + # And server-set cookie persistience. + r = s.get(httpbin('cookies')) + c = json.loads(r.content).get('cookies') + + assert 'k' in c + + def test_session_persistent_params(self): @@ -530,8 +538,19 @@ class RequestsTestSuite(unittest.TestCase): assert params3['c'] in r3.content def test_invalid_content(self): + # WARNING: if you're using a terrible DNS provider (comcast), + # this will fail. + try: + hah = 'http://somedomainthatclearlydoesntexistg.com' + r = requests.get(hah, allow_redirects=False) + except requests.ConnectionError: + pass # \o/ + else: + assert False - r = requests.get('http://somedomainthatclearlydoesntexistg.com') + + config = {'safe_mode': True} + r = requests.get(hah, allow_redirects=False, config=config) assert r.content == None