update internal dependencies

This commit is contained in:
Kenneth Reitz
2013-10-24 10:31:05 -04:00
parent db108a19c8
commit df272b7740
9 changed files with 247 additions and 163 deletions
+34
View File
@@ -30,3 +30,37 @@ def detect(aBuf):
u.feed(aBuf)
u.close()
return u.result
def _description_of(path):
"""Return a string describing the probable encoding of a file."""
from charade.universaldetector import UniversalDetector
u = UniversalDetector()
for line in open(path, 'rb'):
u.feed(line)
u.close()
result = u.result
if result['encoding']:
return '%s: %s with confidence %s' % (path,
result['encoding'],
result['confidence'])
else:
return '%s: no result' % path
def charade_cli():
"""
Script which takes one or more file paths and reports on their detected
encodings
Example::
% chardetect.py somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
"""
from sys import argv
for path in argv[1:]:
print(_description_of(path))
+7
View File
@@ -0,0 +1,7 @@
'''
support ';python -m charade <file1> [file2] ...' package execution syntax (2.7+)
'''
from charade import charade_cli
charade_cli()
+1 -1
View File
@@ -169,7 +169,7 @@ class JapaneseContextAnalysis:
def get_confidence(self):
# This is just one way to calculate confidence. It works well for me.
if self._mTotalRel > MINIMUM_DATA_THRESHOLD:
return (self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
return float(self._mTotalRel - self._mRelSample[0]) / self._mTotalRel
else:
return DONT_KNOW
+1 -1
View File
@@ -129,7 +129,7 @@ class Latin1Prober(CharSetProber):
if total < 0.01:
confidence = 0.0
else:
confidence = ((self._mFreqCounter[3] / total)
confidence = ((float(self._mFreqCounter[3]) / total)
- (self._mFreqCounter[1] * 20.0 / total))
if confidence < 0.0:
confidence = 0.0
@@ -74,12 +74,10 @@ class UniversalDetector:
if aBuf[:3] == codecs.BOM:
# EF BB BF UTF-8 with BOM
self.result = {'encoding': "UTF-8", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_LE:
elif aBuf[:4] in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
# FF FE 00 00 UTF-32, little-endian BOM
self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
elif aBuf[:4] == codecs.BOM_UTF32_BE:
# 00 00 FE FF UTF-32, big-endian BOM
self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
self.result = {'encoding': "UTF-32", 'confidence': 1.0}
elif aBuf[:4] == b'\xFE\xFF\x00\x00':
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
self.result = {
@@ -92,12 +90,10 @@ class UniversalDetector:
'encoding': "X-ISO-10646-UCS-4-2143",
'confidence': 1.0
}
elif aBuf[:2] == codecs.BOM_LE:
elif aBuf[:2] == codecs.BOM_LE or aBuf[:2] == codecs.BOM_BE:
# FF FE UTF-16, little endian BOM
self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
elif aBuf[:2] == codecs.BOM_BE:
# FE FF UTF-16, big endian BOM
self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
self.result = {'encoding': "UTF-16", 'confidence': 1.0}
self._mGotData = True
if self.result['encoding'] and (self.result['confidence'] > 0.0):
+107
View File
@@ -0,0 +1,107 @@
# urllib3/connection.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import socket
from socket import timeout as SocketTimeout
try: # Python 3
from http.client import HTTPConnection, HTTPException
except ImportError:
from httplib import HTTPConnection, HTTPException
class DummyConnection(object):
"Used to detect a failed ConnectionCls import."
pass
try: # Compiled with SSL?
ssl = None
HTTPSConnection = DummyConnection
class BaseSSLError(BaseException):
pass
try: # Python 3
from http.client import HTTPSConnection
except ImportError:
from httplib import HTTPSConnection
import ssl
BaseSSLError = ssl.SSLError
except (ImportError, AttributeError): # Platform-specific: No SSL.
pass
from .exceptions import (
ConnectTimeoutError,
)
from .packages.ssl_match_hostname import match_hostname
from .util import (
assert_fingerprint,
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
)
class VerifiedHTTPSConnection(HTTPSConnection):
"""
Based on httplib.HTTPSConnection but wraps the socket with
SSL certification.
"""
cert_reqs = None
ca_certs = None
ssl_version = None
def set_cert(self, key_file=None, cert_file=None,
cert_reqs=None, ca_certs=None,
assert_hostname=None, assert_fingerprint=None):
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.ca_certs = ca_certs
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
def connect(self):
# Add certificate verification
try:
sock = socket.create_connection(
address=(self.host, self.port),
timeout=self.timeout,
)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
if self._tunnel_host:
self.sock = sock
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
server_hostname=self.host,
ssl_version=resolved_ssl_version)
if resolved_cert_reqs != ssl.CERT_NONE:
if self.assert_fingerprint:
assert_fingerprint(self.sock.getpeercert(binary_form=True),
self.assert_fingerprint)
elif self.assert_hostname is not False:
match_hostname(self.sock.getpeercert(),
self.assert_hostname or self.host)
if ssl:
HTTPSConnection = VerifiedHTTPSConnection
+67 -138
View File
@@ -10,13 +10,6 @@ import logging
from socket import error as SocketError, timeout as SocketTimeout
import socket
try: # Python 3
from http.client import HTTPConnection, HTTPException
from http.client import HTTP_PORT, HTTPS_PORT
except ImportError:
from httplib import HTTPConnection, HTTPException
from httplib import HTTP_PORT, HTTPS_PORT
try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
@@ -24,26 +17,6 @@ except ImportError:
import Queue as _ # Platform-specific: Windows
try: # Compiled with SSL?
HTTPSConnection = object
class BaseSSLError(BaseException):
pass
ssl = None
try: # Python 3
from http.client import HTTPSConnection
except ImportError:
from httplib import HTTPSConnection
import ssl
BaseSSLError = ssl.SSLError
except (ImportError, AttributeError): # Platform-specific: No SSL.
pass
from .exceptions import (
ClosedPoolError,
ConnectTimeoutError,
@@ -51,23 +24,27 @@ from .exceptions import (
HostChangedError,
MaxRetryError,
SSLError,
TimeoutError,
ReadTimeoutError,
ProxyError,
)
from .packages.ssl_match_hostname import CertificateError, match_hostname
from .packages.ssl_match_hostname import CertificateError
from .packages import six
from .connection import (
DummyConnection,
HTTPConnection, HTTPSConnection, VerifiedHTTPSConnection,
HTTPException, BaseSSLError,
)
from .request import RequestMethods
from .response import HTTPResponse
from .util import (
assert_fingerprint,
get_host,
is_connection_dropped,
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
Timeout,
)
xrange = six.moves.xrange
log = logging.getLogger(__name__)
@@ -75,70 +52,11 @@ log = logging.getLogger(__name__)
_Default = object()
port_by_scheme = {
'http': HTTP_PORT,
'https': HTTPS_PORT,
'http': 80,
'https': 443,
}
## Connection objects (extension of httplib)
class VerifiedHTTPSConnection(HTTPSConnection):
"""
Based on httplib.HTTPSConnection but wraps the socket with
SSL certification.
"""
cert_reqs = None
ca_certs = None
ssl_version = None
def set_cert(self, key_file=None, cert_file=None,
cert_reqs=None, ca_certs=None,
assert_hostname=None, assert_fingerprint=None):
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.ca_certs = ca_certs
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
def connect(self):
# Add certificate verification
try:
sock = socket.create_connection(
address=(self.host, self.port),
timeout=self.timeout)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
if self._tunnel_host:
self.sock = sock
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
server_hostname=self.host,
ssl_version=resolved_ssl_version)
if resolved_cert_reqs != ssl.CERT_NONE:
if self.assert_fingerprint:
assert_fingerprint(self.sock.getpeercert(binary_form=True),
self.assert_fingerprint)
elif self.assert_hostname is not False:
match_hostname(self.sock.getpeercert(),
self.assert_hostname or self.host)
## Pool objects
class ConnectionPool(object):
@@ -218,6 +136,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
scheme = 'http'
ConnectionCls = HTTPConnection
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
@@ -255,14 +174,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
self.num_connections += 1
log.info("Starting new HTTP connection (%d): %s" %
(self.num_connections, self.host))
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
return HTTPConnection(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
**extra_params)
return self.ConnectionCls(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
**extra_params)
def _get_conn(self, timeout=None):
"""
@@ -362,7 +281,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# conn.request() calls httplib.*.request, not the method in
# request.py. It also calls makefile (recv) on the socket
# urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
except SocketTimeout:
raise ConnectTimeoutError(
@@ -371,11 +290,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
log.debug("Setting read timeout to %s" % read_timeout)
# App Engine doesn't have a sock attr
if hasattr(conn, 'sock') and \
read_timeout is not None and \
read_timeout is not Timeout.DEFAULT_TIMEOUT:
if hasattr(conn, 'sock'):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
@@ -385,7 +302,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
conn.sock.settimeout(read_timeout)
if read_timeout is Timeout.DEFAULT_TIMEOUT:
conn.sock.settimeout(socket.getdefaulttimeout())
else: # None or a value
conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
@@ -397,6 +317,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
except BaseSSLError as e:
# Catch possible read timeouts thrown as SSL errors. If not the
# case, rethrow the original. We need to do this because of:
# http://bugs.python.org/issue10272
if 'timed out' in str(e) or \
'did not complete (read)' in str(e): # Python 2.6
raise ReadTimeoutError(self, url, "Read timed out.")
raise
except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
@@ -404,8 +334,8 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
raise
raise
# AppEngine doesn't have a version attr.
http_version = getattr(conn, '_http_vsn_str', 'HTTP/?')
@@ -559,24 +489,24 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
except Empty:
# Timed out by queue
raise ReadTimeoutError(
self, url, "Read timed out, no pool connections are available.")
except SocketTimeout:
# Timed out by socket
raise ReadTimeoutError(self, url, "Read timed out.")
raise EmptyPoolError(self, "No pool connections are available.")
except BaseSSLError as e:
# SSL certificate error
if 'timed out' in str(e) or \
'did not complete (read)' in str(e): # Platform-specific: Python 2.6
raise ReadTimeoutError(self, url, "Read timed out.")
raise SSLError(e)
except CertificateError as e:
# Name mismatch
raise SSLError(e)
except TimeoutError as e:
# Connection broken, discard.
conn = None
# Save the error off for retry logic.
err = e
if retries == 0:
raise
except (HTTPException, SocketError) as e:
if isinstance(e, SocketError) and self.proxy is not None:
raise ProxyError('Cannot connect to proxy. '
@@ -639,6 +569,7 @@ class HTTPSConnectionPool(HTTPConnectionPool):
"""
scheme = 'https'
ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
@@ -658,33 +589,33 @@ class HTTPSConnectionPool(HTTPConnectionPool):
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
def _prepare_conn(self, connection):
def _prepare_conn(self, conn):
"""
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
and establish the tunnel if proxy is used.
"""
if isinstance(connection, VerifiedHTTPSConnection):
connection.set_cert(key_file=self.key_file,
cert_file=self.cert_file,
cert_reqs=self.cert_reqs,
ca_certs=self.ca_certs,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
connection.ssl_version = self.ssl_version
if isinstance(conn, VerifiedHTTPSConnection):
conn.set_cert(key_file=self.key_file,
cert_file=self.cert_file,
cert_reqs=self.cert_reqs,
ca_certs=self.ca_certs,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
conn.ssl_version = self.ssl_version
if self.proxy is not None:
# Python 2.7+
try:
set_tunnel = connection.set_tunnel
set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = connection._set_tunnel
set_tunnel = conn._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
connection.connect()
conn.connect()
return connection
return conn
def _new_conn(self):
"""
@@ -694,28 +625,26 @@ class HTTPSConnectionPool(HTTPConnectionPool):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))
if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
# Platform-specific: Python without ssl
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")
actual_host = self.host
actual_port = self.port
if self.proxy is not None:
actual_host = self.proxy.host
actual_port = self.proxy.port
if not ssl: # Platform-specific: Python compiled without +ssl
if not HTTPSConnection or HTTPSConnection is object:
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")
connection_class = HTTPSConnection
else:
connection_class = VerifiedHTTPSConnection
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
connection = connection_class(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
**extra_params)
return self._prepare_conn(connection)
conn = self.ConnectionCls(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
**extra_params)
return self._prepare_conn(conn)
def connection_from_url(url, **kw):
@@ -26,6 +26,7 @@ import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
from socket import _fileobject
import ssl
import select
from cStringIO import StringIO
from .. import connectionpool
@@ -336,6 +337,7 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError:
select.select([sock], [], [])
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad handshake', e)
+24 -15
View File
@@ -80,14 +80,13 @@ class Timeout(object):
:type read: integer, float, or None
:param total:
The maximum amount of time to wait for an HTTP request to connect and
return. This combines the connect and read timeouts into one. In the
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
.. note::
@@ -101,18 +100,23 @@ class Timeout(object):
`total`.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server, not
the total amount of time for the request to return a complete response.
As an example, you may want a request to return within 7 seconds or
fail, so you set the ``total`` timeout to 7 seconds. If the server
sends one byte to you every 5 seconds, the request will **not** trigger
time out. This case is admittedly rare.
read operations on the socket connecting the client and the server,
not the total amount of time for the request to return a complete
response. For most requests, the timeout is raised because the server
has not sent the first byte in the specified time. This is not always
the case; if a server streams one byte every fifteen seconds, a timeout
of 20 seconds will not ever trigger, even though the request will
take several minutes to complete.
If your goal is to cut off any request after a set amount of wall clock
time, consider having a second "watcher" thread to cut off a slow
request.
"""
#: A sentinel object representing the default timeout value
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
def __init__(self, connect=_Default, read=_Default, total=None):
def __init__(self, total=None, connect=_Default, read=_Default):
self._connect = self._validate_timeout(connect, 'connect')
self._read = self._validate_timeout(read, 'read')
self.total = self._validate_timeout(total, 'total')
@@ -372,7 +376,8 @@ def parse_url(url):
# Auth
if '@' in url:
auth, url = url.split('@', 1)
# Last '@' denotes end of auth part
auth, url = url.rsplit('@', 1)
# IPv6
if url and url[0] == '[':
@@ -386,10 +391,14 @@ def parse_url(url):
if not host:
host = _host
if not port.isdigit():
raise LocationParseError("Failed to parse: %s" % url)
port = int(port)
if port:
# If given, ports must be integers.
if not port.isdigit():
raise LocationParseError("Failed to parse: %s" % url)
port = int(port)
else:
# Blank ports are cool, too. (rfc3986#section-3.2.3)
port = None
elif not host and url:
host = url