mirror of
https://github.com/kennethreitz/heroku-buildpack-python.git
synced 2026-06-05 23:10:16 +00:00
Compare commits
53 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0b87419166 | |||
| d832b7425e | |||
| 2991dce623 | |||
| ffd0bb57ee | |||
| 75870dd412 | |||
| 831d73e302 | |||
| 0cf2e2849d | |||
| 6ba8d47927 | |||
| af940b5390 | |||
| 819bc2da5b | |||
| 2dad2e73f3 | |||
| e550b32ea8 | |||
| 6af8d81086 | |||
| 0ed4089cb9 | |||
| f09191ec8b | |||
| aef7b65b95 | |||
| 5438aab121 | |||
| a259143c0b | |||
| 0896d6a0c4 | |||
| 39f2efe296 | |||
| d047f26ad4 | |||
| 998f996a32 | |||
| c620980e5a | |||
| 2983fcfbea | |||
| e3511e7202 | |||
| 692f040618 | |||
| 8645a965aa | |||
| 3a483865e8 | |||
| 6ea6a0ccaf | |||
| 6bc94eaf6e | |||
| 49e409cb64 | |||
| 2693de45c7 | |||
| 88f711b6e7 | |||
| a7976fc177 | |||
| 8a837a23e7 | |||
| f37597ad9d | |||
| 928a664544 | |||
| 9fb715ef1c | |||
| 3cff2e39fe | |||
| 653a5be104 | |||
| 67ade52e95 | |||
| ab3e365ff6 | |||
| 902672e934 | |||
| 6207b3506b | |||
| 5446d448fd | |||
| 366def6c16 | |||
| a84fc7a248 | |||
| e4aa40216b | |||
| 87d03e62d7 | |||
| 92b4f5e8aa | |||
| 19cd447b83 | |||
| 570182b6b7 | |||
| 4c9de740d2 |
@@ -0,0 +1,2 @@
|
|||||||
|
sudo: false
|
||||||
|
script: exit 0
|
||||||
@@ -1,5 +1,18 @@
|
|||||||
# Python Buildpack Changelog
|
# Python Buildpack Changelog
|
||||||
|
|
||||||
|
## v82 (2016-08-22)
|
||||||
|
|
||||||
|
Update to library detection mechnisms (pip-pop).
|
||||||
|
|
||||||
|
- Updated setuptools to v25.5.0
|
||||||
|
|
||||||
|
## v81 (2016-06-28)
|
||||||
|
|
||||||
|
Updated default Python to 2.7.11.
|
||||||
|
|
||||||
|
- Updated setuptools to v8.1.2.
|
||||||
|
- Updated pip to v23.1.0.
|
||||||
|
|
||||||
## v80 (2016-04-05)
|
## v80 (2016-04-05)
|
||||||
|
|
||||||
Improved pip-pop compatibility with latest pip releases.
|
Improved pip-pop compatibility with latest pip releases.
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
|
-
|
||||||
|
|
||||||
# Heroku Buildpack: Python
|
# Heroku Buildpack: Python
|
||||||

|
|
||||||
|
|
||||||
This is the official [Heroku buildpack](https://devcenter.heroku.com/articles/buildpacks) for Python apps, powered by [pip](https://pip.pypa.io/) and other excellent software.
|
This is the official [Heroku buildpack](https://devcenter.heroku.com/articles/buildpacks) for Python apps, powered by [pip](https://pip.pypa.io/) and other excellent software.
|
||||||
|
|
||||||
@@ -20,12 +21,12 @@ Deploying a Python application couldn't be easier:
|
|||||||
$ git push heroku master
|
$ git push heroku master
|
||||||
...
|
...
|
||||||
-----> Python app detected
|
-----> Python app detected
|
||||||
-----> Installing python-2.7.11
|
-----> Installing python-2.7.12
|
||||||
$ pip install -r requirements.txt
|
$ pip install -r requirements.txt
|
||||||
Collecting requests (from -r requirements.txt (line 1))
|
Collecting requests (from -r requirements.txt (line 1))
|
||||||
Downloading requests-2.9.1-py2.py3-none-any.whl (501kB)
|
Downloading requests-2.10.0-py2.py3-none-any.whl (501kB)
|
||||||
Installing collected packages: requests
|
Installing collected packages: requests
|
||||||
Successfully installed requests-2.9.1
|
Successfully installed requests-2.10.0
|
||||||
|
|
||||||
-----> Discovering process types
|
-----> Discovering process types
|
||||||
Procfile declares types -> (none)
|
Procfile declares types -> (none)
|
||||||
@@ -43,12 +44,12 @@ Specify a Python Runtime
|
|||||||
Specific versions of the Python runtime can be specified with a `runtime.txt` file:
|
Specific versions of the Python runtime can be specified with a `runtime.txt` file:
|
||||||
|
|
||||||
$ cat runtime.txt
|
$ cat runtime.txt
|
||||||
python-3.5.1
|
python-3.5.2
|
||||||
|
|
||||||
Runtime options include:
|
Runtime options include:
|
||||||
|
|
||||||
- `python-2.7.11`
|
- `python-2.7.12`
|
||||||
- `python-3.5.1`
|
- `python-3.5.2`
|
||||||
- `pypy-5.0.1` (unsupported, experimental)
|
- `pypy-5.3.1` (unsupported, experimental)
|
||||||
|
|
||||||
Other [unsupported runtimes](https://github.com/heroku/heroku-buildpack-python/tree/master/builds/runtimes) are available as well. Use at your own risk.
|
Other [unsupported runtimes](https://github.com/heroku/heroku-buildpack-python/tree/master/builds/runtimes) are available as well. Use at your own risk.
|
||||||
+4
-4
@@ -31,11 +31,11 @@ ENV_DIR=$3
|
|||||||
VIRTUALENV_LOC=".heroku/venv"
|
VIRTUALENV_LOC=".heroku/venv"
|
||||||
LEGACY_TRIGGER="lib/python2.7"
|
LEGACY_TRIGGER="lib/python2.7"
|
||||||
|
|
||||||
DEFAULT_PYTHON_VERSION="python-2.7.11"
|
DEFAULT_PYTHON_VERSION="python-2.7.12"
|
||||||
DEFAULT_PYTHON_STACK="cedar-14"
|
DEFAULT_PYTHON_STACK="cedar-14"
|
||||||
PYTHON_EXE="/app/.heroku/python/bin/python"
|
PYTHON_EXE="/app/.heroku/python/bin/python"
|
||||||
PIP_VERSION="8.1.1"
|
PIP_VERSION="8.1.2"
|
||||||
SETUPTOOLS_VERSION="20.4"
|
SETUPTOOLS_VERSION="25.2.0"
|
||||||
|
|
||||||
# Common Problem Warnings
|
# Common Problem Warnings
|
||||||
export WARNINGS_LOG=$(mktemp)
|
export WARNINGS_LOG=$(mktemp)
|
||||||
@@ -56,7 +56,7 @@ export PATH=$PATH:$ROOT_DIR/vendor/pip-pop
|
|||||||
[ ! "$STACK" ] && STACK=$DEFAULT_PYTHON_STACK
|
[ ! "$STACK" ] && STACK=$DEFAULT_PYTHON_STACK
|
||||||
|
|
||||||
# Sanitizing environment variables.
|
# Sanitizing environment variables.
|
||||||
unset GIT_DIR PYTHONHOME PYTHONPATH LD_LIBRARY_PATH LIBRARY_PATH
|
unset GIT_DIR PYTHONHOME PYTHONPATH
|
||||||
unset RECEIVE_DATA RUN_KEY BUILD_INFO DEPLOY LOG_TOKEN DYNO
|
unset RECEIVE_DATA RUN_KEY BUILD_INFO DEPLOY LOG_TOKEN DYNO
|
||||||
unset CYTOKINE_LOG_FILE GEM_PATH
|
unset CYTOKINE_LOG_FILE GEM_PATH
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
# This script serves as the Pylibmc build step of the
|
# This script serves as the Cryptography build step of the
|
||||||
# [**Python Buildpack**](https://github.com/heroku/heroku-buildpack-python)
|
# [**Python Buildpack**](https://github.com/heroku/heroku-buildpack-python)
|
||||||
# compiler.
|
# compiler.
|
||||||
#
|
#
|
||||||
@@ -20,18 +20,16 @@ source $BIN_DIR/utils
|
|||||||
bpwatch start libffi_install
|
bpwatch start libffi_install
|
||||||
|
|
||||||
# If a package using cffi exists within requirements, use vendored libffi.
|
# If a package using cffi exists within requirements, use vendored libffi.
|
||||||
if (pip-grep -s requirements.txt bcrypt cffi cryptography django[bcrypt] Django[bcrypt] pyOpenSSL PyOpenSSL requests[security] &> /dev/null) then
|
if (pip-grep -s requirements.txt bcrypt cffi cryptography django[bcrypt] Django[bcrypt] PyNaCl pyOpenSSL PyOpenSSL requests[security] misaka &> /dev/null) then
|
||||||
|
|
||||||
if [ -d ".heroku/vendor/lib/libffi-3.1.1" ]; then
|
if [ ! -d ".heroku/vendor/lib/libffi-3.1" ]; then
|
||||||
export LIBFFI=$(pwd)/vendor
|
|
||||||
else
|
|
||||||
echo "-----> Noticed cffi. Bootstrapping libffi."
|
echo "-----> Noticed cffi. Bootstrapping libffi."
|
||||||
mkdir -p .heroku/vendor
|
mkdir -p .heroku/vendor
|
||||||
# Download and extract libffi into target vendor directory.
|
# Download and extract libffi into target vendor directory.
|
||||||
curl $VENDORED_LIBFFI -s | tar zxv -C .heroku/vendor &> /dev/null
|
curl $VENDORED_LIBFFI -s | tar zxv -C .heroku/vendor &> /dev/null
|
||||||
|
|
||||||
export LIBFFI=$(pwd)/vendor
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LIBFFI=$(pwd)/vendor
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bpwatch stop libffi_install
|
bpwatch stop libffi_install
|
||||||
|
|||||||
+3
-5
@@ -22,16 +22,14 @@ bpwatch start gdal_install
|
|||||||
# If GDAL exists within requirements, use vendored gdal.
|
# If GDAL exists within requirements, use vendored gdal.
|
||||||
if (pip-grep -s requirements.txt GDAL gdal pygdal &> /dev/null) then
|
if (pip-grep -s requirements.txt GDAL gdal pygdal &> /dev/null) then
|
||||||
|
|
||||||
if [ -f ".heroku/vendor/bin/gdalserver" ]; then
|
if [ ! -f ".heroku/vendor/bin/gdalserver" ]; then
|
||||||
export GDAL=$(pwd)/vendor
|
|
||||||
else
|
|
||||||
echo "-----> Noticed GDAL. Bootstrapping gdal."
|
echo "-----> Noticed GDAL. Bootstrapping gdal."
|
||||||
mkdir -p .heroku/vendor
|
mkdir -p .heroku/vendor
|
||||||
# Download and extract cryptography into target vendor directory.
|
# Download and extract cryptography into target vendor directory.
|
||||||
curl $VENDORED_GDAL -s | tar zxv -C .heroku/vendor &> /dev/null
|
curl $VENDORED_GDAL -s | tar zxv -C .heroku/vendor &> /dev/null
|
||||||
|
|
||||||
export GDAL=$(pwd)/vendor
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export GDAL=$(pwd)/vendor
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bpwatch stop gdal_install
|
bpwatch stop gdal_install
|
||||||
|
|||||||
+3
-5
@@ -24,18 +24,16 @@ bpwatch start geo_libs_install
|
|||||||
# If GDAL exists within requirements, use vendored gdal.
|
# If GDAL exists within requirements, use vendored gdal.
|
||||||
if [[ "$BUILD_WITH_GEO_LIBRARIES" ]]; then
|
if [[ "$BUILD_WITH_GEO_LIBRARIES" ]]; then
|
||||||
|
|
||||||
if [ -f ".heroku/vendor/bin/gdalserver" ]; then
|
if [ ! -f ".heroku/vendor/bin/proj" ]; then
|
||||||
export GDAL=$(pwd)/vendor
|
|
||||||
else
|
|
||||||
echo "-----> Bootstrapping gdal, geos, proj."
|
echo "-----> Bootstrapping gdal, geos, proj."
|
||||||
mkdir -p .heroku/vendor
|
mkdir -p .heroku/vendor
|
||||||
# Download and extract cryptography into target vendor directory.
|
# Download and extract cryptography into target vendor directory.
|
||||||
curl $VENDORED_GDAL -s | tar zxv -C .heroku/vendor &> /dev/null
|
curl $VENDORED_GDAL -s | tar zxv -C .heroku/vendor &> /dev/null
|
||||||
curl $VENDORED_GEOS -s | tar zxv -C .heroku/vendor &> /dev/null
|
curl $VENDORED_GEOS -s | tar zxv -C .heroku/vendor &> /dev/null
|
||||||
curl $VENDORED_PROJ -s | tar zxv -C .heroku/vendor &> /dev/null
|
curl $VENDORED_PROJ -s | tar zxv -C .heroku/vendor &> /dev/null
|
||||||
|
|
||||||
export GDAL=$(pwd)/vendor
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export GDAL=$(pwd)/vendor
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bpwatch stop geo_libs_install
|
bpwatch stop geo_libs_install
|
||||||
|
|||||||
+1
-1
@@ -3,4 +3,4 @@ if (grep -Fiq "hg+" requirements.txt) then
|
|||||||
bpwatch start mercurial_install
|
bpwatch start mercurial_install
|
||||||
/app/.heroku/python/bin/pip install mercurial | cleanup | indent
|
/app/.heroku/python/bin/pip install mercurial | cleanup | indent
|
||||||
bpwatch stop mercurial_install
|
bpwatch stop mercurial_install
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -15,4 +15,4 @@ if [[ -f .heroku/python/requirements-declared.txt ]]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
bpwatch stop pip_uninstall
|
bpwatch stop pip_uninstall
|
||||||
set -e
|
set -e
|
||||||
|
|||||||
+3
-5
@@ -21,16 +21,14 @@ bpwatch start pylibmc_install
|
|||||||
# If pylibmc exists within requirements, use vendored libmemcached.
|
# If pylibmc exists within requirements, use vendored libmemcached.
|
||||||
if (pip-grep -s requirements.txt pylibmc &> /dev/null) then
|
if (pip-grep -s requirements.txt pylibmc &> /dev/null) then
|
||||||
|
|
||||||
if [ -d ".heroku/vendor/lib/sasl2" ]; then
|
if [ ! -d ".heroku/vendor/lib/sasl2" ]; then
|
||||||
export LIBMEMCACHED=$(pwd)/vendor
|
|
||||||
else
|
|
||||||
echo "-----> Noticed pylibmc. Bootstrapping libmemcached."
|
echo "-----> Noticed pylibmc. Bootstrapping libmemcached."
|
||||||
mkdir -p .heroku/vendor
|
mkdir -p .heroku/vendor
|
||||||
# Download and extract libmemcached into target vendor directory.
|
# Download and extract libmemcached into target vendor directory.
|
||||||
curl $VENDORED_MEMCACHED -s | tar zxv -C .heroku/vendor &> /dev/null
|
curl $VENDORED_MEMCACHED -s | tar zxv -C .heroku/vendor &> /dev/null
|
||||||
|
|
||||||
export LIBMEMCACHED=$(pwd)/vendor
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
export LIBMEMCACHED=$(pwd)/vendor
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bpwatch stop pylibmc_install
|
bpwatch stop pylibmc_install
|
||||||
|
|||||||
Executable
+14
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Build Path: /app/.heroku/python/
|
||||||
|
# Build Deps: libraries/sqlite
|
||||||
|
|
||||||
|
# NOTICE: This formula only works for the cedar-14 stack, not cedar.
|
||||||
|
|
||||||
|
OUT_PREFIX=$1
|
||||||
|
|
||||||
|
echo "Building PyPy..."
|
||||||
|
SOURCE_TARBALL='https://bitbucket.org/pypy/pypy/downloads/pypy-5.1.0-linux64.tar.bz2'
|
||||||
|
curl -L $SOURCE_TARBALL | tar jx
|
||||||
|
cp -R pypy-5.1.0-linux64/* $OUT_PREFIX
|
||||||
|
|
||||||
|
ln $OUT_PREFIX/bin/pypy $OUT_PREFIX/bin/python
|
||||||
Executable
+14
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Build Path: /app/.heroku/python/
|
||||||
|
# Build Deps: libraries/sqlite
|
||||||
|
|
||||||
|
# NOTICE: This formula only works for the cedar-14 stack, not cedar.
|
||||||
|
|
||||||
|
OUT_PREFIX=$1
|
||||||
|
|
||||||
|
echo "Building PyPy..."
|
||||||
|
SOURCE_TARBALL='https://bitbucket.org/pypy/pypy/downloads/pypy-5.1.1-linux64.tar.bz2'
|
||||||
|
curl -L $SOURCE_TARBALL | tar jx
|
||||||
|
cp -R pypy-5.1.1-linux64/* $OUT_PREFIX
|
||||||
|
|
||||||
|
ln $OUT_PREFIX/bin/pypy $OUT_PREFIX/bin/python
|
||||||
Executable
+14
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Build Path: /app/.heroku/python/
|
||||||
|
# Build Deps: libraries/sqlite
|
||||||
|
|
||||||
|
# NOTICE: This formula only works for the cedar-14 stack, not cedar.
|
||||||
|
|
||||||
|
OUT_PREFIX=$1
|
||||||
|
|
||||||
|
echo "Building PyPy..."
|
||||||
|
SOURCE_TARBALL='https://bbuseruploads.s3.amazonaws.com/54220cd1-b139-4188-9455-1e13e663f1ac/downloads/265504fc-8c03-4c1d-935a-2b8fbc6da51a/pypy2-v5.3.1-linux64.tar.bz2?Signature=F%2FnnB39QrCTPldSErIJoqzAt8YA%3D&Expires=1471612628&AWSAccessKeyId=AKIAIWY5XSVPZPDQYRQQ&versionId=kJjoDTh5eNcLrGrt5hYT0_5XGjg3.gl0&response-content-disposition=attachment%3B%20filename%3D%22pypy2-v5.3.1-linux64.tar.bz2%22'
|
||||||
|
curl -L $SOURCE_TARBALL | tar jx
|
||||||
|
cp -R pypy2-v5.3.1-linux64/* $OUT_PREFIX
|
||||||
|
|
||||||
|
ln $OUT_PREFIX/bin/pypy $OUT_PREFIX/bin/python
|
||||||
@@ -5,9 +5,9 @@
|
|||||||
OUT_PREFIX=$1
|
OUT_PREFIX=$1
|
||||||
|
|
||||||
echo "Building Python..."
|
echo "Building Python..."
|
||||||
SOURCE_TARBALL='https://www.python.org/ftp/python/2.7.11/Python-2.7.11rc1.tgz'
|
SOURCE_TARBALL='https://python.org/ftp/python/2.7.12/Python-2.7.12.tgz'
|
||||||
curl -L $SOURCE_TARBALL | tar xz
|
curl -L $SOURCE_TARBALL | tar xz
|
||||||
mv Python-2.7.11rc1 src
|
mv Python-2.7.12 src
|
||||||
cd src
|
cd src
|
||||||
|
|
||||||
./configure --prefix=$OUT_PREFIX --with-ensurepip=no
|
./configure --prefix=$OUT_PREFIX --with-ensurepip=no
|
||||||
Executable
+18
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Build Path: /app/.heroku/python/
|
||||||
|
# Build Deps: libraries/sqlite
|
||||||
|
|
||||||
|
OUT_PREFIX=$1
|
||||||
|
|
||||||
|
echo "Building Python..."
|
||||||
|
SOURCE_TARBALL='https://python.org/ftp/python/3.5.2/Python-3.5.2.tgz'
|
||||||
|
curl -L $SOURCE_TARBALL | tar xz
|
||||||
|
mv Python-3.5.2 src
|
||||||
|
cd src
|
||||||
|
|
||||||
|
./configure --prefix=$OUT_PREFIX --with-ensurepip=no
|
||||||
|
make
|
||||||
|
make install
|
||||||
|
|
||||||
|
ln $OUT_PREFIX/bin/python3 $OUT_PREFIX/bin/python
|
||||||
|
|
||||||
Vendored
BIN
Binary file not shown.
Vendored
BIN
Binary file not shown.
Vendored
+10
-13
@@ -18,6 +18,7 @@ from pip._vendor.requests import session
|
|||||||
|
|
||||||
requests = session()
|
requests = session()
|
||||||
|
|
||||||
|
|
||||||
class Requirements(object):
|
class Requirements(object):
|
||||||
def __init__(self, reqfile=None):
|
def __init__(self, reqfile=None):
|
||||||
super(Requirements, self).__init__()
|
super(Requirements, self).__init__()
|
||||||
@@ -31,13 +32,15 @@ class Requirements(object):
|
|||||||
return '<Requirements \'{}\'>'.format(self.path)
|
return '<Requirements \'{}\'>'.format(self.path)
|
||||||
|
|
||||||
def load(self, reqfile):
|
def load(self, reqfile):
|
||||||
|
|
||||||
if not os.path.exists(reqfile):
|
if not os.path.exists(reqfile):
|
||||||
raise ValueError('The given requirements file does not exist.')
|
raise ValueError('The given requirements file does not exist.')
|
||||||
|
|
||||||
finder = PackageFinder([], [], session=requests)
|
finder = PackageFinder([], [], session=requests)
|
||||||
for requirement in parse_requirements(reqfile, finder=finder, session=requests):
|
for requirement in parse_requirements(reqfile, finder=finder, session=requests):
|
||||||
if requirement.req:
|
if requirement.req:
|
||||||
|
if not getattr(requirement.req, 'name', None):
|
||||||
|
# Prior to pip 8.1.2 the attribute `name` did not exist.
|
||||||
|
requirement.req.name = requirement.req.project_name
|
||||||
self.requirements.append(requirement.req)
|
self.requirements.append(requirement.req)
|
||||||
|
|
||||||
|
|
||||||
@@ -48,24 +51,24 @@ class Requirements(object):
|
|||||||
|
|
||||||
# Generate fresh packages.
|
# Generate fresh packages.
|
||||||
other_reqs = (
|
other_reqs = (
|
||||||
[r.project_name for r in r1.requirements]
|
[r.name for r in r1.requirements]
|
||||||
if ignore_versions else r1.requirements
|
if ignore_versions else r1.requirements
|
||||||
)
|
)
|
||||||
|
|
||||||
for req in r2.requirements:
|
for req in r2.requirements:
|
||||||
r = req.project_name if ignore_versions else req
|
r = req.name if ignore_versions else req
|
||||||
|
|
||||||
if r not in other_reqs and r not in excludes:
|
if r not in other_reqs and r not in excludes:
|
||||||
results['fresh'].append(req)
|
results['fresh'].append(req)
|
||||||
|
|
||||||
# Generate stale packages.
|
# Generate stale packages.
|
||||||
other_reqs = (
|
other_reqs = (
|
||||||
[r.project_name for r in r2.requirements]
|
[r.name for r in r2.requirements]
|
||||||
if ignore_versions else r2.requirements
|
if ignore_versions else r2.requirements
|
||||||
)
|
)
|
||||||
|
|
||||||
for req in r1.requirements:
|
for req in r1.requirements:
|
||||||
r = req.project_name if ignore_versions else req
|
r = req.name if ignore_versions else req
|
||||||
|
|
||||||
if r not in other_reqs and r not in excludes:
|
if r not in other_reqs and r not in excludes:
|
||||||
results['stale'].append(req)
|
results['stale'].append(req)
|
||||||
@@ -73,11 +76,7 @@ class Requirements(object):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def diff(r1, r2, include_fresh=False, include_stale=False, excludes=None):
|
def diff(r1, r2, include_fresh=False, include_stale=False, excludes=None):
|
||||||
|
|
||||||
include_versions = True if include_stale else False
|
include_versions = True if include_stale else False
|
||||||
excludes = excludes if len(excludes) else []
|
excludes = excludes if len(excludes) else []
|
||||||
|
|
||||||
@@ -92,12 +91,11 @@ def diff(r1, r2, include_fresh=False, include_stale=False, excludes=None):
|
|||||||
|
|
||||||
if include_fresh:
|
if include_fresh:
|
||||||
for line in results['fresh']:
|
for line in results['fresh']:
|
||||||
print(line.project_name if include_versions else line)
|
print(line.name if include_versions else line)
|
||||||
|
|
||||||
if include_stale:
|
if include_stale:
|
||||||
for line in results['stale']:
|
for line in results['stale']:
|
||||||
print(line.project_name if include_versions else line)
|
print(line.name if include_versions else line)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -114,6 +112,5 @@ def main():
|
|||||||
diff(**kwargs)
|
diff(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|||||||
Vendored
+12
-20
@@ -15,6 +15,7 @@ from pip._vendor.requests import session
|
|||||||
|
|
||||||
requests = session()
|
requests = session()
|
||||||
|
|
||||||
|
|
||||||
class Requirements(object):
|
class Requirements(object):
|
||||||
def __init__(self, reqfile=None):
|
def __init__(self, reqfile=None):
|
||||||
super(Requirements, self).__init__()
|
super(Requirements, self).__init__()
|
||||||
@@ -28,41 +29,34 @@ class Requirements(object):
|
|||||||
return '<Requirements \'{}\'>'.format(self.path)
|
return '<Requirements \'{}\'>'.format(self.path)
|
||||||
|
|
||||||
def load(self, reqfile):
|
def load(self, reqfile):
|
||||||
|
|
||||||
if not os.path.exists(reqfile):
|
if not os.path.exists(reqfile):
|
||||||
raise ValueError('The given requirements file does not exist.')
|
raise ValueError('The given requirements file does not exist.')
|
||||||
|
|
||||||
finder = PackageFinder([], [], session=requests)
|
finder = PackageFinder([], [], session=requests)
|
||||||
for requirement in parse_requirements(reqfile, finder=finder, session=requests):
|
for requirement in parse_requirements(reqfile, finder=finder, session=requests):
|
||||||
self.requirements.append(requirement)
|
if requirement.req:
|
||||||
|
if not getattr(requirement.req, 'name', None):
|
||||||
|
# Prior to pip 8.1.2 the attribute `name` did not exist.
|
||||||
|
requirement.req.name = requirement.req.project_name
|
||||||
|
self.requirements.append(requirement.req)
|
||||||
|
|
||||||
|
|
||||||
def grep(reqfile, packages, silent=False):
|
def grep(reqfile, packages, silent=False):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = Requirements(reqfile)
|
r = Requirements(reqfile)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
|
||||||
if not silent:
|
if not silent:
|
||||||
print('There was a problem loading the given requirement file.')
|
print('There was a problem loading the given requirement file.')
|
||||||
|
|
||||||
exit(os.EX_NOINPUT)
|
exit(os.EX_NOINPUT)
|
||||||
|
|
||||||
for requirement in r.requirements:
|
for req in r.requirements:
|
||||||
|
if req.name in packages:
|
||||||
if requirement.req:
|
if not silent:
|
||||||
|
print('Package {} found!'.format(req.name))
|
||||||
if requirement.req.project_name in packages:
|
exit(0)
|
||||||
|
|
||||||
if not silent:
|
|
||||||
print('Package {} found!'.format(requirement.req.project_name))
|
|
||||||
|
|
||||||
exit(0)
|
|
||||||
|
|
||||||
if not silent:
|
if not silent:
|
||||||
print('Not found.'.format(requirement.req.project_name))
|
print('Not found.')
|
||||||
|
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
@@ -72,10 +66,8 @@ def main():
|
|||||||
|
|
||||||
kwargs = {'reqfile': args['<reqfile>'], 'packages': args['<package>'], 'silent': args['-s']}
|
kwargs = {'reqfile': args['<reqfile>'], 'packages': args['<package>'], 'silent': args['-s']}
|
||||||
|
|
||||||
|
|
||||||
grep(**kwargs)
|
grep(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|||||||
Vendored
-315
@@ -1,315 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
from __future__ import absolute_import
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import optparse
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import re
|
|
||||||
|
|
||||||
from pip.exceptions import InstallationError, CommandError, PipError
|
|
||||||
from pip.utils import get_installed_distributions, get_prog
|
|
||||||
from pip.utils import deprecation
|
|
||||||
from pip.vcs import git, mercurial, subversion, bazaar # noqa
|
|
||||||
from pip.baseparser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
|
||||||
from pip.commands import get_summaries, get_similar_commands
|
|
||||||
from pip.commands import commands_dict
|
|
||||||
from pip._vendor.requests.packages.urllib3.exceptions import (
|
|
||||||
InsecureRequestWarning,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# assignment for flake8 to be happy
|
|
||||||
|
|
||||||
# This fixes a peculiarity when importing via __import__ - as we are
|
|
||||||
# initialising the pip module, "from pip import cmdoptions" is recursive
|
|
||||||
# and appears not to work properly in that situation.
|
|
||||||
import pip.cmdoptions
|
|
||||||
cmdoptions = pip.cmdoptions
|
|
||||||
|
|
||||||
# The version as used in the setup.py and the docs conf.py
|
|
||||||
__version__ = "7.1.2"
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Hide the InsecureRequestWArning from urllib3
|
|
||||||
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
|
|
||||||
|
|
||||||
|
|
||||||
def autocomplete():
|
|
||||||
"""Command and option completion for the main option parser (and options)
|
|
||||||
and its subcommands (and options).
|
|
||||||
|
|
||||||
Enable by sourcing one of the completion shell scripts (bash or zsh).
|
|
||||||
"""
|
|
||||||
# Don't complete if user hasn't sourced bash_completion file.
|
|
||||||
if 'PIP_AUTO_COMPLETE' not in os.environ:
|
|
||||||
return
|
|
||||||
cwords = os.environ['COMP_WORDS'].split()[1:]
|
|
||||||
cword = int(os.environ['COMP_CWORD'])
|
|
||||||
try:
|
|
||||||
current = cwords[cword - 1]
|
|
||||||
except IndexError:
|
|
||||||
current = ''
|
|
||||||
|
|
||||||
subcommands = [cmd for cmd, summary in get_summaries()]
|
|
||||||
options = []
|
|
||||||
# subcommand
|
|
||||||
try:
|
|
||||||
subcommand_name = [w for w in cwords if w in subcommands][0]
|
|
||||||
except IndexError:
|
|
||||||
subcommand_name = None
|
|
||||||
|
|
||||||
parser = create_main_parser()
|
|
||||||
# subcommand options
|
|
||||||
if subcommand_name:
|
|
||||||
# special case: 'help' subcommand has no options
|
|
||||||
if subcommand_name == 'help':
|
|
||||||
sys.exit(1)
|
|
||||||
# special case: list locally installed dists for uninstall command
|
|
||||||
if subcommand_name == 'uninstall' and not current.startswith('-'):
|
|
||||||
installed = []
|
|
||||||
lc = current.lower()
|
|
||||||
for dist in get_installed_distributions(local_only=True):
|
|
||||||
if dist.key.startswith(lc) and dist.key not in cwords[1:]:
|
|
||||||
installed.append(dist.key)
|
|
||||||
# if there are no dists installed, fall back to option completion
|
|
||||||
if installed:
|
|
||||||
for dist in installed:
|
|
||||||
print(dist)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
subcommand = commands_dict[subcommand_name]()
|
|
||||||
options += [(opt.get_opt_string(), opt.nargs)
|
|
||||||
for opt in subcommand.parser.option_list_all
|
|
||||||
if opt.help != optparse.SUPPRESS_HELP]
|
|
||||||
|
|
||||||
# filter out previously specified options from available options
|
|
||||||
prev_opts = [x.split('=')[0] for x in cwords[1:cword - 1]]
|
|
||||||
options = [(x, v) for (x, v) in options if x not in prev_opts]
|
|
||||||
# filter options by current input
|
|
||||||
options = [(k, v) for k, v in options if k.startswith(current)]
|
|
||||||
for option in options:
|
|
||||||
opt_label = option[0]
|
|
||||||
# append '=' to options which require args
|
|
||||||
if option[1]:
|
|
||||||
opt_label += '='
|
|
||||||
print(opt_label)
|
|
||||||
else:
|
|
||||||
# show main parser options only when necessary
|
|
||||||
if current.startswith('-') or current.startswith('--'):
|
|
||||||
opts = [i.option_list for i in parser.option_groups]
|
|
||||||
opts.append(parser.option_list)
|
|
||||||
opts = (o for it in opts for o in it)
|
|
||||||
|
|
||||||
subcommands += [i.get_opt_string() for i in opts
|
|
||||||
if i.help != optparse.SUPPRESS_HELP]
|
|
||||||
|
|
||||||
print(' '.join([x for x in subcommands if x.startswith(current)]))
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
def create_main_parser():
|
|
||||||
parser_kw = {
|
|
||||||
'usage': '\n%prog <command> [options]',
|
|
||||||
'add_help_option': False,
|
|
||||||
'formatter': UpdatingDefaultsHelpFormatter(),
|
|
||||||
'name': 'global',
|
|
||||||
'prog': get_prog(),
|
|
||||||
}
|
|
||||||
|
|
||||||
parser = ConfigOptionParser(**parser_kw)
|
|
||||||
parser.disable_interspersed_args()
|
|
||||||
|
|
||||||
pip_pkg_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
parser.version = 'pip %s from %s (python %s)' % (
|
|
||||||
__version__, pip_pkg_dir, sys.version[:3])
|
|
||||||
|
|
||||||
# add the general options
|
|
||||||
gen_opts = cmdoptions.make_option_group(cmdoptions.general_group, parser)
|
|
||||||
parser.add_option_group(gen_opts)
|
|
||||||
|
|
||||||
parser.main = True # so the help formatter knows
|
|
||||||
|
|
||||||
# create command listing for description
|
|
||||||
command_summaries = get_summaries()
|
|
||||||
description = [''] + ['%-27s %s' % (i, j) for i, j in command_summaries]
|
|
||||||
parser.description = '\n'.join(description)
|
|
||||||
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
def parseopts(args):
|
|
||||||
parser = create_main_parser()
|
|
||||||
|
|
||||||
# Note: parser calls disable_interspersed_args(), so the result of this
|
|
||||||
# call is to split the initial args into the general options before the
|
|
||||||
# subcommand and everything else.
|
|
||||||
# For example:
|
|
||||||
# args: ['--timeout=5', 'install', '--user', 'INITools']
|
|
||||||
# general_options: ['--timeout==5']
|
|
||||||
# args_else: ['install', '--user', 'INITools']
|
|
||||||
general_options, args_else = parser.parse_args(args)
|
|
||||||
|
|
||||||
# --version
|
|
||||||
if general_options.version:
|
|
||||||
sys.stdout.write(parser.version)
|
|
||||||
sys.stdout.write(os.linesep)
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
# pip || pip help -> print_help()
|
|
||||||
if not args_else or (args_else[0] == 'help' and len(args_else) == 1):
|
|
||||||
parser.print_help()
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
# the subcommand name
|
|
||||||
cmd_name = args_else[0]
|
|
||||||
|
|
||||||
if cmd_name not in commands_dict:
|
|
||||||
guess = get_similar_commands(cmd_name)
|
|
||||||
|
|
||||||
msg = ['unknown command "%s"' % cmd_name]
|
|
||||||
if guess:
|
|
||||||
msg.append('maybe you meant "%s"' % guess)
|
|
||||||
|
|
||||||
raise CommandError(' - '.join(msg))
|
|
||||||
|
|
||||||
# all the args without the subcommand
|
|
||||||
cmd_args = args[:]
|
|
||||||
cmd_args.remove(cmd_name)
|
|
||||||
|
|
||||||
return cmd_name, cmd_args
|
|
||||||
|
|
||||||
|
|
||||||
def check_isolated(args):
|
|
||||||
isolated = False
|
|
||||||
|
|
||||||
if "--isolated" in args:
|
|
||||||
isolated = True
|
|
||||||
|
|
||||||
return isolated
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=None):
|
|
||||||
if args is None:
|
|
||||||
args = sys.argv[1:]
|
|
||||||
|
|
||||||
# Enable our Deprecation Warnings
|
|
||||||
for deprecation_warning in deprecation.DEPRECATIONS:
|
|
||||||
warnings.simplefilter("default", deprecation_warning)
|
|
||||||
|
|
||||||
# Configure our deprecation warnings to be sent through loggers
|
|
||||||
deprecation.install_warning_logger()
|
|
||||||
|
|
||||||
autocomplete()
|
|
||||||
|
|
||||||
try:
|
|
||||||
cmd_name, cmd_args = parseopts(args)
|
|
||||||
except PipError as exc:
|
|
||||||
sys.stderr.write("ERROR: %s" % exc)
|
|
||||||
sys.stderr.write(os.linesep)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
command = commands_dict[cmd_name](isolated=check_isolated(cmd_args))
|
|
||||||
return command.main(cmd_args)
|
|
||||||
|
|
||||||
|
|
||||||
# ###########################################################
|
|
||||||
# # Writing freeze files
|
|
||||||
|
|
||||||
class FrozenRequirement(object):
|
|
||||||
|
|
||||||
def __init__(self, name, req, editable, comments=()):
|
|
||||||
self.name = name
|
|
||||||
self.req = req
|
|
||||||
self.editable = editable
|
|
||||||
self.comments = comments
|
|
||||||
|
|
||||||
_rev_re = re.compile(r'-r(\d+)$')
|
|
||||||
_date_re = re.compile(r'-(20\d\d\d\d\d\d)$')
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dist(cls, dist, dependency_links, find_tags=False):
|
|
||||||
location = os.path.normcase(os.path.abspath(dist.location))
|
|
||||||
comments = []
|
|
||||||
from pip.vcs import vcs, get_src_requirement
|
|
||||||
if vcs.get_backend_name(location):
|
|
||||||
editable = True
|
|
||||||
try:
|
|
||||||
req = get_src_requirement(dist, location, find_tags)
|
|
||||||
except InstallationError as exc:
|
|
||||||
logger.warning(
|
|
||||||
"Error when trying to get requirement for VCS system %s, "
|
|
||||||
"falling back to uneditable format", exc
|
|
||||||
)
|
|
||||||
req = None
|
|
||||||
if req is None:
|
|
||||||
logger.warning(
|
|
||||||
'Could not determine repository location of %s', location
|
|
||||||
)
|
|
||||||
comments.append(
|
|
||||||
'## !! Could not determine repository location'
|
|
||||||
)
|
|
||||||
req = dist.as_requirement()
|
|
||||||
editable = False
|
|
||||||
else:
|
|
||||||
editable = False
|
|
||||||
req = dist.as_requirement()
|
|
||||||
specs = req.specs
|
|
||||||
assert len(specs) == 1 and specs[0][0] in ["==", "==="], \
|
|
||||||
'Expected 1 spec with == or ===; specs = %r; dist = %r' % \
|
|
||||||
(specs, dist)
|
|
||||||
version = specs[0][1]
|
|
||||||
ver_match = cls._rev_re.search(version)
|
|
||||||
date_match = cls._date_re.search(version)
|
|
||||||
if ver_match or date_match:
|
|
||||||
svn_backend = vcs.get_backend('svn')
|
|
||||||
if svn_backend:
|
|
||||||
svn_location = svn_backend().get_location(
|
|
||||||
dist,
|
|
||||||
dependency_links,
|
|
||||||
)
|
|
||||||
if not svn_location:
|
|
||||||
logger.warning(
|
|
||||||
'Warning: cannot find svn location for %s', req)
|
|
||||||
comments.append(
|
|
||||||
'## FIXME: could not find svn URL in dependency_links '
|
|
||||||
'for this package:'
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
comments.append(
|
|
||||||
'# Installing as editable to satisfy requirement %s:' %
|
|
||||||
req
|
|
||||||
)
|
|
||||||
if ver_match:
|
|
||||||
rev = ver_match.group(1)
|
|
||||||
else:
|
|
||||||
rev = '{%s}' % date_match.group(1)
|
|
||||||
editable = True
|
|
||||||
req = '%s@%s#egg=%s' % (
|
|
||||||
svn_location,
|
|
||||||
rev,
|
|
||||||
cls.egg_name(dist)
|
|
||||||
)
|
|
||||||
return cls(dist.project_name, req, editable, comments)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def egg_name(dist):
|
|
||||||
name = dist.egg_name()
|
|
||||||
match = re.search(r'-py\d\.\d$', name)
|
|
||||||
if match:
|
|
||||||
name = name[:match.start()]
|
|
||||||
return name
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
req = self.req
|
|
||||||
if self.editable:
|
|
||||||
req = '-e %s' % req
|
|
||||||
return '\n'.join(list(self.comments) + [str(req)]) + '\n'
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main())
|
|
||||||
Vendored
-19
@@ -1,19 +0,0 @@
|
|||||||
from __future__ import absolute_import
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# If we are running from a wheel, add the wheel to sys.path
|
|
||||||
# This allows the usage python pip-*.whl/pip install pip-*.whl
|
|
||||||
if __package__ == '':
|
|
||||||
# __file__ is pip-*.whl/pip/__main__.py
|
|
||||||
# first dirname call strips of '/__main__.py', second strips off '/pip'
|
|
||||||
# Resulting path is the name of the wheel itself
|
|
||||||
# Add that to sys.path so we can import pip
|
|
||||||
path = os.path.dirname(os.path.dirname(__file__))
|
|
||||||
sys.path.insert(0, path)
|
|
||||||
|
|
||||||
import pip # noqa
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(pip.main())
|
|
||||||
-40
@@ -1,40 +0,0 @@
|
|||||||
Policy
|
|
||||||
======
|
|
||||||
|
|
||||||
Vendored libraries should not be modified except as required to actually
|
|
||||||
successfully vendor them.
|
|
||||||
|
|
||||||
|
|
||||||
Modifications
|
|
||||||
=============
|
|
||||||
|
|
||||||
* html5lib has been modified to import six from pip._vendor
|
|
||||||
* pkg_resources has been modified to import _markerlib from pip._vendor
|
|
||||||
* markerlib has been modified to import its API from pip._vendor
|
|
||||||
* CacheControl has been modified to import it's dependencies from pip._vendor
|
|
||||||
* progress has been modified to not use unicode literals for support for Python 3.2
|
|
||||||
|
|
||||||
|
|
||||||
_markerlib and pkg_resources
|
|
||||||
============================
|
|
||||||
|
|
||||||
_markerlib and pkg_resources has been pulled in from setuptools 18.2
|
|
||||||
|
|
||||||
|
|
||||||
Note to Downstream Distributors
|
|
||||||
===============================
|
|
||||||
|
|
||||||
Libraries are vendored/bundled inside of this directory in order to prevent
|
|
||||||
end users from needing to manually install packages if they accidently remove
|
|
||||||
something that pip depends on.
|
|
||||||
|
|
||||||
All bundled packages exist in the ``pip._vendor`` namespace, and the versions
|
|
||||||
(fetched from PyPI) that we use are located in ``vendor.txt``. If you wish
|
|
||||||
to debundle these you can do so by either deleting everything in
|
|
||||||
``pip/_vendor`` **except** for ``pip/_vendor/__init__.py`` or by running
|
|
||||||
``PIP_NO_VENDOR_FOR_DOWNSTREAM=1 setup.py install``. No other changes should
|
|
||||||
be required as the ``pip/_vendor/__init__.py`` file will alias the "real"
|
|
||||||
names (such as ``import six``) to the bundled names (such as
|
|
||||||
``import pip._vendor.six``) automatically. Alternatively if you delete the
|
|
||||||
entire ``pip._vendor`` you will need to adjust imports that import from those
|
|
||||||
locations.
|
|
||||||
-66
@@ -1,66 +0,0 @@
|
|||||||
"""
|
|
||||||
pip._vendor is for vendoring dependencies of pip to prevent needing pip to
|
|
||||||
depend on something external.
|
|
||||||
|
|
||||||
Files inside of pip._vendor should be considered immutable and should only be
|
|
||||||
updated to versions from upstream.
|
|
||||||
"""
|
|
||||||
from __future__ import absolute_import
|
|
||||||
|
|
||||||
import glob
|
|
||||||
import os.path
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# Downstream redistributors which have debundled our dependencies should also
|
|
||||||
# patch this value to be true. This will trigger the additional patching
|
|
||||||
# to cause things like "six" to be available as pip.
|
|
||||||
DEBUNDLED = False
|
|
||||||
|
|
||||||
# By default, look in this directory for a bunch of .whl files which we will
|
|
||||||
# add to the beginning of sys.path before attempting to import anything. This
|
|
||||||
# is done to support downstream re-distributors like Debian and Fedora who
|
|
||||||
# wish to create their own Wheels for our dependencies to aid in debundling.
|
|
||||||
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
||||||
|
|
||||||
|
|
||||||
# Define a small helper function to alias our vendored modules to the real ones
|
|
||||||
# if the vendored ones do not exist. This idea of this was taken from
|
|
||||||
# https://github.com/kennethreitz/requests/pull/2567.
|
|
||||||
def vendored(modulename):
|
|
||||||
vendored_name = "{0}.{1}".format(__name__, modulename)
|
|
||||||
|
|
||||||
try:
|
|
||||||
__import__(vendored_name, globals(), locals(), level=0)
|
|
||||||
except ImportError:
|
|
||||||
__import__(modulename, globals(), locals(), level=0)
|
|
||||||
sys.modules[vendored_name] = sys.modules[modulename]
|
|
||||||
base, head = vendored_name.rsplit(".", 1)
|
|
||||||
setattr(sys.modules[base], head, sys.modules[modulename])
|
|
||||||
|
|
||||||
|
|
||||||
# If we're operating in a debundled setup, then we want to go ahead and trigger
|
|
||||||
# the aliasing of our vendored libraries as well as looking for wheels to add
|
|
||||||
# to our sys.path. This will cause all of this code to be a no-op typically
|
|
||||||
# however downstream redistributors can enable it in a consistent way across
|
|
||||||
# all platforms.
|
|
||||||
if DEBUNDLED:
|
|
||||||
# Actually look inside of WHEEL_DIR to find .whl files and add them to the
|
|
||||||
# front of our sys.path.
|
|
||||||
sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
|
|
||||||
|
|
||||||
# Actually alias all of our vendored dependencies.
|
|
||||||
vendored("cachecontrol")
|
|
||||||
vendored("colorama")
|
|
||||||
vendored("distlib")
|
|
||||||
vendored("html5lib")
|
|
||||||
vendored("lockfile")
|
|
||||||
vendored("six")
|
|
||||||
vendored("six.moves")
|
|
||||||
vendored("six.moves.urllib")
|
|
||||||
vendored("packaging")
|
|
||||||
vendored("packaging.version")
|
|
||||||
vendored("packaging.specifiers")
|
|
||||||
vendored("pkg_resources")
|
|
||||||
vendored("progress")
|
|
||||||
vendored("retrying")
|
|
||||||
vendored("requests")
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
try:
|
|
||||||
import ast
|
|
||||||
from pip._vendor._markerlib.markers import default_environment, compile, interpret
|
|
||||||
except ImportError:
|
|
||||||
if 'ast' in globals():
|
|
||||||
raise
|
|
||||||
def default_environment():
|
|
||||||
return {}
|
|
||||||
def compile(marker):
|
|
||||||
def marker_fn(environment=None, override=None):
|
|
||||||
# 'empty markers are True' heuristic won't install extra deps.
|
|
||||||
return not marker.strip()
|
|
||||||
marker_fn.__doc__ = marker
|
|
||||||
return marker_fn
|
|
||||||
def interpret(marker, environment=None, override=None):
|
|
||||||
return compile(marker)()
|
|
||||||
-119
@@ -1,119 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""Interpret PEP 345 environment markers.
|
|
||||||
|
|
||||||
EXPR [in|==|!=|not in] EXPR [or|and] ...
|
|
||||||
|
|
||||||
where EXPR belongs to any of those:
|
|
||||||
|
|
||||||
python_version = '%s.%s' % (sys.version_info[0], sys.version_info[1])
|
|
||||||
python_full_version = sys.version.split()[0]
|
|
||||||
os.name = os.name
|
|
||||||
sys.platform = sys.platform
|
|
||||||
platform.version = platform.version()
|
|
||||||
platform.machine = platform.machine()
|
|
||||||
platform.python_implementation = platform.python_implementation()
|
|
||||||
a free string, like '2.6', or 'win32'
|
|
||||||
"""
|
|
||||||
|
|
||||||
__all__ = ['default_environment', 'compile', 'interpret']
|
|
||||||
|
|
||||||
import ast
|
|
||||||
import os
|
|
||||||
import platform
|
|
||||||
import sys
|
|
||||||
import weakref
|
|
||||||
|
|
||||||
_builtin_compile = compile
|
|
||||||
|
|
||||||
try:
|
|
||||||
from platform import python_implementation
|
|
||||||
except ImportError:
|
|
||||||
if os.name == "java":
|
|
||||||
# Jython 2.5 has ast module, but not platform.python_implementation() function.
|
|
||||||
def python_implementation():
|
|
||||||
return "Jython"
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
# restricted set of variables
|
|
||||||
_VARS = {'sys.platform': sys.platform,
|
|
||||||
'python_version': '%s.%s' % sys.version_info[:2],
|
|
||||||
# FIXME parsing sys.platform is not reliable, but there is no other
|
|
||||||
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version
|
|
||||||
'python_full_version': sys.version.split(' ', 1)[0],
|
|
||||||
'os.name': os.name,
|
|
||||||
'platform.version': platform.version(),
|
|
||||||
'platform.machine': platform.machine(),
|
|
||||||
'platform.python_implementation': python_implementation(),
|
|
||||||
'extra': None # wheel extension
|
|
||||||
}
|
|
||||||
|
|
||||||
for var in list(_VARS.keys()):
|
|
||||||
if '.' in var:
|
|
||||||
_VARS[var.replace('.', '_')] = _VARS[var]
|
|
||||||
|
|
||||||
def default_environment():
|
|
||||||
"""Return copy of default PEP 385 globals dictionary."""
|
|
||||||
return dict(_VARS)
|
|
||||||
|
|
||||||
class ASTWhitelist(ast.NodeTransformer):
|
|
||||||
def __init__(self, statement):
|
|
||||||
self.statement = statement # for error messages
|
|
||||||
|
|
||||||
ALLOWED = (ast.Compare, ast.BoolOp, ast.Attribute, ast.Name, ast.Load, ast.Str)
|
|
||||||
# Bool operations
|
|
||||||
ALLOWED += (ast.And, ast.Or)
|
|
||||||
# Comparison operations
|
|
||||||
ALLOWED += (ast.Eq, ast.Gt, ast.GtE, ast.In, ast.Is, ast.IsNot, ast.Lt, ast.LtE, ast.NotEq, ast.NotIn)
|
|
||||||
|
|
||||||
def visit(self, node):
|
|
||||||
"""Ensure statement only contains allowed nodes."""
|
|
||||||
if not isinstance(node, self.ALLOWED):
|
|
||||||
raise SyntaxError('Not allowed in environment markers.\n%s\n%s' %
|
|
||||||
(self.statement,
|
|
||||||
(' ' * node.col_offset) + '^'))
|
|
||||||
return ast.NodeTransformer.visit(self, node)
|
|
||||||
|
|
||||||
def visit_Attribute(self, node):
|
|
||||||
"""Flatten one level of attribute access."""
|
|
||||||
new_node = ast.Name("%s.%s" % (node.value.id, node.attr), node.ctx)
|
|
||||||
return ast.copy_location(new_node, node)
|
|
||||||
|
|
||||||
def parse_marker(marker):
|
|
||||||
tree = ast.parse(marker, mode='eval')
|
|
||||||
new_tree = ASTWhitelist(marker).generic_visit(tree)
|
|
||||||
return new_tree
|
|
||||||
|
|
||||||
def compile_marker(parsed_marker):
|
|
||||||
return _builtin_compile(parsed_marker, '<environment marker>', 'eval',
|
|
||||||
dont_inherit=True)
|
|
||||||
|
|
||||||
_cache = weakref.WeakValueDictionary()
|
|
||||||
|
|
||||||
def compile(marker):
|
|
||||||
"""Return compiled marker as a function accepting an environment dict."""
|
|
||||||
try:
|
|
||||||
return _cache[marker]
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
if not marker.strip():
|
|
||||||
def marker_fn(environment=None, override=None):
|
|
||||||
""""""
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
compiled_marker = compile_marker(parse_marker(marker))
|
|
||||||
def marker_fn(environment=None, override=None):
|
|
||||||
"""override updates environment"""
|
|
||||||
if override is None:
|
|
||||||
override = {}
|
|
||||||
if environment is None:
|
|
||||||
environment = default_environment()
|
|
||||||
environment.update(override)
|
|
||||||
return eval(compiled_marker, environment)
|
|
||||||
marker_fn.__doc__ = marker
|
|
||||||
_cache[marker] = marker_fn
|
|
||||||
return _cache[marker]
|
|
||||||
|
|
||||||
def interpret(marker, environment=None):
|
|
||||||
return compile(marker)(environment)
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
"""CacheControl import Interface.
|
|
||||||
|
|
||||||
Make it easy to import from cachecontrol without long namespaces.
|
|
||||||
"""
|
|
||||||
__author__ = 'Eric Larson'
|
|
||||||
__email__ = 'eric@ionrock.org'
|
|
||||||
__version__ = '0.11.5'
|
|
||||||
|
|
||||||
from .wrapper import CacheControl
|
|
||||||
from .adapter import CacheControlAdapter
|
|
||||||
from .controller import CacheController
|
|
||||||
-117
@@ -1,117 +0,0 @@
|
|||||||
import functools
|
|
||||||
|
|
||||||
from pip._vendor.requests.adapters import HTTPAdapter
|
|
||||||
|
|
||||||
from .controller import CacheController
|
|
||||||
from .cache import DictCache
|
|
||||||
from .filewrapper import CallbackFileWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class CacheControlAdapter(HTTPAdapter):
|
|
||||||
invalidating_methods = set(['PUT', 'DELETE'])
|
|
||||||
|
|
||||||
def __init__(self, cache=None,
|
|
||||||
cache_etags=True,
|
|
||||||
controller_class=None,
|
|
||||||
serializer=None,
|
|
||||||
heuristic=None,
|
|
||||||
*args, **kw):
|
|
||||||
super(CacheControlAdapter, self).__init__(*args, **kw)
|
|
||||||
self.cache = cache or DictCache()
|
|
||||||
self.heuristic = heuristic
|
|
||||||
|
|
||||||
controller_factory = controller_class or CacheController
|
|
||||||
self.controller = controller_factory(
|
|
||||||
self.cache,
|
|
||||||
cache_etags=cache_etags,
|
|
||||||
serializer=serializer,
|
|
||||||
)
|
|
||||||
|
|
||||||
def send(self, request, **kw):
|
|
||||||
"""
|
|
||||||
Send a request. Use the request information to see if it
|
|
||||||
exists in the cache and cache the response if we need to and can.
|
|
||||||
"""
|
|
||||||
if request.method == 'GET':
|
|
||||||
cached_response = self.controller.cached_request(request)
|
|
||||||
if cached_response:
|
|
||||||
return self.build_response(request, cached_response,
|
|
||||||
from_cache=True)
|
|
||||||
|
|
||||||
# check for etags and add headers if appropriate
|
|
||||||
request.headers.update(
|
|
||||||
self.controller.conditional_headers(request)
|
|
||||||
)
|
|
||||||
|
|
||||||
resp = super(CacheControlAdapter, self).send(request, **kw)
|
|
||||||
|
|
||||||
return resp
|
|
||||||
|
|
||||||
def build_response(self, request, response, from_cache=False):
|
|
||||||
"""
|
|
||||||
Build a response by making a request or using the cache.
|
|
||||||
|
|
||||||
This will end up calling send and returning a potentially
|
|
||||||
cached response
|
|
||||||
"""
|
|
||||||
if not from_cache and request.method == 'GET':
|
|
||||||
|
|
||||||
# apply any expiration heuristics
|
|
||||||
if response.status == 304:
|
|
||||||
# We must have sent an ETag request. This could mean
|
|
||||||
# that we've been expired already or that we simply
|
|
||||||
# have an etag. In either case, we want to try and
|
|
||||||
# update the cache if that is the case.
|
|
||||||
cached_response = self.controller.update_cached_response(
|
|
||||||
request, response
|
|
||||||
)
|
|
||||||
|
|
||||||
if cached_response is not response:
|
|
||||||
from_cache = True
|
|
||||||
|
|
||||||
# We are done with the server response, read a
|
|
||||||
# possible response body (compliant servers will
|
|
||||||
# not return one, but we cannot be 100% sure) and
|
|
||||||
# release the connection back to the pool.
|
|
||||||
response.read(decode_content=False)
|
|
||||||
response.release_conn()
|
|
||||||
|
|
||||||
response = cached_response
|
|
||||||
|
|
||||||
# We always cache the 301 responses
|
|
||||||
elif response.status == 301:
|
|
||||||
self.controller.cache_response(request, response)
|
|
||||||
else:
|
|
||||||
# Check for any heuristics that might update headers
|
|
||||||
# before trying to cache.
|
|
||||||
if self.heuristic:
|
|
||||||
response = self.heuristic.apply(response)
|
|
||||||
|
|
||||||
# Wrap the response file with a wrapper that will cache the
|
|
||||||
# response when the stream has been consumed.
|
|
||||||
response._fp = CallbackFileWrapper(
|
|
||||||
response._fp,
|
|
||||||
functools.partial(
|
|
||||||
self.controller.cache_response,
|
|
||||||
request,
|
|
||||||
response,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
resp = super(CacheControlAdapter, self).build_response(
|
|
||||||
request, response
|
|
||||||
)
|
|
||||||
|
|
||||||
# See if we should invalidate the cache.
|
|
||||||
if request.method in self.invalidating_methods and resp.ok:
|
|
||||||
cache_url = self.controller.cache_url(request.url)
|
|
||||||
self.cache.delete(cache_url)
|
|
||||||
|
|
||||||
# Give the request a from_cache attr to let people use it
|
|
||||||
resp.from_cache = from_cache
|
|
||||||
|
|
||||||
return resp
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
self.cache.close()
|
|
||||||
super(CacheControlAdapter, self).close()
|
|
||||||
@@ -1,39 +0,0 @@
|
|||||||
"""
|
|
||||||
The cache object API for implementing caches. The default is a thread
|
|
||||||
safe in-memory dictionary.
|
|
||||||
"""
|
|
||||||
from threading import Lock
|
|
||||||
|
|
||||||
|
|
||||||
class BaseCache(object):
|
|
||||||
|
|
||||||
def get(self, key):
|
|
||||||
raise NotImplemented()
|
|
||||||
|
|
||||||
def set(self, key, value):
|
|
||||||
raise NotImplemented()
|
|
||||||
|
|
||||||
def delete(self, key):
|
|
||||||
raise NotImplemented()
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class DictCache(BaseCache):
|
|
||||||
|
|
||||||
def __init__(self, init_dict=None):
|
|
||||||
self.lock = Lock()
|
|
||||||
self.data = init_dict or {}
|
|
||||||
|
|
||||||
def get(self, key):
|
|
||||||
return self.data.get(key, None)
|
|
||||||
|
|
||||||
def set(self, key, value):
|
|
||||||
with self.lock:
|
|
||||||
self.data.update({key: value})
|
|
||||||
|
|
||||||
def delete(self, key):
|
|
||||||
with self.lock:
|
|
||||||
if key in self.data:
|
|
||||||
self.data.pop(key)
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
from textwrap import dedent
|
|
||||||
|
|
||||||
try:
|
|
||||||
from .file_cache import FileCache
|
|
||||||
except ImportError:
|
|
||||||
notice = dedent('''
|
|
||||||
NOTE: In order to use the FileCache you must have
|
|
||||||
lockfile installed. You can install it via pip:
|
|
||||||
pip install lockfile
|
|
||||||
''')
|
|
||||||
print(notice)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import redis
|
|
||||||
from .redis_cache import RedisCache
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
@@ -1,116 +0,0 @@
|
|||||||
import hashlib
|
|
||||||
import os
|
|
||||||
|
|
||||||
from pip._vendor.lockfile import LockFile
|
|
||||||
from pip._vendor.lockfile.mkdirlockfile import MkdirLockFile
|
|
||||||
|
|
||||||
from ..cache import BaseCache
|
|
||||||
from ..controller import CacheController
|
|
||||||
|
|
||||||
|
|
||||||
def _secure_open_write(filename, fmode):
|
|
||||||
# We only want to write to this file, so open it in write only mode
|
|
||||||
flags = os.O_WRONLY
|
|
||||||
|
|
||||||
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
|
|
||||||
# will open *new* files.
|
|
||||||
# We specify this because we want to ensure that the mode we pass is the
|
|
||||||
# mode of the file.
|
|
||||||
flags |= os.O_CREAT | os.O_EXCL
|
|
||||||
|
|
||||||
# Do not follow symlinks to prevent someone from making a symlink that
|
|
||||||
# we follow and insecurely open a cache file.
|
|
||||||
if hasattr(os, "O_NOFOLLOW"):
|
|
||||||
flags |= os.O_NOFOLLOW
|
|
||||||
|
|
||||||
# On Windows we'll mark this file as binary
|
|
||||||
if hasattr(os, "O_BINARY"):
|
|
||||||
flags |= os.O_BINARY
|
|
||||||
|
|
||||||
# Before we open our file, we want to delete any existing file that is
|
|
||||||
# there
|
|
||||||
try:
|
|
||||||
os.remove(filename)
|
|
||||||
except (IOError, OSError):
|
|
||||||
# The file must not exist already, so we can just skip ahead to opening
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
|
|
||||||
# race condition happens between the os.remove and this line, that an
|
|
||||||
# error will be raised. Because we utilize a lockfile this should only
|
|
||||||
# happen if someone is attempting to attack us.
|
|
||||||
fd = os.open(filename, flags, fmode)
|
|
||||||
try:
|
|
||||||
return os.fdopen(fd, "wb")
|
|
||||||
except:
|
|
||||||
# An error occurred wrapping our FD in a file object
|
|
||||||
os.close(fd)
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
class FileCache(BaseCache):
|
|
||||||
def __init__(self, directory, forever=False, filemode=0o0600,
|
|
||||||
dirmode=0o0700, use_dir_lock=None, lock_class=None):
|
|
||||||
|
|
||||||
if use_dir_lock is not None and lock_class is not None:
|
|
||||||
raise ValueError("Cannot use use_dir_lock and lock_class together")
|
|
||||||
|
|
||||||
if use_dir_lock:
|
|
||||||
lock_class = MkdirLockFile
|
|
||||||
|
|
||||||
if lock_class is None:
|
|
||||||
lock_class = LockFile
|
|
||||||
|
|
||||||
self.directory = directory
|
|
||||||
self.forever = forever
|
|
||||||
self.filemode = filemode
|
|
||||||
self.dirmode = dirmode
|
|
||||||
self.lock_class = lock_class
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def encode(x):
|
|
||||||
return hashlib.sha224(x.encode()).hexdigest()
|
|
||||||
|
|
||||||
def _fn(self, name):
|
|
||||||
# NOTE: This method should not change as some may depend on it.
|
|
||||||
# See: https://github.com/ionrock/cachecontrol/issues/63
|
|
||||||
hashed = self.encode(name)
|
|
||||||
parts = list(hashed[:5]) + [hashed]
|
|
||||||
return os.path.join(self.directory, *parts)
|
|
||||||
|
|
||||||
def get(self, key):
|
|
||||||
name = self._fn(key)
|
|
||||||
if not os.path.exists(name):
|
|
||||||
return None
|
|
||||||
|
|
||||||
with open(name, 'rb') as fh:
|
|
||||||
return fh.read()
|
|
||||||
|
|
||||||
def set(self, key, value):
|
|
||||||
name = self._fn(key)
|
|
||||||
|
|
||||||
# Make sure the directory exists
|
|
||||||
try:
|
|
||||||
os.makedirs(os.path.dirname(name), self.dirmode)
|
|
||||||
except (IOError, OSError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
with self.lock_class(name) as lock:
|
|
||||||
# Write our actual file
|
|
||||||
with _secure_open_write(lock.path, self.filemode) as fh:
|
|
||||||
fh.write(value)
|
|
||||||
|
|
||||||
def delete(self, key):
|
|
||||||
name = self._fn(key)
|
|
||||||
if not self.forever:
|
|
||||||
os.remove(name)
|
|
||||||
|
|
||||||
|
|
||||||
def url_to_file_path(url, filecache):
|
|
||||||
"""Return the file cache path based on the URL.
|
|
||||||
|
|
||||||
This does not ensure the file exists!
|
|
||||||
"""
|
|
||||||
key = CacheController.cache_url(url)
|
|
||||||
return filecache._fn(key)
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
from __future__ import division
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
|
|
||||||
def total_seconds(td):
|
|
||||||
"""Python 2.6 compatability"""
|
|
||||||
if hasattr(td, 'total_seconds'):
|
|
||||||
return td.total_seconds()
|
|
||||||
|
|
||||||
ms = td.microseconds
|
|
||||||
secs = (td.seconds + td.days * 24 * 3600)
|
|
||||||
return (ms + secs * 10**6) / 10**6
|
|
||||||
|
|
||||||
|
|
||||||
class RedisCache(object):
|
|
||||||
|
|
||||||
def __init__(self, conn):
|
|
||||||
self.conn = conn
|
|
||||||
|
|
||||||
def get(self, key):
|
|
||||||
return self.conn.get(key)
|
|
||||||
|
|
||||||
def set(self, key, value, expires=None):
|
|
||||||
if not expires:
|
|
||||||
self.conn.set(key, value)
|
|
||||||
else:
|
|
||||||
expires = expires - datetime.now()
|
|
||||||
self.conn.setex(key, total_seconds(expires), value)
|
|
||||||
|
|
||||||
def delete(self, key):
|
|
||||||
self.conn.delete(key)
|
|
||||||
|
|
||||||
def clear(self):
|
|
||||||
"""Helper for clearing all the keys in a database. Use with
|
|
||||||
caution!"""
|
|
||||||
for key in self.conn.keys():
|
|
||||||
self.conn.delete(key)
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
self.conn.disconnect()
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
try:
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
except ImportError:
|
|
||||||
from urlparse import urljoin
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import cPickle as pickle
|
|
||||||
except ImportError:
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
|
|
||||||
from pip._vendor.requests.packages.urllib3.response import HTTPResponse
|
|
||||||
from pip._vendor.requests.packages.urllib3.util import is_fp_closed
|
|
||||||
@@ -1,299 +0,0 @@
|
|||||||
"""
|
|
||||||
The httplib2 algorithms ported for use with requests.
|
|
||||||
"""
|
|
||||||
import re
|
|
||||||
import calendar
|
|
||||||
import time
|
|
||||||
from email.utils import parsedate_tz
|
|
||||||
|
|
||||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
|
||||||
|
|
||||||
from .cache import DictCache
|
|
||||||
from .serialize import Serializer
|
|
||||||
|
|
||||||
|
|
||||||
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
|
||||||
|
|
||||||
|
|
||||||
def parse_uri(uri):
|
|
||||||
"""Parses a URI using the regex given in Appendix B of RFC 3986.
|
|
||||||
|
|
||||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
|
||||||
"""
|
|
||||||
groups = URI.match(uri).groups()
|
|
||||||
return (groups[1], groups[3], groups[4], groups[6], groups[8])
|
|
||||||
|
|
||||||
|
|
||||||
class CacheController(object):
|
|
||||||
"""An interface to see if request should cached or not.
|
|
||||||
"""
|
|
||||||
def __init__(self, cache=None, cache_etags=True, serializer=None):
|
|
||||||
self.cache = cache or DictCache()
|
|
||||||
self.cache_etags = cache_etags
|
|
||||||
self.serializer = serializer or Serializer()
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _urlnorm(cls, uri):
|
|
||||||
"""Normalize the URL to create a safe key for the cache"""
|
|
||||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
|
||||||
if not scheme or not authority:
|
|
||||||
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
|
|
||||||
|
|
||||||
scheme = scheme.lower()
|
|
||||||
authority = authority.lower()
|
|
||||||
|
|
||||||
if not path:
|
|
||||||
path = "/"
|
|
||||||
|
|
||||||
# Could do syntax based normalization of the URI before
|
|
||||||
# computing the digest. See Section 6.2.2 of Std 66.
|
|
||||||
request_uri = query and "?".join([path, query]) or path
|
|
||||||
defrag_uri = scheme + "://" + authority + request_uri
|
|
||||||
|
|
||||||
return defrag_uri
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def cache_url(cls, uri):
|
|
||||||
return cls._urlnorm(uri)
|
|
||||||
|
|
||||||
def parse_cache_control(self, headers):
|
|
||||||
"""
|
|
||||||
Parse the cache control headers returning a dictionary with values
|
|
||||||
for the different directives.
|
|
||||||
"""
|
|
||||||
retval = {}
|
|
||||||
|
|
||||||
cc_header = 'cache-control'
|
|
||||||
if 'Cache-Control' in headers:
|
|
||||||
cc_header = 'Cache-Control'
|
|
||||||
|
|
||||||
if cc_header in headers:
|
|
||||||
parts = headers[cc_header].split(',')
|
|
||||||
parts_with_args = [
|
|
||||||
tuple([x.strip().lower() for x in part.split("=", 1)])
|
|
||||||
for part in parts if -1 != part.find("=")
|
|
||||||
]
|
|
||||||
parts_wo_args = [
|
|
||||||
(name.strip().lower(), 1)
|
|
||||||
for name in parts if -1 == name.find("=")
|
|
||||||
]
|
|
||||||
retval = dict(parts_with_args + parts_wo_args)
|
|
||||||
return retval
|
|
||||||
|
|
||||||
def cached_request(self, request):
|
|
||||||
"""
|
|
||||||
Return a cached response if it exists in the cache, otherwise
|
|
||||||
return False.
|
|
||||||
"""
|
|
||||||
cache_url = self.cache_url(request.url)
|
|
||||||
cc = self.parse_cache_control(request.headers)
|
|
||||||
|
|
||||||
# non-caching states
|
|
||||||
no_cache = True if 'no-cache' in cc else False
|
|
||||||
if 'max-age' in cc and cc['max-age'] == 0:
|
|
||||||
no_cache = True
|
|
||||||
|
|
||||||
# Bail out if no-cache was set
|
|
||||||
if no_cache:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# It is in the cache, so lets see if it is going to be
|
|
||||||
# fresh enough
|
|
||||||
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
|
||||||
|
|
||||||
# Check to see if we have a cached object
|
|
||||||
if not resp:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# If we have a cached 301, return it immediately. We don't
|
|
||||||
# need to test our response for other headers b/c it is
|
|
||||||
# intrinsically "cacheable" as it is Permanent.
|
|
||||||
# See:
|
|
||||||
# https://tools.ietf.org/html/rfc7231#section-6.4.2
|
|
||||||
#
|
|
||||||
# Client can try to refresh the value by repeating the request
|
|
||||||
# with cache busting headers as usual (ie no-cache).
|
|
||||||
if resp.status == 301:
|
|
||||||
return resp
|
|
||||||
|
|
||||||
headers = CaseInsensitiveDict(resp.headers)
|
|
||||||
if not headers or 'date' not in headers:
|
|
||||||
# With date or etag, the cached response can never be used
|
|
||||||
# and should be deleted.
|
|
||||||
if 'etag' not in headers:
|
|
||||||
self.cache.delete(cache_url)
|
|
||||||
return False
|
|
||||||
|
|
||||||
now = time.time()
|
|
||||||
date = calendar.timegm(
|
|
||||||
parsedate_tz(headers['date'])
|
|
||||||
)
|
|
||||||
current_age = max(0, now - date)
|
|
||||||
|
|
||||||
# TODO: There is an assumption that the result will be a
|
|
||||||
# urllib3 response object. This may not be best since we
|
|
||||||
# could probably avoid instantiating or constructing the
|
|
||||||
# response until we know we need it.
|
|
||||||
resp_cc = self.parse_cache_control(headers)
|
|
||||||
|
|
||||||
# determine freshness
|
|
||||||
freshness_lifetime = 0
|
|
||||||
|
|
||||||
# Check the max-age pragma in the cache control header
|
|
||||||
if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
|
|
||||||
freshness_lifetime = int(resp_cc['max-age'])
|
|
||||||
|
|
||||||
# If there isn't a max-age, check for an expires header
|
|
||||||
elif 'expires' in headers:
|
|
||||||
expires = parsedate_tz(headers['expires'])
|
|
||||||
if expires is not None:
|
|
||||||
expire_time = calendar.timegm(expires) - date
|
|
||||||
freshness_lifetime = max(0, expire_time)
|
|
||||||
|
|
||||||
# determine if we are setting freshness limit in the req
|
|
||||||
if 'max-age' in cc:
|
|
||||||
try:
|
|
||||||
freshness_lifetime = int(cc['max-age'])
|
|
||||||
except ValueError:
|
|
||||||
freshness_lifetime = 0
|
|
||||||
|
|
||||||
if 'min-fresh' in cc:
|
|
||||||
try:
|
|
||||||
min_fresh = int(cc['min-fresh'])
|
|
||||||
except ValueError:
|
|
||||||
min_fresh = 0
|
|
||||||
# adjust our current age by our min fresh
|
|
||||||
current_age += min_fresh
|
|
||||||
|
|
||||||
# see how fresh we actually are
|
|
||||||
fresh = (freshness_lifetime > current_age)
|
|
||||||
|
|
||||||
if fresh:
|
|
||||||
return resp
|
|
||||||
|
|
||||||
# we're not fresh. If we don't have an Etag, clear it out
|
|
||||||
if 'etag' not in headers:
|
|
||||||
self.cache.delete(cache_url)
|
|
||||||
|
|
||||||
# return the original handler
|
|
||||||
return False
|
|
||||||
|
|
||||||
def conditional_headers(self, request):
|
|
||||||
cache_url = self.cache_url(request.url)
|
|
||||||
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
|
||||||
new_headers = {}
|
|
||||||
|
|
||||||
if resp:
|
|
||||||
headers = CaseInsensitiveDict(resp.headers)
|
|
||||||
|
|
||||||
if 'etag' in headers:
|
|
||||||
new_headers['If-None-Match'] = headers['ETag']
|
|
||||||
|
|
||||||
if 'last-modified' in headers:
|
|
||||||
new_headers['If-Modified-Since'] = headers['Last-Modified']
|
|
||||||
|
|
||||||
return new_headers
|
|
||||||
|
|
||||||
def cache_response(self, request, response, body=None):
|
|
||||||
"""
|
|
||||||
Algorithm for caching requests.
|
|
||||||
|
|
||||||
This assumes a requests Response object.
|
|
||||||
"""
|
|
||||||
# From httplib2: Don't cache 206's since we aren't going to
|
|
||||||
# handle byte range requests
|
|
||||||
if response.status not in [200, 203, 300, 301]:
|
|
||||||
return
|
|
||||||
|
|
||||||
response_headers = CaseInsensitiveDict(response.headers)
|
|
||||||
|
|
||||||
cc_req = self.parse_cache_control(request.headers)
|
|
||||||
cc = self.parse_cache_control(response_headers)
|
|
||||||
|
|
||||||
cache_url = self.cache_url(request.url)
|
|
||||||
|
|
||||||
# Delete it from the cache if we happen to have it stored there
|
|
||||||
no_store = cc.get('no-store') or cc_req.get('no-store')
|
|
||||||
if no_store and self.cache.get(cache_url):
|
|
||||||
self.cache.delete(cache_url)
|
|
||||||
|
|
||||||
# If we've been given an etag, then keep the response
|
|
||||||
if self.cache_etags and 'etag' in response_headers:
|
|
||||||
self.cache.set(
|
|
||||||
cache_url,
|
|
||||||
self.serializer.dumps(request, response, body=body),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add to the cache any 301s. We do this before looking that
|
|
||||||
# the Date headers.
|
|
||||||
elif response.status == 301:
|
|
||||||
self.cache.set(
|
|
||||||
cache_url,
|
|
||||||
self.serializer.dumps(request, response)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add to the cache if the response headers demand it. If there
|
|
||||||
# is no date header then we can't do anything about expiring
|
|
||||||
# the cache.
|
|
||||||
elif 'date' in response_headers:
|
|
||||||
# cache when there is a max-age > 0
|
|
||||||
if cc and cc.get('max-age'):
|
|
||||||
if int(cc['max-age']) > 0:
|
|
||||||
self.cache.set(
|
|
||||||
cache_url,
|
|
||||||
self.serializer.dumps(request, response, body=body),
|
|
||||||
)
|
|
||||||
|
|
||||||
# If the request can expire, it means we should cache it
|
|
||||||
# in the meantime.
|
|
||||||
elif 'expires' in response_headers:
|
|
||||||
if response_headers['expires']:
|
|
||||||
self.cache.set(
|
|
||||||
cache_url,
|
|
||||||
self.serializer.dumps(request, response, body=body),
|
|
||||||
)
|
|
||||||
|
|
||||||
def update_cached_response(self, request, response):
|
|
||||||
"""On a 304 we will get a new set of headers that we want to
|
|
||||||
update our cached value with, assuming we have one.
|
|
||||||
|
|
||||||
This should only ever be called when we've sent an ETag and
|
|
||||||
gotten a 304 as the response.
|
|
||||||
"""
|
|
||||||
cache_url = self.cache_url(request.url)
|
|
||||||
|
|
||||||
cached_response = self.serializer.loads(
|
|
||||||
request,
|
|
||||||
self.cache.get(cache_url)
|
|
||||||
)
|
|
||||||
|
|
||||||
if not cached_response:
|
|
||||||
# we didn't have a cached response
|
|
||||||
return response
|
|
||||||
|
|
||||||
# Lets update our headers with the headers from the new request:
|
|
||||||
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
|
|
||||||
#
|
|
||||||
# The server isn't supposed to send headers that would make
|
|
||||||
# the cached body invalid. But... just in case, we'll be sure
|
|
||||||
# to strip out ones we know that might be problmatic due to
|
|
||||||
# typical assumptions.
|
|
||||||
excluded_headers = [
|
|
||||||
"content-length",
|
|
||||||
]
|
|
||||||
|
|
||||||
cached_response.headers.update(
|
|
||||||
dict((k, v) for k, v in response.headers.items()
|
|
||||||
if k.lower() not in excluded_headers)
|
|
||||||
)
|
|
||||||
|
|
||||||
# we want a 200 b/c we have content via the cache
|
|
||||||
cached_response.status = 200
|
|
||||||
|
|
||||||
# update our cache
|
|
||||||
self.cache.set(
|
|
||||||
cache_url,
|
|
||||||
self.serializer.dumps(request, cached_response),
|
|
||||||
)
|
|
||||||
|
|
||||||
return cached_response
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
from io import BytesIO
|
|
||||||
|
|
||||||
|
|
||||||
class CallbackFileWrapper(object):
|
|
||||||
"""
|
|
||||||
Small wrapper around a fp object which will tee everything read into a
|
|
||||||
buffer, and when that file is closed it will execute a callback with the
|
|
||||||
contents of that buffer.
|
|
||||||
|
|
||||||
All attributes are proxied to the underlying file object.
|
|
||||||
|
|
||||||
This class uses members with a double underscore (__) leading prefix so as
|
|
||||||
not to accidentally shadow an attribute.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, fp, callback):
|
|
||||||
self.__buf = BytesIO()
|
|
||||||
self.__fp = fp
|
|
||||||
self.__callback = callback
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
# The vaguaries of garbage collection means that self.__fp is
|
|
||||||
# not always set. By using __getattribute__ and the private
|
|
||||||
# name[0] allows looking up the attribute value and raising an
|
|
||||||
# AttributeError when it doesn't exist. This stop thigns from
|
|
||||||
# infinitely recursing calls to getattr in the case where
|
|
||||||
# self.__fp hasn't been set.
|
|
||||||
#
|
|
||||||
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
|
|
||||||
fp = self.__getattribute__('_CallbackFileWrapper__fp')
|
|
||||||
return getattr(fp, name)
|
|
||||||
|
|
||||||
def __is_fp_closed(self):
|
|
||||||
try:
|
|
||||||
return self.__fp.fp is None
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
return self.__fp.closed
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# We just don't cache it then.
|
|
||||||
# TODO: Add some logging here...
|
|
||||||
return False
|
|
||||||
|
|
||||||
def read(self, amt=None):
|
|
||||||
data = self.__fp.read(amt)
|
|
||||||
self.__buf.write(data)
|
|
||||||
|
|
||||||
if self.__is_fp_closed():
|
|
||||||
if self.__callback:
|
|
||||||
self.__callback(self.__buf.getvalue())
|
|
||||||
|
|
||||||
# We assign this to None here, because otherwise we can get into
|
|
||||||
# really tricky problems where the CPython interpreter dead locks
|
|
||||||
# because the callback is holding a reference to something which
|
|
||||||
# has a __del__ method. Setting this to None breaks the cycle
|
|
||||||
# and allows the garbage collector to do it's thing normally.
|
|
||||||
self.__callback = None
|
|
||||||
|
|
||||||
return data
|
|
||||||
@@ -1,134 +0,0 @@
|
|||||||
import calendar
|
|
||||||
import time
|
|
||||||
|
|
||||||
from email.utils import formatdate, parsedate, parsedate_tz
|
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
|
|
||||||
|
|
||||||
|
|
||||||
def expire_after(delta, date=None):
|
|
||||||
date = date or datetime.now()
|
|
||||||
return date + delta
|
|
||||||
|
|
||||||
|
|
||||||
def datetime_to_header(dt):
|
|
||||||
return formatdate(calendar.timegm(dt.timetuple()))
|
|
||||||
|
|
||||||
|
|
||||||
class BaseHeuristic(object):
|
|
||||||
|
|
||||||
def warning(self, response):
|
|
||||||
"""
|
|
||||||
Return a valid 1xx warning header value describing the cache
|
|
||||||
adjustments.
|
|
||||||
|
|
||||||
The response is provided too allow warnings like 113
|
|
||||||
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
|
|
||||||
to explicitly say response is over 24 hours old.
|
|
||||||
"""
|
|
||||||
return '110 - "Response is Stale"'
|
|
||||||
|
|
||||||
def update_headers(self, response):
|
|
||||||
"""Update the response headers with any new headers.
|
|
||||||
|
|
||||||
NOTE: This SHOULD always include some Warning header to
|
|
||||||
signify that the response was cached by the client, not
|
|
||||||
by way of the provided headers.
|
|
||||||
"""
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def apply(self, response):
|
|
||||||
warning_header_value = self.warning(response)
|
|
||||||
response.headers.update(self.update_headers(response))
|
|
||||||
if warning_header_value is not None:
|
|
||||||
response.headers.update({'Warning': warning_header_value})
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
class OneDayCache(BaseHeuristic):
|
|
||||||
"""
|
|
||||||
Cache the response by providing an expires 1 day in the
|
|
||||||
future.
|
|
||||||
"""
|
|
||||||
def update_headers(self, response):
|
|
||||||
headers = {}
|
|
||||||
|
|
||||||
if 'expires' not in response.headers:
|
|
||||||
date = parsedate(response.headers['date'])
|
|
||||||
expires = expire_after(timedelta(days=1),
|
|
||||||
date=datetime(*date[:6]))
|
|
||||||
headers['expires'] = datetime_to_header(expires)
|
|
||||||
headers['cache-control'] = 'public'
|
|
||||||
return headers
|
|
||||||
|
|
||||||
|
|
||||||
class ExpiresAfter(BaseHeuristic):
|
|
||||||
"""
|
|
||||||
Cache **all** requests for a defined time period.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, **kw):
|
|
||||||
self.delta = timedelta(**kw)
|
|
||||||
|
|
||||||
def update_headers(self, response):
|
|
||||||
expires = expire_after(self.delta)
|
|
||||||
return {
|
|
||||||
'expires': datetime_to_header(expires),
|
|
||||||
'cache-control': 'public',
|
|
||||||
}
|
|
||||||
|
|
||||||
def warning(self, response):
|
|
||||||
tmpl = '110 - Automatically cached for %s. Response might be stale'
|
|
||||||
return tmpl % self.delta
|
|
||||||
|
|
||||||
|
|
||||||
class LastModified(BaseHeuristic):
|
|
||||||
"""
|
|
||||||
If there is no Expires header already, fall back on Last-Modified
|
|
||||||
using the heuristic from
|
|
||||||
http://tools.ietf.org/html/rfc7234#section-4.2.2
|
|
||||||
to calculate a reasonable value.
|
|
||||||
|
|
||||||
Firefox also does something like this per
|
|
||||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
|
|
||||||
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
|
|
||||||
Unlike mozilla we limit this to 24-hr.
|
|
||||||
"""
|
|
||||||
cacheable_by_default_statuses = set([
|
|
||||||
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
|
|
||||||
])
|
|
||||||
|
|
||||||
def update_headers(self, resp):
|
|
||||||
headers = resp.headers
|
|
||||||
|
|
||||||
if 'expires' in headers:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
if 'cache-control' in headers and headers['cache-control'] != 'public':
|
|
||||||
return {}
|
|
||||||
|
|
||||||
if resp.status not in self.cacheable_by_default_statuses:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
if 'date' not in headers or 'last-modified' not in headers:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
date = calendar.timegm(parsedate_tz(headers['date']))
|
|
||||||
last_modified = parsedate(headers['last-modified'])
|
|
||||||
if date is None or last_modified is None:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
now = time.time()
|
|
||||||
current_age = max(0, now - date)
|
|
||||||
delta = date - calendar.timegm(last_modified)
|
|
||||||
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
|
|
||||||
if freshness_lifetime <= current_age:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
expires = date + freshness_lifetime
|
|
||||||
return {'expires': time.strftime(TIME_FMT, time.gmtime(expires))}
|
|
||||||
|
|
||||||
def warning(self, resp):
|
|
||||||
return None
|
|
||||||
@@ -1,184 +0,0 @@
|
|||||||
import base64
|
|
||||||
import io
|
|
||||||
import json
|
|
||||||
import zlib
|
|
||||||
|
|
||||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
|
||||||
|
|
||||||
from .compat import HTTPResponse, pickle
|
|
||||||
|
|
||||||
|
|
||||||
def _b64_encode_bytes(b):
|
|
||||||
return base64.b64encode(b).decode("ascii")
|
|
||||||
|
|
||||||
|
|
||||||
def _b64_encode_str(s):
|
|
||||||
return _b64_encode_bytes(s.encode("utf8"))
|
|
||||||
|
|
||||||
|
|
||||||
def _b64_decode_bytes(b):
|
|
||||||
return base64.b64decode(b.encode("ascii"))
|
|
||||||
|
|
||||||
|
|
||||||
def _b64_decode_str(s):
|
|
||||||
return _b64_decode_bytes(s).decode("utf8")
|
|
||||||
|
|
||||||
|
|
||||||
class Serializer(object):
|
|
||||||
|
|
||||||
def dumps(self, request, response, body=None):
|
|
||||||
response_headers = CaseInsensitiveDict(response.headers)
|
|
||||||
|
|
||||||
if body is None:
|
|
||||||
body = response.read(decode_content=False)
|
|
||||||
|
|
||||||
# NOTE: 99% sure this is dead code. I'm only leaving it
|
|
||||||
# here b/c I don't have a test yet to prove
|
|
||||||
# it. Basically, before using
|
|
||||||
# `cachecontrol.filewrapper.CallbackFileWrapper`,
|
|
||||||
# this made an effort to reset the file handle. The
|
|
||||||
# `CallbackFileWrapper` short circuits this code by
|
|
||||||
# setting the body as the content is consumed, the
|
|
||||||
# result being a `body` argument is *always* passed
|
|
||||||
# into cache_response, and in turn,
|
|
||||||
# `Serializer.dump`.
|
|
||||||
response._fp = io.BytesIO(body)
|
|
||||||
|
|
||||||
data = {
|
|
||||||
"response": {
|
|
||||||
"body": _b64_encode_bytes(body),
|
|
||||||
"headers": dict(
|
|
||||||
(_b64_encode_str(k), _b64_encode_str(v))
|
|
||||||
for k, v in response.headers.items()
|
|
||||||
),
|
|
||||||
"status": response.status,
|
|
||||||
"version": response.version,
|
|
||||||
"reason": _b64_encode_str(response.reason),
|
|
||||||
"strict": response.strict,
|
|
||||||
"decode_content": response.decode_content,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
# Construct our vary headers
|
|
||||||
data["vary"] = {}
|
|
||||||
if "vary" in response_headers:
|
|
||||||
varied_headers = response_headers['vary'].split(',')
|
|
||||||
for header in varied_headers:
|
|
||||||
header = header.strip()
|
|
||||||
data["vary"][header] = request.headers.get(header, None)
|
|
||||||
|
|
||||||
# Encode our Vary headers to ensure they can be serialized as JSON
|
|
||||||
data["vary"] = dict(
|
|
||||||
(_b64_encode_str(k), _b64_encode_str(v) if v is not None else v)
|
|
||||||
for k, v in data["vary"].items()
|
|
||||||
)
|
|
||||||
|
|
||||||
return b",".join([
|
|
||||||
b"cc=2",
|
|
||||||
zlib.compress(
|
|
||||||
json.dumps(
|
|
||||||
data, separators=(",", ":"), sort_keys=True,
|
|
||||||
).encode("utf8"),
|
|
||||||
),
|
|
||||||
])
|
|
||||||
|
|
||||||
def loads(self, request, data):
|
|
||||||
# Short circuit if we've been given an empty set of data
|
|
||||||
if not data:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Determine what version of the serializer the data was serialized
|
|
||||||
# with
|
|
||||||
try:
|
|
||||||
ver, data = data.split(b",", 1)
|
|
||||||
except ValueError:
|
|
||||||
ver = b"cc=0"
|
|
||||||
|
|
||||||
# Make sure that our "ver" is actually a version and isn't a false
|
|
||||||
# positive from a , being in the data stream.
|
|
||||||
if ver[:3] != b"cc=":
|
|
||||||
data = ver + data
|
|
||||||
ver = b"cc=0"
|
|
||||||
|
|
||||||
# Get the version number out of the cc=N
|
|
||||||
ver = ver.split(b"=", 1)[-1].decode("ascii")
|
|
||||||
|
|
||||||
# Dispatch to the actual load method for the given version
|
|
||||||
try:
|
|
||||||
return getattr(self, "_loads_v{0}".format(ver))(request, data)
|
|
||||||
except AttributeError:
|
|
||||||
# This is a version we don't have a loads function for, so we'll
|
|
||||||
# just treat it as a miss and return None
|
|
||||||
return
|
|
||||||
|
|
||||||
def prepare_response(self, request, cached):
|
|
||||||
"""Verify our vary headers match and construct a real urllib3
|
|
||||||
HTTPResponse object.
|
|
||||||
"""
|
|
||||||
# Special case the '*' Vary value as it means we cannot actually
|
|
||||||
# determine if the cached response is suitable for this request.
|
|
||||||
if "*" in cached.get("vary", {}):
|
|
||||||
return
|
|
||||||
|
|
||||||
# Ensure that the Vary headers for the cached response match our
|
|
||||||
# request
|
|
||||||
for header, value in cached.get("vary", {}).items():
|
|
||||||
if request.headers.get(header, None) != value:
|
|
||||||
return
|
|
||||||
|
|
||||||
body_raw = cached["response"].pop("body")
|
|
||||||
|
|
||||||
try:
|
|
||||||
body = io.BytesIO(body_raw)
|
|
||||||
except TypeError:
|
|
||||||
# This can happen if cachecontrol serialized to v1 format (pickle)
|
|
||||||
# using Python 2. A Python 2 str(byte string) will be unpickled as
|
|
||||||
# a Python 3 str (unicode string), which will cause the above to
|
|
||||||
# fail with:
|
|
||||||
#
|
|
||||||
# TypeError: 'str' does not support the buffer interface
|
|
||||||
body = io.BytesIO(body_raw.encode('utf8'))
|
|
||||||
|
|
||||||
return HTTPResponse(
|
|
||||||
body=body,
|
|
||||||
preload_content=False,
|
|
||||||
**cached["response"]
|
|
||||||
)
|
|
||||||
|
|
||||||
def _loads_v0(self, request, data):
|
|
||||||
# The original legacy cache data. This doesn't contain enough
|
|
||||||
# information to construct everything we need, so we'll treat this as
|
|
||||||
# a miss.
|
|
||||||
return
|
|
||||||
|
|
||||||
def _loads_v1(self, request, data):
|
|
||||||
try:
|
|
||||||
cached = pickle.loads(data)
|
|
||||||
except ValueError:
|
|
||||||
return
|
|
||||||
|
|
||||||
return self.prepare_response(request, cached)
|
|
||||||
|
|
||||||
def _loads_v2(self, request, data):
|
|
||||||
try:
|
|
||||||
cached = json.loads(zlib.decompress(data).decode("utf8"))
|
|
||||||
except ValueError:
|
|
||||||
return
|
|
||||||
|
|
||||||
# We need to decode the items that we've base64 encoded
|
|
||||||
cached["response"]["body"] = _b64_decode_bytes(
|
|
||||||
cached["response"]["body"]
|
|
||||||
)
|
|
||||||
cached["response"]["headers"] = dict(
|
|
||||||
(_b64_decode_str(k), _b64_decode_str(v))
|
|
||||||
for k, v in cached["response"]["headers"].items()
|
|
||||||
)
|
|
||||||
cached["response"]["reason"] = _b64_decode_str(
|
|
||||||
cached["response"]["reason"],
|
|
||||||
)
|
|
||||||
cached["vary"] = dict(
|
|
||||||
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
|
|
||||||
for k, v in cached["vary"].items()
|
|
||||||
)
|
|
||||||
|
|
||||||
return self.prepare_response(request, cached)
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
from .adapter import CacheControlAdapter
|
|
||||||
from .cache import DictCache
|
|
||||||
|
|
||||||
|
|
||||||
def CacheControl(sess,
|
|
||||||
cache=None,
|
|
||||||
cache_etags=True,
|
|
||||||
serializer=None,
|
|
||||||
heuristic=None):
|
|
||||||
|
|
||||||
cache = cache or DictCache()
|
|
||||||
adapter = CacheControlAdapter(
|
|
||||||
cache,
|
|
||||||
cache_etags=cache_etags,
|
|
||||||
serializer=serializer,
|
|
||||||
heuristic=heuristic,
|
|
||||||
)
|
|
||||||
sess.mount('http://', adapter)
|
|
||||||
sess.mount('https://', adapter)
|
|
||||||
|
|
||||||
return sess
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
|
||||||
from .initialise import init, deinit, reinit
|
|
||||||
from .ansi import Fore, Back, Style, Cursor
|
|
||||||
from .ansitowin32 import AnsiToWin32
|
|
||||||
|
|
||||||
__version__ = '0.3.3'
|
|
||||||
|
|
||||||
-99
@@ -1,99 +0,0 @@
|
|||||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
|
||||||
'''
|
|
||||||
This module generates ANSI character codes to printing colors to terminals.
|
|
||||||
See: http://en.wikipedia.org/wiki/ANSI_escape_code
|
|
||||||
'''
|
|
||||||
|
|
||||||
CSI = '\033['
|
|
||||||
OSC = '\033]'
|
|
||||||
BEL = '\007'
|
|
||||||
|
|
||||||
|
|
||||||
def code_to_chars(code):
|
|
||||||
return CSI + str(code) + 'm'
|
|
||||||
|
|
||||||
|
|
||||||
class AnsiCodes(object):
|
|
||||||
def __init__(self, codes):
|
|
||||||
for name in dir(codes):
|
|
||||||
if not name.startswith('_'):
|
|
||||||
value = getattr(codes, name)
|
|
||||||
setattr(self, name, code_to_chars(value))
|
|
||||||
|
|
||||||
|
|
||||||
class AnsiCursor(object):
|
|
||||||
def UP(self, n=1):
|
|
||||||
return CSI + str(n) + "A"
|
|
||||||
def DOWN(self, n=1):
|
|
||||||
return CSI + str(n) + "B"
|
|
||||||
def FORWARD(self, n=1):
|
|
||||||
return CSI + str(n) + "C"
|
|
||||||
def BACK(self, n=1):
|
|
||||||
return CSI + str(n) + "D"
|
|
||||||
def POS(self, x=1, y=1):
|
|
||||||
return CSI + str(y) + ";" + str(x) + "H"
|
|
||||||
|
|
||||||
def set_title(title):
|
|
||||||
return OSC + "2;" + title + BEL
|
|
||||||
|
|
||||||
def clear_screen(mode=2):
|
|
||||||
return CSI + str(mode) + "J"
|
|
||||||
|
|
||||||
def clear_line(mode=2):
|
|
||||||
return CSI + str(mode) + "K"
|
|
||||||
|
|
||||||
|
|
||||||
class AnsiFore:
|
|
||||||
BLACK = 30
|
|
||||||
RED = 31
|
|
||||||
GREEN = 32
|
|
||||||
YELLOW = 33
|
|
||||||
BLUE = 34
|
|
||||||
MAGENTA = 35
|
|
||||||
CYAN = 36
|
|
||||||
WHITE = 37
|
|
||||||
RESET = 39
|
|
||||||
|
|
||||||
# These are fairly well supported, but not part of the standard.
|
|
||||||
LIGHTBLACK_EX = 90
|
|
||||||
LIGHTRED_EX = 91
|
|
||||||
LIGHTGREEN_EX = 92
|
|
||||||
LIGHTYELLOW_EX = 93
|
|
||||||
LIGHTBLUE_EX = 94
|
|
||||||
LIGHTMAGENTA_EX = 95
|
|
||||||
LIGHTCYAN_EX = 96
|
|
||||||
LIGHTWHITE_EX = 97
|
|
||||||
|
|
||||||
|
|
||||||
class AnsiBack:
|
|
||||||
BLACK = 40
|
|
||||||
RED = 41
|
|
||||||
GREEN = 42
|
|
||||||
YELLOW = 43
|
|
||||||
BLUE = 44
|
|
||||||
MAGENTA = 45
|
|
||||||
CYAN = 46
|
|
||||||
WHITE = 47
|
|
||||||
RESET = 49
|
|
||||||
|
|
||||||
# These are fairly well supported, but not part of the standard.
|
|
||||||
LIGHTBLACK_EX = 100
|
|
||||||
LIGHTRED_EX = 101
|
|
||||||
LIGHTGREEN_EX = 102
|
|
||||||
LIGHTYELLOW_EX = 103
|
|
||||||
LIGHTBLUE_EX = 104
|
|
||||||
LIGHTMAGENTA_EX = 105
|
|
||||||
LIGHTCYAN_EX = 106
|
|
||||||
LIGHTWHITE_EX = 107
|
|
||||||
|
|
||||||
|
|
||||||
class AnsiStyle:
|
|
||||||
BRIGHT = 1
|
|
||||||
DIM = 2
|
|
||||||
NORMAL = 22
|
|
||||||
RESET_ALL = 0
|
|
||||||
|
|
||||||
Fore = AnsiCodes( AnsiFore )
|
|
||||||
Back = AnsiCodes( AnsiBack )
|
|
||||||
Style = AnsiCodes( AnsiStyle )
|
|
||||||
Cursor = AnsiCursor()
|
|
||||||
-228
@@ -1,228 +0,0 @@
|
|||||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style
|
|
||||||
from .winterm import WinTerm, WinColor, WinStyle
|
|
||||||
from .win32 import windll
|
|
||||||
|
|
||||||
|
|
||||||
winterm = None
|
|
||||||
if windll is not None:
|
|
||||||
winterm = WinTerm()
|
|
||||||
|
|
||||||
|
|
||||||
def is_a_tty(stream):
|
|
||||||
return hasattr(stream, 'isatty') and stream.isatty()
|
|
||||||
|
|
||||||
|
|
||||||
class StreamWrapper(object):
|
|
||||||
'''
|
|
||||||
Wraps a stream (such as stdout), acting as a transparent proxy for all
|
|
||||||
attribute access apart from method 'write()', which is delegated to our
|
|
||||||
Converter instance.
|
|
||||||
'''
|
|
||||||
def __init__(self, wrapped, converter):
|
|
||||||
# double-underscore everything to prevent clashes with names of
|
|
||||||
# attributes on the wrapped stream object.
|
|
||||||
self.__wrapped = wrapped
|
|
||||||
self.__convertor = converter
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
return getattr(self.__wrapped, name)
|
|
||||||
|
|
||||||
def write(self, text):
|
|
||||||
self.__convertor.write(text)
|
|
||||||
|
|
||||||
|
|
||||||
class AnsiToWin32(object):
|
|
||||||
'''
|
|
||||||
Implements a 'write()' method which, on Windows, will strip ANSI character
|
|
||||||
sequences from the text, and if outputting to a tty, will convert them into
|
|
||||||
win32 function calls.
|
|
||||||
'''
|
|
||||||
ANSI_CSI_RE = re.compile('\033\[((?:\d|;)*)([a-zA-Z])') # Control Sequence Introducer
|
|
||||||
ANSI_OSC_RE = re.compile('\033\]((?:.|;)*?)(\x07)') # Operating System Command
|
|
||||||
|
|
||||||
def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
|
|
||||||
# The wrapped stream (normally sys.stdout or sys.stderr)
|
|
||||||
self.wrapped = wrapped
|
|
||||||
|
|
||||||
# should we reset colors to defaults after every .write()
|
|
||||||
self.autoreset = autoreset
|
|
||||||
|
|
||||||
# create the proxy wrapping our output stream
|
|
||||||
self.stream = StreamWrapper(wrapped, self)
|
|
||||||
|
|
||||||
on_windows = os.name == 'nt'
|
|
||||||
on_emulated_windows = on_windows and 'TERM' in os.environ
|
|
||||||
|
|
||||||
# should we strip ANSI sequences from our output?
|
|
||||||
if strip is None:
|
|
||||||
strip = on_windows and not on_emulated_windows
|
|
||||||
self.strip = strip
|
|
||||||
|
|
||||||
# should we should convert ANSI sequences into win32 calls?
|
|
||||||
if convert is None:
|
|
||||||
convert = on_windows and not wrapped.closed and not on_emulated_windows and is_a_tty(wrapped)
|
|
||||||
self.convert = convert
|
|
||||||
|
|
||||||
# dict of ansi codes to win32 functions and parameters
|
|
||||||
self.win32_calls = self.get_win32_calls()
|
|
||||||
|
|
||||||
# are we wrapping stderr?
|
|
||||||
self.on_stderr = self.wrapped is sys.stderr
|
|
||||||
|
|
||||||
def should_wrap(self):
|
|
||||||
'''
|
|
||||||
True if this class is actually needed. If false, then the output
|
|
||||||
stream will not be affected, nor will win32 calls be issued, so
|
|
||||||
wrapping stdout is not actually required. This will generally be
|
|
||||||
False on non-Windows platforms, unless optional functionality like
|
|
||||||
autoreset has been requested using kwargs to init()
|
|
||||||
'''
|
|
||||||
return self.convert or self.strip or self.autoreset
|
|
||||||
|
|
||||||
def get_win32_calls(self):
|
|
||||||
if self.convert and winterm:
|
|
||||||
return {
|
|
||||||
AnsiStyle.RESET_ALL: (winterm.reset_all, ),
|
|
||||||
AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
|
|
||||||
AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
|
|
||||||
AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
|
|
||||||
AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
|
|
||||||
AnsiFore.RED: (winterm.fore, WinColor.RED),
|
|
||||||
AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
|
|
||||||
AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
|
|
||||||
AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
|
|
||||||
AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
|
|
||||||
AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
|
|
||||||
AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
|
|
||||||
AnsiFore.RESET: (winterm.fore, ),
|
|
||||||
AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True),
|
|
||||||
AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True),
|
|
||||||
AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True),
|
|
||||||
AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True),
|
|
||||||
AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True),
|
|
||||||
AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True),
|
|
||||||
AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True),
|
|
||||||
AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True),
|
|
||||||
AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
|
|
||||||
AnsiBack.RED: (winterm.back, WinColor.RED),
|
|
||||||
AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
|
|
||||||
AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
|
|
||||||
AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
|
|
||||||
AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
|
|
||||||
AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
|
|
||||||
AnsiBack.WHITE: (winterm.back, WinColor.GREY),
|
|
||||||
AnsiBack.RESET: (winterm.back, ),
|
|
||||||
AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True),
|
|
||||||
AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True),
|
|
||||||
AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True),
|
|
||||||
AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True),
|
|
||||||
AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True),
|
|
||||||
AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True),
|
|
||||||
AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True),
|
|
||||||
AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True),
|
|
||||||
}
|
|
||||||
return dict()
|
|
||||||
|
|
||||||
def write(self, text):
|
|
||||||
if self.strip or self.convert:
|
|
||||||
self.write_and_convert(text)
|
|
||||||
else:
|
|
||||||
self.wrapped.write(text)
|
|
||||||
self.wrapped.flush()
|
|
||||||
if self.autoreset:
|
|
||||||
self.reset_all()
|
|
||||||
|
|
||||||
|
|
||||||
def reset_all(self):
|
|
||||||
if self.convert:
|
|
||||||
self.call_win32('m', (0,))
|
|
||||||
elif not self.wrapped.closed and is_a_tty(self.wrapped):
|
|
||||||
self.wrapped.write(Style.RESET_ALL)
|
|
||||||
|
|
||||||
|
|
||||||
def write_and_convert(self, text):
|
|
||||||
'''
|
|
||||||
Write the given text to our wrapped stream, stripping any ANSI
|
|
||||||
sequences from the text, and optionally converting them into win32
|
|
||||||
calls.
|
|
||||||
'''
|
|
||||||
cursor = 0
|
|
||||||
text = self.convert_osc(text)
|
|
||||||
for match in self.ANSI_CSI_RE.finditer(text):
|
|
||||||
start, end = match.span()
|
|
||||||
self.write_plain_text(text, cursor, start)
|
|
||||||
self.convert_ansi(*match.groups())
|
|
||||||
cursor = end
|
|
||||||
self.write_plain_text(text, cursor, len(text))
|
|
||||||
|
|
||||||
|
|
||||||
def write_plain_text(self, text, start, end):
|
|
||||||
if start < end:
|
|
||||||
self.wrapped.write(text[start:end])
|
|
||||||
self.wrapped.flush()
|
|
||||||
|
|
||||||
|
|
||||||
def convert_ansi(self, paramstring, command):
|
|
||||||
if self.convert:
|
|
||||||
params = self.extract_params(command, paramstring)
|
|
||||||
self.call_win32(command, params)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_params(self, command, paramstring):
|
|
||||||
if command in 'Hf':
|
|
||||||
params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';'))
|
|
||||||
while len(params) < 2:
|
|
||||||
# defaults:
|
|
||||||
params = params + (1,)
|
|
||||||
else:
|
|
||||||
params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0)
|
|
||||||
if len(params) == 0:
|
|
||||||
# defaults:
|
|
||||||
if command in 'JKm':
|
|
||||||
params = (0,)
|
|
||||||
elif command in 'ABCD':
|
|
||||||
params = (1,)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
def call_win32(self, command, params):
|
|
||||||
if command == 'm':
|
|
||||||
for param in params:
|
|
||||||
if param in self.win32_calls:
|
|
||||||
func_args = self.win32_calls[param]
|
|
||||||
func = func_args[0]
|
|
||||||
args = func_args[1:]
|
|
||||||
kwargs = dict(on_stderr=self.on_stderr)
|
|
||||||
func(*args, **kwargs)
|
|
||||||
elif command in 'J':
|
|
||||||
winterm.erase_screen(params[0], on_stderr=self.on_stderr)
|
|
||||||
elif command in 'K':
|
|
||||||
winterm.erase_line(params[0], on_stderr=self.on_stderr)
|
|
||||||
elif command in 'Hf': # cursor position - absolute
|
|
||||||
winterm.set_cursor_position(params, on_stderr=self.on_stderr)
|
|
||||||
elif command in 'ABCD': # cursor position - relative
|
|
||||||
n = params[0]
|
|
||||||
# A - up, B - down, C - forward, D - back
|
|
||||||
x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command]
|
|
||||||
winterm.cursor_adjust(x, y, on_stderr=self.on_stderr)
|
|
||||||
|
|
||||||
|
|
||||||
def convert_osc(self, text):
|
|
||||||
for match in self.ANSI_OSC_RE.finditer(text):
|
|
||||||
start, end = match.span()
|
|
||||||
text = text[:start] + text[end:]
|
|
||||||
paramstring, command = match.groups()
|
|
||||||
if command in '\x07': # \x07 = BEL
|
|
||||||
params = paramstring.split(";")
|
|
||||||
# 0 - change title and icon (we will only change title)
|
|
||||||
# 1 - change icon (we don't support this)
|
|
||||||
# 2 - change title
|
|
||||||
if params[0] in '02':
|
|
||||||
winterm.set_title(params[1])
|
|
||||||
return text
|
|
||||||
@@ -1,66 +0,0 @@
|
|||||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
|
||||||
import atexit
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from .ansitowin32 import AnsiToWin32
|
|
||||||
|
|
||||||
|
|
||||||
orig_stdout = sys.stdout
|
|
||||||
orig_stderr = sys.stderr
|
|
||||||
|
|
||||||
wrapped_stdout = sys.stdout
|
|
||||||
wrapped_stderr = sys.stderr
|
|
||||||
|
|
||||||
atexit_done = False
|
|
||||||
|
|
||||||
|
|
||||||
def reset_all():
|
|
||||||
AnsiToWin32(orig_stdout).reset_all()
|
|
||||||
|
|
||||||
|
|
||||||
def init(autoreset=False, convert=None, strip=None, wrap=True):
|
|
||||||
|
|
||||||
if not wrap and any([autoreset, convert, strip]):
|
|
||||||
raise ValueError('wrap=False conflicts with any other arg=True')
|
|
||||||
|
|
||||||
global wrapped_stdout, wrapped_stderr
|
|
||||||
if sys.stdout is None:
|
|
||||||
wrapped_stdout = None
|
|
||||||
else:
|
|
||||||
sys.stdout = wrapped_stdout = \
|
|
||||||
wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
|
|
||||||
if sys.stderr is None:
|
|
||||||
wrapped_stderr = None
|
|
||||||
else:
|
|
||||||
sys.stderr = wrapped_stderr = \
|
|
||||||
wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
|
|
||||||
|
|
||||||
global atexit_done
|
|
||||||
if not atexit_done:
|
|
||||||
atexit.register(reset_all)
|
|
||||||
atexit_done = True
|
|
||||||
|
|
||||||
|
|
||||||
def deinit():
|
|
||||||
if orig_stdout is not None:
|
|
||||||
sys.stdout = orig_stdout
|
|
||||||
if orig_stderr is not None:
|
|
||||||
sys.stderr = orig_stderr
|
|
||||||
|
|
||||||
|
|
||||||
def reinit():
|
|
||||||
if wrapped_stdout is not None:
|
|
||||||
sys.stdout = wrapped_stdout
|
|
||||||
if wrapped_stderr is not None:
|
|
||||||
sys.stderr = wrapped_stderr
|
|
||||||
|
|
||||||
|
|
||||||
def wrap_stream(stream, convert, strip, autoreset, wrap):
|
|
||||||
if wrap:
|
|
||||||
wrapper = AnsiToWin32(stream,
|
|
||||||
convert=convert, strip=strip, autoreset=autoreset)
|
|
||||||
if wrapper.should_wrap():
|
|
||||||
stream = wrapper.stream
|
|
||||||
return stream
|
|
||||||
|
|
||||||
|
|
||||||
-146
@@ -1,146 +0,0 @@
|
|||||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
|
||||||
|
|
||||||
# from winbase.h
|
|
||||||
STDOUT = -11
|
|
||||||
STDERR = -12
|
|
||||||
|
|
||||||
try:
|
|
||||||
import ctypes
|
|
||||||
from ctypes import LibraryLoader
|
|
||||||
windll = LibraryLoader(ctypes.WinDLL)
|
|
||||||
from ctypes import wintypes
|
|
||||||
except (AttributeError, ImportError):
|
|
||||||
windll = None
|
|
||||||
SetConsoleTextAttribute = lambda *_: None
|
|
||||||
else:
|
|
||||||
from ctypes import byref, Structure, c_char, POINTER
|
|
||||||
|
|
||||||
COORD = wintypes._COORD
|
|
||||||
|
|
||||||
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
|
|
||||||
"""struct in wincon.h."""
|
|
||||||
_fields_ = [
|
|
||||||
("dwSize", COORD),
|
|
||||||
("dwCursorPosition", COORD),
|
|
||||||
("wAttributes", wintypes.WORD),
|
|
||||||
("srWindow", wintypes.SMALL_RECT),
|
|
||||||
("dwMaximumWindowSize", COORD),
|
|
||||||
]
|
|
||||||
def __str__(self):
|
|
||||||
return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
|
|
||||||
self.dwSize.Y, self.dwSize.X
|
|
||||||
, self.dwCursorPosition.Y, self.dwCursorPosition.X
|
|
||||||
, self.wAttributes
|
|
||||||
, self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
|
|
||||||
, self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
|
|
||||||
)
|
|
||||||
|
|
||||||
_GetStdHandle = windll.kernel32.GetStdHandle
|
|
||||||
_GetStdHandle.argtypes = [
|
|
||||||
wintypes.DWORD,
|
|
||||||
]
|
|
||||||
_GetStdHandle.restype = wintypes.HANDLE
|
|
||||||
|
|
||||||
_GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
|
|
||||||
_GetConsoleScreenBufferInfo.argtypes = [
|
|
||||||
wintypes.HANDLE,
|
|
||||||
POINTER(CONSOLE_SCREEN_BUFFER_INFO),
|
|
||||||
]
|
|
||||||
_GetConsoleScreenBufferInfo.restype = wintypes.BOOL
|
|
||||||
|
|
||||||
_SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
|
|
||||||
_SetConsoleTextAttribute.argtypes = [
|
|
||||||
wintypes.HANDLE,
|
|
||||||
wintypes.WORD,
|
|
||||||
]
|
|
||||||
_SetConsoleTextAttribute.restype = wintypes.BOOL
|
|
||||||
|
|
||||||
_SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
|
|
||||||
_SetConsoleCursorPosition.argtypes = [
|
|
||||||
wintypes.HANDLE,
|
|
||||||
COORD,
|
|
||||||
]
|
|
||||||
_SetConsoleCursorPosition.restype = wintypes.BOOL
|
|
||||||
|
|
||||||
_FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
|
|
||||||
_FillConsoleOutputCharacterA.argtypes = [
|
|
||||||
wintypes.HANDLE,
|
|
||||||
c_char,
|
|
||||||
wintypes.DWORD,
|
|
||||||
COORD,
|
|
||||||
POINTER(wintypes.DWORD),
|
|
||||||
]
|
|
||||||
_FillConsoleOutputCharacterA.restype = wintypes.BOOL
|
|
||||||
|
|
||||||
_FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
|
|
||||||
_FillConsoleOutputAttribute.argtypes = [
|
|
||||||
wintypes.HANDLE,
|
|
||||||
wintypes.WORD,
|
|
||||||
wintypes.DWORD,
|
|
||||||
COORD,
|
|
||||||
POINTER(wintypes.DWORD),
|
|
||||||
]
|
|
||||||
_FillConsoleOutputAttribute.restype = wintypes.BOOL
|
|
||||||
|
|
||||||
_SetConsoleTitleW = windll.kernel32.SetConsoleTitleA
|
|
||||||
_SetConsoleTitleW.argtypes = [
|
|
||||||
wintypes.LPCSTR
|
|
||||||
]
|
|
||||||
_SetConsoleTitleW.restype = wintypes.BOOL
|
|
||||||
|
|
||||||
handles = {
|
|
||||||
STDOUT: _GetStdHandle(STDOUT),
|
|
||||||
STDERR: _GetStdHandle(STDERR),
|
|
||||||
}
|
|
||||||
|
|
||||||
def GetConsoleScreenBufferInfo(stream_id=STDOUT):
|
|
||||||
handle = handles[stream_id]
|
|
||||||
csbi = CONSOLE_SCREEN_BUFFER_INFO()
|
|
||||||
success = _GetConsoleScreenBufferInfo(
|
|
||||||
handle, byref(csbi))
|
|
||||||
return csbi
|
|
||||||
|
|
||||||
def SetConsoleTextAttribute(stream_id, attrs):
|
|
||||||
handle = handles[stream_id]
|
|
||||||
return _SetConsoleTextAttribute(handle, attrs)
|
|
||||||
|
|
||||||
def SetConsoleCursorPosition(stream_id, position, adjust=True):
|
|
||||||
position = COORD(*position)
|
|
||||||
# If the position is out of range, do nothing.
|
|
||||||
if position.Y <= 0 or position.X <= 0:
|
|
||||||
return
|
|
||||||
# Adjust for Windows' SetConsoleCursorPosition:
|
|
||||||
# 1. being 0-based, while ANSI is 1-based.
|
|
||||||
# 2. expecting (x,y), while ANSI uses (y,x).
|
|
||||||
adjusted_position = COORD(position.Y - 1, position.X - 1)
|
|
||||||
if adjust:
|
|
||||||
# Adjust for viewport's scroll position
|
|
||||||
sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
|
|
||||||
adjusted_position.Y += sr.Top
|
|
||||||
adjusted_position.X += sr.Left
|
|
||||||
# Resume normal processing
|
|
||||||
handle = handles[stream_id]
|
|
||||||
return _SetConsoleCursorPosition(handle, adjusted_position)
|
|
||||||
|
|
||||||
def FillConsoleOutputCharacter(stream_id, char, length, start):
|
|
||||||
handle = handles[stream_id]
|
|
||||||
char = c_char(char.encode())
|
|
||||||
length = wintypes.DWORD(length)
|
|
||||||
num_written = wintypes.DWORD(0)
|
|
||||||
# Note that this is hard-coded for ANSI (vs wide) bytes.
|
|
||||||
success = _FillConsoleOutputCharacterA(
|
|
||||||
handle, char, length, start, byref(num_written))
|
|
||||||
return num_written.value
|
|
||||||
|
|
||||||
def FillConsoleOutputAttribute(stream_id, attr, length, start):
|
|
||||||
''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
|
|
||||||
handle = handles[stream_id]
|
|
||||||
attribute = wintypes.WORD(attr)
|
|
||||||
length = wintypes.DWORD(length)
|
|
||||||
num_written = wintypes.DWORD(0)
|
|
||||||
# Note that this is hard-coded for ANSI (vs wide) bytes.
|
|
||||||
return _FillConsoleOutputAttribute(
|
|
||||||
handle, attribute, length, start, byref(num_written))
|
|
||||||
|
|
||||||
def SetConsoleTitle(title):
|
|
||||||
return _SetConsoleTitleW(title)
|
|
||||||
-151
@@ -1,151 +0,0 @@
|
|||||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
|
||||||
from . import win32
|
|
||||||
|
|
||||||
|
|
||||||
# from wincon.h
|
|
||||||
class WinColor(object):
|
|
||||||
BLACK = 0
|
|
||||||
BLUE = 1
|
|
||||||
GREEN = 2
|
|
||||||
CYAN = 3
|
|
||||||
RED = 4
|
|
||||||
MAGENTA = 5
|
|
||||||
YELLOW = 6
|
|
||||||
GREY = 7
|
|
||||||
|
|
||||||
# from wincon.h
|
|
||||||
class WinStyle(object):
|
|
||||||
NORMAL = 0x00 # dim text, dim background
|
|
||||||
BRIGHT = 0x08 # bright text, dim background
|
|
||||||
BRIGHT_BACKGROUND = 0x80 # dim text, bright background
|
|
||||||
|
|
||||||
class WinTerm(object):
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes
|
|
||||||
self.set_attrs(self._default)
|
|
||||||
self._default_fore = self._fore
|
|
||||||
self._default_back = self._back
|
|
||||||
self._default_style = self._style
|
|
||||||
|
|
||||||
def get_attrs(self):
|
|
||||||
return self._fore + self._back * 16 + self._style
|
|
||||||
|
|
||||||
def set_attrs(self, value):
|
|
||||||
self._fore = value & 7
|
|
||||||
self._back = (value >> 4) & 7
|
|
||||||
self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND)
|
|
||||||
|
|
||||||
def reset_all(self, on_stderr=None):
|
|
||||||
self.set_attrs(self._default)
|
|
||||||
self.set_console(attrs=self._default)
|
|
||||||
|
|
||||||
def fore(self, fore=None, light=False, on_stderr=False):
|
|
||||||
if fore is None:
|
|
||||||
fore = self._default_fore
|
|
||||||
self._fore = fore
|
|
||||||
if light:
|
|
||||||
self._style |= WinStyle.BRIGHT
|
|
||||||
self.set_console(on_stderr=on_stderr)
|
|
||||||
|
|
||||||
def back(self, back=None, light=False, on_stderr=False):
|
|
||||||
if back is None:
|
|
||||||
back = self._default_back
|
|
||||||
self._back = back
|
|
||||||
if light:
|
|
||||||
self._style |= WinStyle.BRIGHT_BACKGROUND
|
|
||||||
self.set_console(on_stderr=on_stderr)
|
|
||||||
|
|
||||||
def style(self, style=None, on_stderr=False):
|
|
||||||
if style is None:
|
|
||||||
style = self._default_style
|
|
||||||
self._style = style
|
|
||||||
self.set_console(on_stderr=on_stderr)
|
|
||||||
|
|
||||||
def set_console(self, attrs=None, on_stderr=False):
|
|
||||||
if attrs is None:
|
|
||||||
attrs = self.get_attrs()
|
|
||||||
handle = win32.STDOUT
|
|
||||||
if on_stderr:
|
|
||||||
handle = win32.STDERR
|
|
||||||
win32.SetConsoleTextAttribute(handle, attrs)
|
|
||||||
|
|
||||||
def get_position(self, handle):
|
|
||||||
position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition
|
|
||||||
# Because Windows coordinates are 0-based,
|
|
||||||
# and win32.SetConsoleCursorPosition expects 1-based.
|
|
||||||
position.X += 1
|
|
||||||
position.Y += 1
|
|
||||||
return position
|
|
||||||
|
|
||||||
def set_cursor_position(self, position=None, on_stderr=False):
|
|
||||||
if position is None:
|
|
||||||
#I'm not currently tracking the position, so there is no default.
|
|
||||||
#position = self.get_position()
|
|
||||||
return
|
|
||||||
handle = win32.STDOUT
|
|
||||||
if on_stderr:
|
|
||||||
handle = win32.STDERR
|
|
||||||
win32.SetConsoleCursorPosition(handle, position)
|
|
||||||
|
|
||||||
def cursor_adjust(self, x, y, on_stderr=False):
|
|
||||||
handle = win32.STDOUT
|
|
||||||
if on_stderr:
|
|
||||||
handle = win32.STDERR
|
|
||||||
position = self.get_position(handle)
|
|
||||||
adjusted_position = (position.Y + y, position.X + x)
|
|
||||||
win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False)
|
|
||||||
|
|
||||||
def erase_screen(self, mode=0, on_stderr=False):
|
|
||||||
# 0 should clear from the cursor to the end of the screen.
|
|
||||||
# 1 should clear from the cursor to the beginning of the screen.
|
|
||||||
# 2 should clear the entire screen, and move cursor to (1,1)
|
|
||||||
handle = win32.STDOUT
|
|
||||||
if on_stderr:
|
|
||||||
handle = win32.STDERR
|
|
||||||
csbi = win32.GetConsoleScreenBufferInfo(handle)
|
|
||||||
# get the number of character cells in the current buffer
|
|
||||||
cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y
|
|
||||||
# get number of character cells before current cursor position
|
|
||||||
cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X
|
|
||||||
if mode == 0:
|
|
||||||
from_coord = csbi.dwCursorPosition
|
|
||||||
cells_to_erase = cells_in_screen - cells_before_cursor
|
|
||||||
if mode == 1:
|
|
||||||
from_coord = win32.COORD(0, 0)
|
|
||||||
cells_to_erase = cells_before_cursor
|
|
||||||
elif mode == 2:
|
|
||||||
from_coord = win32.COORD(0, 0)
|
|
||||||
cells_to_erase = cells_in_screen
|
|
||||||
# fill the entire screen with blanks
|
|
||||||
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
|
|
||||||
# now set the buffer's attributes accordingly
|
|
||||||
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
|
|
||||||
if mode == 2:
|
|
||||||
# put the cursor where needed
|
|
||||||
win32.SetConsoleCursorPosition(handle, (1, 1))
|
|
||||||
|
|
||||||
def erase_line(self, mode=0, on_stderr=False):
|
|
||||||
# 0 should clear from the cursor to the end of the line.
|
|
||||||
# 1 should clear from the cursor to the beginning of the line.
|
|
||||||
# 2 should clear the entire line.
|
|
||||||
handle = win32.STDOUT
|
|
||||||
if on_stderr:
|
|
||||||
handle = win32.STDERR
|
|
||||||
csbi = win32.GetConsoleScreenBufferInfo(handle)
|
|
||||||
if mode == 0:
|
|
||||||
from_coord = csbi.dwCursorPosition
|
|
||||||
cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X
|
|
||||||
if mode == 1:
|
|
||||||
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
|
|
||||||
cells_to_erase = csbi.dwCursorPosition.X
|
|
||||||
elif mode == 2:
|
|
||||||
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
|
|
||||||
cells_to_erase = csbi.dwSize.X
|
|
||||||
# fill the entire screen with blanks
|
|
||||||
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
|
|
||||||
# now set the buffer's attributes accordingly
|
|
||||||
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
|
|
||||||
|
|
||||||
def set_title(self, title):
|
|
||||||
win32.SetConsoleTitle(title)
|
|
||||||
-23
@@ -1,23 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2012-2014 Vinay Sajip.
|
|
||||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
import logging
|
|
||||||
|
|
||||||
__version__ = '0.2.1'
|
|
||||||
|
|
||||||
class DistlibException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
from logging import NullHandler
|
|
||||||
except ImportError: # pragma: no cover
|
|
||||||
class NullHandler(logging.Handler):
|
|
||||||
def handle(self, record): pass
|
|
||||||
def emit(self, record): pass
|
|
||||||
def createLock(self): self.lock = None
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
logger.addHandler(NullHandler())
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
"""Modules copied from Python 3 standard libraries, for internal use only.
|
|
||||||
|
|
||||||
Individual classes and functions are found in d2._backport.misc. Intended
|
|
||||||
usage is to always import things missing from 3.1 from that module: the
|
|
||||||
built-in/stdlib objects will be used if found.
|
|
||||||
"""
|
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2012 The Python Software Foundation.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
"""Backports for individual classes and functions."""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
__all__ = ['cache_from_source', 'callable', 'fsencode']
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
from imp import cache_from_source
|
|
||||||
except ImportError:
|
|
||||||
def cache_from_source(py_file, debug=__debug__):
|
|
||||||
ext = debug and 'c' or 'o'
|
|
||||||
return py_file + ext
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
callable = callable
|
|
||||||
except NameError:
|
|
||||||
from collections import Callable
|
|
||||||
|
|
||||||
def callable(obj):
|
|
||||||
return isinstance(obj, Callable)
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
fsencode = os.fsencode
|
|
||||||
except AttributeError:
|
|
||||||
def fsencode(filename):
|
|
||||||
if isinstance(filename, bytes):
|
|
||||||
return filename
|
|
||||||
elif isinstance(filename, str):
|
|
||||||
return filename.encode(sys.getfilesystemencoding())
|
|
||||||
else:
|
|
||||||
raise TypeError("expect bytes or str, not %s" %
|
|
||||||
type(filename).__name__)
|
|
||||||
@@ -1,761 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2012 The Python Software Foundation.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
"""Utility functions for copying and archiving files and directory trees.
|
|
||||||
|
|
||||||
XXX The functions here don't copy the resource fork or other metadata on Mac.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import stat
|
|
||||||
from os.path import abspath
|
|
||||||
import fnmatch
|
|
||||||
import collections
|
|
||||||
import errno
|
|
||||||
from . import tarfile
|
|
||||||
|
|
||||||
try:
|
|
||||||
import bz2
|
|
||||||
_BZ2_SUPPORTED = True
|
|
||||||
except ImportError:
|
|
||||||
_BZ2_SUPPORTED = False
|
|
||||||
|
|
||||||
try:
|
|
||||||
from pwd import getpwnam
|
|
||||||
except ImportError:
|
|
||||||
getpwnam = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
from grp import getgrnam
|
|
||||||
except ImportError:
|
|
||||||
getgrnam = None
|
|
||||||
|
|
||||||
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
|
|
||||||
"copytree", "move", "rmtree", "Error", "SpecialFileError",
|
|
||||||
"ExecError", "make_archive", "get_archive_formats",
|
|
||||||
"register_archive_format", "unregister_archive_format",
|
|
||||||
"get_unpack_formats", "register_unpack_format",
|
|
||||||
"unregister_unpack_format", "unpack_archive", "ignore_patterns"]
|
|
||||||
|
|
||||||
class Error(EnvironmentError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class SpecialFileError(EnvironmentError):
|
|
||||||
"""Raised when trying to do a kind of operation (e.g. copying) which is
|
|
||||||
not supported on a special file (e.g. a named pipe)"""
|
|
||||||
|
|
||||||
class ExecError(EnvironmentError):
|
|
||||||
"""Raised when a command could not be executed"""
|
|
||||||
|
|
||||||
class ReadError(EnvironmentError):
|
|
||||||
"""Raised when an archive cannot be read"""
|
|
||||||
|
|
||||||
class RegistryError(Exception):
|
|
||||||
"""Raised when a registery operation with the archiving
|
|
||||||
and unpacking registeries fails"""
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
WindowsError
|
|
||||||
except NameError:
|
|
||||||
WindowsError = None
|
|
||||||
|
|
||||||
def copyfileobj(fsrc, fdst, length=16*1024):
|
|
||||||
"""copy data from file-like object fsrc to file-like object fdst"""
|
|
||||||
while 1:
|
|
||||||
buf = fsrc.read(length)
|
|
||||||
if not buf:
|
|
||||||
break
|
|
||||||
fdst.write(buf)
|
|
||||||
|
|
||||||
def _samefile(src, dst):
|
|
||||||
# Macintosh, Unix.
|
|
||||||
if hasattr(os.path, 'samefile'):
|
|
||||||
try:
|
|
||||||
return os.path.samefile(src, dst)
|
|
||||||
except OSError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# All other platforms: check for same pathname.
|
|
||||||
return (os.path.normcase(os.path.abspath(src)) ==
|
|
||||||
os.path.normcase(os.path.abspath(dst)))
|
|
||||||
|
|
||||||
def copyfile(src, dst):
|
|
||||||
"""Copy data from src to dst"""
|
|
||||||
if _samefile(src, dst):
|
|
||||||
raise Error("`%s` and `%s` are the same file" % (src, dst))
|
|
||||||
|
|
||||||
for fn in [src, dst]:
|
|
||||||
try:
|
|
||||||
st = os.stat(fn)
|
|
||||||
except OSError:
|
|
||||||
# File most likely does not exist
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# XXX What about other special files? (sockets, devices...)
|
|
||||||
if stat.S_ISFIFO(st.st_mode):
|
|
||||||
raise SpecialFileError("`%s` is a named pipe" % fn)
|
|
||||||
|
|
||||||
with open(src, 'rb') as fsrc:
|
|
||||||
with open(dst, 'wb') as fdst:
|
|
||||||
copyfileobj(fsrc, fdst)
|
|
||||||
|
|
||||||
def copymode(src, dst):
|
|
||||||
"""Copy mode bits from src to dst"""
|
|
||||||
if hasattr(os, 'chmod'):
|
|
||||||
st = os.stat(src)
|
|
||||||
mode = stat.S_IMODE(st.st_mode)
|
|
||||||
os.chmod(dst, mode)
|
|
||||||
|
|
||||||
def copystat(src, dst):
|
|
||||||
"""Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
|
|
||||||
st = os.stat(src)
|
|
||||||
mode = stat.S_IMODE(st.st_mode)
|
|
||||||
if hasattr(os, 'utime'):
|
|
||||||
os.utime(dst, (st.st_atime, st.st_mtime))
|
|
||||||
if hasattr(os, 'chmod'):
|
|
||||||
os.chmod(dst, mode)
|
|
||||||
if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
|
|
||||||
try:
|
|
||||||
os.chflags(dst, st.st_flags)
|
|
||||||
except OSError as why:
|
|
||||||
if (not hasattr(errno, 'EOPNOTSUPP') or
|
|
||||||
why.errno != errno.EOPNOTSUPP):
|
|
||||||
raise
|
|
||||||
|
|
||||||
def copy(src, dst):
|
|
||||||
"""Copy data and mode bits ("cp src dst").
|
|
||||||
|
|
||||||
The destination may be a directory.
|
|
||||||
|
|
||||||
"""
|
|
||||||
if os.path.isdir(dst):
|
|
||||||
dst = os.path.join(dst, os.path.basename(src))
|
|
||||||
copyfile(src, dst)
|
|
||||||
copymode(src, dst)
|
|
||||||
|
|
||||||
def copy2(src, dst):
|
|
||||||
"""Copy data and all stat info ("cp -p src dst").
|
|
||||||
|
|
||||||
The destination may be a directory.
|
|
||||||
|
|
||||||
"""
|
|
||||||
if os.path.isdir(dst):
|
|
||||||
dst = os.path.join(dst, os.path.basename(src))
|
|
||||||
copyfile(src, dst)
|
|
||||||
copystat(src, dst)
|
|
||||||
|
|
||||||
def ignore_patterns(*patterns):
|
|
||||||
"""Function that can be used as copytree() ignore parameter.
|
|
||||||
|
|
||||||
Patterns is a sequence of glob-style patterns
|
|
||||||
that are used to exclude files"""
|
|
||||||
def _ignore_patterns(path, names):
|
|
||||||
ignored_names = []
|
|
||||||
for pattern in patterns:
|
|
||||||
ignored_names.extend(fnmatch.filter(names, pattern))
|
|
||||||
return set(ignored_names)
|
|
||||||
return _ignore_patterns
|
|
||||||
|
|
||||||
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
|
||||||
ignore_dangling_symlinks=False):
|
|
||||||
"""Recursively copy a directory tree.
|
|
||||||
|
|
||||||
The destination directory must not already exist.
|
|
||||||
If exception(s) occur, an Error is raised with a list of reasons.
|
|
||||||
|
|
||||||
If the optional symlinks flag is true, symbolic links in the
|
|
||||||
source tree result in symbolic links in the destination tree; if
|
|
||||||
it is false, the contents of the files pointed to by symbolic
|
|
||||||
links are copied. If the file pointed by the symlink doesn't
|
|
||||||
exist, an exception will be added in the list of errors raised in
|
|
||||||
an Error exception at the end of the copy process.
|
|
||||||
|
|
||||||
You can set the optional ignore_dangling_symlinks flag to true if you
|
|
||||||
want to silence this exception. Notice that this has no effect on
|
|
||||||
platforms that don't support os.symlink.
|
|
||||||
|
|
||||||
The optional ignore argument is a callable. If given, it
|
|
||||||
is called with the `src` parameter, which is the directory
|
|
||||||
being visited by copytree(), and `names` which is the list of
|
|
||||||
`src` contents, as returned by os.listdir():
|
|
||||||
|
|
||||||
callable(src, names) -> ignored_names
|
|
||||||
|
|
||||||
Since copytree() is called recursively, the callable will be
|
|
||||||
called once for each directory that is copied. It returns a
|
|
||||||
list of names relative to the `src` directory that should
|
|
||||||
not be copied.
|
|
||||||
|
|
||||||
The optional copy_function argument is a callable that will be used
|
|
||||||
to copy each file. It will be called with the source path and the
|
|
||||||
destination path as arguments. By default, copy2() is used, but any
|
|
||||||
function that supports the same signature (like copy()) can be used.
|
|
||||||
|
|
||||||
"""
|
|
||||||
names = os.listdir(src)
|
|
||||||
if ignore is not None:
|
|
||||||
ignored_names = ignore(src, names)
|
|
||||||
else:
|
|
||||||
ignored_names = set()
|
|
||||||
|
|
||||||
os.makedirs(dst)
|
|
||||||
errors = []
|
|
||||||
for name in names:
|
|
||||||
if name in ignored_names:
|
|
||||||
continue
|
|
||||||
srcname = os.path.join(src, name)
|
|
||||||
dstname = os.path.join(dst, name)
|
|
||||||
try:
|
|
||||||
if os.path.islink(srcname):
|
|
||||||
linkto = os.readlink(srcname)
|
|
||||||
if symlinks:
|
|
||||||
os.symlink(linkto, dstname)
|
|
||||||
else:
|
|
||||||
# ignore dangling symlink if the flag is on
|
|
||||||
if not os.path.exists(linkto) and ignore_dangling_symlinks:
|
|
||||||
continue
|
|
||||||
# otherwise let the copy occurs. copy2 will raise an error
|
|
||||||
copy_function(srcname, dstname)
|
|
||||||
elif os.path.isdir(srcname):
|
|
||||||
copytree(srcname, dstname, symlinks, ignore, copy_function)
|
|
||||||
else:
|
|
||||||
# Will raise a SpecialFileError for unsupported file types
|
|
||||||
copy_function(srcname, dstname)
|
|
||||||
# catch the Error from the recursive copytree so that we can
|
|
||||||
# continue with other files
|
|
||||||
except Error as err:
|
|
||||||
errors.extend(err.args[0])
|
|
||||||
except EnvironmentError as why:
|
|
||||||
errors.append((srcname, dstname, str(why)))
|
|
||||||
try:
|
|
||||||
copystat(src, dst)
|
|
||||||
except OSError as why:
|
|
||||||
if WindowsError is not None and isinstance(why, WindowsError):
|
|
||||||
# Copying file access times may fail on Windows
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
errors.extend((src, dst, str(why)))
|
|
||||||
if errors:
|
|
||||||
raise Error(errors)
|
|
||||||
|
|
||||||
def rmtree(path, ignore_errors=False, onerror=None):
|
|
||||||
"""Recursively delete a directory tree.
|
|
||||||
|
|
||||||
If ignore_errors is set, errors are ignored; otherwise, if onerror
|
|
||||||
is set, it is called to handle the error with arguments (func,
|
|
||||||
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
|
|
||||||
path is the argument to that function that caused it to fail; and
|
|
||||||
exc_info is a tuple returned by sys.exc_info(). If ignore_errors
|
|
||||||
is false and onerror is None, an exception is raised.
|
|
||||||
|
|
||||||
"""
|
|
||||||
if ignore_errors:
|
|
||||||
def onerror(*args):
|
|
||||||
pass
|
|
||||||
elif onerror is None:
|
|
||||||
def onerror(*args):
|
|
||||||
raise
|
|
||||||
try:
|
|
||||||
if os.path.islink(path):
|
|
||||||
# symlinks to directories are forbidden, see bug #1669
|
|
||||||
raise OSError("Cannot call rmtree on a symbolic link")
|
|
||||||
except OSError:
|
|
||||||
onerror(os.path.islink, path, sys.exc_info())
|
|
||||||
# can't continue even if onerror hook returns
|
|
||||||
return
|
|
||||||
names = []
|
|
||||||
try:
|
|
||||||
names = os.listdir(path)
|
|
||||||
except os.error:
|
|
||||||
onerror(os.listdir, path, sys.exc_info())
|
|
||||||
for name in names:
|
|
||||||
fullname = os.path.join(path, name)
|
|
||||||
try:
|
|
||||||
mode = os.lstat(fullname).st_mode
|
|
||||||
except os.error:
|
|
||||||
mode = 0
|
|
||||||
if stat.S_ISDIR(mode):
|
|
||||||
rmtree(fullname, ignore_errors, onerror)
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
os.remove(fullname)
|
|
||||||
except os.error:
|
|
||||||
onerror(os.remove, fullname, sys.exc_info())
|
|
||||||
try:
|
|
||||||
os.rmdir(path)
|
|
||||||
except os.error:
|
|
||||||
onerror(os.rmdir, path, sys.exc_info())
|
|
||||||
|
|
||||||
|
|
||||||
def _basename(path):
|
|
||||||
# A basename() variant which first strips the trailing slash, if present.
|
|
||||||
# Thus we always get the last component of the path, even for directories.
|
|
||||||
return os.path.basename(path.rstrip(os.path.sep))
|
|
||||||
|
|
||||||
def move(src, dst):
|
|
||||||
"""Recursively move a file or directory to another location. This is
|
|
||||||
similar to the Unix "mv" command.
|
|
||||||
|
|
||||||
If the destination is a directory or a symlink to a directory, the source
|
|
||||||
is moved inside the directory. The destination path must not already
|
|
||||||
exist.
|
|
||||||
|
|
||||||
If the destination already exists but is not a directory, it may be
|
|
||||||
overwritten depending on os.rename() semantics.
|
|
||||||
|
|
||||||
If the destination is on our current filesystem, then rename() is used.
|
|
||||||
Otherwise, src is copied to the destination and then removed.
|
|
||||||
A lot more could be done here... A look at a mv.c shows a lot of
|
|
||||||
the issues this implementation glosses over.
|
|
||||||
|
|
||||||
"""
|
|
||||||
real_dst = dst
|
|
||||||
if os.path.isdir(dst):
|
|
||||||
if _samefile(src, dst):
|
|
||||||
# We might be on a case insensitive filesystem,
|
|
||||||
# perform the rename anyway.
|
|
||||||
os.rename(src, dst)
|
|
||||||
return
|
|
||||||
|
|
||||||
real_dst = os.path.join(dst, _basename(src))
|
|
||||||
if os.path.exists(real_dst):
|
|
||||||
raise Error("Destination path '%s' already exists" % real_dst)
|
|
||||||
try:
|
|
||||||
os.rename(src, real_dst)
|
|
||||||
except OSError:
|
|
||||||
if os.path.isdir(src):
|
|
||||||
if _destinsrc(src, dst):
|
|
||||||
raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
|
|
||||||
copytree(src, real_dst, symlinks=True)
|
|
||||||
rmtree(src)
|
|
||||||
else:
|
|
||||||
copy2(src, real_dst)
|
|
||||||
os.unlink(src)
|
|
||||||
|
|
||||||
def _destinsrc(src, dst):
|
|
||||||
src = abspath(src)
|
|
||||||
dst = abspath(dst)
|
|
||||||
if not src.endswith(os.path.sep):
|
|
||||||
src += os.path.sep
|
|
||||||
if not dst.endswith(os.path.sep):
|
|
||||||
dst += os.path.sep
|
|
||||||
return dst.startswith(src)
|
|
||||||
|
|
||||||
def _get_gid(name):
|
|
||||||
"""Returns a gid, given a group name."""
|
|
||||||
if getgrnam is None or name is None:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
result = getgrnam(name)
|
|
||||||
except KeyError:
|
|
||||||
result = None
|
|
||||||
if result is not None:
|
|
||||||
return result[2]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _get_uid(name):
|
|
||||||
"""Returns an uid, given a user name."""
|
|
||||||
if getpwnam is None or name is None:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
result = getpwnam(name)
|
|
||||||
except KeyError:
|
|
||||||
result = None
|
|
||||||
if result is not None:
|
|
||||||
return result[2]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
|
|
||||||
owner=None, group=None, logger=None):
|
|
||||||
"""Create a (possibly compressed) tar file from all the files under
|
|
||||||
'base_dir'.
|
|
||||||
|
|
||||||
'compress' must be "gzip" (the default), "bzip2", or None.
|
|
||||||
|
|
||||||
'owner' and 'group' can be used to define an owner and a group for the
|
|
||||||
archive that is being built. If not provided, the current owner and group
|
|
||||||
will be used.
|
|
||||||
|
|
||||||
The output tar file will be named 'base_name' + ".tar", possibly plus
|
|
||||||
the appropriate compression extension (".gz", or ".bz2").
|
|
||||||
|
|
||||||
Returns the output filename.
|
|
||||||
"""
|
|
||||||
tar_compression = {'gzip': 'gz', None: ''}
|
|
||||||
compress_ext = {'gzip': '.gz'}
|
|
||||||
|
|
||||||
if _BZ2_SUPPORTED:
|
|
||||||
tar_compression['bzip2'] = 'bz2'
|
|
||||||
compress_ext['bzip2'] = '.bz2'
|
|
||||||
|
|
||||||
# flags for compression program, each element of list will be an argument
|
|
||||||
if compress is not None and compress not in compress_ext:
|
|
||||||
raise ValueError("bad value for 'compress', or compression format not "
|
|
||||||
"supported : {0}".format(compress))
|
|
||||||
|
|
||||||
archive_name = base_name + '.tar' + compress_ext.get(compress, '')
|
|
||||||
archive_dir = os.path.dirname(archive_name)
|
|
||||||
|
|
||||||
if not os.path.exists(archive_dir):
|
|
||||||
if logger is not None:
|
|
||||||
logger.info("creating %s", archive_dir)
|
|
||||||
if not dry_run:
|
|
||||||
os.makedirs(archive_dir)
|
|
||||||
|
|
||||||
# creating the tarball
|
|
||||||
if logger is not None:
|
|
||||||
logger.info('Creating tar archive')
|
|
||||||
|
|
||||||
uid = _get_uid(owner)
|
|
||||||
gid = _get_gid(group)
|
|
||||||
|
|
||||||
def _set_uid_gid(tarinfo):
|
|
||||||
if gid is not None:
|
|
||||||
tarinfo.gid = gid
|
|
||||||
tarinfo.gname = group
|
|
||||||
if uid is not None:
|
|
||||||
tarinfo.uid = uid
|
|
||||||
tarinfo.uname = owner
|
|
||||||
return tarinfo
|
|
||||||
|
|
||||||
if not dry_run:
|
|
||||||
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
|
|
||||||
try:
|
|
||||||
tar.add(base_dir, filter=_set_uid_gid)
|
|
||||||
finally:
|
|
||||||
tar.close()
|
|
||||||
|
|
||||||
return archive_name
|
|
||||||
|
|
||||||
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
|
|
||||||
# XXX see if we want to keep an external call here
|
|
||||||
if verbose:
|
|
||||||
zipoptions = "-r"
|
|
||||||
else:
|
|
||||||
zipoptions = "-rq"
|
|
||||||
from distutils.errors import DistutilsExecError
|
|
||||||
from distutils.spawn import spawn
|
|
||||||
try:
|
|
||||||
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
|
|
||||||
except DistutilsExecError:
|
|
||||||
# XXX really should distinguish between "couldn't find
|
|
||||||
# external 'zip' command" and "zip failed".
|
|
||||||
raise ExecError("unable to create zip file '%s': "
|
|
||||||
"could neither import the 'zipfile' module nor "
|
|
||||||
"find a standalone zip utility") % zip_filename
|
|
||||||
|
|
||||||
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
|
|
||||||
"""Create a zip file from all the files under 'base_dir'.
|
|
||||||
|
|
||||||
The output zip file will be named 'base_name' + ".zip". Uses either the
|
|
||||||
"zipfile" Python module (if available) or the InfoZIP "zip" utility
|
|
||||||
(if installed and found on the default search path). If neither tool is
|
|
||||||
available, raises ExecError. Returns the name of the output zip
|
|
||||||
file.
|
|
||||||
"""
|
|
||||||
zip_filename = base_name + ".zip"
|
|
||||||
archive_dir = os.path.dirname(base_name)
|
|
||||||
|
|
||||||
if not os.path.exists(archive_dir):
|
|
||||||
if logger is not None:
|
|
||||||
logger.info("creating %s", archive_dir)
|
|
||||||
if not dry_run:
|
|
||||||
os.makedirs(archive_dir)
|
|
||||||
|
|
||||||
# If zipfile module is not available, try spawning an external 'zip'
|
|
||||||
# command.
|
|
||||||
try:
|
|
||||||
import zipfile
|
|
||||||
except ImportError:
|
|
||||||
zipfile = None
|
|
||||||
|
|
||||||
if zipfile is None:
|
|
||||||
_call_external_zip(base_dir, zip_filename, verbose, dry_run)
|
|
||||||
else:
|
|
||||||
if logger is not None:
|
|
||||||
logger.info("creating '%s' and adding '%s' to it",
|
|
||||||
zip_filename, base_dir)
|
|
||||||
|
|
||||||
if not dry_run:
|
|
||||||
zip = zipfile.ZipFile(zip_filename, "w",
|
|
||||||
compression=zipfile.ZIP_DEFLATED)
|
|
||||||
|
|
||||||
for dirpath, dirnames, filenames in os.walk(base_dir):
|
|
||||||
for name in filenames:
|
|
||||||
path = os.path.normpath(os.path.join(dirpath, name))
|
|
||||||
if os.path.isfile(path):
|
|
||||||
zip.write(path, path)
|
|
||||||
if logger is not None:
|
|
||||||
logger.info("adding '%s'", path)
|
|
||||||
zip.close()
|
|
||||||
|
|
||||||
return zip_filename
|
|
||||||
|
|
||||||
_ARCHIVE_FORMATS = {
|
|
||||||
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
|
|
||||||
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
|
|
||||||
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
|
|
||||||
'zip': (_make_zipfile, [], "ZIP file"),
|
|
||||||
}
|
|
||||||
|
|
||||||
if _BZ2_SUPPORTED:
|
|
||||||
_ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
|
|
||||||
"bzip2'ed tar-file")
|
|
||||||
|
|
||||||
def get_archive_formats():
|
|
||||||
"""Returns a list of supported formats for archiving and unarchiving.
|
|
||||||
|
|
||||||
Each element of the returned sequence is a tuple (name, description)
|
|
||||||
"""
|
|
||||||
formats = [(name, registry[2]) for name, registry in
|
|
||||||
_ARCHIVE_FORMATS.items()]
|
|
||||||
formats.sort()
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def register_archive_format(name, function, extra_args=None, description=''):
|
|
||||||
"""Registers an archive format.
|
|
||||||
|
|
||||||
name is the name of the format. function is the callable that will be
|
|
||||||
used to create archives. If provided, extra_args is a sequence of
|
|
||||||
(name, value) tuples that will be passed as arguments to the callable.
|
|
||||||
description can be provided to describe the format, and will be returned
|
|
||||||
by the get_archive_formats() function.
|
|
||||||
"""
|
|
||||||
if extra_args is None:
|
|
||||||
extra_args = []
|
|
||||||
if not isinstance(function, collections.Callable):
|
|
||||||
raise TypeError('The %s object is not callable' % function)
|
|
||||||
if not isinstance(extra_args, (tuple, list)):
|
|
||||||
raise TypeError('extra_args needs to be a sequence')
|
|
||||||
for element in extra_args:
|
|
||||||
if not isinstance(element, (tuple, list)) or len(element) !=2:
|
|
||||||
raise TypeError('extra_args elements are : (arg_name, value)')
|
|
||||||
|
|
||||||
_ARCHIVE_FORMATS[name] = (function, extra_args, description)
|
|
||||||
|
|
||||||
def unregister_archive_format(name):
|
|
||||||
del _ARCHIVE_FORMATS[name]
|
|
||||||
|
|
||||||
def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
|
|
||||||
dry_run=0, owner=None, group=None, logger=None):
|
|
||||||
"""Create an archive file (eg. zip or tar).
|
|
||||||
|
|
||||||
'base_name' is the name of the file to create, minus any format-specific
|
|
||||||
extension; 'format' is the archive format: one of "zip", "tar", "bztar"
|
|
||||||
or "gztar".
|
|
||||||
|
|
||||||
'root_dir' is a directory that will be the root directory of the
|
|
||||||
archive; ie. we typically chdir into 'root_dir' before creating the
|
|
||||||
archive. 'base_dir' is the directory where we start archiving from;
|
|
||||||
ie. 'base_dir' will be the common prefix of all files and
|
|
||||||
directories in the archive. 'root_dir' and 'base_dir' both default
|
|
||||||
to the current directory. Returns the name of the archive file.
|
|
||||||
|
|
||||||
'owner' and 'group' are used when creating a tar archive. By default,
|
|
||||||
uses the current owner and group.
|
|
||||||
"""
|
|
||||||
save_cwd = os.getcwd()
|
|
||||||
if root_dir is not None:
|
|
||||||
if logger is not None:
|
|
||||||
logger.debug("changing into '%s'", root_dir)
|
|
||||||
base_name = os.path.abspath(base_name)
|
|
||||||
if not dry_run:
|
|
||||||
os.chdir(root_dir)
|
|
||||||
|
|
||||||
if base_dir is None:
|
|
||||||
base_dir = os.curdir
|
|
||||||
|
|
||||||
kwargs = {'dry_run': dry_run, 'logger': logger}
|
|
||||||
|
|
||||||
try:
|
|
||||||
format_info = _ARCHIVE_FORMATS[format]
|
|
||||||
except KeyError:
|
|
||||||
raise ValueError("unknown archive format '%s'" % format)
|
|
||||||
|
|
||||||
func = format_info[0]
|
|
||||||
for arg, val in format_info[1]:
|
|
||||||
kwargs[arg] = val
|
|
||||||
|
|
||||||
if format != 'zip':
|
|
||||||
kwargs['owner'] = owner
|
|
||||||
kwargs['group'] = group
|
|
||||||
|
|
||||||
try:
|
|
||||||
filename = func(base_name, base_dir, **kwargs)
|
|
||||||
finally:
|
|
||||||
if root_dir is not None:
|
|
||||||
if logger is not None:
|
|
||||||
logger.debug("changing back to '%s'", save_cwd)
|
|
||||||
os.chdir(save_cwd)
|
|
||||||
|
|
||||||
return filename
|
|
||||||
|
|
||||||
|
|
||||||
def get_unpack_formats():
|
|
||||||
"""Returns a list of supported formats for unpacking.
|
|
||||||
|
|
||||||
Each element of the returned sequence is a tuple
|
|
||||||
(name, extensions, description)
|
|
||||||
"""
|
|
||||||
formats = [(name, info[0], info[3]) for name, info in
|
|
||||||
_UNPACK_FORMATS.items()]
|
|
||||||
formats.sort()
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _check_unpack_options(extensions, function, extra_args):
|
|
||||||
"""Checks what gets registered as an unpacker."""
|
|
||||||
# first make sure no other unpacker is registered for this extension
|
|
||||||
existing_extensions = {}
|
|
||||||
for name, info in _UNPACK_FORMATS.items():
|
|
||||||
for ext in info[0]:
|
|
||||||
existing_extensions[ext] = name
|
|
||||||
|
|
||||||
for extension in extensions:
|
|
||||||
if extension in existing_extensions:
|
|
||||||
msg = '%s is already registered for "%s"'
|
|
||||||
raise RegistryError(msg % (extension,
|
|
||||||
existing_extensions[extension]))
|
|
||||||
|
|
||||||
if not isinstance(function, collections.Callable):
|
|
||||||
raise TypeError('The registered function must be a callable')
|
|
||||||
|
|
||||||
|
|
||||||
def register_unpack_format(name, extensions, function, extra_args=None,
|
|
||||||
description=''):
|
|
||||||
"""Registers an unpack format.
|
|
||||||
|
|
||||||
`name` is the name of the format. `extensions` is a list of extensions
|
|
||||||
corresponding to the format.
|
|
||||||
|
|
||||||
`function` is the callable that will be
|
|
||||||
used to unpack archives. The callable will receive archives to unpack.
|
|
||||||
If it's unable to handle an archive, it needs to raise a ReadError
|
|
||||||
exception.
|
|
||||||
|
|
||||||
If provided, `extra_args` is a sequence of
|
|
||||||
(name, value) tuples that will be passed as arguments to the callable.
|
|
||||||
description can be provided to describe the format, and will be returned
|
|
||||||
by the get_unpack_formats() function.
|
|
||||||
"""
|
|
||||||
if extra_args is None:
|
|
||||||
extra_args = []
|
|
||||||
_check_unpack_options(extensions, function, extra_args)
|
|
||||||
_UNPACK_FORMATS[name] = extensions, function, extra_args, description
|
|
||||||
|
|
||||||
def unregister_unpack_format(name):
|
|
||||||
"""Removes the pack format from the registery."""
|
|
||||||
del _UNPACK_FORMATS[name]
|
|
||||||
|
|
||||||
def _ensure_directory(path):
|
|
||||||
"""Ensure that the parent directory of `path` exists"""
|
|
||||||
dirname = os.path.dirname(path)
|
|
||||||
if not os.path.isdir(dirname):
|
|
||||||
os.makedirs(dirname)
|
|
||||||
|
|
||||||
def _unpack_zipfile(filename, extract_dir):
|
|
||||||
"""Unpack zip `filename` to `extract_dir`
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import zipfile
|
|
||||||
except ImportError:
|
|
||||||
raise ReadError('zlib not supported, cannot unpack this archive.')
|
|
||||||
|
|
||||||
if not zipfile.is_zipfile(filename):
|
|
||||||
raise ReadError("%s is not a zip file" % filename)
|
|
||||||
|
|
||||||
zip = zipfile.ZipFile(filename)
|
|
||||||
try:
|
|
||||||
for info in zip.infolist():
|
|
||||||
name = info.filename
|
|
||||||
|
|
||||||
# don't extract absolute paths or ones with .. in them
|
|
||||||
if name.startswith('/') or '..' in name:
|
|
||||||
continue
|
|
||||||
|
|
||||||
target = os.path.join(extract_dir, *name.split('/'))
|
|
||||||
if not target:
|
|
||||||
continue
|
|
||||||
|
|
||||||
_ensure_directory(target)
|
|
||||||
if not name.endswith('/'):
|
|
||||||
# file
|
|
||||||
data = zip.read(info.filename)
|
|
||||||
f = open(target, 'wb')
|
|
||||||
try:
|
|
||||||
f.write(data)
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
del data
|
|
||||||
finally:
|
|
||||||
zip.close()
|
|
||||||
|
|
||||||
def _unpack_tarfile(filename, extract_dir):
|
|
||||||
"""Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
tarobj = tarfile.open(filename)
|
|
||||||
except tarfile.TarError:
|
|
||||||
raise ReadError(
|
|
||||||
"%s is not a compressed or uncompressed tar file" % filename)
|
|
||||||
try:
|
|
||||||
tarobj.extractall(extract_dir)
|
|
||||||
finally:
|
|
||||||
tarobj.close()
|
|
||||||
|
|
||||||
_UNPACK_FORMATS = {
|
|
||||||
'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
|
|
||||||
'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
|
|
||||||
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
|
|
||||||
}
|
|
||||||
|
|
||||||
if _BZ2_SUPPORTED:
|
|
||||||
_UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
|
|
||||||
"bzip2'ed tar-file")
|
|
||||||
|
|
||||||
def _find_unpack_format(filename):
|
|
||||||
for name, info in _UNPACK_FORMATS.items():
|
|
||||||
for extension in info[0]:
|
|
||||||
if filename.endswith(extension):
|
|
||||||
return name
|
|
||||||
return None
|
|
||||||
|
|
||||||
def unpack_archive(filename, extract_dir=None, format=None):
|
|
||||||
"""Unpack an archive.
|
|
||||||
|
|
||||||
`filename` is the name of the archive.
|
|
||||||
|
|
||||||
`extract_dir` is the name of the target directory, where the archive
|
|
||||||
is unpacked. If not provided, the current working directory is used.
|
|
||||||
|
|
||||||
`format` is the archive format: one of "zip", "tar", or "gztar". Or any
|
|
||||||
other registered format. If not provided, unpack_archive will use the
|
|
||||||
filename extension and see if an unpacker was registered for that
|
|
||||||
extension.
|
|
||||||
|
|
||||||
In case none is found, a ValueError is raised.
|
|
||||||
"""
|
|
||||||
if extract_dir is None:
|
|
||||||
extract_dir = os.getcwd()
|
|
||||||
|
|
||||||
if format is not None:
|
|
||||||
try:
|
|
||||||
format_info = _UNPACK_FORMATS[format]
|
|
||||||
except KeyError:
|
|
||||||
raise ValueError("Unknown unpack format '{0}'".format(format))
|
|
||||||
|
|
||||||
func = format_info[1]
|
|
||||||
func(filename, extract_dir, **dict(format_info[2]))
|
|
||||||
else:
|
|
||||||
# we need to look at the registered unpackers supported extensions
|
|
||||||
format = _find_unpack_format(filename)
|
|
||||||
if format is None:
|
|
||||||
raise ReadError("Unknown archive format '{0}'".format(filename))
|
|
||||||
|
|
||||||
func = _UNPACK_FORMATS[format][1]
|
|
||||||
kwargs = dict(_UNPACK_FORMATS[format][2])
|
|
||||||
func(filename, extract_dir, **kwargs)
|
|
||||||
@@ -1,84 +0,0 @@
|
|||||||
[posix_prefix]
|
|
||||||
# Configuration directories. Some of these come straight out of the
|
|
||||||
# configure script. They are for implementing the other variables, not to
|
|
||||||
# be used directly in [resource_locations].
|
|
||||||
confdir = /etc
|
|
||||||
datadir = /usr/share
|
|
||||||
libdir = /usr/lib
|
|
||||||
statedir = /var
|
|
||||||
# User resource directory
|
|
||||||
local = ~/.local/{distribution.name}
|
|
||||||
|
|
||||||
stdlib = {base}/lib/python{py_version_short}
|
|
||||||
platstdlib = {platbase}/lib/python{py_version_short}
|
|
||||||
purelib = {base}/lib/python{py_version_short}/site-packages
|
|
||||||
platlib = {platbase}/lib/python{py_version_short}/site-packages
|
|
||||||
include = {base}/include/python{py_version_short}{abiflags}
|
|
||||||
platinclude = {platbase}/include/python{py_version_short}{abiflags}
|
|
||||||
data = {base}
|
|
||||||
|
|
||||||
[posix_home]
|
|
||||||
stdlib = {base}/lib/python
|
|
||||||
platstdlib = {base}/lib/python
|
|
||||||
purelib = {base}/lib/python
|
|
||||||
platlib = {base}/lib/python
|
|
||||||
include = {base}/include/python
|
|
||||||
platinclude = {base}/include/python
|
|
||||||
scripts = {base}/bin
|
|
||||||
data = {base}
|
|
||||||
|
|
||||||
[nt]
|
|
||||||
stdlib = {base}/Lib
|
|
||||||
platstdlib = {base}/Lib
|
|
||||||
purelib = {base}/Lib/site-packages
|
|
||||||
platlib = {base}/Lib/site-packages
|
|
||||||
include = {base}/Include
|
|
||||||
platinclude = {base}/Include
|
|
||||||
scripts = {base}/Scripts
|
|
||||||
data = {base}
|
|
||||||
|
|
||||||
[os2]
|
|
||||||
stdlib = {base}/Lib
|
|
||||||
platstdlib = {base}/Lib
|
|
||||||
purelib = {base}/Lib/site-packages
|
|
||||||
platlib = {base}/Lib/site-packages
|
|
||||||
include = {base}/Include
|
|
||||||
platinclude = {base}/Include
|
|
||||||
scripts = {base}/Scripts
|
|
||||||
data = {base}
|
|
||||||
|
|
||||||
[os2_home]
|
|
||||||
stdlib = {userbase}/lib/python{py_version_short}
|
|
||||||
platstdlib = {userbase}/lib/python{py_version_short}
|
|
||||||
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
|
||||||
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
|
||||||
include = {userbase}/include/python{py_version_short}
|
|
||||||
scripts = {userbase}/bin
|
|
||||||
data = {userbase}
|
|
||||||
|
|
||||||
[nt_user]
|
|
||||||
stdlib = {userbase}/Python{py_version_nodot}
|
|
||||||
platstdlib = {userbase}/Python{py_version_nodot}
|
|
||||||
purelib = {userbase}/Python{py_version_nodot}/site-packages
|
|
||||||
platlib = {userbase}/Python{py_version_nodot}/site-packages
|
|
||||||
include = {userbase}/Python{py_version_nodot}/Include
|
|
||||||
scripts = {userbase}/Scripts
|
|
||||||
data = {userbase}
|
|
||||||
|
|
||||||
[posix_user]
|
|
||||||
stdlib = {userbase}/lib/python{py_version_short}
|
|
||||||
platstdlib = {userbase}/lib/python{py_version_short}
|
|
||||||
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
|
||||||
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
|
||||||
include = {userbase}/include/python{py_version_short}
|
|
||||||
scripts = {userbase}/bin
|
|
||||||
data = {userbase}
|
|
||||||
|
|
||||||
[osx_framework_user]
|
|
||||||
stdlib = {userbase}/lib/python
|
|
||||||
platstdlib = {userbase}/lib/python
|
|
||||||
purelib = {userbase}/lib/python/site-packages
|
|
||||||
platlib = {userbase}/lib/python/site-packages
|
|
||||||
include = {userbase}/include
|
|
||||||
scripts = {userbase}/bin
|
|
||||||
data = {userbase}
|
|
||||||
@@ -1,788 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2012 The Python Software Foundation.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
"""Access to Python's configuration information."""
|
|
||||||
|
|
||||||
import codecs
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
from os.path import pardir, realpath
|
|
||||||
try:
|
|
||||||
import configparser
|
|
||||||
except ImportError:
|
|
||||||
import ConfigParser as configparser
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'get_config_h_filename',
|
|
||||||
'get_config_var',
|
|
||||||
'get_config_vars',
|
|
||||||
'get_makefile_filename',
|
|
||||||
'get_path',
|
|
||||||
'get_path_names',
|
|
||||||
'get_paths',
|
|
||||||
'get_platform',
|
|
||||||
'get_python_version',
|
|
||||||
'get_scheme_names',
|
|
||||||
'parse_config_h',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def _safe_realpath(path):
|
|
||||||
try:
|
|
||||||
return realpath(path)
|
|
||||||
except OSError:
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
if sys.executable:
|
|
||||||
_PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable))
|
|
||||||
else:
|
|
||||||
# sys.executable can be empty if argv[0] has been changed and Python is
|
|
||||||
# unable to retrieve the real program name
|
|
||||||
_PROJECT_BASE = _safe_realpath(os.getcwd())
|
|
||||||
|
|
||||||
if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower():
|
|
||||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir))
|
|
||||||
# PC/VS7.1
|
|
||||||
if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower():
|
|
||||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
|
||||||
# PC/AMD64
|
|
||||||
if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower():
|
|
||||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
|
||||||
|
|
||||||
|
|
||||||
def is_python_build():
|
|
||||||
for fn in ("Setup.dist", "Setup.local"):
|
|
||||||
if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
_PYTHON_BUILD = is_python_build()
|
|
||||||
|
|
||||||
_cfg_read = False
|
|
||||||
|
|
||||||
def _ensure_cfg_read():
|
|
||||||
global _cfg_read
|
|
||||||
if not _cfg_read:
|
|
||||||
from ..resources import finder
|
|
||||||
backport_package = __name__.rsplit('.', 1)[0]
|
|
||||||
_finder = finder(backport_package)
|
|
||||||
_cfgfile = _finder.find('sysconfig.cfg')
|
|
||||||
assert _cfgfile, 'sysconfig.cfg exists'
|
|
||||||
with _cfgfile.as_stream() as s:
|
|
||||||
_SCHEMES.readfp(s)
|
|
||||||
if _PYTHON_BUILD:
|
|
||||||
for scheme in ('posix_prefix', 'posix_home'):
|
|
||||||
_SCHEMES.set(scheme, 'include', '{srcdir}/Include')
|
|
||||||
_SCHEMES.set(scheme, 'platinclude', '{projectbase}/.')
|
|
||||||
|
|
||||||
_cfg_read = True
|
|
||||||
|
|
||||||
|
|
||||||
_SCHEMES = configparser.RawConfigParser()
|
|
||||||
_VAR_REPL = re.compile(r'\{([^{]*?)\}')
|
|
||||||
|
|
||||||
def _expand_globals(config):
|
|
||||||
_ensure_cfg_read()
|
|
||||||
if config.has_section('globals'):
|
|
||||||
globals = config.items('globals')
|
|
||||||
else:
|
|
||||||
globals = tuple()
|
|
||||||
|
|
||||||
sections = config.sections()
|
|
||||||
for section in sections:
|
|
||||||
if section == 'globals':
|
|
||||||
continue
|
|
||||||
for option, value in globals:
|
|
||||||
if config.has_option(section, option):
|
|
||||||
continue
|
|
||||||
config.set(section, option, value)
|
|
||||||
config.remove_section('globals')
|
|
||||||
|
|
||||||
# now expanding local variables defined in the cfg file
|
|
||||||
#
|
|
||||||
for section in config.sections():
|
|
||||||
variables = dict(config.items(section))
|
|
||||||
|
|
||||||
def _replacer(matchobj):
|
|
||||||
name = matchobj.group(1)
|
|
||||||
if name in variables:
|
|
||||||
return variables[name]
|
|
||||||
return matchobj.group(0)
|
|
||||||
|
|
||||||
for option, value in config.items(section):
|
|
||||||
config.set(section, option, _VAR_REPL.sub(_replacer, value))
|
|
||||||
|
|
||||||
#_expand_globals(_SCHEMES)
|
|
||||||
|
|
||||||
# FIXME don't rely on sys.version here, its format is an implementation detail
|
|
||||||
# of CPython, use sys.version_info or sys.hexversion
|
|
||||||
_PY_VERSION = sys.version.split()[0]
|
|
||||||
_PY_VERSION_SHORT = sys.version[:3]
|
|
||||||
_PY_VERSION_SHORT_NO_DOT = _PY_VERSION[0] + _PY_VERSION[2]
|
|
||||||
_PREFIX = os.path.normpath(sys.prefix)
|
|
||||||
_EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
|
|
||||||
_CONFIG_VARS = None
|
|
||||||
_USER_BASE = None
|
|
||||||
|
|
||||||
|
|
||||||
def _subst_vars(path, local_vars):
|
|
||||||
"""In the string `path`, replace tokens like {some.thing} with the
|
|
||||||
corresponding value from the map `local_vars`.
|
|
||||||
|
|
||||||
If there is no corresponding value, leave the token unchanged.
|
|
||||||
"""
|
|
||||||
def _replacer(matchobj):
|
|
||||||
name = matchobj.group(1)
|
|
||||||
if name in local_vars:
|
|
||||||
return local_vars[name]
|
|
||||||
elif name in os.environ:
|
|
||||||
return os.environ[name]
|
|
||||||
return matchobj.group(0)
|
|
||||||
return _VAR_REPL.sub(_replacer, path)
|
|
||||||
|
|
||||||
|
|
||||||
def _extend_dict(target_dict, other_dict):
|
|
||||||
target_keys = target_dict.keys()
|
|
||||||
for key, value in other_dict.items():
|
|
||||||
if key in target_keys:
|
|
||||||
continue
|
|
||||||
target_dict[key] = value
|
|
||||||
|
|
||||||
|
|
||||||
def _expand_vars(scheme, vars):
|
|
||||||
res = {}
|
|
||||||
if vars is None:
|
|
||||||
vars = {}
|
|
||||||
_extend_dict(vars, get_config_vars())
|
|
||||||
|
|
||||||
for key, value in _SCHEMES.items(scheme):
|
|
||||||
if os.name in ('posix', 'nt'):
|
|
||||||
value = os.path.expanduser(value)
|
|
||||||
res[key] = os.path.normpath(_subst_vars(value, vars))
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
def format_value(value, vars):
|
|
||||||
def _replacer(matchobj):
|
|
||||||
name = matchobj.group(1)
|
|
||||||
if name in vars:
|
|
||||||
return vars[name]
|
|
||||||
return matchobj.group(0)
|
|
||||||
return _VAR_REPL.sub(_replacer, value)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_default_scheme():
|
|
||||||
if os.name == 'posix':
|
|
||||||
# the default scheme for posix is posix_prefix
|
|
||||||
return 'posix_prefix'
|
|
||||||
return os.name
|
|
||||||
|
|
||||||
|
|
||||||
def _getuserbase():
|
|
||||||
env_base = os.environ.get("PYTHONUSERBASE", None)
|
|
||||||
|
|
||||||
def joinuser(*args):
|
|
||||||
return os.path.expanduser(os.path.join(*args))
|
|
||||||
|
|
||||||
# what about 'os2emx', 'riscos' ?
|
|
||||||
if os.name == "nt":
|
|
||||||
base = os.environ.get("APPDATA") or "~"
|
|
||||||
if env_base:
|
|
||||||
return env_base
|
|
||||||
else:
|
|
||||||
return joinuser(base, "Python")
|
|
||||||
|
|
||||||
if sys.platform == "darwin":
|
|
||||||
framework = get_config_var("PYTHONFRAMEWORK")
|
|
||||||
if framework:
|
|
||||||
if env_base:
|
|
||||||
return env_base
|
|
||||||
else:
|
|
||||||
return joinuser("~", "Library", framework, "%d.%d" %
|
|
||||||
sys.version_info[:2])
|
|
||||||
|
|
||||||
if env_base:
|
|
||||||
return env_base
|
|
||||||
else:
|
|
||||||
return joinuser("~", ".local")
|
|
||||||
|
|
||||||
|
|
||||||
def _parse_makefile(filename, vars=None):
|
|
||||||
"""Parse a Makefile-style file.
|
|
||||||
|
|
||||||
A dictionary containing name/value pairs is returned. If an
|
|
||||||
optional dictionary is passed in as the second argument, it is
|
|
||||||
used instead of a new dictionary.
|
|
||||||
"""
|
|
||||||
# Regexes needed for parsing Makefile (and similar syntaxes,
|
|
||||||
# like old-style Setup files).
|
|
||||||
_variable_rx = re.compile("([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
|
|
||||||
_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
|
|
||||||
_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
|
|
||||||
|
|
||||||
if vars is None:
|
|
||||||
vars = {}
|
|
||||||
done = {}
|
|
||||||
notdone = {}
|
|
||||||
|
|
||||||
with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f:
|
|
||||||
lines = f.readlines()
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
if line.startswith('#') or line.strip() == '':
|
|
||||||
continue
|
|
||||||
m = _variable_rx.match(line)
|
|
||||||
if m:
|
|
||||||
n, v = m.group(1, 2)
|
|
||||||
v = v.strip()
|
|
||||||
# `$$' is a literal `$' in make
|
|
||||||
tmpv = v.replace('$$', '')
|
|
||||||
|
|
||||||
if "$" in tmpv:
|
|
||||||
notdone[n] = v
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
v = int(v)
|
|
||||||
except ValueError:
|
|
||||||
# insert literal `$'
|
|
||||||
done[n] = v.replace('$$', '$')
|
|
||||||
else:
|
|
||||||
done[n] = v
|
|
||||||
|
|
||||||
# do variable interpolation here
|
|
||||||
variables = list(notdone.keys())
|
|
||||||
|
|
||||||
# Variables with a 'PY_' prefix in the makefile. These need to
|
|
||||||
# be made available without that prefix through sysconfig.
|
|
||||||
# Special care is needed to ensure that variable expansion works, even
|
|
||||||
# if the expansion uses the name without a prefix.
|
|
||||||
renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS')
|
|
||||||
|
|
||||||
while len(variables) > 0:
|
|
||||||
for name in tuple(variables):
|
|
||||||
value = notdone[name]
|
|
||||||
m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
|
|
||||||
if m is not None:
|
|
||||||
n = m.group(1)
|
|
||||||
found = True
|
|
||||||
if n in done:
|
|
||||||
item = str(done[n])
|
|
||||||
elif n in notdone:
|
|
||||||
# get it on a subsequent round
|
|
||||||
found = False
|
|
||||||
elif n in os.environ:
|
|
||||||
# do it like make: fall back to environment
|
|
||||||
item = os.environ[n]
|
|
||||||
|
|
||||||
elif n in renamed_variables:
|
|
||||||
if (name.startswith('PY_') and
|
|
||||||
name[3:] in renamed_variables):
|
|
||||||
item = ""
|
|
||||||
|
|
||||||
elif 'PY_' + n in notdone:
|
|
||||||
found = False
|
|
||||||
|
|
||||||
else:
|
|
||||||
item = str(done['PY_' + n])
|
|
||||||
|
|
||||||
else:
|
|
||||||
done[n] = item = ""
|
|
||||||
|
|
||||||
if found:
|
|
||||||
after = value[m.end():]
|
|
||||||
value = value[:m.start()] + item + after
|
|
||||||
if "$" in after:
|
|
||||||
notdone[name] = value
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
value = int(value)
|
|
||||||
except ValueError:
|
|
||||||
done[name] = value.strip()
|
|
||||||
else:
|
|
||||||
done[name] = value
|
|
||||||
variables.remove(name)
|
|
||||||
|
|
||||||
if (name.startswith('PY_') and
|
|
||||||
name[3:] in renamed_variables):
|
|
||||||
|
|
||||||
name = name[3:]
|
|
||||||
if name not in done:
|
|
||||||
done[name] = value
|
|
||||||
|
|
||||||
else:
|
|
||||||
# bogus variable reference (e.g. "prefix=$/opt/python");
|
|
||||||
# just drop it since we can't deal
|
|
||||||
done[name] = value
|
|
||||||
variables.remove(name)
|
|
||||||
|
|
||||||
# strip spurious spaces
|
|
||||||
for k, v in done.items():
|
|
||||||
if isinstance(v, str):
|
|
||||||
done[k] = v.strip()
|
|
||||||
|
|
||||||
# save the results in the global dictionary
|
|
||||||
vars.update(done)
|
|
||||||
return vars
|
|
||||||
|
|
||||||
|
|
||||||
def get_makefile_filename():
|
|
||||||
"""Return the path of the Makefile."""
|
|
||||||
if _PYTHON_BUILD:
|
|
||||||
return os.path.join(_PROJECT_BASE, "Makefile")
|
|
||||||
if hasattr(sys, 'abiflags'):
|
|
||||||
config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags)
|
|
||||||
else:
|
|
||||||
config_dir_name = 'config'
|
|
||||||
return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
|
|
||||||
|
|
||||||
|
|
||||||
def _init_posix(vars):
|
|
||||||
"""Initialize the module as appropriate for POSIX systems."""
|
|
||||||
# load the installed Makefile:
|
|
||||||
makefile = get_makefile_filename()
|
|
||||||
try:
|
|
||||||
_parse_makefile(makefile, vars)
|
|
||||||
except IOError as e:
|
|
||||||
msg = "invalid Python installation: unable to open %s" % makefile
|
|
||||||
if hasattr(e, "strerror"):
|
|
||||||
msg = msg + " (%s)" % e.strerror
|
|
||||||
raise IOError(msg)
|
|
||||||
# load the installed pyconfig.h:
|
|
||||||
config_h = get_config_h_filename()
|
|
||||||
try:
|
|
||||||
with open(config_h) as f:
|
|
||||||
parse_config_h(f, vars)
|
|
||||||
except IOError as e:
|
|
||||||
msg = "invalid Python installation: unable to open %s" % config_h
|
|
||||||
if hasattr(e, "strerror"):
|
|
||||||
msg = msg + " (%s)" % e.strerror
|
|
||||||
raise IOError(msg)
|
|
||||||
# On AIX, there are wrong paths to the linker scripts in the Makefile
|
|
||||||
# -- these paths are relative to the Python source, but when installed
|
|
||||||
# the scripts are in another directory.
|
|
||||||
if _PYTHON_BUILD:
|
|
||||||
vars['LDSHARED'] = vars['BLDSHARED']
|
|
||||||
|
|
||||||
|
|
||||||
def _init_non_posix(vars):
|
|
||||||
"""Initialize the module as appropriate for NT"""
|
|
||||||
# set basic install directories
|
|
||||||
vars['LIBDEST'] = get_path('stdlib')
|
|
||||||
vars['BINLIBDEST'] = get_path('platstdlib')
|
|
||||||
vars['INCLUDEPY'] = get_path('include')
|
|
||||||
vars['SO'] = '.pyd'
|
|
||||||
vars['EXE'] = '.exe'
|
|
||||||
vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT
|
|
||||||
vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable))
|
|
||||||
|
|
||||||
#
|
|
||||||
# public APIs
|
|
||||||
#
|
|
||||||
|
|
||||||
|
|
||||||
def parse_config_h(fp, vars=None):
|
|
||||||
"""Parse a config.h-style file.
|
|
||||||
|
|
||||||
A dictionary containing name/value pairs is returned. If an
|
|
||||||
optional dictionary is passed in as the second argument, it is
|
|
||||||
used instead of a new dictionary.
|
|
||||||
"""
|
|
||||||
if vars is None:
|
|
||||||
vars = {}
|
|
||||||
define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
|
|
||||||
undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
line = fp.readline()
|
|
||||||
if not line:
|
|
||||||
break
|
|
||||||
m = define_rx.match(line)
|
|
||||||
if m:
|
|
||||||
n, v = m.group(1, 2)
|
|
||||||
try:
|
|
||||||
v = int(v)
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
vars[n] = v
|
|
||||||
else:
|
|
||||||
m = undef_rx.match(line)
|
|
||||||
if m:
|
|
||||||
vars[m.group(1)] = 0
|
|
||||||
return vars
|
|
||||||
|
|
||||||
|
|
||||||
def get_config_h_filename():
|
|
||||||
"""Return the path of pyconfig.h."""
|
|
||||||
if _PYTHON_BUILD:
|
|
||||||
if os.name == "nt":
|
|
||||||
inc_dir = os.path.join(_PROJECT_BASE, "PC")
|
|
||||||
else:
|
|
||||||
inc_dir = _PROJECT_BASE
|
|
||||||
else:
|
|
||||||
inc_dir = get_path('platinclude')
|
|
||||||
return os.path.join(inc_dir, 'pyconfig.h')
|
|
||||||
|
|
||||||
|
|
||||||
def get_scheme_names():
|
|
||||||
"""Return a tuple containing the schemes names."""
|
|
||||||
return tuple(sorted(_SCHEMES.sections()))
|
|
||||||
|
|
||||||
|
|
||||||
def get_path_names():
|
|
||||||
"""Return a tuple containing the paths names."""
|
|
||||||
# xxx see if we want a static list
|
|
||||||
return _SCHEMES.options('posix_prefix')
|
|
||||||
|
|
||||||
|
|
||||||
def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
|
|
||||||
"""Return a mapping containing an install scheme.
|
|
||||||
|
|
||||||
``scheme`` is the install scheme name. If not provided, it will
|
|
||||||
return the default scheme for the current platform.
|
|
||||||
"""
|
|
||||||
_ensure_cfg_read()
|
|
||||||
if expand:
|
|
||||||
return _expand_vars(scheme, vars)
|
|
||||||
else:
|
|
||||||
return dict(_SCHEMES.items(scheme))
|
|
||||||
|
|
||||||
|
|
||||||
def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
|
|
||||||
"""Return a path corresponding to the scheme.
|
|
||||||
|
|
||||||
``scheme`` is the install scheme name.
|
|
||||||
"""
|
|
||||||
return get_paths(scheme, vars, expand)[name]
|
|
||||||
|
|
||||||
|
|
||||||
def get_config_vars(*args):
|
|
||||||
"""With no arguments, return a dictionary of all configuration
|
|
||||||
variables relevant for the current platform.
|
|
||||||
|
|
||||||
On Unix, this means every variable defined in Python's installed Makefile;
|
|
||||||
On Windows and Mac OS it's a much smaller set.
|
|
||||||
|
|
||||||
With arguments, return a list of values that result from looking up
|
|
||||||
each argument in the configuration variable dictionary.
|
|
||||||
"""
|
|
||||||
global _CONFIG_VARS
|
|
||||||
if _CONFIG_VARS is None:
|
|
||||||
_CONFIG_VARS = {}
|
|
||||||
# Normalized versions of prefix and exec_prefix are handy to have;
|
|
||||||
# in fact, these are the standard versions used most places in the
|
|
||||||
# distutils2 module.
|
|
||||||
_CONFIG_VARS['prefix'] = _PREFIX
|
|
||||||
_CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX
|
|
||||||
_CONFIG_VARS['py_version'] = _PY_VERSION
|
|
||||||
_CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
|
|
||||||
_CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2]
|
|
||||||
_CONFIG_VARS['base'] = _PREFIX
|
|
||||||
_CONFIG_VARS['platbase'] = _EXEC_PREFIX
|
|
||||||
_CONFIG_VARS['projectbase'] = _PROJECT_BASE
|
|
||||||
try:
|
|
||||||
_CONFIG_VARS['abiflags'] = sys.abiflags
|
|
||||||
except AttributeError:
|
|
||||||
# sys.abiflags may not be defined on all platforms.
|
|
||||||
_CONFIG_VARS['abiflags'] = ''
|
|
||||||
|
|
||||||
if os.name in ('nt', 'os2'):
|
|
||||||
_init_non_posix(_CONFIG_VARS)
|
|
||||||
if os.name == 'posix':
|
|
||||||
_init_posix(_CONFIG_VARS)
|
|
||||||
# Setting 'userbase' is done below the call to the
|
|
||||||
# init function to enable using 'get_config_var' in
|
|
||||||
# the init-function.
|
|
||||||
if sys.version >= '2.6':
|
|
||||||
_CONFIG_VARS['userbase'] = _getuserbase()
|
|
||||||
|
|
||||||
if 'srcdir' not in _CONFIG_VARS:
|
|
||||||
_CONFIG_VARS['srcdir'] = _PROJECT_BASE
|
|
||||||
else:
|
|
||||||
_CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir'])
|
|
||||||
|
|
||||||
# Convert srcdir into an absolute path if it appears necessary.
|
|
||||||
# Normally it is relative to the build directory. However, during
|
|
||||||
# testing, for example, we might be running a non-installed python
|
|
||||||
# from a different directory.
|
|
||||||
if _PYTHON_BUILD and os.name == "posix":
|
|
||||||
base = _PROJECT_BASE
|
|
||||||
try:
|
|
||||||
cwd = os.getcwd()
|
|
||||||
except OSError:
|
|
||||||
cwd = None
|
|
||||||
if (not os.path.isabs(_CONFIG_VARS['srcdir']) and
|
|
||||||
base != cwd):
|
|
||||||
# srcdir is relative and we are not in the same directory
|
|
||||||
# as the executable. Assume executable is in the build
|
|
||||||
# directory and make srcdir absolute.
|
|
||||||
srcdir = os.path.join(base, _CONFIG_VARS['srcdir'])
|
|
||||||
_CONFIG_VARS['srcdir'] = os.path.normpath(srcdir)
|
|
||||||
|
|
||||||
if sys.platform == 'darwin':
|
|
||||||
kernel_version = os.uname()[2] # Kernel version (8.4.3)
|
|
||||||
major_version = int(kernel_version.split('.')[0])
|
|
||||||
|
|
||||||
if major_version < 8:
|
|
||||||
# On Mac OS X before 10.4, check if -arch and -isysroot
|
|
||||||
# are in CFLAGS or LDFLAGS and remove them if they are.
|
|
||||||
# This is needed when building extensions on a 10.3 system
|
|
||||||
# using a universal build of python.
|
|
||||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
|
||||||
# a number of derived variables. These need to be
|
|
||||||
# patched up as well.
|
|
||||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
|
||||||
flags = _CONFIG_VARS[key]
|
|
||||||
flags = re.sub('-arch\s+\w+\s', ' ', flags)
|
|
||||||
flags = re.sub('-isysroot [^ \t]*', ' ', flags)
|
|
||||||
_CONFIG_VARS[key] = flags
|
|
||||||
else:
|
|
||||||
# Allow the user to override the architecture flags using
|
|
||||||
# an environment variable.
|
|
||||||
# NOTE: This name was introduced by Apple in OSX 10.5 and
|
|
||||||
# is used by several scripting languages distributed with
|
|
||||||
# that OS release.
|
|
||||||
if 'ARCHFLAGS' in os.environ:
|
|
||||||
arch = os.environ['ARCHFLAGS']
|
|
||||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
|
||||||
# a number of derived variables. These need to be
|
|
||||||
# patched up as well.
|
|
||||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
|
||||||
|
|
||||||
flags = _CONFIG_VARS[key]
|
|
||||||
flags = re.sub('-arch\s+\w+\s', ' ', flags)
|
|
||||||
flags = flags + ' ' + arch
|
|
||||||
_CONFIG_VARS[key] = flags
|
|
||||||
|
|
||||||
# If we're on OSX 10.5 or later and the user tries to
|
|
||||||
# compiles an extension using an SDK that is not present
|
|
||||||
# on the current machine it is better to not use an SDK
|
|
||||||
# than to fail.
|
|
||||||
#
|
|
||||||
# The major usecase for this is users using a Python.org
|
|
||||||
# binary installer on OSX 10.6: that installer uses
|
|
||||||
# the 10.4u SDK, but that SDK is not installed by default
|
|
||||||
# when you install Xcode.
|
|
||||||
#
|
|
||||||
CFLAGS = _CONFIG_VARS.get('CFLAGS', '')
|
|
||||||
m = re.search('-isysroot\s+(\S+)', CFLAGS)
|
|
||||||
if m is not None:
|
|
||||||
sdk = m.group(1)
|
|
||||||
if not os.path.exists(sdk):
|
|
||||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
|
||||||
# a number of derived variables. These need to be
|
|
||||||
# patched up as well.
|
|
||||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
|
||||||
|
|
||||||
flags = _CONFIG_VARS[key]
|
|
||||||
flags = re.sub('-isysroot\s+\S+(\s|$)', ' ', flags)
|
|
||||||
_CONFIG_VARS[key] = flags
|
|
||||||
|
|
||||||
if args:
|
|
||||||
vals = []
|
|
||||||
for name in args:
|
|
||||||
vals.append(_CONFIG_VARS.get(name))
|
|
||||||
return vals
|
|
||||||
else:
|
|
||||||
return _CONFIG_VARS
|
|
||||||
|
|
||||||
|
|
||||||
def get_config_var(name):
|
|
||||||
"""Return the value of a single variable using the dictionary returned by
|
|
||||||
'get_config_vars()'.
|
|
||||||
|
|
||||||
Equivalent to get_config_vars().get(name)
|
|
||||||
"""
|
|
||||||
return get_config_vars().get(name)
|
|
||||||
|
|
||||||
|
|
||||||
def get_platform():
|
|
||||||
"""Return a string that identifies the current platform.
|
|
||||||
|
|
||||||
This is used mainly to distinguish platform-specific build directories and
|
|
||||||
platform-specific built distributions. Typically includes the OS name
|
|
||||||
and version and the architecture (as supplied by 'os.uname()'),
|
|
||||||
although the exact information included depends on the OS; eg. for IRIX
|
|
||||||
the architecture isn't particularly important (IRIX only runs on SGI
|
|
||||||
hardware), but for Linux the kernel version isn't particularly
|
|
||||||
important.
|
|
||||||
|
|
||||||
Examples of returned values:
|
|
||||||
linux-i586
|
|
||||||
linux-alpha (?)
|
|
||||||
solaris-2.6-sun4u
|
|
||||||
irix-5.3
|
|
||||||
irix64-6.2
|
|
||||||
|
|
||||||
Windows will return one of:
|
|
||||||
win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
|
|
||||||
win-ia64 (64bit Windows on Itanium)
|
|
||||||
win32 (all others - specifically, sys.platform is returned)
|
|
||||||
|
|
||||||
For other non-POSIX platforms, currently just returns 'sys.platform'.
|
|
||||||
"""
|
|
||||||
if os.name == 'nt':
|
|
||||||
# sniff sys.version for architecture.
|
|
||||||
prefix = " bit ("
|
|
||||||
i = sys.version.find(prefix)
|
|
||||||
if i == -1:
|
|
||||||
return sys.platform
|
|
||||||
j = sys.version.find(")", i)
|
|
||||||
look = sys.version[i+len(prefix):j].lower()
|
|
||||||
if look == 'amd64':
|
|
||||||
return 'win-amd64'
|
|
||||||
if look == 'itanium':
|
|
||||||
return 'win-ia64'
|
|
||||||
return sys.platform
|
|
||||||
|
|
||||||
if os.name != "posix" or not hasattr(os, 'uname'):
|
|
||||||
# XXX what about the architecture? NT is Intel or Alpha,
|
|
||||||
# Mac OS is M68k or PPC, etc.
|
|
||||||
return sys.platform
|
|
||||||
|
|
||||||
# Try to distinguish various flavours of Unix
|
|
||||||
osname, host, release, version, machine = os.uname()
|
|
||||||
|
|
||||||
# Convert the OS name to lowercase, remove '/' characters
|
|
||||||
# (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
|
|
||||||
osname = osname.lower().replace('/', '')
|
|
||||||
machine = machine.replace(' ', '_')
|
|
||||||
machine = machine.replace('/', '-')
|
|
||||||
|
|
||||||
if osname[:5] == "linux":
|
|
||||||
# At least on Linux/Intel, 'machine' is the processor --
|
|
||||||
# i386, etc.
|
|
||||||
# XXX what about Alpha, SPARC, etc?
|
|
||||||
return "%s-%s" % (osname, machine)
|
|
||||||
elif osname[:5] == "sunos":
|
|
||||||
if release[0] >= "5": # SunOS 5 == Solaris 2
|
|
||||||
osname = "solaris"
|
|
||||||
release = "%d.%s" % (int(release[0]) - 3, release[2:])
|
|
||||||
# fall through to standard osname-release-machine representation
|
|
||||||
elif osname[:4] == "irix": # could be "irix64"!
|
|
||||||
return "%s-%s" % (osname, release)
|
|
||||||
elif osname[:3] == "aix":
|
|
||||||
return "%s-%s.%s" % (osname, version, release)
|
|
||||||
elif osname[:6] == "cygwin":
|
|
||||||
osname = "cygwin"
|
|
||||||
rel_re = re.compile(r'[\d.]+')
|
|
||||||
m = rel_re.match(release)
|
|
||||||
if m:
|
|
||||||
release = m.group()
|
|
||||||
elif osname[:6] == "darwin":
|
|
||||||
#
|
|
||||||
# For our purposes, we'll assume that the system version from
|
|
||||||
# distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
|
|
||||||
# to. This makes the compatibility story a bit more sane because the
|
|
||||||
# machine is going to compile and link as if it were
|
|
||||||
# MACOSX_DEPLOYMENT_TARGET.
|
|
||||||
cfgvars = get_config_vars()
|
|
||||||
macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
|
|
||||||
|
|
||||||
if True:
|
|
||||||
# Always calculate the release of the running machine,
|
|
||||||
# needed to determine if we can build fat binaries or not.
|
|
||||||
|
|
||||||
macrelease = macver
|
|
||||||
# Get the system version. Reading this plist is a documented
|
|
||||||
# way to get the system version (see the documentation for
|
|
||||||
# the Gestalt Manager)
|
|
||||||
try:
|
|
||||||
f = open('/System/Library/CoreServices/SystemVersion.plist')
|
|
||||||
except IOError:
|
|
||||||
# We're on a plain darwin box, fall back to the default
|
|
||||||
# behaviour.
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
m = re.search(r'<key>ProductUserVisibleVersion</key>\s*'
|
|
||||||
r'<string>(.*?)</string>', f.read())
|
|
||||||
finally:
|
|
||||||
f.close()
|
|
||||||
if m is not None:
|
|
||||||
macrelease = '.'.join(m.group(1).split('.')[:2])
|
|
||||||
# else: fall back to the default behaviour
|
|
||||||
|
|
||||||
if not macver:
|
|
||||||
macver = macrelease
|
|
||||||
|
|
||||||
if macver:
|
|
||||||
release = macver
|
|
||||||
osname = "macosx"
|
|
||||||
|
|
||||||
if ((macrelease + '.') >= '10.4.' and
|
|
||||||
'-arch' in get_config_vars().get('CFLAGS', '').strip()):
|
|
||||||
# The universal build will build fat binaries, but not on
|
|
||||||
# systems before 10.4
|
|
||||||
#
|
|
||||||
# Try to detect 4-way universal builds, those have machine-type
|
|
||||||
# 'universal' instead of 'fat'.
|
|
||||||
|
|
||||||
machine = 'fat'
|
|
||||||
cflags = get_config_vars().get('CFLAGS')
|
|
||||||
|
|
||||||
archs = re.findall('-arch\s+(\S+)', cflags)
|
|
||||||
archs = tuple(sorted(set(archs)))
|
|
||||||
|
|
||||||
if len(archs) == 1:
|
|
||||||
machine = archs[0]
|
|
||||||
elif archs == ('i386', 'ppc'):
|
|
||||||
machine = 'fat'
|
|
||||||
elif archs == ('i386', 'x86_64'):
|
|
||||||
machine = 'intel'
|
|
||||||
elif archs == ('i386', 'ppc', 'x86_64'):
|
|
||||||
machine = 'fat3'
|
|
||||||
elif archs == ('ppc64', 'x86_64'):
|
|
||||||
machine = 'fat64'
|
|
||||||
elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
|
|
||||||
machine = 'universal'
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"Don't know machine value for archs=%r" % (archs,))
|
|
||||||
|
|
||||||
elif machine == 'i386':
|
|
||||||
# On OSX the machine type returned by uname is always the
|
|
||||||
# 32-bit variant, even if the executable architecture is
|
|
||||||
# the 64-bit variant
|
|
||||||
if sys.maxsize >= 2**32:
|
|
||||||
machine = 'x86_64'
|
|
||||||
|
|
||||||
elif machine in ('PowerPC', 'Power_Macintosh'):
|
|
||||||
# Pick a sane name for the PPC architecture.
|
|
||||||
# See 'i386' case
|
|
||||||
if sys.maxsize >= 2**32:
|
|
||||||
machine = 'ppc64'
|
|
||||||
else:
|
|
||||||
machine = 'ppc'
|
|
||||||
|
|
||||||
return "%s-%s-%s" % (osname, release, machine)
|
|
||||||
|
|
||||||
|
|
||||||
def get_python_version():
|
|
||||||
return _PY_VERSION_SHORT
|
|
||||||
|
|
||||||
|
|
||||||
def _print_dict(title, data):
|
|
||||||
for index, (key, value) in enumerate(sorted(data.items())):
|
|
||||||
if index == 0:
|
|
||||||
print('%s: ' % (title))
|
|
||||||
print('\t%s = "%s"' % (key, value))
|
|
||||||
|
|
||||||
|
|
||||||
def _main():
|
|
||||||
"""Display all information sysconfig detains."""
|
|
||||||
print('Platform: "%s"' % get_platform())
|
|
||||||
print('Python version: "%s"' % get_python_version())
|
|
||||||
print('Current installation scheme: "%s"' % _get_default_scheme())
|
|
||||||
print()
|
|
||||||
_print_dict('Paths', get_paths())
|
|
||||||
print()
|
|
||||||
_print_dict('Variables', get_config_vars())
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
_main()
|
|
||||||
-2607
File diff suppressed because it is too large
Load Diff
-1102
File diff suppressed because it is too large
Load Diff
-1305
File diff suppressed because it is too large
Load Diff
-513
@@ -1,513 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2013 Vinay Sajip.
|
|
||||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
import hashlib
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import subprocess
|
|
||||||
import tempfile
|
|
||||||
try:
|
|
||||||
from threading import Thread
|
|
||||||
except ImportError:
|
|
||||||
from dummy_threading import Thread
|
|
||||||
|
|
||||||
from . import DistlibException
|
|
||||||
from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
|
|
||||||
urlparse, build_opener, string_types)
|
|
||||||
from .util import cached_property, zip_dir, ServerProxy
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
DEFAULT_INDEX = 'https://pypi.python.org/pypi'
|
|
||||||
DEFAULT_REALM = 'pypi'
|
|
||||||
|
|
||||||
class PackageIndex(object):
|
|
||||||
"""
|
|
||||||
This class represents a package index compatible with PyPI, the Python
|
|
||||||
Package Index.
|
|
||||||
"""
|
|
||||||
|
|
||||||
boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
|
|
||||||
|
|
||||||
def __init__(self, url=None):
|
|
||||||
"""
|
|
||||||
Initialise an instance.
|
|
||||||
|
|
||||||
:param url: The URL of the index. If not specified, the URL for PyPI is
|
|
||||||
used.
|
|
||||||
"""
|
|
||||||
self.url = url or DEFAULT_INDEX
|
|
||||||
self.read_configuration()
|
|
||||||
scheme, netloc, path, params, query, frag = urlparse(self.url)
|
|
||||||
if params or query or frag or scheme not in ('http', 'https'):
|
|
||||||
raise DistlibException('invalid repository: %s' % self.url)
|
|
||||||
self.password_handler = None
|
|
||||||
self.ssl_verifier = None
|
|
||||||
self.gpg = None
|
|
||||||
self.gpg_home = None
|
|
||||||
self.rpc_proxy = None
|
|
||||||
with open(os.devnull, 'w') as sink:
|
|
||||||
for s in ('gpg2', 'gpg'):
|
|
||||||
try:
|
|
||||||
rc = subprocess.check_call([s, '--version'], stdout=sink,
|
|
||||||
stderr=sink)
|
|
||||||
if rc == 0:
|
|
||||||
self.gpg = s
|
|
||||||
break
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _get_pypirc_command(self):
|
|
||||||
"""
|
|
||||||
Get the distutils command for interacting with PyPI configurations.
|
|
||||||
:return: the command.
|
|
||||||
"""
|
|
||||||
from distutils.core import Distribution
|
|
||||||
from distutils.config import PyPIRCCommand
|
|
||||||
d = Distribution()
|
|
||||||
return PyPIRCCommand(d)
|
|
||||||
|
|
||||||
def read_configuration(self):
|
|
||||||
"""
|
|
||||||
Read the PyPI access configuration as supported by distutils, getting
|
|
||||||
PyPI to do the acutal work. This populates ``username``, ``password``,
|
|
||||||
``realm`` and ``url`` attributes from the configuration.
|
|
||||||
"""
|
|
||||||
# get distutils to do the work
|
|
||||||
c = self._get_pypirc_command()
|
|
||||||
c.repository = self.url
|
|
||||||
cfg = c._read_pypirc()
|
|
||||||
self.username = cfg.get('username')
|
|
||||||
self.password = cfg.get('password')
|
|
||||||
self.realm = cfg.get('realm', 'pypi')
|
|
||||||
self.url = cfg.get('repository', self.url)
|
|
||||||
|
|
||||||
def save_configuration(self):
|
|
||||||
"""
|
|
||||||
Save the PyPI access configuration. You must have set ``username`` and
|
|
||||||
``password`` attributes before calling this method.
|
|
||||||
|
|
||||||
Again, distutils is used to do the actual work.
|
|
||||||
"""
|
|
||||||
self.check_credentials()
|
|
||||||
# get distutils to do the work
|
|
||||||
c = self._get_pypirc_command()
|
|
||||||
c._store_pypirc(self.username, self.password)
|
|
||||||
|
|
||||||
def check_credentials(self):
|
|
||||||
"""
|
|
||||||
Check that ``username`` and ``password`` have been set, and raise an
|
|
||||||
exception if not.
|
|
||||||
"""
|
|
||||||
if self.username is None or self.password is None:
|
|
||||||
raise DistlibException('username and password must be set')
|
|
||||||
pm = HTTPPasswordMgr()
|
|
||||||
_, netloc, _, _, _, _ = urlparse(self.url)
|
|
||||||
pm.add_password(self.realm, netloc, self.username, self.password)
|
|
||||||
self.password_handler = HTTPBasicAuthHandler(pm)
|
|
||||||
|
|
||||||
def register(self, metadata):
|
|
||||||
"""
|
|
||||||
Register a distribution on PyPI, using the provided metadata.
|
|
||||||
|
|
||||||
:param metadata: A :class:`Metadata` instance defining at least a name
|
|
||||||
and version number for the distribution to be
|
|
||||||
registered.
|
|
||||||
:return: The HTTP response received from PyPI upon submission of the
|
|
||||||
request.
|
|
||||||
"""
|
|
||||||
self.check_credentials()
|
|
||||||
metadata.validate()
|
|
||||||
d = metadata.todict()
|
|
||||||
d[':action'] = 'verify'
|
|
||||||
request = self.encode_request(d.items(), [])
|
|
||||||
response = self.send_request(request)
|
|
||||||
d[':action'] = 'submit'
|
|
||||||
request = self.encode_request(d.items(), [])
|
|
||||||
return self.send_request(request)
|
|
||||||
|
|
||||||
def _reader(self, name, stream, outbuf):
|
|
||||||
"""
|
|
||||||
Thread runner for reading lines of from a subprocess into a buffer.
|
|
||||||
|
|
||||||
:param name: The logical name of the stream (used for logging only).
|
|
||||||
:param stream: The stream to read from. This will typically a pipe
|
|
||||||
connected to the output stream of a subprocess.
|
|
||||||
:param outbuf: The list to append the read lines to.
|
|
||||||
"""
|
|
||||||
while True:
|
|
||||||
s = stream.readline()
|
|
||||||
if not s:
|
|
||||||
break
|
|
||||||
s = s.decode('utf-8').rstrip()
|
|
||||||
outbuf.append(s)
|
|
||||||
logger.debug('%s: %s' % (name, s))
|
|
||||||
stream.close()
|
|
||||||
|
|
||||||
def get_sign_command(self, filename, signer, sign_password,
|
|
||||||
keystore=None):
|
|
||||||
"""
|
|
||||||
Return a suitable command for signing a file.
|
|
||||||
|
|
||||||
:param filename: The pathname to the file to be signed.
|
|
||||||
:param signer: The identifier of the signer of the file.
|
|
||||||
:param sign_password: The passphrase for the signer's
|
|
||||||
private key used for signing.
|
|
||||||
:param keystore: The path to a directory which contains the keys
|
|
||||||
used in verification. If not specified, the
|
|
||||||
instance's ``gpg_home`` attribute is used instead.
|
|
||||||
:return: The signing command as a list suitable to be
|
|
||||||
passed to :class:`subprocess.Popen`.
|
|
||||||
"""
|
|
||||||
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
|
||||||
if keystore is None:
|
|
||||||
keystore = self.gpg_home
|
|
||||||
if keystore:
|
|
||||||
cmd.extend(['--homedir', keystore])
|
|
||||||
if sign_password is not None:
|
|
||||||
cmd.extend(['--batch', '--passphrase-fd', '0'])
|
|
||||||
td = tempfile.mkdtemp()
|
|
||||||
sf = os.path.join(td, os.path.basename(filename) + '.asc')
|
|
||||||
cmd.extend(['--detach-sign', '--armor', '--local-user',
|
|
||||||
signer, '--output', sf, filename])
|
|
||||||
logger.debug('invoking: %s', ' '.join(cmd))
|
|
||||||
return cmd, sf
|
|
||||||
|
|
||||||
def run_command(self, cmd, input_data=None):
|
|
||||||
"""
|
|
||||||
Run a command in a child process , passing it any input data specified.
|
|
||||||
|
|
||||||
:param cmd: The command to run.
|
|
||||||
:param input_data: If specified, this must be a byte string containing
|
|
||||||
data to be sent to the child process.
|
|
||||||
:return: A tuple consisting of the subprocess' exit code, a list of
|
|
||||||
lines read from the subprocess' ``stdout``, and a list of
|
|
||||||
lines read from the subprocess' ``stderr``.
|
|
||||||
"""
|
|
||||||
kwargs = {
|
|
||||||
'stdout': subprocess.PIPE,
|
|
||||||
'stderr': subprocess.PIPE,
|
|
||||||
}
|
|
||||||
if input_data is not None:
|
|
||||||
kwargs['stdin'] = subprocess.PIPE
|
|
||||||
stdout = []
|
|
||||||
stderr = []
|
|
||||||
p = subprocess.Popen(cmd, **kwargs)
|
|
||||||
# We don't use communicate() here because we may need to
|
|
||||||
# get clever with interacting with the command
|
|
||||||
t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
|
|
||||||
t1.start()
|
|
||||||
t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
|
|
||||||
t2.start()
|
|
||||||
if input_data is not None:
|
|
||||||
p.stdin.write(input_data)
|
|
||||||
p.stdin.close()
|
|
||||||
|
|
||||||
p.wait()
|
|
||||||
t1.join()
|
|
||||||
t2.join()
|
|
||||||
return p.returncode, stdout, stderr
|
|
||||||
|
|
||||||
def sign_file(self, filename, signer, sign_password, keystore=None):
|
|
||||||
"""
|
|
||||||
Sign a file.
|
|
||||||
|
|
||||||
:param filename: The pathname to the file to be signed.
|
|
||||||
:param signer: The identifier of the signer of the file.
|
|
||||||
:param sign_password: The passphrase for the signer's
|
|
||||||
private key used for signing.
|
|
||||||
:param keystore: The path to a directory which contains the keys
|
|
||||||
used in signing. If not specified, the instance's
|
|
||||||
``gpg_home`` attribute is used instead.
|
|
||||||
:return: The absolute pathname of the file where the signature is
|
|
||||||
stored.
|
|
||||||
"""
|
|
||||||
cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
|
|
||||||
keystore)
|
|
||||||
rc, stdout, stderr = self.run_command(cmd,
|
|
||||||
sign_password.encode('utf-8'))
|
|
||||||
if rc != 0:
|
|
||||||
raise DistlibException('sign command failed with error '
|
|
||||||
'code %s' % rc)
|
|
||||||
return sig_file
|
|
||||||
|
|
||||||
def upload_file(self, metadata, filename, signer=None, sign_password=None,
|
|
||||||
filetype='sdist', pyversion='source', keystore=None):
|
|
||||||
"""
|
|
||||||
Upload a release file to the index.
|
|
||||||
|
|
||||||
:param metadata: A :class:`Metadata` instance defining at least a name
|
|
||||||
and version number for the file to be uploaded.
|
|
||||||
:param filename: The pathname of the file to be uploaded.
|
|
||||||
:param signer: The identifier of the signer of the file.
|
|
||||||
:param sign_password: The passphrase for the signer's
|
|
||||||
private key used for signing.
|
|
||||||
:param filetype: The type of the file being uploaded. This is the
|
|
||||||
distutils command which produced that file, e.g.
|
|
||||||
``sdist`` or ``bdist_wheel``.
|
|
||||||
:param pyversion: The version of Python which the release relates
|
|
||||||
to. For code compatible with any Python, this would
|
|
||||||
be ``source``, otherwise it would be e.g. ``3.2``.
|
|
||||||
:param keystore: The path to a directory which contains the keys
|
|
||||||
used in signing. If not specified, the instance's
|
|
||||||
``gpg_home`` attribute is used instead.
|
|
||||||
:return: The HTTP response received from PyPI upon submission of the
|
|
||||||
request.
|
|
||||||
"""
|
|
||||||
self.check_credentials()
|
|
||||||
if not os.path.exists(filename):
|
|
||||||
raise DistlibException('not found: %s' % filename)
|
|
||||||
metadata.validate()
|
|
||||||
d = metadata.todict()
|
|
||||||
sig_file = None
|
|
||||||
if signer:
|
|
||||||
if not self.gpg:
|
|
||||||
logger.warning('no signing program available - not signed')
|
|
||||||
else:
|
|
||||||
sig_file = self.sign_file(filename, signer, sign_password,
|
|
||||||
keystore)
|
|
||||||
with open(filename, 'rb') as f:
|
|
||||||
file_data = f.read()
|
|
||||||
md5_digest = hashlib.md5(file_data).hexdigest()
|
|
||||||
sha256_digest = hashlib.sha256(file_data).hexdigest()
|
|
||||||
d.update({
|
|
||||||
':action': 'file_upload',
|
|
||||||
'protcol_version': '1',
|
|
||||||
'filetype': filetype,
|
|
||||||
'pyversion': pyversion,
|
|
||||||
'md5_digest': md5_digest,
|
|
||||||
'sha256_digest': sha256_digest,
|
|
||||||
})
|
|
||||||
files = [('content', os.path.basename(filename), file_data)]
|
|
||||||
if sig_file:
|
|
||||||
with open(sig_file, 'rb') as f:
|
|
||||||
sig_data = f.read()
|
|
||||||
files.append(('gpg_signature', os.path.basename(sig_file),
|
|
||||||
sig_data))
|
|
||||||
shutil.rmtree(os.path.dirname(sig_file))
|
|
||||||
request = self.encode_request(d.items(), files)
|
|
||||||
return self.send_request(request)
|
|
||||||
|
|
||||||
def upload_documentation(self, metadata, doc_dir):
|
|
||||||
"""
|
|
||||||
Upload documentation to the index.
|
|
||||||
|
|
||||||
:param metadata: A :class:`Metadata` instance defining at least a name
|
|
||||||
and version number for the documentation to be
|
|
||||||
uploaded.
|
|
||||||
:param doc_dir: The pathname of the directory which contains the
|
|
||||||
documentation. This should be the directory that
|
|
||||||
contains the ``index.html`` for the documentation.
|
|
||||||
:return: The HTTP response received from PyPI upon submission of the
|
|
||||||
request.
|
|
||||||
"""
|
|
||||||
self.check_credentials()
|
|
||||||
if not os.path.isdir(doc_dir):
|
|
||||||
raise DistlibException('not a directory: %r' % doc_dir)
|
|
||||||
fn = os.path.join(doc_dir, 'index.html')
|
|
||||||
if not os.path.exists(fn):
|
|
||||||
raise DistlibException('not found: %r' % fn)
|
|
||||||
metadata.validate()
|
|
||||||
name, version = metadata.name, metadata.version
|
|
||||||
zip_data = zip_dir(doc_dir).getvalue()
|
|
||||||
fields = [(':action', 'doc_upload'),
|
|
||||||
('name', name), ('version', version)]
|
|
||||||
files = [('content', name, zip_data)]
|
|
||||||
request = self.encode_request(fields, files)
|
|
||||||
return self.send_request(request)
|
|
||||||
|
|
||||||
def get_verify_command(self, signature_filename, data_filename,
|
|
||||||
keystore=None):
|
|
||||||
"""
|
|
||||||
Return a suitable command for verifying a file.
|
|
||||||
|
|
||||||
:param signature_filename: The pathname to the file containing the
|
|
||||||
signature.
|
|
||||||
:param data_filename: The pathname to the file containing the
|
|
||||||
signed data.
|
|
||||||
:param keystore: The path to a directory which contains the keys
|
|
||||||
used in verification. If not specified, the
|
|
||||||
instance's ``gpg_home`` attribute is used instead.
|
|
||||||
:return: The verifying command as a list suitable to be
|
|
||||||
passed to :class:`subprocess.Popen`.
|
|
||||||
"""
|
|
||||||
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
|
||||||
if keystore is None:
|
|
||||||
keystore = self.gpg_home
|
|
||||||
if keystore:
|
|
||||||
cmd.extend(['--homedir', keystore])
|
|
||||||
cmd.extend(['--verify', signature_filename, data_filename])
|
|
||||||
logger.debug('invoking: %s', ' '.join(cmd))
|
|
||||||
return cmd
|
|
||||||
|
|
||||||
def verify_signature(self, signature_filename, data_filename,
|
|
||||||
keystore=None):
|
|
||||||
"""
|
|
||||||
Verify a signature for a file.
|
|
||||||
|
|
||||||
:param signature_filename: The pathname to the file containing the
|
|
||||||
signature.
|
|
||||||
:param data_filename: The pathname to the file containing the
|
|
||||||
signed data.
|
|
||||||
:param keystore: The path to a directory which contains the keys
|
|
||||||
used in verification. If not specified, the
|
|
||||||
instance's ``gpg_home`` attribute is used instead.
|
|
||||||
:return: True if the signature was verified, else False.
|
|
||||||
"""
|
|
||||||
if not self.gpg:
|
|
||||||
raise DistlibException('verification unavailable because gpg '
|
|
||||||
'unavailable')
|
|
||||||
cmd = self.get_verify_command(signature_filename, data_filename,
|
|
||||||
keystore)
|
|
||||||
rc, stdout, stderr = self.run_command(cmd)
|
|
||||||
if rc not in (0, 1):
|
|
||||||
raise DistlibException('verify command failed with error '
|
|
||||||
'code %s' % rc)
|
|
||||||
return rc == 0
|
|
||||||
|
|
||||||
def download_file(self, url, destfile, digest=None, reporthook=None):
|
|
||||||
"""
|
|
||||||
This is a convenience method for downloading a file from an URL.
|
|
||||||
Normally, this will be a file from the index, though currently
|
|
||||||
no check is made for this (i.e. a file can be downloaded from
|
|
||||||
anywhere).
|
|
||||||
|
|
||||||
The method is just like the :func:`urlretrieve` function in the
|
|
||||||
standard library, except that it allows digest computation to be
|
|
||||||
done during download and checking that the downloaded data
|
|
||||||
matched any expected value.
|
|
||||||
|
|
||||||
:param url: The URL of the file to be downloaded (assumed to be
|
|
||||||
available via an HTTP GET request).
|
|
||||||
:param destfile: The pathname where the downloaded file is to be
|
|
||||||
saved.
|
|
||||||
:param digest: If specified, this must be a (hasher, value)
|
|
||||||
tuple, where hasher is the algorithm used (e.g.
|
|
||||||
``'md5'``) and ``value`` is the expected value.
|
|
||||||
:param reporthook: The same as for :func:`urlretrieve` in the
|
|
||||||
standard library.
|
|
||||||
"""
|
|
||||||
if digest is None:
|
|
||||||
digester = None
|
|
||||||
logger.debug('No digest specified')
|
|
||||||
else:
|
|
||||||
if isinstance(digest, (list, tuple)):
|
|
||||||
hasher, digest = digest
|
|
||||||
else:
|
|
||||||
hasher = 'md5'
|
|
||||||
digester = getattr(hashlib, hasher)()
|
|
||||||
logger.debug('Digest specified: %s' % digest)
|
|
||||||
# The following code is equivalent to urlretrieve.
|
|
||||||
# We need to do it this way so that we can compute the
|
|
||||||
# digest of the file as we go.
|
|
||||||
with open(destfile, 'wb') as dfp:
|
|
||||||
# addinfourl is not a context manager on 2.x
|
|
||||||
# so we have to use try/finally
|
|
||||||
sfp = self.send_request(Request(url))
|
|
||||||
try:
|
|
||||||
headers = sfp.info()
|
|
||||||
blocksize = 8192
|
|
||||||
size = -1
|
|
||||||
read = 0
|
|
||||||
blocknum = 0
|
|
||||||
if "content-length" in headers:
|
|
||||||
size = int(headers["Content-Length"])
|
|
||||||
if reporthook:
|
|
||||||
reporthook(blocknum, blocksize, size)
|
|
||||||
while True:
|
|
||||||
block = sfp.read(blocksize)
|
|
||||||
if not block:
|
|
||||||
break
|
|
||||||
read += len(block)
|
|
||||||
dfp.write(block)
|
|
||||||
if digester:
|
|
||||||
digester.update(block)
|
|
||||||
blocknum += 1
|
|
||||||
if reporthook:
|
|
||||||
reporthook(blocknum, blocksize, size)
|
|
||||||
finally:
|
|
||||||
sfp.close()
|
|
||||||
|
|
||||||
# check that we got the whole file, if we can
|
|
||||||
if size >= 0 and read < size:
|
|
||||||
raise DistlibException(
|
|
||||||
'retrieval incomplete: got only %d out of %d bytes'
|
|
||||||
% (read, size))
|
|
||||||
# if we have a digest, it must match.
|
|
||||||
if digester:
|
|
||||||
actual = digester.hexdigest()
|
|
||||||
if digest != actual:
|
|
||||||
raise DistlibException('%s digest mismatch for %s: expected '
|
|
||||||
'%s, got %s' % (hasher, destfile,
|
|
||||||
digest, actual))
|
|
||||||
logger.debug('Digest verified: %s', digest)
|
|
||||||
|
|
||||||
def send_request(self, req):
|
|
||||||
"""
|
|
||||||
Send a standard library :class:`Request` to PyPI and return its
|
|
||||||
response.
|
|
||||||
|
|
||||||
:param req: The request to send.
|
|
||||||
:return: The HTTP response from PyPI (a standard library HTTPResponse).
|
|
||||||
"""
|
|
||||||
handlers = []
|
|
||||||
if self.password_handler:
|
|
||||||
handlers.append(self.password_handler)
|
|
||||||
if self.ssl_verifier:
|
|
||||||
handlers.append(self.ssl_verifier)
|
|
||||||
opener = build_opener(*handlers)
|
|
||||||
return opener.open(req)
|
|
||||||
|
|
||||||
def encode_request(self, fields, files):
|
|
||||||
"""
|
|
||||||
Encode fields and files for posting to an HTTP server.
|
|
||||||
|
|
||||||
:param fields: The fields to send as a list of (fieldname, value)
|
|
||||||
tuples.
|
|
||||||
:param files: The files to send as a list of (fieldname, filename,
|
|
||||||
file_bytes) tuple.
|
|
||||||
"""
|
|
||||||
# Adapted from packaging, which in turn was adapted from
|
|
||||||
# http://code.activestate.com/recipes/146306
|
|
||||||
|
|
||||||
parts = []
|
|
||||||
boundary = self.boundary
|
|
||||||
for k, values in fields:
|
|
||||||
if not isinstance(values, (list, tuple)):
|
|
||||||
values = [values]
|
|
||||||
|
|
||||||
for v in values:
|
|
||||||
parts.extend((
|
|
||||||
b'--' + boundary,
|
|
||||||
('Content-Disposition: form-data; name="%s"' %
|
|
||||||
k).encode('utf-8'),
|
|
||||||
b'',
|
|
||||||
v.encode('utf-8')))
|
|
||||||
for key, filename, value in files:
|
|
||||||
parts.extend((
|
|
||||||
b'--' + boundary,
|
|
||||||
('Content-Disposition: form-data; name="%s"; filename="%s"' %
|
|
||||||
(key, filename)).encode('utf-8'),
|
|
||||||
b'',
|
|
||||||
value))
|
|
||||||
|
|
||||||
parts.extend((b'--' + boundary + b'--', b''))
|
|
||||||
|
|
||||||
body = b'\r\n'.join(parts)
|
|
||||||
ct = b'multipart/form-data; boundary=' + boundary
|
|
||||||
headers = {
|
|
||||||
'Content-type': ct,
|
|
||||||
'Content-length': str(len(body))
|
|
||||||
}
|
|
||||||
return Request(self.url, body, headers)
|
|
||||||
|
|
||||||
def search(self, terms, operator=None):
|
|
||||||
if isinstance(terms, string_types):
|
|
||||||
terms = {'name': terms}
|
|
||||||
if self.rpc_proxy is None:
|
|
||||||
self.rpc_proxy = ServerProxy(self.url, timeout=3.0)
|
|
||||||
return self.rpc_proxy.search(terms, operator or 'and')
|
|
||||||
-1264
File diff suppressed because it is too large
Load Diff
-367
@@ -1,367 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2012-2013 Python Software Foundation.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
"""
|
|
||||||
Class representing the list of files in a distribution.
|
|
||||||
|
|
||||||
Equivalent to distutils.filelist, but fixes some problems.
|
|
||||||
"""
|
|
||||||
import fnmatch
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
from . import DistlibException
|
|
||||||
from .compat import fsdecode
|
|
||||||
from .util import convert_path
|
|
||||||
|
|
||||||
|
|
||||||
__all__ = ['Manifest']
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# a \ followed by some spaces + EOL
|
|
||||||
_COLLAPSE_PATTERN = re.compile('\\\w*\n', re.M)
|
|
||||||
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
|
|
||||||
|
|
||||||
|
|
||||||
class Manifest(object):
|
|
||||||
"""A list of files built by on exploring the filesystem and filtered by
|
|
||||||
applying various patterns to what we find there.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, base=None):
|
|
||||||
"""
|
|
||||||
Initialise an instance.
|
|
||||||
|
|
||||||
:param base: The base directory to explore under.
|
|
||||||
"""
|
|
||||||
self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
|
|
||||||
self.prefix = self.base + os.sep
|
|
||||||
self.allfiles = None
|
|
||||||
self.files = set()
|
|
||||||
|
|
||||||
#
|
|
||||||
# Public API
|
|
||||||
#
|
|
||||||
|
|
||||||
def findall(self):
|
|
||||||
"""Find all files under the base and set ``allfiles`` to the absolute
|
|
||||||
pathnames of files found.
|
|
||||||
"""
|
|
||||||
from stat import S_ISREG, S_ISDIR, S_ISLNK
|
|
||||||
|
|
||||||
self.allfiles = allfiles = []
|
|
||||||
root = self.base
|
|
||||||
stack = [root]
|
|
||||||
pop = stack.pop
|
|
||||||
push = stack.append
|
|
||||||
|
|
||||||
while stack:
|
|
||||||
root = pop()
|
|
||||||
names = os.listdir(root)
|
|
||||||
|
|
||||||
for name in names:
|
|
||||||
fullname = os.path.join(root, name)
|
|
||||||
|
|
||||||
# Avoid excess stat calls -- just one will do, thank you!
|
|
||||||
stat = os.stat(fullname)
|
|
||||||
mode = stat.st_mode
|
|
||||||
if S_ISREG(mode):
|
|
||||||
allfiles.append(fsdecode(fullname))
|
|
||||||
elif S_ISDIR(mode) and not S_ISLNK(mode):
|
|
||||||
push(fullname)
|
|
||||||
|
|
||||||
def add(self, item):
|
|
||||||
"""
|
|
||||||
Add a file to the manifest.
|
|
||||||
|
|
||||||
:param item: The pathname to add. This can be relative to the base.
|
|
||||||
"""
|
|
||||||
if not item.startswith(self.prefix):
|
|
||||||
item = os.path.join(self.base, item)
|
|
||||||
self.files.add(os.path.normpath(item))
|
|
||||||
|
|
||||||
def add_many(self, items):
|
|
||||||
"""
|
|
||||||
Add a list of files to the manifest.
|
|
||||||
|
|
||||||
:param items: The pathnames to add. These can be relative to the base.
|
|
||||||
"""
|
|
||||||
for item in items:
|
|
||||||
self.add(item)
|
|
||||||
|
|
||||||
def sorted(self, wantdirs=False):
|
|
||||||
"""
|
|
||||||
Return sorted files in directory order
|
|
||||||
"""
|
|
||||||
|
|
||||||
def add_dir(dirs, d):
|
|
||||||
dirs.add(d)
|
|
||||||
logger.debug('add_dir added %s', d)
|
|
||||||
if d != self.base:
|
|
||||||
parent, _ = os.path.split(d)
|
|
||||||
assert parent not in ('', '/')
|
|
||||||
add_dir(dirs, parent)
|
|
||||||
|
|
||||||
result = set(self.files) # make a copy!
|
|
||||||
if wantdirs:
|
|
||||||
dirs = set()
|
|
||||||
for f in result:
|
|
||||||
add_dir(dirs, os.path.dirname(f))
|
|
||||||
result |= dirs
|
|
||||||
return [os.path.join(*path_tuple) for path_tuple in
|
|
||||||
sorted(os.path.split(path) for path in result)]
|
|
||||||
|
|
||||||
def clear(self):
|
|
||||||
"""Clear all collected files."""
|
|
||||||
self.files = set()
|
|
||||||
self.allfiles = []
|
|
||||||
|
|
||||||
def process_directive(self, directive):
|
|
||||||
"""
|
|
||||||
Process a directive which either adds some files from ``allfiles`` to
|
|
||||||
``files``, or removes some files from ``files``.
|
|
||||||
|
|
||||||
:param directive: The directive to process. This should be in a format
|
|
||||||
compatible with distutils ``MANIFEST.in`` files:
|
|
||||||
|
|
||||||
http://docs.python.org/distutils/sourcedist.html#commands
|
|
||||||
"""
|
|
||||||
# Parse the line: split it up, make sure the right number of words
|
|
||||||
# is there, and return the relevant words. 'action' is always
|
|
||||||
# defined: it's the first word of the line. Which of the other
|
|
||||||
# three are defined depends on the action; it'll be either
|
|
||||||
# patterns, (dir and patterns), or (dirpattern).
|
|
||||||
action, patterns, thedir, dirpattern = self._parse_directive(directive)
|
|
||||||
|
|
||||||
# OK, now we know that the action is valid and we have the
|
|
||||||
# right number of words on the line for that action -- so we
|
|
||||||
# can proceed with minimal error-checking.
|
|
||||||
if action == 'include':
|
|
||||||
for pattern in patterns:
|
|
||||||
if not self._include_pattern(pattern, anchor=True):
|
|
||||||
logger.warning('no files found matching %r', pattern)
|
|
||||||
|
|
||||||
elif action == 'exclude':
|
|
||||||
for pattern in patterns:
|
|
||||||
found = self._exclude_pattern(pattern, anchor=True)
|
|
||||||
#if not found:
|
|
||||||
# logger.warning('no previously-included files '
|
|
||||||
# 'found matching %r', pattern)
|
|
||||||
|
|
||||||
elif action == 'global-include':
|
|
||||||
for pattern in patterns:
|
|
||||||
if not self._include_pattern(pattern, anchor=False):
|
|
||||||
logger.warning('no files found matching %r '
|
|
||||||
'anywhere in distribution', pattern)
|
|
||||||
|
|
||||||
elif action == 'global-exclude':
|
|
||||||
for pattern in patterns:
|
|
||||||
found = self._exclude_pattern(pattern, anchor=False)
|
|
||||||
#if not found:
|
|
||||||
# logger.warning('no previously-included files '
|
|
||||||
# 'matching %r found anywhere in '
|
|
||||||
# 'distribution', pattern)
|
|
||||||
|
|
||||||
elif action == 'recursive-include':
|
|
||||||
for pattern in patterns:
|
|
||||||
if not self._include_pattern(pattern, prefix=thedir):
|
|
||||||
logger.warning('no files found matching %r '
|
|
||||||
'under directory %r', pattern, thedir)
|
|
||||||
|
|
||||||
elif action == 'recursive-exclude':
|
|
||||||
for pattern in patterns:
|
|
||||||
found = self._exclude_pattern(pattern, prefix=thedir)
|
|
||||||
#if not found:
|
|
||||||
# logger.warning('no previously-included files '
|
|
||||||
# 'matching %r found under directory %r',
|
|
||||||
# pattern, thedir)
|
|
||||||
|
|
||||||
elif action == 'graft':
|
|
||||||
if not self._include_pattern(None, prefix=dirpattern):
|
|
||||||
logger.warning('no directories found matching %r',
|
|
||||||
dirpattern)
|
|
||||||
|
|
||||||
elif action == 'prune':
|
|
||||||
if not self._exclude_pattern(None, prefix=dirpattern):
|
|
||||||
logger.warning('no previously-included directories found '
|
|
||||||
'matching %r', dirpattern)
|
|
||||||
else: # pragma: no cover
|
|
||||||
# This should never happen, as it should be caught in
|
|
||||||
# _parse_template_line
|
|
||||||
raise DistlibException(
|
|
||||||
'invalid action %r' % action)
|
|
||||||
|
|
||||||
#
|
|
||||||
# Private API
|
|
||||||
#
|
|
||||||
|
|
||||||
def _parse_directive(self, directive):
|
|
||||||
"""
|
|
||||||
Validate a directive.
|
|
||||||
:param directive: The directive to validate.
|
|
||||||
:return: A tuple of action, patterns, thedir, dir_patterns
|
|
||||||
"""
|
|
||||||
words = directive.split()
|
|
||||||
if len(words) == 1 and words[0] not in ('include', 'exclude',
|
|
||||||
'global-include',
|
|
||||||
'global-exclude',
|
|
||||||
'recursive-include',
|
|
||||||
'recursive-exclude',
|
|
||||||
'graft', 'prune'):
|
|
||||||
# no action given, let's use the default 'include'
|
|
||||||
words.insert(0, 'include')
|
|
||||||
|
|
||||||
action = words[0]
|
|
||||||
patterns = thedir = dir_pattern = None
|
|
||||||
|
|
||||||
if action in ('include', 'exclude',
|
|
||||||
'global-include', 'global-exclude'):
|
|
||||||
if len(words) < 2:
|
|
||||||
raise DistlibException(
|
|
||||||
'%r expects <pattern1> <pattern2> ...' % action)
|
|
||||||
|
|
||||||
patterns = [convert_path(word) for word in words[1:]]
|
|
||||||
|
|
||||||
elif action in ('recursive-include', 'recursive-exclude'):
|
|
||||||
if len(words) < 3:
|
|
||||||
raise DistlibException(
|
|
||||||
'%r expects <dir> <pattern1> <pattern2> ...' % action)
|
|
||||||
|
|
||||||
thedir = convert_path(words[1])
|
|
||||||
patterns = [convert_path(word) for word in words[2:]]
|
|
||||||
|
|
||||||
elif action in ('graft', 'prune'):
|
|
||||||
if len(words) != 2:
|
|
||||||
raise DistlibException(
|
|
||||||
'%r expects a single <dir_pattern>' % action)
|
|
||||||
|
|
||||||
dir_pattern = convert_path(words[1])
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise DistlibException('unknown action %r' % action)
|
|
||||||
|
|
||||||
return action, patterns, thedir, dir_pattern
|
|
||||||
|
|
||||||
def _include_pattern(self, pattern, anchor=True, prefix=None,
|
|
||||||
is_regex=False):
|
|
||||||
"""Select strings (presumably filenames) from 'self.files' that
|
|
||||||
match 'pattern', a Unix-style wildcard (glob) pattern.
|
|
||||||
|
|
||||||
Patterns are not quite the same as implemented by the 'fnmatch'
|
|
||||||
module: '*' and '?' match non-special characters, where "special"
|
|
||||||
is platform-dependent: slash on Unix; colon, slash, and backslash on
|
|
||||||
DOS/Windows; and colon on Mac OS.
|
|
||||||
|
|
||||||
If 'anchor' is true (the default), then the pattern match is more
|
|
||||||
stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
|
|
||||||
'anchor' is false, both of these will match.
|
|
||||||
|
|
||||||
If 'prefix' is supplied, then only filenames starting with 'prefix'
|
|
||||||
(itself a pattern) and ending with 'pattern', with anything in between
|
|
||||||
them, will match. 'anchor' is ignored in this case.
|
|
||||||
|
|
||||||
If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
|
|
||||||
'pattern' is assumed to be either a string containing a regex or a
|
|
||||||
regex object -- no translation is done, the regex is just compiled
|
|
||||||
and used as-is.
|
|
||||||
|
|
||||||
Selected strings will be added to self.files.
|
|
||||||
|
|
||||||
Return True if files are found.
|
|
||||||
"""
|
|
||||||
# XXX docstring lying about what the special chars are?
|
|
||||||
found = False
|
|
||||||
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
|
||||||
|
|
||||||
# delayed loading of allfiles list
|
|
||||||
if self.allfiles is None:
|
|
||||||
self.findall()
|
|
||||||
|
|
||||||
for name in self.allfiles:
|
|
||||||
if pattern_re.search(name):
|
|
||||||
self.files.add(name)
|
|
||||||
found = True
|
|
||||||
return found
|
|
||||||
|
|
||||||
def _exclude_pattern(self, pattern, anchor=True, prefix=None,
|
|
||||||
is_regex=False):
|
|
||||||
"""Remove strings (presumably filenames) from 'files' that match
|
|
||||||
'pattern'.
|
|
||||||
|
|
||||||
Other parameters are the same as for 'include_pattern()', above.
|
|
||||||
The list 'self.files' is modified in place. Return True if files are
|
|
||||||
found.
|
|
||||||
|
|
||||||
This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
|
|
||||||
packaging source distributions
|
|
||||||
"""
|
|
||||||
found = False
|
|
||||||
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
|
||||||
for f in list(self.files):
|
|
||||||
if pattern_re.search(f):
|
|
||||||
self.files.remove(f)
|
|
||||||
found = True
|
|
||||||
return found
|
|
||||||
|
|
||||||
def _translate_pattern(self, pattern, anchor=True, prefix=None,
|
|
||||||
is_regex=False):
|
|
||||||
"""Translate a shell-like wildcard pattern to a compiled regular
|
|
||||||
expression.
|
|
||||||
|
|
||||||
Return the compiled regex. If 'is_regex' true,
|
|
||||||
then 'pattern' is directly compiled to a regex (if it's a string)
|
|
||||||
or just returned as-is (assumes it's a regex object).
|
|
||||||
"""
|
|
||||||
if is_regex:
|
|
||||||
if isinstance(pattern, str):
|
|
||||||
return re.compile(pattern)
|
|
||||||
else:
|
|
||||||
return pattern
|
|
||||||
|
|
||||||
if pattern:
|
|
||||||
pattern_re = self._glob_to_re(pattern)
|
|
||||||
else:
|
|
||||||
pattern_re = ''
|
|
||||||
|
|
||||||
base = re.escape(os.path.join(self.base, ''))
|
|
||||||
if prefix is not None:
|
|
||||||
# ditch end of pattern character
|
|
||||||
empty_pattern = self._glob_to_re('')
|
|
||||||
prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
|
|
||||||
sep = os.sep
|
|
||||||
if os.sep == '\\':
|
|
||||||
sep = r'\\'
|
|
||||||
pattern_re = '^' + base + sep.join((prefix_re,
|
|
||||||
'.*' + pattern_re))
|
|
||||||
else: # no prefix -- respect anchor flag
|
|
||||||
if anchor:
|
|
||||||
pattern_re = '^' + base + pattern_re
|
|
||||||
|
|
||||||
return re.compile(pattern_re)
|
|
||||||
|
|
||||||
def _glob_to_re(self, pattern):
|
|
||||||
"""Translate a shell-like glob pattern to a regular expression.
|
|
||||||
|
|
||||||
Return a string containing the regex. Differs from
|
|
||||||
'fnmatch.translate()' in that '*' does not match "special characters"
|
|
||||||
(which are platform-specific).
|
|
||||||
"""
|
|
||||||
pattern_re = fnmatch.translate(pattern)
|
|
||||||
|
|
||||||
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
|
|
||||||
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
|
|
||||||
# and by extension they shouldn't match such "special characters" under
|
|
||||||
# any OS. So change all non-escaped dots in the RE to match any
|
|
||||||
# character except the special characters (currently: just os.sep).
|
|
||||||
sep = os.sep
|
|
||||||
if os.sep == '\\':
|
|
||||||
# we're using a regex to manipulate a regex, so we need
|
|
||||||
# to escape the backslash twice
|
|
||||||
sep = r'\\\\'
|
|
||||||
escaped = r'\1[^%s]' % sep
|
|
||||||
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
|
|
||||||
return pattern_re
|
|
||||||
-190
@@ -1,190 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2012-2013 Vinay Sajip.
|
|
||||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
"""Parser for the environment markers micro-language defined in PEP 345."""
|
|
||||||
|
|
||||||
import ast
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import platform
|
|
||||||
|
|
||||||
from .compat import python_implementation, string_types
|
|
||||||
from .util import in_venv
|
|
||||||
|
|
||||||
__all__ = ['interpret']
|
|
||||||
|
|
||||||
|
|
||||||
class Evaluator(object):
|
|
||||||
"""
|
|
||||||
A limited evaluator for Python expressions.
|
|
||||||
"""
|
|
||||||
|
|
||||||
operators = {
|
|
||||||
'eq': lambda x, y: x == y,
|
|
||||||
'gt': lambda x, y: x > y,
|
|
||||||
'gte': lambda x, y: x >= y,
|
|
||||||
'in': lambda x, y: x in y,
|
|
||||||
'lt': lambda x, y: x < y,
|
|
||||||
'lte': lambda x, y: x <= y,
|
|
||||||
'not': lambda x: not x,
|
|
||||||
'noteq': lambda x, y: x != y,
|
|
||||||
'notin': lambda x, y: x not in y,
|
|
||||||
}
|
|
||||||
|
|
||||||
allowed_values = {
|
|
||||||
'sys_platform': sys.platform,
|
|
||||||
'python_version': '%s.%s' % sys.version_info[:2],
|
|
||||||
# parsing sys.platform is not reliable, but there is no other
|
|
||||||
# way to get e.g. 2.7.2+, and the PEP is defined with sys.version
|
|
||||||
'python_full_version': sys.version.split(' ', 1)[0],
|
|
||||||
'os_name': os.name,
|
|
||||||
'platform_in_venv': str(in_venv()),
|
|
||||||
'platform_release': platform.release(),
|
|
||||||
'platform_version': platform.version(),
|
|
||||||
'platform_machine': platform.machine(),
|
|
||||||
'platform_python_implementation': python_implementation(),
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, context=None):
|
|
||||||
"""
|
|
||||||
Initialise an instance.
|
|
||||||
|
|
||||||
:param context: If specified, names are looked up in this mapping.
|
|
||||||
"""
|
|
||||||
self.context = context or {}
|
|
||||||
self.source = None
|
|
||||||
|
|
||||||
def get_fragment(self, offset):
|
|
||||||
"""
|
|
||||||
Get the part of the source which is causing a problem.
|
|
||||||
"""
|
|
||||||
fragment_len = 10
|
|
||||||
s = '%r' % (self.source[offset:offset + fragment_len])
|
|
||||||
if offset + fragment_len < len(self.source):
|
|
||||||
s += '...'
|
|
||||||
return s
|
|
||||||
|
|
||||||
def get_handler(self, node_type):
|
|
||||||
"""
|
|
||||||
Get a handler for the specified AST node type.
|
|
||||||
"""
|
|
||||||
return getattr(self, 'do_%s' % node_type, None)
|
|
||||||
|
|
||||||
def evaluate(self, node, filename=None):
|
|
||||||
"""
|
|
||||||
Evaluate a source string or node, using ``filename`` when
|
|
||||||
displaying errors.
|
|
||||||
"""
|
|
||||||
if isinstance(node, string_types):
|
|
||||||
self.source = node
|
|
||||||
kwargs = {'mode': 'eval'}
|
|
||||||
if filename:
|
|
||||||
kwargs['filename'] = filename
|
|
||||||
try:
|
|
||||||
node = ast.parse(node, **kwargs)
|
|
||||||
except SyntaxError as e:
|
|
||||||
s = self.get_fragment(e.offset)
|
|
||||||
raise SyntaxError('syntax error %s' % s)
|
|
||||||
node_type = node.__class__.__name__.lower()
|
|
||||||
handler = self.get_handler(node_type)
|
|
||||||
if handler is None:
|
|
||||||
if self.source is None:
|
|
||||||
s = '(source not available)'
|
|
||||||
else:
|
|
||||||
s = self.get_fragment(node.col_offset)
|
|
||||||
raise SyntaxError("don't know how to evaluate %r %s" % (
|
|
||||||
node_type, s))
|
|
||||||
return handler(node)
|
|
||||||
|
|
||||||
def get_attr_key(self, node):
|
|
||||||
assert isinstance(node, ast.Attribute), 'attribute node expected'
|
|
||||||
return '%s.%s' % (node.value.id, node.attr)
|
|
||||||
|
|
||||||
def do_attribute(self, node):
|
|
||||||
if not isinstance(node.value, ast.Name):
|
|
||||||
valid = False
|
|
||||||
else:
|
|
||||||
key = self.get_attr_key(node)
|
|
||||||
valid = key in self.context or key in self.allowed_values
|
|
||||||
if not valid:
|
|
||||||
raise SyntaxError('invalid expression: %s' % key)
|
|
||||||
if key in self.context:
|
|
||||||
result = self.context[key]
|
|
||||||
else:
|
|
||||||
result = self.allowed_values[key]
|
|
||||||
return result
|
|
||||||
|
|
||||||
def do_boolop(self, node):
|
|
||||||
result = self.evaluate(node.values[0])
|
|
||||||
is_or = node.op.__class__ is ast.Or
|
|
||||||
is_and = node.op.__class__ is ast.And
|
|
||||||
assert is_or or is_and
|
|
||||||
if (is_and and result) or (is_or and not result):
|
|
||||||
for n in node.values[1:]:
|
|
||||||
result = self.evaluate(n)
|
|
||||||
if (is_or and result) or (is_and and not result):
|
|
||||||
break
|
|
||||||
return result
|
|
||||||
|
|
||||||
def do_compare(self, node):
|
|
||||||
def sanity_check(lhsnode, rhsnode):
|
|
||||||
valid = True
|
|
||||||
if isinstance(lhsnode, ast.Str) and isinstance(rhsnode, ast.Str):
|
|
||||||
valid = False
|
|
||||||
#elif (isinstance(lhsnode, ast.Attribute)
|
|
||||||
# and isinstance(rhsnode, ast.Attribute)):
|
|
||||||
# klhs = self.get_attr_key(lhsnode)
|
|
||||||
# krhs = self.get_attr_key(rhsnode)
|
|
||||||
# valid = klhs != krhs
|
|
||||||
if not valid:
|
|
||||||
s = self.get_fragment(node.col_offset)
|
|
||||||
raise SyntaxError('Invalid comparison: %s' % s)
|
|
||||||
|
|
||||||
lhsnode = node.left
|
|
||||||
lhs = self.evaluate(lhsnode)
|
|
||||||
result = True
|
|
||||||
for op, rhsnode in zip(node.ops, node.comparators):
|
|
||||||
sanity_check(lhsnode, rhsnode)
|
|
||||||
op = op.__class__.__name__.lower()
|
|
||||||
if op not in self.operators:
|
|
||||||
raise SyntaxError('unsupported operation: %r' % op)
|
|
||||||
rhs = self.evaluate(rhsnode)
|
|
||||||
result = self.operators[op](lhs, rhs)
|
|
||||||
if not result:
|
|
||||||
break
|
|
||||||
lhs = rhs
|
|
||||||
lhsnode = rhsnode
|
|
||||||
return result
|
|
||||||
|
|
||||||
def do_expression(self, node):
|
|
||||||
return self.evaluate(node.body)
|
|
||||||
|
|
||||||
def do_name(self, node):
|
|
||||||
valid = False
|
|
||||||
if node.id in self.context:
|
|
||||||
valid = True
|
|
||||||
result = self.context[node.id]
|
|
||||||
elif node.id in self.allowed_values:
|
|
||||||
valid = True
|
|
||||||
result = self.allowed_values[node.id]
|
|
||||||
if not valid:
|
|
||||||
raise SyntaxError('invalid expression: %s' % node.id)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def do_str(self, node):
|
|
||||||
return node.s
|
|
||||||
|
|
||||||
|
|
||||||
def interpret(marker, execution_context=None):
|
|
||||||
"""
|
|
||||||
Interpret a marker and return a result depending on environment.
|
|
||||||
|
|
||||||
:param marker: The marker to interpret.
|
|
||||||
:type marker: str
|
|
||||||
:param execution_context: The context used for name lookup.
|
|
||||||
:type execution_context: mapping
|
|
||||||
"""
|
|
||||||
return Evaluator(execution_context).evaluate(marker.strip())
|
|
||||||
-1058
File diff suppressed because it is too large
Load Diff
-350
@@ -1,350 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2013 Vinay Sajip.
|
|
||||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import bisect
|
|
||||||
import io
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import pkgutil
|
|
||||||
import shutil
|
|
||||||
import sys
|
|
||||||
import types
|
|
||||||
import zipimport
|
|
||||||
|
|
||||||
from . import DistlibException
|
|
||||||
from .util import cached_property, get_cache_base, path_to_cache_dir, Cache
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
cache = None # created when needed
|
|
||||||
|
|
||||||
|
|
||||||
class ResourceCache(Cache):
|
|
||||||
def __init__(self, base=None):
|
|
||||||
if base is None:
|
|
||||||
# Use native string to avoid issues on 2.x: see Python #20140.
|
|
||||||
base = os.path.join(get_cache_base(), str('resource-cache'))
|
|
||||||
super(ResourceCache, self).__init__(base)
|
|
||||||
|
|
||||||
def is_stale(self, resource, path):
|
|
||||||
"""
|
|
||||||
Is the cache stale for the given resource?
|
|
||||||
|
|
||||||
:param resource: The :class:`Resource` being cached.
|
|
||||||
:param path: The path of the resource in the cache.
|
|
||||||
:return: True if the cache is stale.
|
|
||||||
"""
|
|
||||||
# Cache invalidation is a hard problem :-)
|
|
||||||
return True
|
|
||||||
|
|
||||||
def get(self, resource):
|
|
||||||
"""
|
|
||||||
Get a resource into the cache,
|
|
||||||
|
|
||||||
:param resource: A :class:`Resource` instance.
|
|
||||||
:return: The pathname of the resource in the cache.
|
|
||||||
"""
|
|
||||||
prefix, path = resource.finder.get_cache_info(resource)
|
|
||||||
if prefix is None:
|
|
||||||
result = path
|
|
||||||
else:
|
|
||||||
result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
|
|
||||||
dirname = os.path.dirname(result)
|
|
||||||
if not os.path.isdir(dirname):
|
|
||||||
os.makedirs(dirname)
|
|
||||||
if not os.path.exists(result):
|
|
||||||
stale = True
|
|
||||||
else:
|
|
||||||
stale = self.is_stale(resource, path)
|
|
||||||
if stale:
|
|
||||||
# write the bytes of the resource to the cache location
|
|
||||||
with open(result, 'wb') as f:
|
|
||||||
f.write(resource.bytes)
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
class ResourceBase(object):
|
|
||||||
def __init__(self, finder, name):
|
|
||||||
self.finder = finder
|
|
||||||
self.name = name
|
|
||||||
|
|
||||||
|
|
||||||
class Resource(ResourceBase):
|
|
||||||
"""
|
|
||||||
A class representing an in-package resource, such as a data file. This is
|
|
||||||
not normally instantiated by user code, but rather by a
|
|
||||||
:class:`ResourceFinder` which manages the resource.
|
|
||||||
"""
|
|
||||||
is_container = False # Backwards compatibility
|
|
||||||
|
|
||||||
def as_stream(self):
|
|
||||||
"""
|
|
||||||
Get the resource as a stream.
|
|
||||||
|
|
||||||
This is not a property to make it obvious that it returns a new stream
|
|
||||||
each time.
|
|
||||||
"""
|
|
||||||
return self.finder.get_stream(self)
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def file_path(self):
|
|
||||||
global cache
|
|
||||||
if cache is None:
|
|
||||||
cache = ResourceCache()
|
|
||||||
return cache.get(self)
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def bytes(self):
|
|
||||||
return self.finder.get_bytes(self)
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def size(self):
|
|
||||||
return self.finder.get_size(self)
|
|
||||||
|
|
||||||
|
|
||||||
class ResourceContainer(ResourceBase):
|
|
||||||
is_container = True # Backwards compatibility
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def resources(self):
|
|
||||||
return self.finder.get_resources(self)
|
|
||||||
|
|
||||||
|
|
||||||
class ResourceFinder(object):
|
|
||||||
"""
|
|
||||||
Resource finder for file system resources.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if sys.platform.startswith('java'):
|
|
||||||
skipped_extensions = ('.pyc', '.pyo', '.class')
|
|
||||||
else:
|
|
||||||
skipped_extensions = ('.pyc', '.pyo')
|
|
||||||
|
|
||||||
def __init__(self, module):
|
|
||||||
self.module = module
|
|
||||||
self.loader = getattr(module, '__loader__', None)
|
|
||||||
self.base = os.path.dirname(getattr(module, '__file__', ''))
|
|
||||||
|
|
||||||
def _adjust_path(self, path):
|
|
||||||
return os.path.realpath(path)
|
|
||||||
|
|
||||||
def _make_path(self, resource_name):
|
|
||||||
# Issue #50: need to preserve type of path on Python 2.x
|
|
||||||
# like os.path._get_sep
|
|
||||||
if isinstance(resource_name, bytes): # should only happen on 2.x
|
|
||||||
sep = b'/'
|
|
||||||
else:
|
|
||||||
sep = '/'
|
|
||||||
parts = resource_name.split(sep)
|
|
||||||
parts.insert(0, self.base)
|
|
||||||
result = os.path.join(*parts)
|
|
||||||
return self._adjust_path(result)
|
|
||||||
|
|
||||||
def _find(self, path):
|
|
||||||
return os.path.exists(path)
|
|
||||||
|
|
||||||
def get_cache_info(self, resource):
|
|
||||||
return None, resource.path
|
|
||||||
|
|
||||||
def find(self, resource_name):
|
|
||||||
path = self._make_path(resource_name)
|
|
||||||
if not self._find(path):
|
|
||||||
result = None
|
|
||||||
else:
|
|
||||||
if self._is_directory(path):
|
|
||||||
result = ResourceContainer(self, resource_name)
|
|
||||||
else:
|
|
||||||
result = Resource(self, resource_name)
|
|
||||||
result.path = path
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_stream(self, resource):
|
|
||||||
return open(resource.path, 'rb')
|
|
||||||
|
|
||||||
def get_bytes(self, resource):
|
|
||||||
with open(resource.path, 'rb') as f:
|
|
||||||
return f.read()
|
|
||||||
|
|
||||||
def get_size(self, resource):
|
|
||||||
return os.path.getsize(resource.path)
|
|
||||||
|
|
||||||
def get_resources(self, resource):
|
|
||||||
def allowed(f):
|
|
||||||
return (f != '__pycache__' and not
|
|
||||||
f.endswith(self.skipped_extensions))
|
|
||||||
return set([f for f in os.listdir(resource.path) if allowed(f)])
|
|
||||||
|
|
||||||
def is_container(self, resource):
|
|
||||||
return self._is_directory(resource.path)
|
|
||||||
|
|
||||||
_is_directory = staticmethod(os.path.isdir)
|
|
||||||
|
|
||||||
def iterator(self, resource_name):
|
|
||||||
resource = self.find(resource_name)
|
|
||||||
if resource is not None:
|
|
||||||
todo = [resource]
|
|
||||||
while todo:
|
|
||||||
resource = todo.pop(0)
|
|
||||||
yield resource
|
|
||||||
if resource.is_container:
|
|
||||||
rname = resource.name
|
|
||||||
for name in resource.resources:
|
|
||||||
if not rname:
|
|
||||||
new_name = name
|
|
||||||
else:
|
|
||||||
new_name = '/'.join([rname, name])
|
|
||||||
child = self.find(new_name)
|
|
||||||
if child.is_container:
|
|
||||||
todo.append(child)
|
|
||||||
else:
|
|
||||||
yield child
|
|
||||||
|
|
||||||
|
|
||||||
class ZipResourceFinder(ResourceFinder):
|
|
||||||
"""
|
|
||||||
Resource finder for resources in .zip files.
|
|
||||||
"""
|
|
||||||
def __init__(self, module):
|
|
||||||
super(ZipResourceFinder, self).__init__(module)
|
|
||||||
archive = self.loader.archive
|
|
||||||
self.prefix_len = 1 + len(archive)
|
|
||||||
# PyPy doesn't have a _files attr on zipimporter, and you can't set one
|
|
||||||
if hasattr(self.loader, '_files'):
|
|
||||||
self._files = self.loader._files
|
|
||||||
else:
|
|
||||||
self._files = zipimport._zip_directory_cache[archive]
|
|
||||||
self.index = sorted(self._files)
|
|
||||||
|
|
||||||
def _adjust_path(self, path):
|
|
||||||
return path
|
|
||||||
|
|
||||||
def _find(self, path):
|
|
||||||
path = path[self.prefix_len:]
|
|
||||||
if path in self._files:
|
|
||||||
result = True
|
|
||||||
else:
|
|
||||||
if path and path[-1] != os.sep:
|
|
||||||
path = path + os.sep
|
|
||||||
i = bisect.bisect(self.index, path)
|
|
||||||
try:
|
|
||||||
result = self.index[i].startswith(path)
|
|
||||||
except IndexError:
|
|
||||||
result = False
|
|
||||||
if not result:
|
|
||||||
logger.debug('_find failed: %r %r', path, self.loader.prefix)
|
|
||||||
else:
|
|
||||||
logger.debug('_find worked: %r %r', path, self.loader.prefix)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_cache_info(self, resource):
|
|
||||||
prefix = self.loader.archive
|
|
||||||
path = resource.path[1 + len(prefix):]
|
|
||||||
return prefix, path
|
|
||||||
|
|
||||||
def get_bytes(self, resource):
|
|
||||||
return self.loader.get_data(resource.path)
|
|
||||||
|
|
||||||
def get_stream(self, resource):
|
|
||||||
return io.BytesIO(self.get_bytes(resource))
|
|
||||||
|
|
||||||
def get_size(self, resource):
|
|
||||||
path = resource.path[self.prefix_len:]
|
|
||||||
return self._files[path][3]
|
|
||||||
|
|
||||||
def get_resources(self, resource):
|
|
||||||
path = resource.path[self.prefix_len:]
|
|
||||||
if path and path[-1] != os.sep:
|
|
||||||
path += os.sep
|
|
||||||
plen = len(path)
|
|
||||||
result = set()
|
|
||||||
i = bisect.bisect(self.index, path)
|
|
||||||
while i < len(self.index):
|
|
||||||
if not self.index[i].startswith(path):
|
|
||||||
break
|
|
||||||
s = self.index[i][plen:]
|
|
||||||
result.add(s.split(os.sep, 1)[0]) # only immediate children
|
|
||||||
i += 1
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _is_directory(self, path):
|
|
||||||
path = path[self.prefix_len:]
|
|
||||||
if path and path[-1] != os.sep:
|
|
||||||
path += os.sep
|
|
||||||
i = bisect.bisect(self.index, path)
|
|
||||||
try:
|
|
||||||
result = self.index[i].startswith(path)
|
|
||||||
except IndexError:
|
|
||||||
result = False
|
|
||||||
return result
|
|
||||||
|
|
||||||
_finder_registry = {
|
|
||||||
type(None): ResourceFinder,
|
|
||||||
zipimport.zipimporter: ZipResourceFinder
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
import _frozen_importlib
|
|
||||||
_finder_registry[_frozen_importlib.SourceFileLoader] = ResourceFinder
|
|
||||||
_finder_registry[_frozen_importlib.FileFinder] = ResourceFinder
|
|
||||||
except (ImportError, AttributeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def register_finder(loader, finder_maker):
|
|
||||||
_finder_registry[type(loader)] = finder_maker
|
|
||||||
|
|
||||||
_finder_cache = {}
|
|
||||||
|
|
||||||
|
|
||||||
def finder(package):
|
|
||||||
"""
|
|
||||||
Return a resource finder for a package.
|
|
||||||
:param package: The name of the package.
|
|
||||||
:return: A :class:`ResourceFinder` instance for the package.
|
|
||||||
"""
|
|
||||||
if package in _finder_cache:
|
|
||||||
result = _finder_cache[package]
|
|
||||||
else:
|
|
||||||
if package not in sys.modules:
|
|
||||||
__import__(package)
|
|
||||||
module = sys.modules[package]
|
|
||||||
path = getattr(module, '__path__', None)
|
|
||||||
if path is None:
|
|
||||||
raise DistlibException('You cannot get a finder for a module, '
|
|
||||||
'only for a package')
|
|
||||||
loader = getattr(module, '__loader__', None)
|
|
||||||
finder_maker = _finder_registry.get(type(loader))
|
|
||||||
if finder_maker is None:
|
|
||||||
raise DistlibException('Unable to locate finder for %r' % package)
|
|
||||||
result = finder_maker(module)
|
|
||||||
_finder_cache[package] = result
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
_dummy_module = types.ModuleType(str('__dummy__'))
|
|
||||||
|
|
||||||
|
|
||||||
def finder_for_path(path):
|
|
||||||
"""
|
|
||||||
Return a resource finder for a path, which should represent a container.
|
|
||||||
|
|
||||||
:param path: The path.
|
|
||||||
:return: A :class:`ResourceFinder` instance for the path.
|
|
||||||
"""
|
|
||||||
result = None
|
|
||||||
# calls any path hooks, gets importer into cache
|
|
||||||
pkgutil.get_importer(path)
|
|
||||||
loader = sys.path_importer_cache.get(path)
|
|
||||||
finder = _finder_registry.get(type(loader))
|
|
||||||
if finder:
|
|
||||||
module = _dummy_module
|
|
||||||
module.__file__ = os.path.join(path, '')
|
|
||||||
module.__loader__ = loader
|
|
||||||
result = finder(module)
|
|
||||||
return result
|
|
||||||
-365
@@ -1,365 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2013-2015 Vinay Sajip.
|
|
||||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
from io import BytesIO
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import struct
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from .compat import sysconfig, detect_encoding, ZipFile
|
|
||||||
from .resources import finder
|
|
||||||
from .util import (FileOperator, get_export_entry, convert_path,
|
|
||||||
get_executable, in_venv)
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_DEFAULT_MANIFEST = '''
|
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
||||||
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
|
|
||||||
<assemblyIdentity version="1.0.0.0"
|
|
||||||
processorArchitecture="X86"
|
|
||||||
name="%s"
|
|
||||||
type="win32"/>
|
|
||||||
|
|
||||||
<!-- Identify the application security requirements. -->
|
|
||||||
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
|
|
||||||
<security>
|
|
||||||
<requestedPrivileges>
|
|
||||||
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
|
|
||||||
</requestedPrivileges>
|
|
||||||
</security>
|
|
||||||
</trustInfo>
|
|
||||||
</assembly>'''.strip()
|
|
||||||
|
|
||||||
# check if Python is called on the first line with this expression
|
|
||||||
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
|
|
||||||
SCRIPT_TEMPLATE = '''# -*- coding: utf-8 -*-
|
|
||||||
if __name__ == '__main__':
|
|
||||||
import sys, re
|
|
||||||
|
|
||||||
def _resolve(module, func):
|
|
||||||
__import__(module)
|
|
||||||
mod = sys.modules[module]
|
|
||||||
parts = func.split('.')
|
|
||||||
result = getattr(mod, parts.pop(0))
|
|
||||||
for p in parts:
|
|
||||||
result = getattr(result, p)
|
|
||||||
return result
|
|
||||||
|
|
||||||
try:
|
|
||||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
|
||||||
|
|
||||||
func = _resolve('%(module)s', '%(func)s')
|
|
||||||
rc = func() # None interpreted as 0
|
|
||||||
except Exception as e: # only supporting Python >= 2.6
|
|
||||||
sys.stderr.write('%%s\\n' %% e)
|
|
||||||
rc = 1
|
|
||||||
sys.exit(rc)
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
class ScriptMaker(object):
|
|
||||||
"""
|
|
||||||
A class to copy or create scripts from source scripts or callable
|
|
||||||
specifications.
|
|
||||||
"""
|
|
||||||
script_template = SCRIPT_TEMPLATE
|
|
||||||
|
|
||||||
executable = None # for shebangs
|
|
||||||
|
|
||||||
def __init__(self, source_dir, target_dir, add_launchers=True,
|
|
||||||
dry_run=False, fileop=None):
|
|
||||||
self.source_dir = source_dir
|
|
||||||
self.target_dir = target_dir
|
|
||||||
self.add_launchers = add_launchers
|
|
||||||
self.force = False
|
|
||||||
self.clobber = False
|
|
||||||
# It only makes sense to set mode bits on POSIX.
|
|
||||||
self.set_mode = (os.name == 'posix') or (os.name == 'java' and
|
|
||||||
os._name == 'posix')
|
|
||||||
self.variants = set(('', 'X.Y'))
|
|
||||||
self._fileop = fileop or FileOperator(dry_run)
|
|
||||||
|
|
||||||
def _get_alternate_executable(self, executable, options):
|
|
||||||
if options.get('gui', False) and os.name == 'nt':
|
|
||||||
dn, fn = os.path.split(executable)
|
|
||||||
fn = fn.replace('python', 'pythonw')
|
|
||||||
executable = os.path.join(dn, fn)
|
|
||||||
return executable
|
|
||||||
|
|
||||||
if sys.platform.startswith('java'): # pragma: no cover
|
|
||||||
def _is_shell(self, executable):
|
|
||||||
"""
|
|
||||||
Determine if the specified executable is a script
|
|
||||||
(contains a #! line)
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
with open(executable) as fp:
|
|
||||||
return fp.read(2) == '#!'
|
|
||||||
except (OSError, IOError):
|
|
||||||
logger.warning('Failed to open %s', executable)
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _fix_jython_executable(self, executable):
|
|
||||||
if self._is_shell(executable):
|
|
||||||
# Workaround for Jython is not needed on Linux systems.
|
|
||||||
import java
|
|
||||||
|
|
||||||
if java.lang.System.getProperty('os.name') == 'Linux':
|
|
||||||
return executable
|
|
||||||
elif executable.lower().endswith('jython.exe'):
|
|
||||||
# Use wrapper exe for Jython on Windows
|
|
||||||
return executable
|
|
||||||
return '/usr/bin/env %s' % executable
|
|
||||||
|
|
||||||
def _get_shebang(self, encoding, post_interp=b'', options=None):
|
|
||||||
enquote = True
|
|
||||||
if self.executable:
|
|
||||||
executable = self.executable
|
|
||||||
enquote = False # assume this will be taken care of
|
|
||||||
elif not sysconfig.is_python_build():
|
|
||||||
executable = get_executable()
|
|
||||||
elif in_venv():
|
|
||||||
executable = os.path.join(sysconfig.get_path('scripts'),
|
|
||||||
'python%s' % sysconfig.get_config_var('EXE'))
|
|
||||||
else:
|
|
||||||
executable = os.path.join(
|
|
||||||
sysconfig.get_config_var('BINDIR'),
|
|
||||||
'python%s%s' % (sysconfig.get_config_var('VERSION'),
|
|
||||||
sysconfig.get_config_var('EXE')))
|
|
||||||
if options:
|
|
||||||
executable = self._get_alternate_executable(executable, options)
|
|
||||||
|
|
||||||
if sys.platform.startswith('java'): # pragma: no cover
|
|
||||||
executable = self._fix_jython_executable(executable)
|
|
||||||
# Normalise case for Windows
|
|
||||||
executable = os.path.normcase(executable)
|
|
||||||
# If the user didn't specify an executable, it may be necessary to
|
|
||||||
# cater for executable paths with spaces (not uncommon on Windows)
|
|
||||||
if enquote and ' ' in executable:
|
|
||||||
executable = '"%s"' % executable
|
|
||||||
# Issue #51: don't use fsencode, since we later try to
|
|
||||||
# check that the shebang is decodable using utf-8.
|
|
||||||
executable = executable.encode('utf-8')
|
|
||||||
# in case of IronPython, play safe and enable frames support
|
|
||||||
if (sys.platform == 'cli' and '-X:Frames' not in post_interp
|
|
||||||
and '-X:FullFrames' not in post_interp):
|
|
||||||
post_interp += b' -X:Frames'
|
|
||||||
shebang = b'#!' + executable + post_interp + b'\n'
|
|
||||||
# Python parser starts to read a script using UTF-8 until
|
|
||||||
# it gets a #coding:xxx cookie. The shebang has to be the
|
|
||||||
# first line of a file, the #coding:xxx cookie cannot be
|
|
||||||
# written before. So the shebang has to be decodable from
|
|
||||||
# UTF-8.
|
|
||||||
try:
|
|
||||||
shebang.decode('utf-8')
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
raise ValueError(
|
|
||||||
'The shebang (%r) is not decodable from utf-8' % shebang)
|
|
||||||
# If the script is encoded to a custom encoding (use a
|
|
||||||
# #coding:xxx cookie), the shebang has to be decodable from
|
|
||||||
# the script encoding too.
|
|
||||||
if encoding != 'utf-8':
|
|
||||||
try:
|
|
||||||
shebang.decode(encoding)
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
raise ValueError(
|
|
||||||
'The shebang (%r) is not decodable '
|
|
||||||
'from the script encoding (%r)' % (shebang, encoding))
|
|
||||||
return shebang
|
|
||||||
|
|
||||||
def _get_script_text(self, entry):
|
|
||||||
return self.script_template % dict(module=entry.prefix,
|
|
||||||
func=entry.suffix)
|
|
||||||
|
|
||||||
manifest = _DEFAULT_MANIFEST
|
|
||||||
|
|
||||||
def get_manifest(self, exename):
|
|
||||||
base = os.path.basename(exename)
|
|
||||||
return self.manifest % base
|
|
||||||
|
|
||||||
def _write_script(self, names, shebang, script_bytes, filenames, ext):
|
|
||||||
use_launcher = self.add_launchers and os.name == 'nt'
|
|
||||||
linesep = os.linesep.encode('utf-8')
|
|
||||||
if not use_launcher:
|
|
||||||
script_bytes = shebang + linesep + script_bytes
|
|
||||||
else:
|
|
||||||
if ext == 'py':
|
|
||||||
launcher = self._get_launcher('t')
|
|
||||||
else:
|
|
||||||
launcher = self._get_launcher('w')
|
|
||||||
stream = BytesIO()
|
|
||||||
with ZipFile(stream, 'w') as zf:
|
|
||||||
zf.writestr('__main__.py', script_bytes)
|
|
||||||
zip_data = stream.getvalue()
|
|
||||||
script_bytes = launcher + shebang + linesep + zip_data
|
|
||||||
for name in names:
|
|
||||||
outname = os.path.join(self.target_dir, name)
|
|
||||||
if use_launcher:
|
|
||||||
n, e = os.path.splitext(outname)
|
|
||||||
if e.startswith('.py'):
|
|
||||||
outname = n
|
|
||||||
outname = '%s.exe' % outname
|
|
||||||
try:
|
|
||||||
self._fileop.write_binary_file(outname, script_bytes)
|
|
||||||
except Exception:
|
|
||||||
# Failed writing an executable - it might be in use.
|
|
||||||
logger.warning('Failed to write executable - trying to '
|
|
||||||
'use .deleteme logic')
|
|
||||||
dfname = '%s.deleteme' % outname
|
|
||||||
if os.path.exists(dfname):
|
|
||||||
os.remove(dfname) # Not allowed to fail here
|
|
||||||
os.rename(outname, dfname) # nor here
|
|
||||||
self._fileop.write_binary_file(outname, script_bytes)
|
|
||||||
logger.debug('Able to replace executable using '
|
|
||||||
'.deleteme logic')
|
|
||||||
try:
|
|
||||||
os.remove(dfname)
|
|
||||||
except Exception:
|
|
||||||
pass # still in use - ignore error
|
|
||||||
else:
|
|
||||||
if os.name == 'nt' and not outname.endswith('.' + ext):
|
|
||||||
outname = '%s.%s' % (outname, ext)
|
|
||||||
if os.path.exists(outname) and not self.clobber:
|
|
||||||
logger.warning('Skipping existing file %s', outname)
|
|
||||||
continue
|
|
||||||
self._fileop.write_binary_file(outname, script_bytes)
|
|
||||||
if self.set_mode:
|
|
||||||
self._fileop.set_executable_mode([outname])
|
|
||||||
filenames.append(outname)
|
|
||||||
|
|
||||||
def _make_script(self, entry, filenames, options=None):
|
|
||||||
post_interp = b''
|
|
||||||
if options:
|
|
||||||
args = options.get('interpreter_args', [])
|
|
||||||
if args:
|
|
||||||
args = ' %s' % ' '.join(args)
|
|
||||||
post_interp = args.encode('utf-8')
|
|
||||||
shebang = self._get_shebang('utf-8', post_interp, options=options)
|
|
||||||
script = self._get_script_text(entry).encode('utf-8')
|
|
||||||
name = entry.name
|
|
||||||
scriptnames = set()
|
|
||||||
if '' in self.variants:
|
|
||||||
scriptnames.add(name)
|
|
||||||
if 'X' in self.variants:
|
|
||||||
scriptnames.add('%s%s' % (name, sys.version[0]))
|
|
||||||
if 'X.Y' in self.variants:
|
|
||||||
scriptnames.add('%s-%s' % (name, sys.version[:3]))
|
|
||||||
if options and options.get('gui', False):
|
|
||||||
ext = 'pyw'
|
|
||||||
else:
|
|
||||||
ext = 'py'
|
|
||||||
self._write_script(scriptnames, shebang, script, filenames, ext)
|
|
||||||
|
|
||||||
def _copy_script(self, script, filenames):
|
|
||||||
adjust = False
|
|
||||||
script = os.path.join(self.source_dir, convert_path(script))
|
|
||||||
outname = os.path.join(self.target_dir, os.path.basename(script))
|
|
||||||
if not self.force and not self._fileop.newer(script, outname):
|
|
||||||
logger.debug('not copying %s (up-to-date)', script)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Always open the file, but ignore failures in dry-run mode --
|
|
||||||
# that way, we'll get accurate feedback if we can read the
|
|
||||||
# script.
|
|
||||||
try:
|
|
||||||
f = open(script, 'rb')
|
|
||||||
except IOError:
|
|
||||||
if not self.dry_run:
|
|
||||||
raise
|
|
||||||
f = None
|
|
||||||
else:
|
|
||||||
encoding, lines = detect_encoding(f.readline)
|
|
||||||
f.seek(0)
|
|
||||||
first_line = f.readline()
|
|
||||||
if not first_line:
|
|
||||||
logger.warning('%s: %s is an empty file (skipping)',
|
|
||||||
self.get_command_name(), script)
|
|
||||||
return
|
|
||||||
|
|
||||||
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
|
|
||||||
if match:
|
|
||||||
adjust = True
|
|
||||||
post_interp = match.group(1) or b''
|
|
||||||
|
|
||||||
if not adjust:
|
|
||||||
if f:
|
|
||||||
f.close()
|
|
||||||
self._fileop.copy_file(script, outname)
|
|
||||||
if self.set_mode:
|
|
||||||
self._fileop.set_executable_mode([outname])
|
|
||||||
filenames.append(outname)
|
|
||||||
else:
|
|
||||||
logger.info('copying and adjusting %s -> %s', script,
|
|
||||||
self.target_dir)
|
|
||||||
if not self._fileop.dry_run:
|
|
||||||
shebang = self._get_shebang(encoding, post_interp)
|
|
||||||
if b'pythonw' in first_line:
|
|
||||||
ext = 'pyw'
|
|
||||||
else:
|
|
||||||
ext = 'py'
|
|
||||||
n = os.path.basename(outname)
|
|
||||||
self._write_script([n], shebang, f.read(), filenames, ext)
|
|
||||||
if f:
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def dry_run(self):
|
|
||||||
return self._fileop.dry_run
|
|
||||||
|
|
||||||
@dry_run.setter
|
|
||||||
def dry_run(self, value):
|
|
||||||
self._fileop.dry_run = value
|
|
||||||
|
|
||||||
if os.name == 'nt':
|
|
||||||
# Executable launcher support.
|
|
||||||
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
|
|
||||||
|
|
||||||
def _get_launcher(self, kind):
|
|
||||||
if struct.calcsize('P') == 8: # 64-bit
|
|
||||||
bits = '64'
|
|
||||||
else:
|
|
||||||
bits = '32'
|
|
||||||
name = '%s%s.exe' % (kind, bits)
|
|
||||||
# Issue 31: don't hardcode an absolute package name, but
|
|
||||||
# determine it relative to the current package
|
|
||||||
distlib_package = __name__.rsplit('.', 1)[0]
|
|
||||||
result = finder(distlib_package).find(name).bytes
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Public API follows
|
|
||||||
|
|
||||||
def make(self, specification, options=None):
|
|
||||||
"""
|
|
||||||
Make a script.
|
|
||||||
|
|
||||||
:param specification: The specification, which is either a valid export
|
|
||||||
entry specification (to make a script from a
|
|
||||||
callable) or a filename (to make a script by
|
|
||||||
copying from a source location).
|
|
||||||
:param options: A dictionary of options controlling script generation.
|
|
||||||
:return: A list of all absolute pathnames written to.
|
|
||||||
"""
|
|
||||||
filenames = []
|
|
||||||
entry = get_export_entry(specification)
|
|
||||||
if entry is None:
|
|
||||||
self._copy_script(specification, filenames)
|
|
||||||
else:
|
|
||||||
self._make_script(entry, filenames, options=options)
|
|
||||||
return filenames
|
|
||||||
|
|
||||||
def make_multiple(self, specifications, options=None):
|
|
||||||
"""
|
|
||||||
Take a list of specifications and make scripts from them,
|
|
||||||
:param specifications: A list of specifications.
|
|
||||||
:return: A list of all absolute pathnames written to,
|
|
||||||
"""
|
|
||||||
filenames = []
|
|
||||||
for specification in specifications:
|
|
||||||
filenames.extend(self.make(specification, options))
|
|
||||||
return filenames
|
|
||||||
BIN
Binary file not shown.
BIN
Binary file not shown.
-1579
File diff suppressed because it is too large
Load Diff
-742
@@ -1,742 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2012-2014 The Python Software Foundation.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
"""
|
|
||||||
Implementation of a flexible versioning scheme providing support for PEP-386,
|
|
||||||
distribute-compatible and semantic versioning.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .compat import string_types
|
|
||||||
|
|
||||||
__all__ = ['NormalizedVersion', 'NormalizedMatcher',
|
|
||||||
'LegacyVersion', 'LegacyMatcher',
|
|
||||||
'SemanticVersion', 'SemanticMatcher',
|
|
||||||
'UnsupportedVersionError', 'get_scheme']
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedVersionError(ValueError):
|
|
||||||
"""This is an unsupported version."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Version(object):
|
|
||||||
def __init__(self, s):
|
|
||||||
self._string = s = s.strip()
|
|
||||||
self._parts = parts = self.parse(s)
|
|
||||||
assert isinstance(parts, tuple)
|
|
||||||
assert len(parts) > 0
|
|
||||||
|
|
||||||
def parse(self, s):
|
|
||||||
raise NotImplementedError('please implement in a subclass')
|
|
||||||
|
|
||||||
def _check_compatible(self, other):
|
|
||||||
if type(self) != type(other):
|
|
||||||
raise TypeError('cannot compare %r and %r' % (self, other))
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
self._check_compatible(other)
|
|
||||||
return self._parts == other._parts
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
return not self.__eq__(other)
|
|
||||||
|
|
||||||
def __lt__(self, other):
|
|
||||||
self._check_compatible(other)
|
|
||||||
return self._parts < other._parts
|
|
||||||
|
|
||||||
def __gt__(self, other):
|
|
||||||
return not (self.__lt__(other) or self.__eq__(other))
|
|
||||||
|
|
||||||
def __le__(self, other):
|
|
||||||
return self.__lt__(other) or self.__eq__(other)
|
|
||||||
|
|
||||||
def __ge__(self, other):
|
|
||||||
return self.__gt__(other) or self.__eq__(other)
|
|
||||||
|
|
||||||
# See http://docs.python.org/reference/datamodel#object.__hash__
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self._parts)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "%s('%s')" % (self.__class__.__name__, self._string)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self._string
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_prerelease(self):
|
|
||||||
raise NotImplementedError('Please implement in subclasses.')
|
|
||||||
|
|
||||||
|
|
||||||
class Matcher(object):
|
|
||||||
version_class = None
|
|
||||||
|
|
||||||
dist_re = re.compile(r"^(\w[\s\w'.-]*)(\((.*)\))?")
|
|
||||||
comp_re = re.compile(r'^(<=|>=|<|>|!=|={2,3}|~=)?\s*([^\s,]+)$')
|
|
||||||
num_re = re.compile(r'^\d+(\.\d+)*$')
|
|
||||||
|
|
||||||
# value is either a callable or the name of a method
|
|
||||||
_operators = {
|
|
||||||
'<': lambda v, c, p: v < c,
|
|
||||||
'>': lambda v, c, p: v > c,
|
|
||||||
'<=': lambda v, c, p: v == c or v < c,
|
|
||||||
'>=': lambda v, c, p: v == c or v > c,
|
|
||||||
'==': lambda v, c, p: v == c,
|
|
||||||
'===': lambda v, c, p: v == c,
|
|
||||||
# by default, compatible => >=.
|
|
||||||
'~=': lambda v, c, p: v == c or v > c,
|
|
||||||
'!=': lambda v, c, p: v != c,
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, s):
|
|
||||||
if self.version_class is None:
|
|
||||||
raise ValueError('Please specify a version class')
|
|
||||||
self._string = s = s.strip()
|
|
||||||
m = self.dist_re.match(s)
|
|
||||||
if not m:
|
|
||||||
raise ValueError('Not valid: %r' % s)
|
|
||||||
groups = m.groups('')
|
|
||||||
self.name = groups[0].strip()
|
|
||||||
self.key = self.name.lower() # for case-insensitive comparisons
|
|
||||||
clist = []
|
|
||||||
if groups[2]:
|
|
||||||
constraints = [c.strip() for c in groups[2].split(',')]
|
|
||||||
for c in constraints:
|
|
||||||
m = self.comp_re.match(c)
|
|
||||||
if not m:
|
|
||||||
raise ValueError('Invalid %r in %r' % (c, s))
|
|
||||||
groups = m.groups()
|
|
||||||
op = groups[0] or '~='
|
|
||||||
s = groups[1]
|
|
||||||
if s.endswith('.*'):
|
|
||||||
if op not in ('==', '!='):
|
|
||||||
raise ValueError('\'.*\' not allowed for '
|
|
||||||
'%r constraints' % op)
|
|
||||||
# Could be a partial version (e.g. for '2.*') which
|
|
||||||
# won't parse as a version, so keep it as a string
|
|
||||||
vn, prefix = s[:-2], True
|
|
||||||
if not self.num_re.match(vn):
|
|
||||||
# Just to check that vn is a valid version
|
|
||||||
self.version_class(vn)
|
|
||||||
else:
|
|
||||||
# Should parse as a version, so we can create an
|
|
||||||
# instance for the comparison
|
|
||||||
vn, prefix = self.version_class(s), False
|
|
||||||
clist.append((op, vn, prefix))
|
|
||||||
self._parts = tuple(clist)
|
|
||||||
|
|
||||||
def match(self, version):
|
|
||||||
"""
|
|
||||||
Check if the provided version matches the constraints.
|
|
||||||
|
|
||||||
:param version: The version to match against this instance.
|
|
||||||
:type version: Strring or :class:`Version` instance.
|
|
||||||
"""
|
|
||||||
if isinstance(version, string_types):
|
|
||||||
version = self.version_class(version)
|
|
||||||
for operator, constraint, prefix in self._parts:
|
|
||||||
f = self._operators.get(operator)
|
|
||||||
if isinstance(f, string_types):
|
|
||||||
f = getattr(self, f)
|
|
||||||
if not f:
|
|
||||||
msg = ('%r not implemented '
|
|
||||||
'for %s' % (operator, self.__class__.__name__))
|
|
||||||
raise NotImplementedError(msg)
|
|
||||||
if not f(version, constraint, prefix):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
@property
|
|
||||||
def exact_version(self):
|
|
||||||
result = None
|
|
||||||
if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='):
|
|
||||||
result = self._parts[0][1]
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _check_compatible(self, other):
|
|
||||||
if type(self) != type(other) or self.name != other.name:
|
|
||||||
raise TypeError('cannot compare %s and %s' % (self, other))
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
|
||||||
self._check_compatible(other)
|
|
||||||
return self.key == other.key and self._parts == other._parts
|
|
||||||
|
|
||||||
def __ne__(self, other):
|
|
||||||
return not self.__eq__(other)
|
|
||||||
|
|
||||||
# See http://docs.python.org/reference/datamodel#object.__hash__
|
|
||||||
def __hash__(self):
|
|
||||||
return hash(self.key) + hash(self._parts)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "%s(%r)" % (self.__class__.__name__, self._string)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return self._string
|
|
||||||
|
|
||||||
|
|
||||||
PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|b|c|rc)(\d+))?'
|
|
||||||
r'(\.(post)(\d+))?(\.(dev)(\d+))?'
|
|
||||||
r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$')
|
|
||||||
|
|
||||||
|
|
||||||
def _pep_440_key(s):
|
|
||||||
s = s.strip()
|
|
||||||
m = PEP440_VERSION_RE.match(s)
|
|
||||||
if not m:
|
|
||||||
raise UnsupportedVersionError('Not a valid version: %s' % s)
|
|
||||||
groups = m.groups()
|
|
||||||
nums = tuple(int(v) for v in groups[1].split('.'))
|
|
||||||
while len(nums) > 1 and nums[-1] == 0:
|
|
||||||
nums = nums[:-1]
|
|
||||||
|
|
||||||
if not groups[0]:
|
|
||||||
epoch = 0
|
|
||||||
else:
|
|
||||||
epoch = int(groups[0])
|
|
||||||
pre = groups[4:6]
|
|
||||||
post = groups[7:9]
|
|
||||||
dev = groups[10:12]
|
|
||||||
local = groups[13]
|
|
||||||
if pre == (None, None):
|
|
||||||
pre = ()
|
|
||||||
else:
|
|
||||||
pre = pre[0], int(pre[1])
|
|
||||||
if post == (None, None):
|
|
||||||
post = ()
|
|
||||||
else:
|
|
||||||
post = post[0], int(post[1])
|
|
||||||
if dev == (None, None):
|
|
||||||
dev = ()
|
|
||||||
else:
|
|
||||||
dev = dev[0], int(dev[1])
|
|
||||||
if local is None:
|
|
||||||
local = ()
|
|
||||||
else:
|
|
||||||
parts = []
|
|
||||||
for part in local.split('.'):
|
|
||||||
# to ensure that numeric compares as > lexicographic, avoid
|
|
||||||
# comparing them directly, but encode a tuple which ensures
|
|
||||||
# correct sorting
|
|
||||||
if part.isdigit():
|
|
||||||
part = (1, int(part))
|
|
||||||
else:
|
|
||||||
part = (0, part)
|
|
||||||
parts.append(part)
|
|
||||||
local = tuple(parts)
|
|
||||||
if not pre:
|
|
||||||
# either before pre-release, or final release and after
|
|
||||||
if not post and dev:
|
|
||||||
# before pre-release
|
|
||||||
pre = ('a', -1) # to sort before a0
|
|
||||||
else:
|
|
||||||
pre = ('z',) # to sort after all pre-releases
|
|
||||||
# now look at the state of post and dev.
|
|
||||||
if not post:
|
|
||||||
post = ('_',) # sort before 'a'
|
|
||||||
if not dev:
|
|
||||||
dev = ('final',)
|
|
||||||
|
|
||||||
#print('%s -> %s' % (s, m.groups()))
|
|
||||||
return epoch, nums, pre, post, dev, local
|
|
||||||
|
|
||||||
|
|
||||||
_normalized_key = _pep_440_key
|
|
||||||
|
|
||||||
|
|
||||||
class NormalizedVersion(Version):
|
|
||||||
"""A rational version.
|
|
||||||
|
|
||||||
Good:
|
|
||||||
1.2 # equivalent to "1.2.0"
|
|
||||||
1.2.0
|
|
||||||
1.2a1
|
|
||||||
1.2.3a2
|
|
||||||
1.2.3b1
|
|
||||||
1.2.3c1
|
|
||||||
1.2.3.4
|
|
||||||
TODO: fill this out
|
|
||||||
|
|
||||||
Bad:
|
|
||||||
1 # mininum two numbers
|
|
||||||
1.2a # release level must have a release serial
|
|
||||||
1.2.3b
|
|
||||||
"""
|
|
||||||
def parse(self, s):
|
|
||||||
result = _normalized_key(s)
|
|
||||||
# _normalized_key loses trailing zeroes in the release
|
|
||||||
# clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
|
|
||||||
# However, PEP 440 prefix matching needs it: for example,
|
|
||||||
# (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
|
|
||||||
m = PEP440_VERSION_RE.match(s) # must succeed
|
|
||||||
groups = m.groups()
|
|
||||||
self._release_clause = tuple(int(v) for v in groups[1].split('.'))
|
|
||||||
return result
|
|
||||||
|
|
||||||
PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_prerelease(self):
|
|
||||||
return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
|
|
||||||
|
|
||||||
|
|
||||||
def _match_prefix(x, y):
|
|
||||||
x = str(x)
|
|
||||||
y = str(y)
|
|
||||||
if x == y:
|
|
||||||
return True
|
|
||||||
if not x.startswith(y):
|
|
||||||
return False
|
|
||||||
n = len(y)
|
|
||||||
return x[n] == '.'
|
|
||||||
|
|
||||||
|
|
||||||
class NormalizedMatcher(Matcher):
|
|
||||||
version_class = NormalizedVersion
|
|
||||||
|
|
||||||
# value is either a callable or the name of a method
|
|
||||||
_operators = {
|
|
||||||
'~=': '_match_compatible',
|
|
||||||
'<': '_match_lt',
|
|
||||||
'>': '_match_gt',
|
|
||||||
'<=': '_match_le',
|
|
||||||
'>=': '_match_ge',
|
|
||||||
'==': '_match_eq',
|
|
||||||
'===': '_match_arbitrary',
|
|
||||||
'!=': '_match_ne',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _adjust_local(self, version, constraint, prefix):
|
|
||||||
if prefix:
|
|
||||||
strip_local = '+' not in constraint and version._parts[-1]
|
|
||||||
else:
|
|
||||||
# both constraint and version are
|
|
||||||
# NormalizedVersion instances.
|
|
||||||
# If constraint does not have a local component,
|
|
||||||
# ensure the version doesn't, either.
|
|
||||||
strip_local = not constraint._parts[-1] and version._parts[-1]
|
|
||||||
if strip_local:
|
|
||||||
s = version._string.split('+', 1)[0]
|
|
||||||
version = self.version_class(s)
|
|
||||||
return version, constraint
|
|
||||||
|
|
||||||
def _match_lt(self, version, constraint, prefix):
|
|
||||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
|
||||||
if version >= constraint:
|
|
||||||
return False
|
|
||||||
release_clause = constraint._release_clause
|
|
||||||
pfx = '.'.join([str(i) for i in release_clause])
|
|
||||||
return not _match_prefix(version, pfx)
|
|
||||||
|
|
||||||
def _match_gt(self, version, constraint, prefix):
|
|
||||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
|
||||||
if version <= constraint:
|
|
||||||
return False
|
|
||||||
release_clause = constraint._release_clause
|
|
||||||
pfx = '.'.join([str(i) for i in release_clause])
|
|
||||||
return not _match_prefix(version, pfx)
|
|
||||||
|
|
||||||
def _match_le(self, version, constraint, prefix):
|
|
||||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
|
||||||
return version <= constraint
|
|
||||||
|
|
||||||
def _match_ge(self, version, constraint, prefix):
|
|
||||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
|
||||||
return version >= constraint
|
|
||||||
|
|
||||||
def _match_eq(self, version, constraint, prefix):
|
|
||||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
|
||||||
if not prefix:
|
|
||||||
result = (version == constraint)
|
|
||||||
else:
|
|
||||||
result = _match_prefix(version, constraint)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _match_arbitrary(self, version, constraint, prefix):
|
|
||||||
return str(version) == str(constraint)
|
|
||||||
|
|
||||||
def _match_ne(self, version, constraint, prefix):
|
|
||||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
|
||||||
if not prefix:
|
|
||||||
result = (version != constraint)
|
|
||||||
else:
|
|
||||||
result = not _match_prefix(version, constraint)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _match_compatible(self, version, constraint, prefix):
|
|
||||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
|
||||||
if version == constraint:
|
|
||||||
return True
|
|
||||||
if version < constraint:
|
|
||||||
return False
|
|
||||||
# if not prefix:
|
|
||||||
# return True
|
|
||||||
release_clause = constraint._release_clause
|
|
||||||
if len(release_clause) > 1:
|
|
||||||
release_clause = release_clause[:-1]
|
|
||||||
pfx = '.'.join([str(i) for i in release_clause])
|
|
||||||
return _match_prefix(version, pfx)
|
|
||||||
|
|
||||||
_REPLACEMENTS = (
|
|
||||||
(re.compile('[.+-]$'), ''), # remove trailing puncts
|
|
||||||
(re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start
|
|
||||||
(re.compile('^[.-]'), ''), # remove leading puncts
|
|
||||||
(re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses
|
|
||||||
(re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
|
||||||
(re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
|
||||||
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
|
||||||
(re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha
|
|
||||||
(re.compile(r'\b(pre-alpha|prealpha)\b'),
|
|
||||||
'pre.alpha'), # standardise
|
|
||||||
(re.compile(r'\(beta\)$'), 'beta'), # remove parentheses
|
|
||||||
)
|
|
||||||
|
|
||||||
_SUFFIX_REPLACEMENTS = (
|
|
||||||
(re.compile('^[:~._+-]+'), ''), # remove leading puncts
|
|
||||||
(re.compile('[,*")([\]]'), ''), # remove unwanted chars
|
|
||||||
(re.compile('[~:+_ -]'), '.'), # replace illegal chars
|
|
||||||
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
|
||||||
(re.compile(r'\.$'), ''), # trailing '.'
|
|
||||||
)
|
|
||||||
|
|
||||||
_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
|
|
||||||
|
|
||||||
|
|
||||||
def _suggest_semantic_version(s):
|
|
||||||
"""
|
|
||||||
Try to suggest a semantic form for a version for which
|
|
||||||
_suggest_normalized_version couldn't come up with anything.
|
|
||||||
"""
|
|
||||||
result = s.strip().lower()
|
|
||||||
for pat, repl in _REPLACEMENTS:
|
|
||||||
result = pat.sub(repl, result)
|
|
||||||
if not result:
|
|
||||||
result = '0.0.0'
|
|
||||||
|
|
||||||
# Now look for numeric prefix, and separate it out from
|
|
||||||
# the rest.
|
|
||||||
#import pdb; pdb.set_trace()
|
|
||||||
m = _NUMERIC_PREFIX.match(result)
|
|
||||||
if not m:
|
|
||||||
prefix = '0.0.0'
|
|
||||||
suffix = result
|
|
||||||
else:
|
|
||||||
prefix = m.groups()[0].split('.')
|
|
||||||
prefix = [int(i) for i in prefix]
|
|
||||||
while len(prefix) < 3:
|
|
||||||
prefix.append(0)
|
|
||||||
if len(prefix) == 3:
|
|
||||||
suffix = result[m.end():]
|
|
||||||
else:
|
|
||||||
suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
|
|
||||||
prefix = prefix[:3]
|
|
||||||
prefix = '.'.join([str(i) for i in prefix])
|
|
||||||
suffix = suffix.strip()
|
|
||||||
if suffix:
|
|
||||||
#import pdb; pdb.set_trace()
|
|
||||||
# massage the suffix.
|
|
||||||
for pat, repl in _SUFFIX_REPLACEMENTS:
|
|
||||||
suffix = pat.sub(repl, suffix)
|
|
||||||
|
|
||||||
if not suffix:
|
|
||||||
result = prefix
|
|
||||||
else:
|
|
||||||
sep = '-' if 'dev' in suffix else '+'
|
|
||||||
result = prefix + sep + suffix
|
|
||||||
if not is_semver(result):
|
|
||||||
result = None
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def _suggest_normalized_version(s):
|
|
||||||
"""Suggest a normalized version close to the given version string.
|
|
||||||
|
|
||||||
If you have a version string that isn't rational (i.e. NormalizedVersion
|
|
||||||
doesn't like it) then you might be able to get an equivalent (or close)
|
|
||||||
rational version from this function.
|
|
||||||
|
|
||||||
This does a number of simple normalizations to the given string, based
|
|
||||||
on observation of versions currently in use on PyPI. Given a dump of
|
|
||||||
those version during PyCon 2009, 4287 of them:
|
|
||||||
- 2312 (53.93%) match NormalizedVersion without change
|
|
||||||
with the automatic suggestion
|
|
||||||
- 3474 (81.04%) match when using this suggestion method
|
|
||||||
|
|
||||||
@param s {str} An irrational version string.
|
|
||||||
@returns A rational version string, or None, if couldn't determine one.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
_normalized_key(s)
|
|
||||||
return s # already rational
|
|
||||||
except UnsupportedVersionError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
rs = s.lower()
|
|
||||||
|
|
||||||
# part of this could use maketrans
|
|
||||||
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
|
|
||||||
('beta', 'b'), ('rc', 'c'), ('-final', ''),
|
|
||||||
('-pre', 'c'),
|
|
||||||
('-release', ''), ('.release', ''), ('-stable', ''),
|
|
||||||
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
|
|
||||||
('final', '')):
|
|
||||||
rs = rs.replace(orig, repl)
|
|
||||||
|
|
||||||
# if something ends with dev or pre, we add a 0
|
|
||||||
rs = re.sub(r"pre$", r"pre0", rs)
|
|
||||||
rs = re.sub(r"dev$", r"dev0", rs)
|
|
||||||
|
|
||||||
# if we have something like "b-2" or "a.2" at the end of the
|
|
||||||
# version, that is pobably beta, alpha, etc
|
|
||||||
# let's remove the dash or dot
|
|
||||||
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
|
|
||||||
|
|
||||||
# 1.0-dev-r371 -> 1.0.dev371
|
|
||||||
# 0.1-dev-r79 -> 0.1.dev79
|
|
||||||
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
|
|
||||||
|
|
||||||
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
|
|
||||||
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
|
|
||||||
|
|
||||||
# Clean: v0.3, v1.0
|
|
||||||
if rs.startswith('v'):
|
|
||||||
rs = rs[1:]
|
|
||||||
|
|
||||||
# Clean leading '0's on numbers.
|
|
||||||
#TODO: unintended side-effect on, e.g., "2003.05.09"
|
|
||||||
# PyPI stats: 77 (~2%) better
|
|
||||||
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
|
|
||||||
|
|
||||||
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
|
|
||||||
# zero.
|
|
||||||
# PyPI stats: 245 (7.56%) better
|
|
||||||
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
|
|
||||||
|
|
||||||
# the 'dev-rNNN' tag is a dev tag
|
|
||||||
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
|
|
||||||
|
|
||||||
# clean the - when used as a pre delimiter
|
|
||||||
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
|
|
||||||
|
|
||||||
# a terminal "dev" or "devel" can be changed into ".dev0"
|
|
||||||
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
|
|
||||||
|
|
||||||
# a terminal "dev" can be changed into ".dev0"
|
|
||||||
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
|
|
||||||
|
|
||||||
# a terminal "final" or "stable" can be removed
|
|
||||||
rs = re.sub(r"(final|stable)$", "", rs)
|
|
||||||
|
|
||||||
# The 'r' and the '-' tags are post release tags
|
|
||||||
# 0.4a1.r10 -> 0.4a1.post10
|
|
||||||
# 0.9.33-17222 -> 0.9.33.post17222
|
|
||||||
# 0.9.33-r17222 -> 0.9.33.post17222
|
|
||||||
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
|
|
||||||
|
|
||||||
# Clean 'r' instead of 'dev' usage:
|
|
||||||
# 0.9.33+r17222 -> 0.9.33.dev17222
|
|
||||||
# 1.0dev123 -> 1.0.dev123
|
|
||||||
# 1.0.git123 -> 1.0.dev123
|
|
||||||
# 1.0.bzr123 -> 1.0.dev123
|
|
||||||
# 0.1a0dev.123 -> 0.1a0.dev123
|
|
||||||
# PyPI stats: ~150 (~4%) better
|
|
||||||
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
|
|
||||||
|
|
||||||
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
|
|
||||||
# 0.2.pre1 -> 0.2c1
|
|
||||||
# 0.2-c1 -> 0.2c1
|
|
||||||
# 1.0preview123 -> 1.0c123
|
|
||||||
# PyPI stats: ~21 (0.62%) better
|
|
||||||
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
|
|
||||||
|
|
||||||
# Tcl/Tk uses "px" for their post release markers
|
|
||||||
rs = re.sub(r"p(\d+)$", r".post\1", rs)
|
|
||||||
|
|
||||||
try:
|
|
||||||
_normalized_key(rs)
|
|
||||||
except UnsupportedVersionError:
|
|
||||||
rs = None
|
|
||||||
return rs
|
|
||||||
|
|
||||||
#
|
|
||||||
# Legacy version processing (distribute-compatible)
|
|
||||||
#
|
|
||||||
|
|
||||||
_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
|
|
||||||
_VERSION_REPLACE = {
|
|
||||||
'pre': 'c',
|
|
||||||
'preview': 'c',
|
|
||||||
'-': 'final-',
|
|
||||||
'rc': 'c',
|
|
||||||
'dev': '@',
|
|
||||||
'': None,
|
|
||||||
'.': None,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _legacy_key(s):
|
|
||||||
def get_parts(s):
|
|
||||||
result = []
|
|
||||||
for p in _VERSION_PART.split(s.lower()):
|
|
||||||
p = _VERSION_REPLACE.get(p, p)
|
|
||||||
if p:
|
|
||||||
if '0' <= p[:1] <= '9':
|
|
||||||
p = p.zfill(8)
|
|
||||||
else:
|
|
||||||
p = '*' + p
|
|
||||||
result.append(p)
|
|
||||||
result.append('*final')
|
|
||||||
return result
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for p in get_parts(s):
|
|
||||||
if p.startswith('*'):
|
|
||||||
if p < '*final':
|
|
||||||
while result and result[-1] == '*final-':
|
|
||||||
result.pop()
|
|
||||||
while result and result[-1] == '00000000':
|
|
||||||
result.pop()
|
|
||||||
result.append(p)
|
|
||||||
return tuple(result)
|
|
||||||
|
|
||||||
|
|
||||||
class LegacyVersion(Version):
|
|
||||||
def parse(self, s):
|
|
||||||
return _legacy_key(s)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_prerelease(self):
|
|
||||||
result = False
|
|
||||||
for x in self._parts:
|
|
||||||
if (isinstance(x, string_types) and x.startswith('*') and
|
|
||||||
x < '*final'):
|
|
||||||
result = True
|
|
||||||
break
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
class LegacyMatcher(Matcher):
|
|
||||||
version_class = LegacyVersion
|
|
||||||
|
|
||||||
_operators = dict(Matcher._operators)
|
|
||||||
_operators['~='] = '_match_compatible'
|
|
||||||
|
|
||||||
numeric_re = re.compile('^(\d+(\.\d+)*)')
|
|
||||||
|
|
||||||
def _match_compatible(self, version, constraint, prefix):
|
|
||||||
if version < constraint:
|
|
||||||
return False
|
|
||||||
m = self.numeric_re.match(str(constraint))
|
|
||||||
if not m:
|
|
||||||
logger.warning('Cannot compute compatible match for version %s '
|
|
||||||
' and constraint %s', version, constraint)
|
|
||||||
return True
|
|
||||||
s = m.groups()[0]
|
|
||||||
if '.' in s:
|
|
||||||
s = s.rsplit('.', 1)[0]
|
|
||||||
return _match_prefix(version, s)
|
|
||||||
|
|
||||||
#
|
|
||||||
# Semantic versioning
|
|
||||||
#
|
|
||||||
|
|
||||||
_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
|
|
||||||
r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
|
|
||||||
r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
|
|
||||||
|
|
||||||
|
|
||||||
def is_semver(s):
|
|
||||||
return _SEMVER_RE.match(s)
|
|
||||||
|
|
||||||
|
|
||||||
def _semantic_key(s):
|
|
||||||
def make_tuple(s, absent):
|
|
||||||
if s is None:
|
|
||||||
result = (absent,)
|
|
||||||
else:
|
|
||||||
parts = s[1:].split('.')
|
|
||||||
# We can't compare ints and strings on Python 3, so fudge it
|
|
||||||
# by zero-filling numeric values so simulate a numeric comparison
|
|
||||||
result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
|
|
||||||
return result
|
|
||||||
|
|
||||||
m = is_semver(s)
|
|
||||||
if not m:
|
|
||||||
raise UnsupportedVersionError(s)
|
|
||||||
groups = m.groups()
|
|
||||||
major, minor, patch = [int(i) for i in groups[:3]]
|
|
||||||
# choose the '|' and '*' so that versions sort correctly
|
|
||||||
pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
|
|
||||||
return (major, minor, patch), pre, build
|
|
||||||
|
|
||||||
|
|
||||||
class SemanticVersion(Version):
|
|
||||||
def parse(self, s):
|
|
||||||
return _semantic_key(s)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_prerelease(self):
|
|
||||||
return self._parts[1][0] != '|'
|
|
||||||
|
|
||||||
|
|
||||||
class SemanticMatcher(Matcher):
|
|
||||||
version_class = SemanticVersion
|
|
||||||
|
|
||||||
|
|
||||||
class VersionScheme(object):
|
|
||||||
def __init__(self, key, matcher, suggester=None):
|
|
||||||
self.key = key
|
|
||||||
self.matcher = matcher
|
|
||||||
self.suggester = suggester
|
|
||||||
|
|
||||||
def is_valid_version(self, s):
|
|
||||||
try:
|
|
||||||
self.matcher.version_class(s)
|
|
||||||
result = True
|
|
||||||
except UnsupportedVersionError:
|
|
||||||
result = False
|
|
||||||
return result
|
|
||||||
|
|
||||||
def is_valid_matcher(self, s):
|
|
||||||
try:
|
|
||||||
self.matcher(s)
|
|
||||||
result = True
|
|
||||||
except UnsupportedVersionError:
|
|
||||||
result = False
|
|
||||||
return result
|
|
||||||
|
|
||||||
def is_valid_constraint_list(self, s):
|
|
||||||
"""
|
|
||||||
Used for processing some metadata fields
|
|
||||||
"""
|
|
||||||
return self.is_valid_matcher('dummy_name (%s)' % s)
|
|
||||||
|
|
||||||
def suggest(self, s):
|
|
||||||
if self.suggester is None:
|
|
||||||
result = None
|
|
||||||
else:
|
|
||||||
result = self.suggester(s)
|
|
||||||
return result
|
|
||||||
|
|
||||||
_SCHEMES = {
|
|
||||||
'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
|
|
||||||
_suggest_normalized_version),
|
|
||||||
'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
|
|
||||||
'semantic': VersionScheme(_semantic_key, SemanticMatcher,
|
|
||||||
_suggest_semantic_version),
|
|
||||||
}
|
|
||||||
|
|
||||||
_SCHEMES['default'] = _SCHEMES['normalized']
|
|
||||||
|
|
||||||
|
|
||||||
def get_scheme(name):
|
|
||||||
if name not in _SCHEMES:
|
|
||||||
raise ValueError('unknown scheme name: %r' % name)
|
|
||||||
return _SCHEMES[name]
|
|
||||||
BIN
Binary file not shown.
BIN
Binary file not shown.
-976
@@ -1,976 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Copyright (C) 2013-2014 Vinay Sajip.
|
|
||||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
|
||||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
|
||||||
#
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import base64
|
|
||||||
import codecs
|
|
||||||
import datetime
|
|
||||||
import distutils.util
|
|
||||||
from email import message_from_file
|
|
||||||
import hashlib
|
|
||||||
import imp
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import posixpath
|
|
||||||
import re
|
|
||||||
import shutil
|
|
||||||
import sys
|
|
||||||
import tempfile
|
|
||||||
import zipfile
|
|
||||||
|
|
||||||
from . import __version__, DistlibException
|
|
||||||
from .compat import sysconfig, ZipFile, fsdecode, text_type, filter
|
|
||||||
from .database import InstalledDistribution
|
|
||||||
from .metadata import Metadata, METADATA_FILENAME
|
|
||||||
from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache,
|
|
||||||
cached_property, get_cache_base, read_exports, tempdir)
|
|
||||||
from .version import NormalizedVersion, UnsupportedVersionError
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
cache = None # created when needed
|
|
||||||
|
|
||||||
if hasattr(sys, 'pypy_version_info'):
|
|
||||||
IMP_PREFIX = 'pp'
|
|
||||||
elif sys.platform.startswith('java'):
|
|
||||||
IMP_PREFIX = 'jy'
|
|
||||||
elif sys.platform == 'cli':
|
|
||||||
IMP_PREFIX = 'ip'
|
|
||||||
else:
|
|
||||||
IMP_PREFIX = 'cp'
|
|
||||||
|
|
||||||
VER_SUFFIX = sysconfig.get_config_var('py_version_nodot')
|
|
||||||
if not VER_SUFFIX: # pragma: no cover
|
|
||||||
VER_SUFFIX = '%s%s' % sys.version_info[:2]
|
|
||||||
PYVER = 'py' + VER_SUFFIX
|
|
||||||
IMPVER = IMP_PREFIX + VER_SUFFIX
|
|
||||||
|
|
||||||
ARCH = distutils.util.get_platform().replace('-', '_').replace('.', '_')
|
|
||||||
|
|
||||||
ABI = sysconfig.get_config_var('SOABI')
|
|
||||||
if ABI and ABI.startswith('cpython-'):
|
|
||||||
ABI = ABI.replace('cpython-', 'cp')
|
|
||||||
else:
|
|
||||||
def _derive_abi():
|
|
||||||
parts = ['cp', VER_SUFFIX]
|
|
||||||
if sysconfig.get_config_var('Py_DEBUG'):
|
|
||||||
parts.append('d')
|
|
||||||
if sysconfig.get_config_var('WITH_PYMALLOC'):
|
|
||||||
parts.append('m')
|
|
||||||
if sysconfig.get_config_var('Py_UNICODE_SIZE') == 4:
|
|
||||||
parts.append('u')
|
|
||||||
return ''.join(parts)
|
|
||||||
ABI = _derive_abi()
|
|
||||||
del _derive_abi
|
|
||||||
|
|
||||||
FILENAME_RE = re.compile(r'''
|
|
||||||
(?P<nm>[^-]+)
|
|
||||||
-(?P<vn>\d+[^-]*)
|
|
||||||
(-(?P<bn>\d+[^-]*))?
|
|
||||||
-(?P<py>\w+\d+(\.\w+\d+)*)
|
|
||||||
-(?P<bi>\w+)
|
|
||||||
-(?P<ar>\w+(\.\w+)*)
|
|
||||||
\.whl$
|
|
||||||
''', re.IGNORECASE | re.VERBOSE)
|
|
||||||
|
|
||||||
NAME_VERSION_RE = re.compile(r'''
|
|
||||||
(?P<nm>[^-]+)
|
|
||||||
-(?P<vn>\d+[^-]*)
|
|
||||||
(-(?P<bn>\d+[^-]*))?$
|
|
||||||
''', re.IGNORECASE | re.VERBOSE)
|
|
||||||
|
|
||||||
SHEBANG_RE = re.compile(br'\s*#![^\r\n]*')
|
|
||||||
SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$')
|
|
||||||
SHEBANG_PYTHON = b'#!python'
|
|
||||||
SHEBANG_PYTHONW = b'#!pythonw'
|
|
||||||
|
|
||||||
if os.sep == '/':
|
|
||||||
to_posix = lambda o: o
|
|
||||||
else:
|
|
||||||
to_posix = lambda o: o.replace(os.sep, '/')
|
|
||||||
|
|
||||||
|
|
||||||
class Mounter(object):
|
|
||||||
def __init__(self):
|
|
||||||
self.impure_wheels = {}
|
|
||||||
self.libs = {}
|
|
||||||
|
|
||||||
def add(self, pathname, extensions):
|
|
||||||
self.impure_wheels[pathname] = extensions
|
|
||||||
self.libs.update(extensions)
|
|
||||||
|
|
||||||
def remove(self, pathname):
|
|
||||||
extensions = self.impure_wheels.pop(pathname)
|
|
||||||
for k, v in extensions:
|
|
||||||
if k in self.libs:
|
|
||||||
del self.libs[k]
|
|
||||||
|
|
||||||
def find_module(self, fullname, path=None):
|
|
||||||
if fullname in self.libs:
|
|
||||||
result = self
|
|
||||||
else:
|
|
||||||
result = None
|
|
||||||
return result
|
|
||||||
|
|
||||||
def load_module(self, fullname):
|
|
||||||
if fullname in sys.modules:
|
|
||||||
result = sys.modules[fullname]
|
|
||||||
else:
|
|
||||||
if fullname not in self.libs:
|
|
||||||
raise ImportError('unable to find extension for %s' % fullname)
|
|
||||||
result = imp.load_dynamic(fullname, self.libs[fullname])
|
|
||||||
result.__loader__ = self
|
|
||||||
parts = fullname.rsplit('.', 1)
|
|
||||||
if len(parts) > 1:
|
|
||||||
result.__package__ = parts[0]
|
|
||||||
return result
|
|
||||||
|
|
||||||
_hook = Mounter()
|
|
||||||
|
|
||||||
|
|
||||||
class Wheel(object):
|
|
||||||
"""
|
|
||||||
Class to build and install from Wheel files (PEP 427).
|
|
||||||
"""
|
|
||||||
|
|
||||||
wheel_version = (1, 1)
|
|
||||||
hash_kind = 'sha256'
|
|
||||||
|
|
||||||
def __init__(self, filename=None, sign=False, verify=False):
|
|
||||||
"""
|
|
||||||
Initialise an instance using a (valid) filename.
|
|
||||||
"""
|
|
||||||
self.sign = sign
|
|
||||||
self.should_verify = verify
|
|
||||||
self.buildver = ''
|
|
||||||
self.pyver = [PYVER]
|
|
||||||
self.abi = ['none']
|
|
||||||
self.arch = ['any']
|
|
||||||
self.dirname = os.getcwd()
|
|
||||||
if filename is None:
|
|
||||||
self.name = 'dummy'
|
|
||||||
self.version = '0.1'
|
|
||||||
self._filename = self.filename
|
|
||||||
else:
|
|
||||||
m = NAME_VERSION_RE.match(filename)
|
|
||||||
if m:
|
|
||||||
info = m.groupdict('')
|
|
||||||
self.name = info['nm']
|
|
||||||
# Reinstate the local version separator
|
|
||||||
self.version = info['vn'].replace('_', '-')
|
|
||||||
self.buildver = info['bn']
|
|
||||||
self._filename = self.filename
|
|
||||||
else:
|
|
||||||
dirname, filename = os.path.split(filename)
|
|
||||||
m = FILENAME_RE.match(filename)
|
|
||||||
if not m:
|
|
||||||
raise DistlibException('Invalid name or '
|
|
||||||
'filename: %r' % filename)
|
|
||||||
if dirname:
|
|
||||||
self.dirname = os.path.abspath(dirname)
|
|
||||||
self._filename = filename
|
|
||||||
info = m.groupdict('')
|
|
||||||
self.name = info['nm']
|
|
||||||
self.version = info['vn']
|
|
||||||
self.buildver = info['bn']
|
|
||||||
self.pyver = info['py'].split('.')
|
|
||||||
self.abi = info['bi'].split('.')
|
|
||||||
self.arch = info['ar'].split('.')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def filename(self):
|
|
||||||
"""
|
|
||||||
Build and return a filename from the various components.
|
|
||||||
"""
|
|
||||||
if self.buildver:
|
|
||||||
buildver = '-' + self.buildver
|
|
||||||
else:
|
|
||||||
buildver = ''
|
|
||||||
pyver = '.'.join(self.pyver)
|
|
||||||
abi = '.'.join(self.abi)
|
|
||||||
arch = '.'.join(self.arch)
|
|
||||||
# replace - with _ as a local version separator
|
|
||||||
version = self.version.replace('-', '_')
|
|
||||||
return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver,
|
|
||||||
pyver, abi, arch)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def exists(self):
|
|
||||||
path = os.path.join(self.dirname, self.filename)
|
|
||||||
return os.path.isfile(path)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def tags(self):
|
|
||||||
for pyver in self.pyver:
|
|
||||||
for abi in self.abi:
|
|
||||||
for arch in self.arch:
|
|
||||||
yield pyver, abi, arch
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def metadata(self):
|
|
||||||
pathname = os.path.join(self.dirname, self.filename)
|
|
||||||
name_ver = '%s-%s' % (self.name, self.version)
|
|
||||||
info_dir = '%s.dist-info' % name_ver
|
|
||||||
wrapper = codecs.getreader('utf-8')
|
|
||||||
with ZipFile(pathname, 'r') as zf:
|
|
||||||
wheel_metadata = self.get_wheel_metadata(zf)
|
|
||||||
wv = wheel_metadata['Wheel-Version'].split('.', 1)
|
|
||||||
file_version = tuple([int(i) for i in wv])
|
|
||||||
if file_version < (1, 1):
|
|
||||||
fn = 'METADATA'
|
|
||||||
else:
|
|
||||||
fn = METADATA_FILENAME
|
|
||||||
try:
|
|
||||||
metadata_filename = posixpath.join(info_dir, fn)
|
|
||||||
with zf.open(metadata_filename) as bf:
|
|
||||||
wf = wrapper(bf)
|
|
||||||
result = Metadata(fileobj=wf)
|
|
||||||
except KeyError:
|
|
||||||
raise ValueError('Invalid wheel, because %s is '
|
|
||||||
'missing' % fn)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def get_wheel_metadata(self, zf):
|
|
||||||
name_ver = '%s-%s' % (self.name, self.version)
|
|
||||||
info_dir = '%s.dist-info' % name_ver
|
|
||||||
metadata_filename = posixpath.join(info_dir, 'WHEEL')
|
|
||||||
with zf.open(metadata_filename) as bf:
|
|
||||||
wf = codecs.getreader('utf-8')(bf)
|
|
||||||
message = message_from_file(wf)
|
|
||||||
return dict(message)
|
|
||||||
|
|
||||||
@cached_property
|
|
||||||
def info(self):
|
|
||||||
pathname = os.path.join(self.dirname, self.filename)
|
|
||||||
with ZipFile(pathname, 'r') as zf:
|
|
||||||
result = self.get_wheel_metadata(zf)
|
|
||||||
return result
|
|
||||||
|
|
||||||
def process_shebang(self, data):
|
|
||||||
m = SHEBANG_RE.match(data)
|
|
||||||
if m:
|
|
||||||
end = m.end()
|
|
||||||
shebang, data_after_shebang = data[:end], data[end:]
|
|
||||||
# Preserve any arguments after the interpreter
|
|
||||||
if b'pythonw' in shebang.lower():
|
|
||||||
shebang_python = SHEBANG_PYTHONW
|
|
||||||
else:
|
|
||||||
shebang_python = SHEBANG_PYTHON
|
|
||||||
m = SHEBANG_DETAIL_RE.match(shebang)
|
|
||||||
if m:
|
|
||||||
args = b' ' + m.groups()[-1]
|
|
||||||
else:
|
|
||||||
args = b''
|
|
||||||
shebang = shebang_python + args
|
|
||||||
data = shebang + data_after_shebang
|
|
||||||
else:
|
|
||||||
cr = data.find(b'\r')
|
|
||||||
lf = data.find(b'\n')
|
|
||||||
if cr < 0 or cr > lf:
|
|
||||||
term = b'\n'
|
|
||||||
else:
|
|
||||||
if data[cr:cr + 2] == b'\r\n':
|
|
||||||
term = b'\r\n'
|
|
||||||
else:
|
|
||||||
term = b'\r'
|
|
||||||
data = SHEBANG_PYTHON + term + data
|
|
||||||
return data
|
|
||||||
|
|
||||||
def get_hash(self, data, hash_kind=None):
|
|
||||||
if hash_kind is None:
|
|
||||||
hash_kind = self.hash_kind
|
|
||||||
try:
|
|
||||||
hasher = getattr(hashlib, hash_kind)
|
|
||||||
except AttributeError:
|
|
||||||
raise DistlibException('Unsupported hash algorithm: %r' % hash_kind)
|
|
||||||
result = hasher(data).digest()
|
|
||||||
result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii')
|
|
||||||
return hash_kind, result
|
|
||||||
|
|
||||||
def write_record(self, records, record_path, base):
|
|
||||||
with CSVWriter(record_path) as writer:
|
|
||||||
for row in records:
|
|
||||||
writer.writerow(row)
|
|
||||||
p = to_posix(os.path.relpath(record_path, base))
|
|
||||||
writer.writerow((p, '', ''))
|
|
||||||
|
|
||||||
def write_records(self, info, libdir, archive_paths):
|
|
||||||
records = []
|
|
||||||
distinfo, info_dir = info
|
|
||||||
hasher = getattr(hashlib, self.hash_kind)
|
|
||||||
for ap, p in archive_paths:
|
|
||||||
with open(p, 'rb') as f:
|
|
||||||
data = f.read()
|
|
||||||
digest = '%s=%s' % self.get_hash(data)
|
|
||||||
size = os.path.getsize(p)
|
|
||||||
records.append((ap, digest, size))
|
|
||||||
|
|
||||||
p = os.path.join(distinfo, 'RECORD')
|
|
||||||
self.write_record(records, p, libdir)
|
|
||||||
ap = to_posix(os.path.join(info_dir, 'RECORD'))
|
|
||||||
archive_paths.append((ap, p))
|
|
||||||
|
|
||||||
def build_zip(self, pathname, archive_paths):
|
|
||||||
with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf:
|
|
||||||
for ap, p in archive_paths:
|
|
||||||
logger.debug('Wrote %s to %s in wheel', p, ap)
|
|
||||||
zf.write(p, ap)
|
|
||||||
|
|
||||||
def build(self, paths, tags=None, wheel_version=None):
|
|
||||||
"""
|
|
||||||
Build a wheel from files in specified paths, and use any specified tags
|
|
||||||
when determining the name of the wheel.
|
|
||||||
"""
|
|
||||||
if tags is None:
|
|
||||||
tags = {}
|
|
||||||
|
|
||||||
libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0]
|
|
||||||
if libkey == 'platlib':
|
|
||||||
is_pure = 'false'
|
|
||||||
default_pyver = [IMPVER]
|
|
||||||
default_abi = [ABI]
|
|
||||||
default_arch = [ARCH]
|
|
||||||
else:
|
|
||||||
is_pure = 'true'
|
|
||||||
default_pyver = [PYVER]
|
|
||||||
default_abi = ['none']
|
|
||||||
default_arch = ['any']
|
|
||||||
|
|
||||||
self.pyver = tags.get('pyver', default_pyver)
|
|
||||||
self.abi = tags.get('abi', default_abi)
|
|
||||||
self.arch = tags.get('arch', default_arch)
|
|
||||||
|
|
||||||
libdir = paths[libkey]
|
|
||||||
|
|
||||||
name_ver = '%s-%s' % (self.name, self.version)
|
|
||||||
data_dir = '%s.data' % name_ver
|
|
||||||
info_dir = '%s.dist-info' % name_ver
|
|
||||||
|
|
||||||
archive_paths = []
|
|
||||||
|
|
||||||
# First, stuff which is not in site-packages
|
|
||||||
for key in ('data', 'headers', 'scripts'):
|
|
||||||
if key not in paths:
|
|
||||||
continue
|
|
||||||
path = paths[key]
|
|
||||||
if os.path.isdir(path):
|
|
||||||
for root, dirs, files in os.walk(path):
|
|
||||||
for fn in files:
|
|
||||||
p = fsdecode(os.path.join(root, fn))
|
|
||||||
rp = os.path.relpath(p, path)
|
|
||||||
ap = to_posix(os.path.join(data_dir, key, rp))
|
|
||||||
archive_paths.append((ap, p))
|
|
||||||
if key == 'scripts' and not p.endswith('.exe'):
|
|
||||||
with open(p, 'rb') as f:
|
|
||||||
data = f.read()
|
|
||||||
data = self.process_shebang(data)
|
|
||||||
with open(p, 'wb') as f:
|
|
||||||
f.write(data)
|
|
||||||
|
|
||||||
# Now, stuff which is in site-packages, other than the
|
|
||||||
# distinfo stuff.
|
|
||||||
path = libdir
|
|
||||||
distinfo = None
|
|
||||||
for root, dirs, files in os.walk(path):
|
|
||||||
if root == path:
|
|
||||||
# At the top level only, save distinfo for later
|
|
||||||
# and skip it for now
|
|
||||||
for i, dn in enumerate(dirs):
|
|
||||||
dn = fsdecode(dn)
|
|
||||||
if dn.endswith('.dist-info'):
|
|
||||||
distinfo = os.path.join(root, dn)
|
|
||||||
del dirs[i]
|
|
||||||
break
|
|
||||||
assert distinfo, '.dist-info directory expected, not found'
|
|
||||||
|
|
||||||
for fn in files:
|
|
||||||
# comment out next suite to leave .pyc files in
|
|
||||||
if fsdecode(fn).endswith(('.pyc', '.pyo')):
|
|
||||||
continue
|
|
||||||
p = os.path.join(root, fn)
|
|
||||||
rp = to_posix(os.path.relpath(p, path))
|
|
||||||
archive_paths.append((rp, p))
|
|
||||||
|
|
||||||
# Now distinfo. Assumed to be flat, i.e. os.listdir is enough.
|
|
||||||
files = os.listdir(distinfo)
|
|
||||||
for fn in files:
|
|
||||||
if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'):
|
|
||||||
p = fsdecode(os.path.join(distinfo, fn))
|
|
||||||
ap = to_posix(os.path.join(info_dir, fn))
|
|
||||||
archive_paths.append((ap, p))
|
|
||||||
|
|
||||||
wheel_metadata = [
|
|
||||||
'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version),
|
|
||||||
'Generator: distlib %s' % __version__,
|
|
||||||
'Root-Is-Purelib: %s' % is_pure,
|
|
||||||
]
|
|
||||||
for pyver, abi, arch in self.tags:
|
|
||||||
wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch))
|
|
||||||
p = os.path.join(distinfo, 'WHEEL')
|
|
||||||
with open(p, 'w') as f:
|
|
||||||
f.write('\n'.join(wheel_metadata))
|
|
||||||
ap = to_posix(os.path.join(info_dir, 'WHEEL'))
|
|
||||||
archive_paths.append((ap, p))
|
|
||||||
|
|
||||||
# Now, at last, RECORD.
|
|
||||||
# Paths in here are archive paths - nothing else makes sense.
|
|
||||||
self.write_records((distinfo, info_dir), libdir, archive_paths)
|
|
||||||
# Now, ready to build the zip file
|
|
||||||
pathname = os.path.join(self.dirname, self.filename)
|
|
||||||
self.build_zip(pathname, archive_paths)
|
|
||||||
return pathname
|
|
||||||
|
|
||||||
def install(self, paths, maker, **kwargs):
|
|
||||||
"""
|
|
||||||
Install a wheel to the specified paths. If kwarg ``warner`` is
|
|
||||||
specified, it should be a callable, which will be called with two
|
|
||||||
tuples indicating the wheel version of this software and the wheel
|
|
||||||
version in the file, if there is a discrepancy in the versions.
|
|
||||||
This can be used to issue any warnings to raise any exceptions.
|
|
||||||
If kwarg ``lib_only`` is True, only the purelib/platlib files are
|
|
||||||
installed, and the headers, scripts, data and dist-info metadata are
|
|
||||||
not written.
|
|
||||||
|
|
||||||
The return value is a :class:`InstalledDistribution` instance unless
|
|
||||||
``options.lib_only`` is True, in which case the return value is ``None``.
|
|
||||||
"""
|
|
||||||
|
|
||||||
dry_run = maker.dry_run
|
|
||||||
warner = kwargs.get('warner')
|
|
||||||
lib_only = kwargs.get('lib_only', False)
|
|
||||||
|
|
||||||
pathname = os.path.join(self.dirname, self.filename)
|
|
||||||
name_ver = '%s-%s' % (self.name, self.version)
|
|
||||||
data_dir = '%s.data' % name_ver
|
|
||||||
info_dir = '%s.dist-info' % name_ver
|
|
||||||
|
|
||||||
metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
|
|
||||||
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
|
|
||||||
record_name = posixpath.join(info_dir, 'RECORD')
|
|
||||||
|
|
||||||
wrapper = codecs.getreader('utf-8')
|
|
||||||
|
|
||||||
with ZipFile(pathname, 'r') as zf:
|
|
||||||
with zf.open(wheel_metadata_name) as bwf:
|
|
||||||
wf = wrapper(bwf)
|
|
||||||
message = message_from_file(wf)
|
|
||||||
wv = message['Wheel-Version'].split('.', 1)
|
|
||||||
file_version = tuple([int(i) for i in wv])
|
|
||||||
if (file_version != self.wheel_version) and warner:
|
|
||||||
warner(self.wheel_version, file_version)
|
|
||||||
|
|
||||||
if message['Root-Is-Purelib'] == 'true':
|
|
||||||
libdir = paths['purelib']
|
|
||||||
else:
|
|
||||||
libdir = paths['platlib']
|
|
||||||
|
|
||||||
records = {}
|
|
||||||
with zf.open(record_name) as bf:
|
|
||||||
with CSVReader(stream=bf) as reader:
|
|
||||||
for row in reader:
|
|
||||||
p = row[0]
|
|
||||||
records[p] = row
|
|
||||||
|
|
||||||
data_pfx = posixpath.join(data_dir, '')
|
|
||||||
info_pfx = posixpath.join(info_dir, '')
|
|
||||||
script_pfx = posixpath.join(data_dir, 'scripts', '')
|
|
||||||
|
|
||||||
# make a new instance rather than a copy of maker's,
|
|
||||||
# as we mutate it
|
|
||||||
fileop = FileOperator(dry_run=dry_run)
|
|
||||||
fileop.record = True # so we can rollback if needed
|
|
||||||
|
|
||||||
bc = not sys.dont_write_bytecode # Double negatives. Lovely!
|
|
||||||
|
|
||||||
outfiles = [] # for RECORD writing
|
|
||||||
|
|
||||||
# for script copying/shebang processing
|
|
||||||
workdir = tempfile.mkdtemp()
|
|
||||||
# set target dir later
|
|
||||||
# we default add_launchers to False, as the
|
|
||||||
# Python Launcher should be used instead
|
|
||||||
maker.source_dir = workdir
|
|
||||||
maker.target_dir = None
|
|
||||||
try:
|
|
||||||
for zinfo in zf.infolist():
|
|
||||||
arcname = zinfo.filename
|
|
||||||
if isinstance(arcname, text_type):
|
|
||||||
u_arcname = arcname
|
|
||||||
else:
|
|
||||||
u_arcname = arcname.decode('utf-8')
|
|
||||||
# The signature file won't be in RECORD,
|
|
||||||
# and we don't currently don't do anything with it
|
|
||||||
if u_arcname.endswith('/RECORD.jws'):
|
|
||||||
continue
|
|
||||||
row = records[u_arcname]
|
|
||||||
if row[2] and str(zinfo.file_size) != row[2]:
|
|
||||||
raise DistlibException('size mismatch for '
|
|
||||||
'%s' % u_arcname)
|
|
||||||
if row[1]:
|
|
||||||
kind, value = row[1].split('=', 1)
|
|
||||||
with zf.open(arcname) as bf:
|
|
||||||
data = bf.read()
|
|
||||||
_, digest = self.get_hash(data, kind)
|
|
||||||
if digest != value:
|
|
||||||
raise DistlibException('digest mismatch for '
|
|
||||||
'%s' % arcname)
|
|
||||||
|
|
||||||
if lib_only and u_arcname.startswith((info_pfx, data_pfx)):
|
|
||||||
logger.debug('lib_only: skipping %s', u_arcname)
|
|
||||||
continue
|
|
||||||
is_script = (u_arcname.startswith(script_pfx)
|
|
||||||
and not u_arcname.endswith('.exe'))
|
|
||||||
|
|
||||||
if u_arcname.startswith(data_pfx):
|
|
||||||
_, where, rp = u_arcname.split('/', 2)
|
|
||||||
outfile = os.path.join(paths[where], convert_path(rp))
|
|
||||||
else:
|
|
||||||
# meant for site-packages.
|
|
||||||
if u_arcname in (wheel_metadata_name, record_name):
|
|
||||||
continue
|
|
||||||
outfile = os.path.join(libdir, convert_path(u_arcname))
|
|
||||||
if not is_script:
|
|
||||||
with zf.open(arcname) as bf:
|
|
||||||
fileop.copy_stream(bf, outfile)
|
|
||||||
outfiles.append(outfile)
|
|
||||||
# Double check the digest of the written file
|
|
||||||
if not dry_run and row[1]:
|
|
||||||
with open(outfile, 'rb') as bf:
|
|
||||||
data = bf.read()
|
|
||||||
_, newdigest = self.get_hash(data, kind)
|
|
||||||
if newdigest != digest:
|
|
||||||
raise DistlibException('digest mismatch '
|
|
||||||
'on write for '
|
|
||||||
'%s' % outfile)
|
|
||||||
if bc and outfile.endswith('.py'):
|
|
||||||
try:
|
|
||||||
pyc = fileop.byte_compile(outfile)
|
|
||||||
outfiles.append(pyc)
|
|
||||||
except Exception:
|
|
||||||
# Don't give up if byte-compilation fails,
|
|
||||||
# but log it and perhaps warn the user
|
|
||||||
logger.warning('Byte-compilation failed',
|
|
||||||
exc_info=True)
|
|
||||||
else:
|
|
||||||
fn = os.path.basename(convert_path(arcname))
|
|
||||||
workname = os.path.join(workdir, fn)
|
|
||||||
with zf.open(arcname) as bf:
|
|
||||||
fileop.copy_stream(bf, workname)
|
|
||||||
|
|
||||||
dn, fn = os.path.split(outfile)
|
|
||||||
maker.target_dir = dn
|
|
||||||
filenames = maker.make(fn)
|
|
||||||
fileop.set_executable_mode(filenames)
|
|
||||||
outfiles.extend(filenames)
|
|
||||||
|
|
||||||
if lib_only:
|
|
||||||
logger.debug('lib_only: returning None')
|
|
||||||
dist = None
|
|
||||||
else:
|
|
||||||
# Generate scripts
|
|
||||||
|
|
||||||
# Try to get pydist.json so we can see if there are
|
|
||||||
# any commands to generate. If this fails (e.g. because
|
|
||||||
# of a legacy wheel), log a warning but don't give up.
|
|
||||||
commands = None
|
|
||||||
file_version = self.info['Wheel-Version']
|
|
||||||
if file_version == '1.0':
|
|
||||||
# Use legacy info
|
|
||||||
ep = posixpath.join(info_dir, 'entry_points.txt')
|
|
||||||
try:
|
|
||||||
with zf.open(ep) as bwf:
|
|
||||||
epdata = read_exports(bwf)
|
|
||||||
commands = {}
|
|
||||||
for key in ('console', 'gui'):
|
|
||||||
k = '%s_scripts' % key
|
|
||||||
if k in epdata:
|
|
||||||
commands['wrap_%s' % key] = d = {}
|
|
||||||
for v in epdata[k].values():
|
|
||||||
s = '%s:%s' % (v.prefix, v.suffix)
|
|
||||||
if v.flags:
|
|
||||||
s += ' %s' % v.flags
|
|
||||||
d[v.name] = s
|
|
||||||
except Exception:
|
|
||||||
logger.warning('Unable to read legacy script '
|
|
||||||
'metadata, so cannot generate '
|
|
||||||
'scripts')
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
with zf.open(metadata_name) as bwf:
|
|
||||||
wf = wrapper(bwf)
|
|
||||||
commands = json.load(wf).get('extensions')
|
|
||||||
if commands:
|
|
||||||
commands = commands.get('python.commands')
|
|
||||||
except Exception:
|
|
||||||
logger.warning('Unable to read JSON metadata, so '
|
|
||||||
'cannot generate scripts')
|
|
||||||
if commands:
|
|
||||||
console_scripts = commands.get('wrap_console', {})
|
|
||||||
gui_scripts = commands.get('wrap_gui', {})
|
|
||||||
if console_scripts or gui_scripts:
|
|
||||||
script_dir = paths.get('scripts', '')
|
|
||||||
if not os.path.isdir(script_dir):
|
|
||||||
raise ValueError('Valid script path not '
|
|
||||||
'specified')
|
|
||||||
maker.target_dir = script_dir
|
|
||||||
for k, v in console_scripts.items():
|
|
||||||
script = '%s = %s' % (k, v)
|
|
||||||
filenames = maker.make(script)
|
|
||||||
fileop.set_executable_mode(filenames)
|
|
||||||
|
|
||||||
if gui_scripts:
|
|
||||||
options = {'gui': True }
|
|
||||||
for k, v in gui_scripts.items():
|
|
||||||
script = '%s = %s' % (k, v)
|
|
||||||
filenames = maker.make(script, options)
|
|
||||||
fileop.set_executable_mode(filenames)
|
|
||||||
|
|
||||||
p = os.path.join(libdir, info_dir)
|
|
||||||
dist = InstalledDistribution(p)
|
|
||||||
|
|
||||||
# Write SHARED
|
|
||||||
paths = dict(paths) # don't change passed in dict
|
|
||||||
del paths['purelib']
|
|
||||||
del paths['platlib']
|
|
||||||
paths['lib'] = libdir
|
|
||||||
p = dist.write_shared_locations(paths, dry_run)
|
|
||||||
if p:
|
|
||||||
outfiles.append(p)
|
|
||||||
|
|
||||||
# Write RECORD
|
|
||||||
dist.write_installed_files(outfiles, paths['prefix'],
|
|
||||||
dry_run)
|
|
||||||
return dist
|
|
||||||
except Exception: # pragma: no cover
|
|
||||||
logger.exception('installation failed.')
|
|
||||||
fileop.rollback()
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
shutil.rmtree(workdir)
|
|
||||||
|
|
||||||
def _get_dylib_cache(self):
|
|
||||||
global cache
|
|
||||||
if cache is None:
|
|
||||||
# Use native string to avoid issues on 2.x: see Python #20140.
|
|
||||||
base = os.path.join(get_cache_base(), str('dylib-cache'),
|
|
||||||
sys.version[:3])
|
|
||||||
cache = Cache(base)
|
|
||||||
return cache
|
|
||||||
|
|
||||||
def _get_extensions(self):
|
|
||||||
pathname = os.path.join(self.dirname, self.filename)
|
|
||||||
name_ver = '%s-%s' % (self.name, self.version)
|
|
||||||
info_dir = '%s.dist-info' % name_ver
|
|
||||||
arcname = posixpath.join(info_dir, 'EXTENSIONS')
|
|
||||||
wrapper = codecs.getreader('utf-8')
|
|
||||||
result = []
|
|
||||||
with ZipFile(pathname, 'r') as zf:
|
|
||||||
try:
|
|
||||||
with zf.open(arcname) as bf:
|
|
||||||
wf = wrapper(bf)
|
|
||||||
extensions = json.load(wf)
|
|
||||||
cache = self._get_dylib_cache()
|
|
||||||
prefix = cache.prefix_to_dir(pathname)
|
|
||||||
cache_base = os.path.join(cache.base, prefix)
|
|
||||||
if not os.path.isdir(cache_base):
|
|
||||||
os.makedirs(cache_base)
|
|
||||||
for name, relpath in extensions.items():
|
|
||||||
dest = os.path.join(cache_base, convert_path(relpath))
|
|
||||||
if not os.path.exists(dest):
|
|
||||||
extract = True
|
|
||||||
else:
|
|
||||||
file_time = os.stat(dest).st_mtime
|
|
||||||
file_time = datetime.datetime.fromtimestamp(file_time)
|
|
||||||
info = zf.getinfo(relpath)
|
|
||||||
wheel_time = datetime.datetime(*info.date_time)
|
|
||||||
extract = wheel_time > file_time
|
|
||||||
if extract:
|
|
||||||
zf.extract(relpath, cache_base)
|
|
||||||
result.append((name, dest))
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
return result
|
|
||||||
|
|
||||||
def is_compatible(self):
|
|
||||||
"""
|
|
||||||
Determine if a wheel is compatible with the running system.
|
|
||||||
"""
|
|
||||||
return is_compatible(self)
|
|
||||||
|
|
||||||
def is_mountable(self):
|
|
||||||
"""
|
|
||||||
Determine if a wheel is asserted as mountable by its metadata.
|
|
||||||
"""
|
|
||||||
return True # for now - metadata details TBD
|
|
||||||
|
|
||||||
def mount(self, append=False):
|
|
||||||
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
|
|
||||||
if not self.is_compatible():
|
|
||||||
msg = 'Wheel %s not compatible with this Python.' % pathname
|
|
||||||
raise DistlibException(msg)
|
|
||||||
if not self.is_mountable():
|
|
||||||
msg = 'Wheel %s is marked as not mountable.' % pathname
|
|
||||||
raise DistlibException(msg)
|
|
||||||
if pathname in sys.path:
|
|
||||||
logger.debug('%s already in path', pathname)
|
|
||||||
else:
|
|
||||||
if append:
|
|
||||||
sys.path.append(pathname)
|
|
||||||
else:
|
|
||||||
sys.path.insert(0, pathname)
|
|
||||||
extensions = self._get_extensions()
|
|
||||||
if extensions:
|
|
||||||
if _hook not in sys.meta_path:
|
|
||||||
sys.meta_path.append(_hook)
|
|
||||||
_hook.add(pathname, extensions)
|
|
||||||
|
|
||||||
def unmount(self):
|
|
||||||
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
|
|
||||||
if pathname not in sys.path:
|
|
||||||
logger.debug('%s not in path', pathname)
|
|
||||||
else:
|
|
||||||
sys.path.remove(pathname)
|
|
||||||
if pathname in _hook.impure_wheels:
|
|
||||||
_hook.remove(pathname)
|
|
||||||
if not _hook.impure_wheels:
|
|
||||||
if _hook in sys.meta_path:
|
|
||||||
sys.meta_path.remove(_hook)
|
|
||||||
|
|
||||||
def verify(self):
|
|
||||||
pathname = os.path.join(self.dirname, self.filename)
|
|
||||||
name_ver = '%s-%s' % (self.name, self.version)
|
|
||||||
data_dir = '%s.data' % name_ver
|
|
||||||
info_dir = '%s.dist-info' % name_ver
|
|
||||||
|
|
||||||
metadata_name = posixpath.join(info_dir, METADATA_FILENAME)
|
|
||||||
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
|
|
||||||
record_name = posixpath.join(info_dir, 'RECORD')
|
|
||||||
|
|
||||||
wrapper = codecs.getreader('utf-8')
|
|
||||||
|
|
||||||
with ZipFile(pathname, 'r') as zf:
|
|
||||||
with zf.open(wheel_metadata_name) as bwf:
|
|
||||||
wf = wrapper(bwf)
|
|
||||||
message = message_from_file(wf)
|
|
||||||
wv = message['Wheel-Version'].split('.', 1)
|
|
||||||
file_version = tuple([int(i) for i in wv])
|
|
||||||
# TODO version verification
|
|
||||||
|
|
||||||
records = {}
|
|
||||||
with zf.open(record_name) as bf:
|
|
||||||
with CSVReader(stream=bf) as reader:
|
|
||||||
for row in reader:
|
|
||||||
p = row[0]
|
|
||||||
records[p] = row
|
|
||||||
|
|
||||||
for zinfo in zf.infolist():
|
|
||||||
arcname = zinfo.filename
|
|
||||||
if isinstance(arcname, text_type):
|
|
||||||
u_arcname = arcname
|
|
||||||
else:
|
|
||||||
u_arcname = arcname.decode('utf-8')
|
|
||||||
if '..' in u_arcname:
|
|
||||||
raise DistlibException('invalid entry in '
|
|
||||||
'wheel: %r' % u_arcname)
|
|
||||||
|
|
||||||
# The signature file won't be in RECORD,
|
|
||||||
# and we don't currently don't do anything with it
|
|
||||||
if u_arcname.endswith('/RECORD.jws'):
|
|
||||||
continue
|
|
||||||
row = records[u_arcname]
|
|
||||||
if row[2] and str(zinfo.file_size) != row[2]:
|
|
||||||
raise DistlibException('size mismatch for '
|
|
||||||
'%s' % u_arcname)
|
|
||||||
if row[1]:
|
|
||||||
kind, value = row[1].split('=', 1)
|
|
||||||
with zf.open(arcname) as bf:
|
|
||||||
data = bf.read()
|
|
||||||
_, digest = self.get_hash(data, kind)
|
|
||||||
if digest != value:
|
|
||||||
raise DistlibException('digest mismatch for '
|
|
||||||
'%s' % arcname)
|
|
||||||
|
|
||||||
def update(self, modifier, dest_dir=None, **kwargs):
|
|
||||||
"""
|
|
||||||
Update the contents of a wheel in a generic way. The modifier should
|
|
||||||
be a callable which expects a dictionary argument: its keys are
|
|
||||||
archive-entry paths, and its values are absolute filesystem paths
|
|
||||||
where the contents the corresponding archive entries can be found. The
|
|
||||||
modifier is free to change the contents of the files pointed to, add
|
|
||||||
new entries and remove entries, before returning. This method will
|
|
||||||
extract the entire contents of the wheel to a temporary location, call
|
|
||||||
the modifier, and then use the passed (and possibly updated)
|
|
||||||
dictionary to write a new wheel. If ``dest_dir`` is specified, the new
|
|
||||||
wheel is written there -- otherwise, the original wheel is overwritten.
|
|
||||||
|
|
||||||
The modifier should return True if it updated the wheel, else False.
|
|
||||||
This method returns the same value the modifier returns.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def get_version(path_map, info_dir):
|
|
||||||
version = path = None
|
|
||||||
key = '%s/%s' % (info_dir, METADATA_FILENAME)
|
|
||||||
if key not in path_map:
|
|
||||||
key = '%s/PKG-INFO' % info_dir
|
|
||||||
if key in path_map:
|
|
||||||
path = path_map[key]
|
|
||||||
version = Metadata(path=path).version
|
|
||||||
return version, path
|
|
||||||
|
|
||||||
def update_version(version, path):
|
|
||||||
updated = None
|
|
||||||
try:
|
|
||||||
v = NormalizedVersion(version)
|
|
||||||
i = version.find('-')
|
|
||||||
if i < 0:
|
|
||||||
updated = '%s+1' % version
|
|
||||||
else:
|
|
||||||
parts = [int(s) for s in version[i + 1:].split('.')]
|
|
||||||
parts[-1] += 1
|
|
||||||
updated = '%s+%s' % (version[:i],
|
|
||||||
'.'.join(str(i) for i in parts))
|
|
||||||
except UnsupportedVersionError:
|
|
||||||
logger.debug('Cannot update non-compliant (PEP-440) '
|
|
||||||
'version %r', version)
|
|
||||||
if updated:
|
|
||||||
md = Metadata(path=path)
|
|
||||||
md.version = updated
|
|
||||||
legacy = not path.endswith(METADATA_FILENAME)
|
|
||||||
md.write(path=path, legacy=legacy)
|
|
||||||
logger.debug('Version updated from %r to %r', version,
|
|
||||||
updated)
|
|
||||||
|
|
||||||
pathname = os.path.join(self.dirname, self.filename)
|
|
||||||
name_ver = '%s-%s' % (self.name, self.version)
|
|
||||||
info_dir = '%s.dist-info' % name_ver
|
|
||||||
record_name = posixpath.join(info_dir, 'RECORD')
|
|
||||||
with tempdir() as workdir:
|
|
||||||
with ZipFile(pathname, 'r') as zf:
|
|
||||||
path_map = {}
|
|
||||||
for zinfo in zf.infolist():
|
|
||||||
arcname = zinfo.filename
|
|
||||||
if isinstance(arcname, text_type):
|
|
||||||
u_arcname = arcname
|
|
||||||
else:
|
|
||||||
u_arcname = arcname.decode('utf-8')
|
|
||||||
if u_arcname == record_name:
|
|
||||||
continue
|
|
||||||
if '..' in u_arcname:
|
|
||||||
raise DistlibException('invalid entry in '
|
|
||||||
'wheel: %r' % u_arcname)
|
|
||||||
zf.extract(zinfo, workdir)
|
|
||||||
path = os.path.join(workdir, convert_path(u_arcname))
|
|
||||||
path_map[u_arcname] = path
|
|
||||||
|
|
||||||
# Remember the version.
|
|
||||||
original_version, _ = get_version(path_map, info_dir)
|
|
||||||
# Files extracted. Call the modifier.
|
|
||||||
modified = modifier(path_map, **kwargs)
|
|
||||||
if modified:
|
|
||||||
# Something changed - need to build a new wheel.
|
|
||||||
current_version, path = get_version(path_map, info_dir)
|
|
||||||
if current_version and (current_version == original_version):
|
|
||||||
# Add or update local version to signify changes.
|
|
||||||
update_version(current_version, path)
|
|
||||||
# Decide where the new wheel goes.
|
|
||||||
if dest_dir is None:
|
|
||||||
fd, newpath = tempfile.mkstemp(suffix='.whl',
|
|
||||||
prefix='wheel-update-',
|
|
||||||
dir=workdir)
|
|
||||||
os.close(fd)
|
|
||||||
else:
|
|
||||||
if not os.path.isdir(dest_dir):
|
|
||||||
raise DistlibException('Not a directory: %r' % dest_dir)
|
|
||||||
newpath = os.path.join(dest_dir, self.filename)
|
|
||||||
archive_paths = list(path_map.items())
|
|
||||||
distinfo = os.path.join(workdir, info_dir)
|
|
||||||
info = distinfo, info_dir
|
|
||||||
self.write_records(info, workdir, archive_paths)
|
|
||||||
self.build_zip(newpath, archive_paths)
|
|
||||||
if dest_dir is None:
|
|
||||||
shutil.copyfile(newpath, pathname)
|
|
||||||
return modified
|
|
||||||
|
|
||||||
def compatible_tags():
|
|
||||||
"""
|
|
||||||
Return (pyver, abi, arch) tuples compatible with this Python.
|
|
||||||
"""
|
|
||||||
versions = [VER_SUFFIX]
|
|
||||||
major = VER_SUFFIX[0]
|
|
||||||
for minor in range(sys.version_info[1] - 1, - 1, -1):
|
|
||||||
versions.append(''.join([major, str(minor)]))
|
|
||||||
|
|
||||||
abis = []
|
|
||||||
for suffix, _, _ in imp.get_suffixes():
|
|
||||||
if suffix.startswith('.abi'):
|
|
||||||
abis.append(suffix.split('.', 2)[1])
|
|
||||||
abis.sort()
|
|
||||||
if ABI != 'none':
|
|
||||||
abis.insert(0, ABI)
|
|
||||||
abis.append('none')
|
|
||||||
result = []
|
|
||||||
|
|
||||||
arches = [ARCH]
|
|
||||||
if sys.platform == 'darwin':
|
|
||||||
m = re.match('(\w+)_(\d+)_(\d+)_(\w+)$', ARCH)
|
|
||||||
if m:
|
|
||||||
name, major, minor, arch = m.groups()
|
|
||||||
minor = int(minor)
|
|
||||||
matches = [arch]
|
|
||||||
if arch in ('i386', 'ppc'):
|
|
||||||
matches.append('fat')
|
|
||||||
if arch in ('i386', 'ppc', 'x86_64'):
|
|
||||||
matches.append('fat3')
|
|
||||||
if arch in ('ppc64', 'x86_64'):
|
|
||||||
matches.append('fat64')
|
|
||||||
if arch in ('i386', 'x86_64'):
|
|
||||||
matches.append('intel')
|
|
||||||
if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'):
|
|
||||||
matches.append('universal')
|
|
||||||
while minor >= 0:
|
|
||||||
for match in matches:
|
|
||||||
s = '%s_%s_%s_%s' % (name, major, minor, match)
|
|
||||||
if s != ARCH: # already there
|
|
||||||
arches.append(s)
|
|
||||||
minor -= 1
|
|
||||||
|
|
||||||
# Most specific - our Python version, ABI and arch
|
|
||||||
for abi in abis:
|
|
||||||
for arch in arches:
|
|
||||||
result.append((''.join((IMP_PREFIX, versions[0])), abi, arch))
|
|
||||||
|
|
||||||
# where no ABI / arch dependency, but IMP_PREFIX dependency
|
|
||||||
for i, version in enumerate(versions):
|
|
||||||
result.append((''.join((IMP_PREFIX, version)), 'none', 'any'))
|
|
||||||
if i == 0:
|
|
||||||
result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any'))
|
|
||||||
|
|
||||||
# no IMP_PREFIX, ABI or arch dependency
|
|
||||||
for i, version in enumerate(versions):
|
|
||||||
result.append((''.join(('py', version)), 'none', 'any'))
|
|
||||||
if i == 0:
|
|
||||||
result.append((''.join(('py', version[0])), 'none', 'any'))
|
|
||||||
return set(result)
|
|
||||||
|
|
||||||
|
|
||||||
COMPATIBLE_TAGS = compatible_tags()
|
|
||||||
|
|
||||||
del compatible_tags
|
|
||||||
|
|
||||||
|
|
||||||
def is_compatible(wheel, tags=None):
|
|
||||||
if not isinstance(wheel, Wheel):
|
|
||||||
wheel = Wheel(wheel) # assume it's a filename
|
|
||||||
result = False
|
|
||||||
if tags is None:
|
|
||||||
tags = COMPATIBLE_TAGS
|
|
||||||
for ver, abi, arch in tags:
|
|
||||||
if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch:
|
|
||||||
result = True
|
|
||||||
break
|
|
||||||
return result
|
|
||||||
-23
@@ -1,23 +0,0 @@
|
|||||||
"""
|
|
||||||
HTML parsing library based on the WHATWG "HTML5"
|
|
||||||
specification. The parser is designed to be compatible with existing
|
|
||||||
HTML found in the wild and implements well-defined error recovery that
|
|
||||||
is largely compatible with modern desktop web browsers.
|
|
||||||
|
|
||||||
Example usage:
|
|
||||||
|
|
||||||
import html5lib
|
|
||||||
f = open("my_document.html")
|
|
||||||
tree = html5lib.parse(f)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from .html5parser import HTMLParser, parse, parseFragment
|
|
||||||
from .treebuilders import getTreeBuilder
|
|
||||||
from .treewalkers import getTreeWalker
|
|
||||||
from .serializer import serialize
|
|
||||||
|
|
||||||
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
|
||||||
"getTreeWalker", "serialize"]
|
|
||||||
__version__ = "1.0b5"
|
|
||||||
-3102
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
|
|
||||||
class Filter(object):
|
|
||||||
def __init__(self, source):
|
|
||||||
self.source = source
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return iter(self.source)
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
return getattr(self.source, name)
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
|
|
||||||
try:
|
|
||||||
from collections import OrderedDict
|
|
||||||
except ImportError:
|
|
||||||
from ordereddict import OrderedDict
|
|
||||||
|
|
||||||
|
|
||||||
class Filter(_base.Filter):
|
|
||||||
def __iter__(self):
|
|
||||||
for token in _base.Filter.__iter__(self):
|
|
||||||
if token["type"] in ("StartTag", "EmptyTag"):
|
|
||||||
attrs = OrderedDict()
|
|
||||||
for name, value in sorted(token["data"].items(),
|
|
||||||
key=lambda x: x[0]):
|
|
||||||
attrs[name] = value
|
|
||||||
token["data"] = attrs
|
|
||||||
yield token
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
|
|
||||||
|
|
||||||
class Filter(_base.Filter):
|
|
||||||
def __init__(self, source, encoding):
|
|
||||||
_base.Filter.__init__(self, source)
|
|
||||||
self.encoding = encoding
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
state = "pre_head"
|
|
||||||
meta_found = (self.encoding is None)
|
|
||||||
pending = []
|
|
||||||
|
|
||||||
for token in _base.Filter.__iter__(self):
|
|
||||||
type = token["type"]
|
|
||||||
if type == "StartTag":
|
|
||||||
if token["name"].lower() == "head":
|
|
||||||
state = "in_head"
|
|
||||||
|
|
||||||
elif type == "EmptyTag":
|
|
||||||
if token["name"].lower() == "meta":
|
|
||||||
# replace charset with actual encoding
|
|
||||||
has_http_equiv_content_type = False
|
|
||||||
for (namespace, name), value in token["data"].items():
|
|
||||||
if namespace is not None:
|
|
||||||
continue
|
|
||||||
elif name.lower() == 'charset':
|
|
||||||
token["data"][(namespace, name)] = self.encoding
|
|
||||||
meta_found = True
|
|
||||||
break
|
|
||||||
elif name == 'http-equiv' and value.lower() == 'content-type':
|
|
||||||
has_http_equiv_content_type = True
|
|
||||||
else:
|
|
||||||
if has_http_equiv_content_type and (None, "content") in token["data"]:
|
|
||||||
token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
|
|
||||||
meta_found = True
|
|
||||||
|
|
||||||
elif token["name"].lower() == "head" and not meta_found:
|
|
||||||
# insert meta into empty head
|
|
||||||
yield {"type": "StartTag", "name": "head",
|
|
||||||
"data": token["data"]}
|
|
||||||
yield {"type": "EmptyTag", "name": "meta",
|
|
||||||
"data": {(None, "charset"): self.encoding}}
|
|
||||||
yield {"type": "EndTag", "name": "head"}
|
|
||||||
meta_found = True
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif type == "EndTag":
|
|
||||||
if token["name"].lower() == "head" and pending:
|
|
||||||
# insert meta into head (if necessary) and flush pending queue
|
|
||||||
yield pending.pop(0)
|
|
||||||
if not meta_found:
|
|
||||||
yield {"type": "EmptyTag", "name": "meta",
|
|
||||||
"data": {(None, "charset"): self.encoding}}
|
|
||||||
while pending:
|
|
||||||
yield pending.pop(0)
|
|
||||||
meta_found = True
|
|
||||||
state = "post_head"
|
|
||||||
|
|
||||||
if state == "in_head":
|
|
||||||
pending.append(token)
|
|
||||||
else:
|
|
||||||
yield token
|
|
||||||
@@ -1,90 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
from ..constants import cdataElements, rcdataElements, voidElements
|
|
||||||
|
|
||||||
from ..constants import spaceCharacters
|
|
||||||
spaceCharacters = "".join(spaceCharacters)
|
|
||||||
|
|
||||||
|
|
||||||
class LintError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Filter(_base.Filter):
|
|
||||||
def __iter__(self):
|
|
||||||
open_elements = []
|
|
||||||
contentModelFlag = "PCDATA"
|
|
||||||
for token in _base.Filter.__iter__(self):
|
|
||||||
type = token["type"]
|
|
||||||
if type in ("StartTag", "EmptyTag"):
|
|
||||||
name = token["name"]
|
|
||||||
if contentModelFlag != "PCDATA":
|
|
||||||
raise LintError("StartTag not in PCDATA content model flag: %(tag)s" % {"tag": name})
|
|
||||||
if not isinstance(name, str):
|
|
||||||
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
|
|
||||||
if not name:
|
|
||||||
raise LintError("Empty tag name")
|
|
||||||
if type == "StartTag" and name in voidElements:
|
|
||||||
raise LintError("Void element reported as StartTag token: %(tag)s" % {"tag": name})
|
|
||||||
elif type == "EmptyTag" and name not in voidElements:
|
|
||||||
raise LintError("Non-void element reported as EmptyTag token: %(tag)s" % {"tag": token["name"]})
|
|
||||||
if type == "StartTag":
|
|
||||||
open_elements.append(name)
|
|
||||||
for name, value in token["data"]:
|
|
||||||
if not isinstance(name, str):
|
|
||||||
raise LintError("Attribute name is not a string: %(name)r" % {"name": name})
|
|
||||||
if not name:
|
|
||||||
raise LintError("Empty attribute name")
|
|
||||||
if not isinstance(value, str):
|
|
||||||
raise LintError("Attribute value is not a string: %(value)r" % {"value": value})
|
|
||||||
if name in cdataElements:
|
|
||||||
contentModelFlag = "CDATA"
|
|
||||||
elif name in rcdataElements:
|
|
||||||
contentModelFlag = "RCDATA"
|
|
||||||
elif name == "plaintext":
|
|
||||||
contentModelFlag = "PLAINTEXT"
|
|
||||||
|
|
||||||
elif type == "EndTag":
|
|
||||||
name = token["name"]
|
|
||||||
if not isinstance(name, str):
|
|
||||||
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
|
|
||||||
if not name:
|
|
||||||
raise LintError("Empty tag name")
|
|
||||||
if name in voidElements:
|
|
||||||
raise LintError("Void element reported as EndTag token: %(tag)s" % {"tag": name})
|
|
||||||
start_name = open_elements.pop()
|
|
||||||
if start_name != name:
|
|
||||||
raise LintError("EndTag (%(end)s) does not match StartTag (%(start)s)" % {"end": name, "start": start_name})
|
|
||||||
contentModelFlag = "PCDATA"
|
|
||||||
|
|
||||||
elif type == "Comment":
|
|
||||||
if contentModelFlag != "PCDATA":
|
|
||||||
raise LintError("Comment not in PCDATA content model flag")
|
|
||||||
|
|
||||||
elif type in ("Characters", "SpaceCharacters"):
|
|
||||||
data = token["data"]
|
|
||||||
if not isinstance(data, str):
|
|
||||||
raise LintError("Attribute name is not a string: %(name)r" % {"name": data})
|
|
||||||
if not data:
|
|
||||||
raise LintError("%(type)s token with empty data" % {"type": type})
|
|
||||||
if type == "SpaceCharacters":
|
|
||||||
data = data.strip(spaceCharacters)
|
|
||||||
if data:
|
|
||||||
raise LintError("Non-space character(s) found in SpaceCharacters token: %(token)r" % {"token": data})
|
|
||||||
|
|
||||||
elif type == "Doctype":
|
|
||||||
name = token["name"]
|
|
||||||
if contentModelFlag != "PCDATA":
|
|
||||||
raise LintError("Doctype not in PCDATA content model flag: %(name)s" % {"name": name})
|
|
||||||
if not isinstance(name, str):
|
|
||||||
raise LintError("Tag name is not a string: %(tag)r" % {"tag": name})
|
|
||||||
# XXX: what to do with token["data"] ?
|
|
||||||
|
|
||||||
elif type in ("ParseError", "SerializeError"):
|
|
||||||
pass
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise LintError("Unknown token type: %(type)s" % {"type": type})
|
|
||||||
|
|
||||||
yield token
|
|
||||||
@@ -1,205 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
|
|
||||||
|
|
||||||
class Filter(_base.Filter):
|
|
||||||
def slider(self):
|
|
||||||
previous1 = previous2 = None
|
|
||||||
for token in self.source:
|
|
||||||
if previous1 is not None:
|
|
||||||
yield previous2, previous1, token
|
|
||||||
previous2 = previous1
|
|
||||||
previous1 = token
|
|
||||||
yield previous2, previous1, None
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
for previous, token, next in self.slider():
|
|
||||||
type = token["type"]
|
|
||||||
if type == "StartTag":
|
|
||||||
if (token["data"] or
|
|
||||||
not self.is_optional_start(token["name"], previous, next)):
|
|
||||||
yield token
|
|
||||||
elif type == "EndTag":
|
|
||||||
if not self.is_optional_end(token["name"], next):
|
|
||||||
yield token
|
|
||||||
else:
|
|
||||||
yield token
|
|
||||||
|
|
||||||
def is_optional_start(self, tagname, previous, next):
|
|
||||||
type = next and next["type"] or None
|
|
||||||
if tagname in 'html':
|
|
||||||
# An html element's start tag may be omitted if the first thing
|
|
||||||
# inside the html element is not a space character or a comment.
|
|
||||||
return type not in ("Comment", "SpaceCharacters")
|
|
||||||
elif tagname == 'head':
|
|
||||||
# A head element's start tag may be omitted if the first thing
|
|
||||||
# inside the head element is an element.
|
|
||||||
# XXX: we also omit the start tag if the head element is empty
|
|
||||||
if type in ("StartTag", "EmptyTag"):
|
|
||||||
return True
|
|
||||||
elif type == "EndTag":
|
|
||||||
return next["name"] == "head"
|
|
||||||
elif tagname == 'body':
|
|
||||||
# A body element's start tag may be omitted if the first thing
|
|
||||||
# inside the body element is not a space character or a comment,
|
|
||||||
# except if the first thing inside the body element is a script
|
|
||||||
# or style element and the node immediately preceding the body
|
|
||||||
# element is a head element whose end tag has been omitted.
|
|
||||||
if type in ("Comment", "SpaceCharacters"):
|
|
||||||
return False
|
|
||||||
elif type == "StartTag":
|
|
||||||
# XXX: we do not look at the preceding event, so we never omit
|
|
||||||
# the body element's start tag if it's followed by a script or
|
|
||||||
# a style element.
|
|
||||||
return next["name"] not in ('script', 'style')
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
elif tagname == 'colgroup':
|
|
||||||
# A colgroup element's start tag may be omitted if the first thing
|
|
||||||
# inside the colgroup element is a col element, and if the element
|
|
||||||
# is not immediately preceeded by another colgroup element whose
|
|
||||||
# end tag has been omitted.
|
|
||||||
if type in ("StartTag", "EmptyTag"):
|
|
||||||
# XXX: we do not look at the preceding event, so instead we never
|
|
||||||
# omit the colgroup element's end tag when it is immediately
|
|
||||||
# followed by another colgroup element. See is_optional_end.
|
|
||||||
return next["name"] == "col"
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
elif tagname == 'tbody':
|
|
||||||
# A tbody element's start tag may be omitted if the first thing
|
|
||||||
# inside the tbody element is a tr element, and if the element is
|
|
||||||
# not immediately preceeded by a tbody, thead, or tfoot element
|
|
||||||
# whose end tag has been omitted.
|
|
||||||
if type == "StartTag":
|
|
||||||
# omit the thead and tfoot elements' end tag when they are
|
|
||||||
# immediately followed by a tbody element. See is_optional_end.
|
|
||||||
if previous and previous['type'] == 'EndTag' and \
|
|
||||||
previous['name'] in ('tbody', 'thead', 'tfoot'):
|
|
||||||
return False
|
|
||||||
return next["name"] == 'tr'
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
return False
|
|
||||||
|
|
||||||
def is_optional_end(self, tagname, next):
|
|
||||||
type = next and next["type"] or None
|
|
||||||
if tagname in ('html', 'head', 'body'):
|
|
||||||
# An html element's end tag may be omitted if the html element
|
|
||||||
# is not immediately followed by a space character or a comment.
|
|
||||||
return type not in ("Comment", "SpaceCharacters")
|
|
||||||
elif tagname in ('li', 'optgroup', 'tr'):
|
|
||||||
# A li element's end tag may be omitted if the li element is
|
|
||||||
# immediately followed by another li element or if there is
|
|
||||||
# no more content in the parent element.
|
|
||||||
# An optgroup element's end tag may be omitted if the optgroup
|
|
||||||
# element is immediately followed by another optgroup element,
|
|
||||||
# or if there is no more content in the parent element.
|
|
||||||
# A tr element's end tag may be omitted if the tr element is
|
|
||||||
# immediately followed by another tr element, or if there is
|
|
||||||
# no more content in the parent element.
|
|
||||||
if type == "StartTag":
|
|
||||||
return next["name"] == tagname
|
|
||||||
else:
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
elif tagname in ('dt', 'dd'):
|
|
||||||
# A dt element's end tag may be omitted if the dt element is
|
|
||||||
# immediately followed by another dt element or a dd element.
|
|
||||||
# A dd element's end tag may be omitted if the dd element is
|
|
||||||
# immediately followed by another dd element or a dt element,
|
|
||||||
# or if there is no more content in the parent element.
|
|
||||||
if type == "StartTag":
|
|
||||||
return next["name"] in ('dt', 'dd')
|
|
||||||
elif tagname == 'dd':
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
elif tagname == 'p':
|
|
||||||
# A p element's end tag may be omitted if the p element is
|
|
||||||
# immediately followed by an address, article, aside,
|
|
||||||
# blockquote, datagrid, dialog, dir, div, dl, fieldset,
|
|
||||||
# footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
|
|
||||||
# nav, ol, p, pre, section, table, or ul, element, or if
|
|
||||||
# there is no more content in the parent element.
|
|
||||||
if type in ("StartTag", "EmptyTag"):
|
|
||||||
return next["name"] in ('address', 'article', 'aside',
|
|
||||||
'blockquote', 'datagrid', 'dialog',
|
|
||||||
'dir', 'div', 'dl', 'fieldset', 'footer',
|
|
||||||
'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
||||||
'header', 'hr', 'menu', 'nav', 'ol',
|
|
||||||
'p', 'pre', 'section', 'table', 'ul')
|
|
||||||
else:
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
elif tagname == 'option':
|
|
||||||
# An option element's end tag may be omitted if the option
|
|
||||||
# element is immediately followed by another option element,
|
|
||||||
# or if it is immediately followed by an <code>optgroup</code>
|
|
||||||
# element, or if there is no more content in the parent
|
|
||||||
# element.
|
|
||||||
if type == "StartTag":
|
|
||||||
return next["name"] in ('option', 'optgroup')
|
|
||||||
else:
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
elif tagname in ('rt', 'rp'):
|
|
||||||
# An rt element's end tag may be omitted if the rt element is
|
|
||||||
# immediately followed by an rt or rp element, or if there is
|
|
||||||
# no more content in the parent element.
|
|
||||||
# An rp element's end tag may be omitted if the rp element is
|
|
||||||
# immediately followed by an rt or rp element, or if there is
|
|
||||||
# no more content in the parent element.
|
|
||||||
if type == "StartTag":
|
|
||||||
return next["name"] in ('rt', 'rp')
|
|
||||||
else:
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
elif tagname == 'colgroup':
|
|
||||||
# A colgroup element's end tag may be omitted if the colgroup
|
|
||||||
# element is not immediately followed by a space character or
|
|
||||||
# a comment.
|
|
||||||
if type in ("Comment", "SpaceCharacters"):
|
|
||||||
return False
|
|
||||||
elif type == "StartTag":
|
|
||||||
# XXX: we also look for an immediately following colgroup
|
|
||||||
# element. See is_optional_start.
|
|
||||||
return next["name"] != 'colgroup'
|
|
||||||
else:
|
|
||||||
return True
|
|
||||||
elif tagname in ('thead', 'tbody'):
|
|
||||||
# A thead element's end tag may be omitted if the thead element
|
|
||||||
# is immediately followed by a tbody or tfoot element.
|
|
||||||
# A tbody element's end tag may be omitted if the tbody element
|
|
||||||
# is immediately followed by a tbody or tfoot element, or if
|
|
||||||
# there is no more content in the parent element.
|
|
||||||
# A tfoot element's end tag may be omitted if the tfoot element
|
|
||||||
# is immediately followed by a tbody element, or if there is no
|
|
||||||
# more content in the parent element.
|
|
||||||
# XXX: we never omit the end tag when the following element is
|
|
||||||
# a tbody. See is_optional_start.
|
|
||||||
if type == "StartTag":
|
|
||||||
return next["name"] in ['tbody', 'tfoot']
|
|
||||||
elif tagname == 'tbody':
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
elif tagname == 'tfoot':
|
|
||||||
# A tfoot element's end tag may be omitted if the tfoot element
|
|
||||||
# is immediately followed by a tbody element, or if there is no
|
|
||||||
# more content in the parent element.
|
|
||||||
# XXX: we never omit the end tag when the following element is
|
|
||||||
# a tbody. See is_optional_start.
|
|
||||||
if type == "StartTag":
|
|
||||||
return next["name"] == 'tbody'
|
|
||||||
else:
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
elif tagname in ('td', 'th'):
|
|
||||||
# A td element's end tag may be omitted if the td element is
|
|
||||||
# immediately followed by a td or th element, or if there is
|
|
||||||
# no more content in the parent element.
|
|
||||||
# A th element's end tag may be omitted if the th element is
|
|
||||||
# immediately followed by a td or th element, or if there is
|
|
||||||
# no more content in the parent element.
|
|
||||||
if type == "StartTag":
|
|
||||||
return next["name"] in ('td', 'th')
|
|
||||||
else:
|
|
||||||
return type == "EndTag" or type is None
|
|
||||||
return False
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
from ..sanitizer import HTMLSanitizerMixin
|
|
||||||
|
|
||||||
|
|
||||||
class Filter(_base.Filter, HTMLSanitizerMixin):
|
|
||||||
def __iter__(self):
|
|
||||||
for token in _base.Filter.__iter__(self):
|
|
||||||
token = self.sanitize_token(token)
|
|
||||||
if token:
|
|
||||||
yield token
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
from ..constants import rcdataElements, spaceCharacters
|
|
||||||
spaceCharacters = "".join(spaceCharacters)
|
|
||||||
|
|
||||||
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
|
|
||||||
|
|
||||||
|
|
||||||
class Filter(_base.Filter):
|
|
||||||
|
|
||||||
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
preserve = 0
|
|
||||||
for token in _base.Filter.__iter__(self):
|
|
||||||
type = token["type"]
|
|
||||||
if type == "StartTag" \
|
|
||||||
and (preserve or token["name"] in self.spacePreserveElements):
|
|
||||||
preserve += 1
|
|
||||||
|
|
||||||
elif type == "EndTag" and preserve:
|
|
||||||
preserve -= 1
|
|
||||||
|
|
||||||
elif not preserve and type == "SpaceCharacters" and token["data"]:
|
|
||||||
# Test on token["data"] above to not introduce spaces where there were not
|
|
||||||
token["data"] = " "
|
|
||||||
|
|
||||||
elif not preserve and type == "Characters":
|
|
||||||
token["data"] = collapse_spaces(token["data"])
|
|
||||||
|
|
||||||
yield token
|
|
||||||
|
|
||||||
|
|
||||||
def collapse_spaces(text):
|
|
||||||
return SPACES_REGEX.sub(' ', text)
|
|
||||||
-2724
File diff suppressed because it is too large
Load Diff
-285
@@ -1,285 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
from .constants import DataLossWarning
|
|
||||||
|
|
||||||
baseChar = """
|
|
||||||
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
|
|
||||||
[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
|
|
||||||
[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
|
|
||||||
[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
|
|
||||||
[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
|
|
||||||
[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
|
|
||||||
[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
|
|
||||||
[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
|
|
||||||
[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
|
|
||||||
[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
|
|
||||||
[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
|
|
||||||
[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
|
|
||||||
[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
|
|
||||||
[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
|
|
||||||
[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
|
|
||||||
[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
|
|
||||||
[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
|
|
||||||
[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
|
|
||||||
[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
|
|
||||||
[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
|
|
||||||
[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
|
|
||||||
[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
|
|
||||||
[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
|
|
||||||
[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
|
|
||||||
[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
|
|
||||||
[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
|
|
||||||
[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
|
|
||||||
[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
|
|
||||||
[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
|
|
||||||
[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
|
|
||||||
#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
|
|
||||||
#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
|
|
||||||
#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
|
|
||||||
[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
|
|
||||||
[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
|
|
||||||
#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
|
|
||||||
[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
|
|
||||||
[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
|
|
||||||
[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
|
|
||||||
[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
|
|
||||||
[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
|
|
||||||
#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
|
|
||||||
[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
|
|
||||||
[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
|
|
||||||
[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
|
|
||||||
[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
|
|
||||||
|
|
||||||
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
|
|
||||||
|
|
||||||
combiningCharacter = """
|
|
||||||
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
|
|
||||||
[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
|
|
||||||
[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
|
|
||||||
[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
|
|
||||||
#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
|
|
||||||
[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
|
|
||||||
[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
|
|
||||||
#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
|
|
||||||
[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
|
|
||||||
[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
|
|
||||||
#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
|
|
||||||
[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
|
|
||||||
[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
|
|
||||||
[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
|
|
||||||
[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
|
|
||||||
[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
|
|
||||||
#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
|
|
||||||
[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
|
|
||||||
#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
|
|
||||||
[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
|
|
||||||
[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
|
|
||||||
#x3099 | #x309A"""
|
|
||||||
|
|
||||||
digit = """
|
|
||||||
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
|
|
||||||
[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
|
|
||||||
[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
|
|
||||||
[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
|
|
||||||
|
|
||||||
extender = """
|
|
||||||
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
|
|
||||||
#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
|
|
||||||
|
|
||||||
letter = " | ".join([baseChar, ideographic])
|
|
||||||
|
|
||||||
# Without the
|
|
||||||
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
|
|
||||||
extender])
|
|
||||||
nameFirst = " | ".join([letter, "_"])
|
|
||||||
|
|
||||||
reChar = re.compile(r"#x([\d|A-F]{4,4})")
|
|
||||||
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
|
|
||||||
|
|
||||||
|
|
||||||
def charStringToList(chars):
|
|
||||||
charRanges = [item.strip() for item in chars.split(" | ")]
|
|
||||||
rv = []
|
|
||||||
for item in charRanges:
|
|
||||||
foundMatch = False
|
|
||||||
for regexp in (reChar, reCharRange):
|
|
||||||
match = regexp.match(item)
|
|
||||||
if match is not None:
|
|
||||||
rv.append([hexToInt(item) for item in match.groups()])
|
|
||||||
if len(rv[-1]) == 1:
|
|
||||||
rv[-1] = rv[-1] * 2
|
|
||||||
foundMatch = True
|
|
||||||
break
|
|
||||||
if not foundMatch:
|
|
||||||
assert len(item) == 1
|
|
||||||
|
|
||||||
rv.append([ord(item)] * 2)
|
|
||||||
rv = normaliseCharList(rv)
|
|
||||||
return rv
|
|
||||||
|
|
||||||
|
|
||||||
def normaliseCharList(charList):
|
|
||||||
charList = sorted(charList)
|
|
||||||
for item in charList:
|
|
||||||
assert item[1] >= item[0]
|
|
||||||
rv = []
|
|
||||||
i = 0
|
|
||||||
while i < len(charList):
|
|
||||||
j = 1
|
|
||||||
rv.append(charList[i])
|
|
||||||
while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
|
|
||||||
rv[-1][1] = charList[i + j][1]
|
|
||||||
j += 1
|
|
||||||
i += j
|
|
||||||
return rv
|
|
||||||
|
|
||||||
# We don't really support characters above the BMP :(
|
|
||||||
max_unicode = int("FFFF", 16)
|
|
||||||
|
|
||||||
|
|
||||||
def missingRanges(charList):
|
|
||||||
rv = []
|
|
||||||
if charList[0] != 0:
|
|
||||||
rv.append([0, charList[0][0] - 1])
|
|
||||||
for i, item in enumerate(charList[:-1]):
|
|
||||||
rv.append([item[1] + 1, charList[i + 1][0] - 1])
|
|
||||||
if charList[-1][1] != max_unicode:
|
|
||||||
rv.append([charList[-1][1] + 1, max_unicode])
|
|
||||||
return rv
|
|
||||||
|
|
||||||
|
|
||||||
def listToRegexpStr(charList):
|
|
||||||
rv = []
|
|
||||||
for item in charList:
|
|
||||||
if item[0] == item[1]:
|
|
||||||
rv.append(escapeRegexp(chr(item[0])))
|
|
||||||
else:
|
|
||||||
rv.append(escapeRegexp(chr(item[0])) + "-" +
|
|
||||||
escapeRegexp(chr(item[1])))
|
|
||||||
return "[%s]" % "".join(rv)
|
|
||||||
|
|
||||||
|
|
||||||
def hexToInt(hex_str):
|
|
||||||
return int(hex_str, 16)
|
|
||||||
|
|
||||||
|
|
||||||
def escapeRegexp(string):
|
|
||||||
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
|
|
||||||
"[", "]", "|", "(", ")", "-")
|
|
||||||
for char in specialCharacters:
|
|
||||||
string = string.replace(char, "\\" + char)
|
|
||||||
|
|
||||||
return string
|
|
||||||
|
|
||||||
# output from the above
|
|
||||||
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
|
||||||
|
|
||||||
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
|
|
||||||
|
|
||||||
# Simpler things
|
|
||||||
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
|
|
||||||
|
|
||||||
|
|
||||||
class InfosetFilter(object):
|
|
||||||
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
|
|
||||||
|
|
||||||
def __init__(self, replaceChars=None,
|
|
||||||
dropXmlnsLocalName=False,
|
|
||||||
dropXmlnsAttrNs=False,
|
|
||||||
preventDoubleDashComments=False,
|
|
||||||
preventDashAtCommentEnd=False,
|
|
||||||
replaceFormFeedCharacters=True,
|
|
||||||
preventSingleQuotePubid=False):
|
|
||||||
|
|
||||||
self.dropXmlnsLocalName = dropXmlnsLocalName
|
|
||||||
self.dropXmlnsAttrNs = dropXmlnsAttrNs
|
|
||||||
|
|
||||||
self.preventDoubleDashComments = preventDoubleDashComments
|
|
||||||
self.preventDashAtCommentEnd = preventDashAtCommentEnd
|
|
||||||
|
|
||||||
self.replaceFormFeedCharacters = replaceFormFeedCharacters
|
|
||||||
|
|
||||||
self.preventSingleQuotePubid = preventSingleQuotePubid
|
|
||||||
|
|
||||||
self.replaceCache = {}
|
|
||||||
|
|
||||||
def coerceAttribute(self, name, namespace=None):
|
|
||||||
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
|
|
||||||
warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
|
|
||||||
return None
|
|
||||||
elif (self.dropXmlnsAttrNs and
|
|
||||||
namespace == "http://www.w3.org/2000/xmlns/"):
|
|
||||||
warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
return self.toXmlName(name)
|
|
||||||
|
|
||||||
def coerceElement(self, name, namespace=None):
|
|
||||||
return self.toXmlName(name)
|
|
||||||
|
|
||||||
def coerceComment(self, data):
|
|
||||||
if self.preventDoubleDashComments:
|
|
||||||
while "--" in data:
|
|
||||||
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
|
||||||
data = data.replace("--", "- -")
|
|
||||||
return data
|
|
||||||
|
|
||||||
def coerceCharacters(self, data):
|
|
||||||
if self.replaceFormFeedCharacters:
|
|
||||||
for i in range(data.count("\x0C")):
|
|
||||||
warnings.warn("Text cannot contain U+000C", DataLossWarning)
|
|
||||||
data = data.replace("\x0C", " ")
|
|
||||||
# Other non-xml characters
|
|
||||||
return data
|
|
||||||
|
|
||||||
def coercePubid(self, data):
|
|
||||||
dataOutput = data
|
|
||||||
for char in nonPubidCharRegexp.findall(data):
|
|
||||||
warnings.warn("Coercing non-XML pubid", DataLossWarning)
|
|
||||||
replacement = self.getReplacementCharacter(char)
|
|
||||||
dataOutput = dataOutput.replace(char, replacement)
|
|
||||||
if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
|
|
||||||
warnings.warn("Pubid cannot contain single quote", DataLossWarning)
|
|
||||||
dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
|
|
||||||
return dataOutput
|
|
||||||
|
|
||||||
def toXmlName(self, name):
|
|
||||||
nameFirst = name[0]
|
|
||||||
nameRest = name[1:]
|
|
||||||
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
|
||||||
if m:
|
|
||||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
|
||||||
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
|
||||||
else:
|
|
||||||
nameFirstOutput = nameFirst
|
|
||||||
|
|
||||||
nameRestOutput = nameRest
|
|
||||||
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
|
||||||
for char in replaceChars:
|
|
||||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
|
||||||
replacement = self.getReplacementCharacter(char)
|
|
||||||
nameRestOutput = nameRestOutput.replace(char, replacement)
|
|
||||||
return nameFirstOutput + nameRestOutput
|
|
||||||
|
|
||||||
def getReplacementCharacter(self, char):
|
|
||||||
if char in self.replaceCache:
|
|
||||||
replacement = self.replaceCache[char]
|
|
||||||
else:
|
|
||||||
replacement = self.escapeChar(char)
|
|
||||||
return replacement
|
|
||||||
|
|
||||||
def fromXmlName(self, name):
|
|
||||||
for item in set(self.replacementRegexp.findall(name)):
|
|
||||||
name = name.replace(item, self.unescapeChar(item))
|
|
||||||
return name
|
|
||||||
|
|
||||||
def escapeChar(self, char):
|
|
||||||
replacement = "U%05X" % ord(char)
|
|
||||||
self.replaceCache[char] = replacement
|
|
||||||
return replacement
|
|
||||||
|
|
||||||
def unescapeChar(self, charcode):
|
|
||||||
return chr(int(charcode[1:], 16))
|
|
||||||
-903
@@ -1,903 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
from pip._vendor.six import text_type
|
|
||||||
from pip._vendor.six.moves import http_client
|
|
||||||
|
|
||||||
import codecs
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
|
||||||
from .constants import encodings, ReparseException
|
|
||||||
from . import utils
|
|
||||||
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
try:
|
|
||||||
from io import BytesIO
|
|
||||||
except ImportError:
|
|
||||||
BytesIO = StringIO
|
|
||||||
|
|
||||||
try:
|
|
||||||
from io import BufferedIOBase
|
|
||||||
except ImportError:
|
|
||||||
class BufferedIOBase(object):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Non-unicode versions of constants for use in the pre-parser
|
|
||||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
|
||||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
|
||||||
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
|
|
||||||
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
|
|
||||||
|
|
||||||
|
|
||||||
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"
|
|
||||||
|
|
||||||
if utils.supports_lone_surrogates:
|
|
||||||
# Use one extra step of indirection and create surrogates with
|
|
||||||
# unichr. Not using this indirection would introduce an illegal
|
|
||||||
# unicode literal on platforms not supporting such lone
|
|
||||||
# surrogates.
|
|
||||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate +
|
|
||||||
eval('"\\uD800-\\uDFFF"'))
|
|
||||||
else:
|
|
||||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
|
||||||
|
|
||||||
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
|
||||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
|
||||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
|
||||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
|
||||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
|
||||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
|
||||||
0x10FFFE, 0x10FFFF])
|
|
||||||
|
|
||||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
|
|
||||||
|
|
||||||
# Cache for charsUntil()
|
|
||||||
charsUntilRegEx = {}
|
|
||||||
|
|
||||||
|
|
||||||
class BufferedStream(object):
|
|
||||||
"""Buffering for streams that do not have buffering of their own
|
|
||||||
|
|
||||||
The buffer is implemented as a list of chunks on the assumption that
|
|
||||||
joining many strings will be slow since it is O(n**2)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, stream):
|
|
||||||
self.stream = stream
|
|
||||||
self.buffer = []
|
|
||||||
self.position = [-1, 0] # chunk number, offset
|
|
||||||
|
|
||||||
def tell(self):
|
|
||||||
pos = 0
|
|
||||||
for chunk in self.buffer[:self.position[0]]:
|
|
||||||
pos += len(chunk)
|
|
||||||
pos += self.position[1]
|
|
||||||
return pos
|
|
||||||
|
|
||||||
def seek(self, pos):
|
|
||||||
assert pos <= self._bufferedBytes()
|
|
||||||
offset = pos
|
|
||||||
i = 0
|
|
||||||
while len(self.buffer[i]) < offset:
|
|
||||||
offset -= len(self.buffer[i])
|
|
||||||
i += 1
|
|
||||||
self.position = [i, offset]
|
|
||||||
|
|
||||||
def read(self, bytes):
|
|
||||||
if not self.buffer:
|
|
||||||
return self._readStream(bytes)
|
|
||||||
elif (self.position[0] == len(self.buffer) and
|
|
||||||
self.position[1] == len(self.buffer[-1])):
|
|
||||||
return self._readStream(bytes)
|
|
||||||
else:
|
|
||||||
return self._readFromBuffer(bytes)
|
|
||||||
|
|
||||||
def _bufferedBytes(self):
|
|
||||||
return sum([len(item) for item in self.buffer])
|
|
||||||
|
|
||||||
def _readStream(self, bytes):
|
|
||||||
data = self.stream.read(bytes)
|
|
||||||
self.buffer.append(data)
|
|
||||||
self.position[0] += 1
|
|
||||||
self.position[1] = len(data)
|
|
||||||
return data
|
|
||||||
|
|
||||||
def _readFromBuffer(self, bytes):
|
|
||||||
remainingBytes = bytes
|
|
||||||
rv = []
|
|
||||||
bufferIndex = self.position[0]
|
|
||||||
bufferOffset = self.position[1]
|
|
||||||
while bufferIndex < len(self.buffer) and remainingBytes != 0:
|
|
||||||
assert remainingBytes > 0
|
|
||||||
bufferedData = self.buffer[bufferIndex]
|
|
||||||
|
|
||||||
if remainingBytes <= len(bufferedData) - bufferOffset:
|
|
||||||
bytesToRead = remainingBytes
|
|
||||||
self.position = [bufferIndex, bufferOffset + bytesToRead]
|
|
||||||
else:
|
|
||||||
bytesToRead = len(bufferedData) - bufferOffset
|
|
||||||
self.position = [bufferIndex, len(bufferedData)]
|
|
||||||
bufferIndex += 1
|
|
||||||
rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
|
|
||||||
remainingBytes -= bytesToRead
|
|
||||||
|
|
||||||
bufferOffset = 0
|
|
||||||
|
|
||||||
if remainingBytes:
|
|
||||||
rv.append(self._readStream(remainingBytes))
|
|
||||||
|
|
||||||
return b"".join(rv)
|
|
||||||
|
|
||||||
|
|
||||||
def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
|
|
||||||
if isinstance(source, http_client.HTTPResponse):
|
|
||||||
# Work around Python bug #20007: read(0) closes the connection.
|
|
||||||
# http://bugs.python.org/issue20007
|
|
||||||
isUnicode = False
|
|
||||||
elif hasattr(source, "read"):
|
|
||||||
isUnicode = isinstance(source.read(0), text_type)
|
|
||||||
else:
|
|
||||||
isUnicode = isinstance(source, text_type)
|
|
||||||
|
|
||||||
if isUnicode:
|
|
||||||
if encoding is not None:
|
|
||||||
raise TypeError("Cannot explicitly set an encoding with a unicode string")
|
|
||||||
|
|
||||||
return HTMLUnicodeInputStream(source)
|
|
||||||
else:
|
|
||||||
return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLUnicodeInputStream(object):
|
|
||||||
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
|
||||||
|
|
||||||
This class takes care of character encoding and removing or replacing
|
|
||||||
incorrect byte-sequences and also provides column and line tracking.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
_defaultChunkSize = 10240
|
|
||||||
|
|
||||||
def __init__(self, source):
|
|
||||||
"""Initialises the HTMLInputStream.
|
|
||||||
|
|
||||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
|
||||||
for use by html5lib.
|
|
||||||
|
|
||||||
source can be either a file-object, local filename or a string.
|
|
||||||
|
|
||||||
The optional encoding parameter must be a string that indicates
|
|
||||||
the encoding. If specified, that encoding will be used,
|
|
||||||
regardless of any BOM or later declaration (such as in a meta
|
|
||||||
element)
|
|
||||||
|
|
||||||
parseMeta - Look for a <meta> element containing encoding information
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if not utils.supports_lone_surrogates:
|
|
||||||
# Such platforms will have already checked for such
|
|
||||||
# surrogate errors, so no need to do this checking.
|
|
||||||
self.reportCharacterErrors = None
|
|
||||||
self.replaceCharactersRegexp = None
|
|
||||||
elif len("\U0010FFFF") == 1:
|
|
||||||
self.reportCharacterErrors = self.characterErrorsUCS4
|
|
||||||
self.replaceCharactersRegexp = re.compile(eval('"[\\uD800-\\uDFFF]"'))
|
|
||||||
else:
|
|
||||||
self.reportCharacterErrors = self.characterErrorsUCS2
|
|
||||||
self.replaceCharactersRegexp = re.compile(
|
|
||||||
eval('"([\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|(?<![\\uD800-\\uDBFF])[\\uDC00-\\uDFFF])"'))
|
|
||||||
|
|
||||||
# List of where new lines occur
|
|
||||||
self.newLines = [0]
|
|
||||||
|
|
||||||
self.charEncoding = ("utf-8", "certain")
|
|
||||||
self.dataStream = self.openStream(source)
|
|
||||||
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.chunk = ""
|
|
||||||
self.chunkSize = 0
|
|
||||||
self.chunkOffset = 0
|
|
||||||
self.errors = []
|
|
||||||
|
|
||||||
# number of (complete) lines in previous chunks
|
|
||||||
self.prevNumLines = 0
|
|
||||||
# number of columns in the last line of the previous chunk
|
|
||||||
self.prevNumCols = 0
|
|
||||||
|
|
||||||
# Deal with CR LF and surrogates split over chunk boundaries
|
|
||||||
self._bufferedCharacter = None
|
|
||||||
|
|
||||||
def openStream(self, source):
|
|
||||||
"""Produces a file object from source.
|
|
||||||
|
|
||||||
source can be either a file object, local filename or a string.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# Already a file object
|
|
||||||
if hasattr(source, 'read'):
|
|
||||||
stream = source
|
|
||||||
else:
|
|
||||||
stream = StringIO(source)
|
|
||||||
|
|
||||||
return stream
|
|
||||||
|
|
||||||
def _position(self, offset):
|
|
||||||
chunk = self.chunk
|
|
||||||
nLines = chunk.count('\n', 0, offset)
|
|
||||||
positionLine = self.prevNumLines + nLines
|
|
||||||
lastLinePos = chunk.rfind('\n', 0, offset)
|
|
||||||
if lastLinePos == -1:
|
|
||||||
positionColumn = self.prevNumCols + offset
|
|
||||||
else:
|
|
||||||
positionColumn = offset - (lastLinePos + 1)
|
|
||||||
return (positionLine, positionColumn)
|
|
||||||
|
|
||||||
def position(self):
|
|
||||||
"""Returns (line, col) of the current position in the stream."""
|
|
||||||
line, col = self._position(self.chunkOffset)
|
|
||||||
return (line + 1, col)
|
|
||||||
|
|
||||||
def char(self):
|
|
||||||
""" Read one character from the stream or queue if available. Return
|
|
||||||
EOF when EOF is reached.
|
|
||||||
"""
|
|
||||||
# Read a new chunk from the input stream if necessary
|
|
||||||
if self.chunkOffset >= self.chunkSize:
|
|
||||||
if not self.readChunk():
|
|
||||||
return EOF
|
|
||||||
|
|
||||||
chunkOffset = self.chunkOffset
|
|
||||||
char = self.chunk[chunkOffset]
|
|
||||||
self.chunkOffset = chunkOffset + 1
|
|
||||||
|
|
||||||
return char
|
|
||||||
|
|
||||||
def readChunk(self, chunkSize=None):
|
|
||||||
if chunkSize is None:
|
|
||||||
chunkSize = self._defaultChunkSize
|
|
||||||
|
|
||||||
self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
|
|
||||||
|
|
||||||
self.chunk = ""
|
|
||||||
self.chunkSize = 0
|
|
||||||
self.chunkOffset = 0
|
|
||||||
|
|
||||||
data = self.dataStream.read(chunkSize)
|
|
||||||
|
|
||||||
# Deal with CR LF and surrogates broken across chunks
|
|
||||||
if self._bufferedCharacter:
|
|
||||||
data = self._bufferedCharacter + data
|
|
||||||
self._bufferedCharacter = None
|
|
||||||
elif not data:
|
|
||||||
# We have no more data, bye-bye stream
|
|
||||||
return False
|
|
||||||
|
|
||||||
if len(data) > 1:
|
|
||||||
lastv = ord(data[-1])
|
|
||||||
if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
|
|
||||||
self._bufferedCharacter = data[-1]
|
|
||||||
data = data[:-1]
|
|
||||||
|
|
||||||
if self.reportCharacterErrors:
|
|
||||||
self.reportCharacterErrors(data)
|
|
||||||
|
|
||||||
# Replace invalid characters
|
|
||||||
# Note U+0000 is dealt with in the tokenizer
|
|
||||||
data = self.replaceCharactersRegexp.sub("\ufffd", data)
|
|
||||||
|
|
||||||
data = data.replace("\r\n", "\n")
|
|
||||||
data = data.replace("\r", "\n")
|
|
||||||
|
|
||||||
self.chunk = data
|
|
||||||
self.chunkSize = len(data)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def characterErrorsUCS4(self, data):
|
|
||||||
for i in range(len(invalid_unicode_re.findall(data))):
|
|
||||||
self.errors.append("invalid-codepoint")
|
|
||||||
|
|
||||||
def characterErrorsUCS2(self, data):
|
|
||||||
# Someone picked the wrong compile option
|
|
||||||
# You lose
|
|
||||||
skip = False
|
|
||||||
for match in invalid_unicode_re.finditer(data):
|
|
||||||
if skip:
|
|
||||||
continue
|
|
||||||
codepoint = ord(match.group())
|
|
||||||
pos = match.start()
|
|
||||||
# Pretty sure there should be endianness issues here
|
|
||||||
if utils.isSurrogatePair(data[pos:pos + 2]):
|
|
||||||
# We have a surrogate pair!
|
|
||||||
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
|
||||||
if char_val in non_bmp_invalid_codepoints:
|
|
||||||
self.errors.append("invalid-codepoint")
|
|
||||||
skip = True
|
|
||||||
elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
|
|
||||||
pos == len(data) - 1):
|
|
||||||
self.errors.append("invalid-codepoint")
|
|
||||||
else:
|
|
||||||
skip = False
|
|
||||||
self.errors.append("invalid-codepoint")
|
|
||||||
|
|
||||||
def charsUntil(self, characters, opposite=False):
|
|
||||||
""" Returns a string of characters from the stream up to but not
|
|
||||||
including any character in 'characters' or EOF. 'characters' must be
|
|
||||||
a container that supports the 'in' method and iteration over its
|
|
||||||
characters.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Use a cache of regexps to find the required characters
|
|
||||||
try:
|
|
||||||
chars = charsUntilRegEx[(characters, opposite)]
|
|
||||||
except KeyError:
|
|
||||||
if __debug__:
|
|
||||||
for c in characters:
|
|
||||||
assert(ord(c) < 128)
|
|
||||||
regex = "".join(["\\x%02x" % ord(c) for c in characters])
|
|
||||||
if not opposite:
|
|
||||||
regex = "^%s" % regex
|
|
||||||
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
|
|
||||||
|
|
||||||
rv = []
|
|
||||||
|
|
||||||
while True:
|
|
||||||
# Find the longest matching prefix
|
|
||||||
m = chars.match(self.chunk, self.chunkOffset)
|
|
||||||
if m is None:
|
|
||||||
# If nothing matched, and it wasn't because we ran out of chunk,
|
|
||||||
# then stop
|
|
||||||
if self.chunkOffset != self.chunkSize:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
end = m.end()
|
|
||||||
# If not the whole chunk matched, return everything
|
|
||||||
# up to the part that didn't match
|
|
||||||
if end != self.chunkSize:
|
|
||||||
rv.append(self.chunk[self.chunkOffset:end])
|
|
||||||
self.chunkOffset = end
|
|
||||||
break
|
|
||||||
# If the whole remainder of the chunk matched,
|
|
||||||
# use it all and read the next chunk
|
|
||||||
rv.append(self.chunk[self.chunkOffset:])
|
|
||||||
if not self.readChunk():
|
|
||||||
# Reached EOF
|
|
||||||
break
|
|
||||||
|
|
||||||
r = "".join(rv)
|
|
||||||
return r
|
|
||||||
|
|
||||||
def unget(self, char):
|
|
||||||
# Only one character is allowed to be ungotten at once - it must
|
|
||||||
# be consumed again before any further call to unget
|
|
||||||
if char is not None:
|
|
||||||
if self.chunkOffset == 0:
|
|
||||||
# unget is called quite rarely, so it's a good idea to do
|
|
||||||
# more work here if it saves a bit of work in the frequently
|
|
||||||
# called char and charsUntil.
|
|
||||||
# So, just prepend the ungotten character onto the current
|
|
||||||
# chunk:
|
|
||||||
self.chunk = char + self.chunk
|
|
||||||
self.chunkSize += 1
|
|
||||||
else:
|
|
||||||
self.chunkOffset -= 1
|
|
||||||
assert self.chunk[self.chunkOffset] == char
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
|
||||||
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
|
||||||
|
|
||||||
This class takes care of character encoding and removing or replacing
|
|
||||||
incorrect byte-sequences and also provides column and line tracking.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
|
|
||||||
"""Initialises the HTMLInputStream.
|
|
||||||
|
|
||||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
|
||||||
for use by html5lib.
|
|
||||||
|
|
||||||
source can be either a file-object, local filename or a string.
|
|
||||||
|
|
||||||
The optional encoding parameter must be a string that indicates
|
|
||||||
the encoding. If specified, that encoding will be used,
|
|
||||||
regardless of any BOM or later declaration (such as in a meta
|
|
||||||
element)
|
|
||||||
|
|
||||||
parseMeta - Look for a <meta> element containing encoding information
|
|
||||||
|
|
||||||
"""
|
|
||||||
# Raw Stream - for unicode objects this will encode to utf-8 and set
|
|
||||||
# self.charEncoding as appropriate
|
|
||||||
self.rawStream = self.openStream(source)
|
|
||||||
|
|
||||||
HTMLUnicodeInputStream.__init__(self, self.rawStream)
|
|
||||||
|
|
||||||
self.charEncoding = (codecName(encoding), "certain")
|
|
||||||
|
|
||||||
# Encoding Information
|
|
||||||
# Number of bytes to use when looking for a meta element with
|
|
||||||
# encoding information
|
|
||||||
self.numBytesMeta = 512
|
|
||||||
# Number of bytes to use when using detecting encoding using chardet
|
|
||||||
self.numBytesChardet = 100
|
|
||||||
# Encoding to use if no other information can be found
|
|
||||||
self.defaultEncoding = "windows-1252"
|
|
||||||
|
|
||||||
# Detect encoding iff no explicit "transport level" encoding is supplied
|
|
||||||
if (self.charEncoding[0] is None):
|
|
||||||
self.charEncoding = self.detectEncoding(parseMeta, chardet)
|
|
||||||
|
|
||||||
# Call superclass
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
|
|
||||||
'replace')
|
|
||||||
HTMLUnicodeInputStream.reset(self)
|
|
||||||
|
|
||||||
def openStream(self, source):
|
|
||||||
"""Produces a file object from source.
|
|
||||||
|
|
||||||
source can be either a file object, local filename or a string.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# Already a file object
|
|
||||||
if hasattr(source, 'read'):
|
|
||||||
stream = source
|
|
||||||
else:
|
|
||||||
stream = BytesIO(source)
|
|
||||||
|
|
||||||
try:
|
|
||||||
stream.seek(stream.tell())
|
|
||||||
except:
|
|
||||||
stream = BufferedStream(stream)
|
|
||||||
|
|
||||||
return stream
|
|
||||||
|
|
||||||
def detectEncoding(self, parseMeta=True, chardet=True):
|
|
||||||
# First look for a BOM
|
|
||||||
# This will also read past the BOM if present
|
|
||||||
encoding = self.detectBOM()
|
|
||||||
confidence = "certain"
|
|
||||||
# If there is no BOM need to look for meta elements with encoding
|
|
||||||
# information
|
|
||||||
if encoding is None and parseMeta:
|
|
||||||
encoding = self.detectEncodingMeta()
|
|
||||||
confidence = "tentative"
|
|
||||||
# Guess with chardet, if avaliable
|
|
||||||
if encoding is None and chardet:
|
|
||||||
confidence = "tentative"
|
|
||||||
try:
|
|
||||||
try:
|
|
||||||
from charade.universaldetector import UniversalDetector
|
|
||||||
except ImportError:
|
|
||||||
from chardet.universaldetector import UniversalDetector
|
|
||||||
buffers = []
|
|
||||||
detector = UniversalDetector()
|
|
||||||
while not detector.done:
|
|
||||||
buffer = self.rawStream.read(self.numBytesChardet)
|
|
||||||
assert isinstance(buffer, bytes)
|
|
||||||
if not buffer:
|
|
||||||
break
|
|
||||||
buffers.append(buffer)
|
|
||||||
detector.feed(buffer)
|
|
||||||
detector.close()
|
|
||||||
encoding = detector.result['encoding']
|
|
||||||
self.rawStream.seek(0)
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
# If all else fails use the default encoding
|
|
||||||
if encoding is None:
|
|
||||||
confidence = "tentative"
|
|
||||||
encoding = self.defaultEncoding
|
|
||||||
|
|
||||||
# Substitute for equivalent encodings:
|
|
||||||
encodingSub = {"iso-8859-1": "windows-1252"}
|
|
||||||
|
|
||||||
if encoding.lower() in encodingSub:
|
|
||||||
encoding = encodingSub[encoding.lower()]
|
|
||||||
|
|
||||||
return encoding, confidence
|
|
||||||
|
|
||||||
def changeEncoding(self, newEncoding):
|
|
||||||
assert self.charEncoding[1] != "certain"
|
|
||||||
newEncoding = codecName(newEncoding)
|
|
||||||
if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
|
||||||
newEncoding = "utf-8"
|
|
||||||
if newEncoding is None:
|
|
||||||
return
|
|
||||||
elif newEncoding == self.charEncoding[0]:
|
|
||||||
self.charEncoding = (self.charEncoding[0], "certain")
|
|
||||||
else:
|
|
||||||
self.rawStream.seek(0)
|
|
||||||
self.reset()
|
|
||||||
self.charEncoding = (newEncoding, "certain")
|
|
||||||
raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
|
||||||
|
|
||||||
def detectBOM(self):
|
|
||||||
"""Attempts to detect at BOM at the start of the stream. If
|
|
||||||
an encoding can be determined from the BOM return the name of the
|
|
||||||
encoding otherwise return None"""
|
|
||||||
bomDict = {
|
|
||||||
codecs.BOM_UTF8: 'utf-8',
|
|
||||||
codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
|
|
||||||
codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
|
|
||||||
}
|
|
||||||
|
|
||||||
# Go to beginning of file and read in 4 bytes
|
|
||||||
string = self.rawStream.read(4)
|
|
||||||
assert isinstance(string, bytes)
|
|
||||||
|
|
||||||
# Try detecting the BOM using bytes from the string
|
|
||||||
encoding = bomDict.get(string[:3]) # UTF-8
|
|
||||||
seek = 3
|
|
||||||
if not encoding:
|
|
||||||
# Need to detect UTF-32 before UTF-16
|
|
||||||
encoding = bomDict.get(string) # UTF-32
|
|
||||||
seek = 4
|
|
||||||
if not encoding:
|
|
||||||
encoding = bomDict.get(string[:2]) # UTF-16
|
|
||||||
seek = 2
|
|
||||||
|
|
||||||
# Set the read position past the BOM if one was found, otherwise
|
|
||||||
# set it to the start of the stream
|
|
||||||
self.rawStream.seek(encoding and seek or 0)
|
|
||||||
|
|
||||||
return encoding
|
|
||||||
|
|
||||||
def detectEncodingMeta(self):
|
|
||||||
"""Report the encoding declared by the meta element
|
|
||||||
"""
|
|
||||||
buffer = self.rawStream.read(self.numBytesMeta)
|
|
||||||
assert isinstance(buffer, bytes)
|
|
||||||
parser = EncodingParser(buffer)
|
|
||||||
self.rawStream.seek(0)
|
|
||||||
encoding = parser.getEncoding()
|
|
||||||
|
|
||||||
if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
|
|
||||||
encoding = "utf-8"
|
|
||||||
|
|
||||||
return encoding
|
|
||||||
|
|
||||||
|
|
||||||
class EncodingBytes(bytes):
|
|
||||||
"""String-like object with an associated position and various extra methods
|
|
||||||
If the position is ever greater than the string length then an exception is
|
|
||||||
raised"""
|
|
||||||
def __new__(self, value):
|
|
||||||
assert isinstance(value, bytes)
|
|
||||||
return bytes.__new__(self, value.lower())
|
|
||||||
|
|
||||||
def __init__(self, value):
|
|
||||||
self._position = -1
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __next__(self):
|
|
||||||
p = self._position = self._position + 1
|
|
||||||
if p >= len(self):
|
|
||||||
raise StopIteration
|
|
||||||
elif p < 0:
|
|
||||||
raise TypeError
|
|
||||||
return self[p:p + 1]
|
|
||||||
|
|
||||||
def next(self):
|
|
||||||
# Py2 compat
|
|
||||||
return self.__next__()
|
|
||||||
|
|
||||||
def previous(self):
|
|
||||||
p = self._position
|
|
||||||
if p >= len(self):
|
|
||||||
raise StopIteration
|
|
||||||
elif p < 0:
|
|
||||||
raise TypeError
|
|
||||||
self._position = p = p - 1
|
|
||||||
return self[p:p + 1]
|
|
||||||
|
|
||||||
def setPosition(self, position):
|
|
||||||
if self._position >= len(self):
|
|
||||||
raise StopIteration
|
|
||||||
self._position = position
|
|
||||||
|
|
||||||
def getPosition(self):
|
|
||||||
if self._position >= len(self):
|
|
||||||
raise StopIteration
|
|
||||||
if self._position >= 0:
|
|
||||||
return self._position
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
position = property(getPosition, setPosition)
|
|
||||||
|
|
||||||
def getCurrentByte(self):
|
|
||||||
return self[self.position:self.position + 1]
|
|
||||||
|
|
||||||
currentByte = property(getCurrentByte)
|
|
||||||
|
|
||||||
def skip(self, chars=spaceCharactersBytes):
|
|
||||||
"""Skip past a list of characters"""
|
|
||||||
p = self.position # use property for the error-checking
|
|
||||||
while p < len(self):
|
|
||||||
c = self[p:p + 1]
|
|
||||||
if c not in chars:
|
|
||||||
self._position = p
|
|
||||||
return c
|
|
||||||
p += 1
|
|
||||||
self._position = p
|
|
||||||
return None
|
|
||||||
|
|
||||||
def skipUntil(self, chars):
|
|
||||||
p = self.position
|
|
||||||
while p < len(self):
|
|
||||||
c = self[p:p + 1]
|
|
||||||
if c in chars:
|
|
||||||
self._position = p
|
|
||||||
return c
|
|
||||||
p += 1
|
|
||||||
self._position = p
|
|
||||||
return None
|
|
||||||
|
|
||||||
def matchBytes(self, bytes):
|
|
||||||
"""Look for a sequence of bytes at the start of a string. If the bytes
|
|
||||||
are found return True and advance the position to the byte after the
|
|
||||||
match. Otherwise return False and leave the position alone"""
|
|
||||||
p = self.position
|
|
||||||
data = self[p:p + len(bytes)]
|
|
||||||
rv = data.startswith(bytes)
|
|
||||||
if rv:
|
|
||||||
self.position += len(bytes)
|
|
||||||
return rv
|
|
||||||
|
|
||||||
def jumpTo(self, bytes):
|
|
||||||
"""Look for the next sequence of bytes matching a given sequence. If
|
|
||||||
a match is found advance the position to the last byte of the match"""
|
|
||||||
newPosition = self[self.position:].find(bytes)
|
|
||||||
if newPosition > -1:
|
|
||||||
# XXX: This is ugly, but I can't see a nicer way to fix this.
|
|
||||||
if self._position == -1:
|
|
||||||
self._position = 0
|
|
||||||
self._position += (newPosition + len(bytes) - 1)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
raise StopIteration
|
|
||||||
|
|
||||||
|
|
||||||
class EncodingParser(object):
|
|
||||||
"""Mini parser for detecting character encoding from meta elements"""
|
|
||||||
|
|
||||||
def __init__(self, data):
|
|
||||||
"""string - the data to work on for encoding detection"""
|
|
||||||
self.data = EncodingBytes(data)
|
|
||||||
self.encoding = None
|
|
||||||
|
|
||||||
def getEncoding(self):
|
|
||||||
methodDispatch = (
|
|
||||||
(b"<!--", self.handleComment),
|
|
||||||
(b"<meta", self.handleMeta),
|
|
||||||
(b"</", self.handlePossibleEndTag),
|
|
||||||
(b"<!", self.handleOther),
|
|
||||||
(b"<?", self.handleOther),
|
|
||||||
(b"<", self.handlePossibleStartTag))
|
|
||||||
for byte in self.data:
|
|
||||||
keepParsing = True
|
|
||||||
for key, method in methodDispatch:
|
|
||||||
if self.data.matchBytes(key):
|
|
||||||
try:
|
|
||||||
keepParsing = method()
|
|
||||||
break
|
|
||||||
except StopIteration:
|
|
||||||
keepParsing = False
|
|
||||||
break
|
|
||||||
if not keepParsing:
|
|
||||||
break
|
|
||||||
|
|
||||||
return self.encoding
|
|
||||||
|
|
||||||
def handleComment(self):
|
|
||||||
"""Skip over comments"""
|
|
||||||
return self.data.jumpTo(b"-->")
|
|
||||||
|
|
||||||
def handleMeta(self):
|
|
||||||
if self.data.currentByte not in spaceCharactersBytes:
|
|
||||||
# if we have <meta not followed by a space so just keep going
|
|
||||||
return True
|
|
||||||
# We have a valid meta element we want to search for attributes
|
|
||||||
hasPragma = False
|
|
||||||
pendingEncoding = None
|
|
||||||
while True:
|
|
||||||
# Try to find the next attribute after the current position
|
|
||||||
attr = self.getAttribute()
|
|
||||||
if attr is None:
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
if attr[0] == b"http-equiv":
|
|
||||||
hasPragma = attr[1] == b"content-type"
|
|
||||||
if hasPragma and pendingEncoding is not None:
|
|
||||||
self.encoding = pendingEncoding
|
|
||||||
return False
|
|
||||||
elif attr[0] == b"charset":
|
|
||||||
tentativeEncoding = attr[1]
|
|
||||||
codec = codecName(tentativeEncoding)
|
|
||||||
if codec is not None:
|
|
||||||
self.encoding = codec
|
|
||||||
return False
|
|
||||||
elif attr[0] == b"content":
|
|
||||||
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
|
|
||||||
tentativeEncoding = contentParser.parse()
|
|
||||||
if tentativeEncoding is not None:
|
|
||||||
codec = codecName(tentativeEncoding)
|
|
||||||
if codec is not None:
|
|
||||||
if hasPragma:
|
|
||||||
self.encoding = codec
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
pendingEncoding = codec
|
|
||||||
|
|
||||||
def handlePossibleStartTag(self):
|
|
||||||
return self.handlePossibleTag(False)
|
|
||||||
|
|
||||||
def handlePossibleEndTag(self):
|
|
||||||
next(self.data)
|
|
||||||
return self.handlePossibleTag(True)
|
|
||||||
|
|
||||||
def handlePossibleTag(self, endTag):
|
|
||||||
data = self.data
|
|
||||||
if data.currentByte not in asciiLettersBytes:
|
|
||||||
# If the next byte is not an ascii letter either ignore this
|
|
||||||
# fragment (possible start tag case) or treat it according to
|
|
||||||
# handleOther
|
|
||||||
if endTag:
|
|
||||||
data.previous()
|
|
||||||
self.handleOther()
|
|
||||||
return True
|
|
||||||
|
|
||||||
c = data.skipUntil(spacesAngleBrackets)
|
|
||||||
if c == b"<":
|
|
||||||
# return to the first step in the overall "two step" algorithm
|
|
||||||
# reprocessing the < byte
|
|
||||||
data.previous()
|
|
||||||
else:
|
|
||||||
# Read all attributes
|
|
||||||
attr = self.getAttribute()
|
|
||||||
while attr is not None:
|
|
||||||
attr = self.getAttribute()
|
|
||||||
return True
|
|
||||||
|
|
||||||
def handleOther(self):
|
|
||||||
return self.data.jumpTo(b">")
|
|
||||||
|
|
||||||
def getAttribute(self):
|
|
||||||
"""Return a name,value pair for the next attribute in the stream,
|
|
||||||
if one is found, or None"""
|
|
||||||
data = self.data
|
|
||||||
# Step 1 (skip chars)
|
|
||||||
c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
|
|
||||||
assert c is None or len(c) == 1
|
|
||||||
# Step 2
|
|
||||||
if c in (b">", None):
|
|
||||||
return None
|
|
||||||
# Step 3
|
|
||||||
attrName = []
|
|
||||||
attrValue = []
|
|
||||||
# Step 4 attribute name
|
|
||||||
while True:
|
|
||||||
if c == b"=" and attrName:
|
|
||||||
break
|
|
||||||
elif c in spaceCharactersBytes:
|
|
||||||
# Step 6!
|
|
||||||
c = data.skip()
|
|
||||||
break
|
|
||||||
elif c in (b"/", b">"):
|
|
||||||
return b"".join(attrName), b""
|
|
||||||
elif c in asciiUppercaseBytes:
|
|
||||||
attrName.append(c.lower())
|
|
||||||
elif c is None:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
attrName.append(c)
|
|
||||||
# Step 5
|
|
||||||
c = next(data)
|
|
||||||
# Step 7
|
|
||||||
if c != b"=":
|
|
||||||
data.previous()
|
|
||||||
return b"".join(attrName), b""
|
|
||||||
# Step 8
|
|
||||||
next(data)
|
|
||||||
# Step 9
|
|
||||||
c = data.skip()
|
|
||||||
# Step 10
|
|
||||||
if c in (b"'", b'"'):
|
|
||||||
# 10.1
|
|
||||||
quoteChar = c
|
|
||||||
while True:
|
|
||||||
# 10.2
|
|
||||||
c = next(data)
|
|
||||||
# 10.3
|
|
||||||
if c == quoteChar:
|
|
||||||
next(data)
|
|
||||||
return b"".join(attrName), b"".join(attrValue)
|
|
||||||
# 10.4
|
|
||||||
elif c in asciiUppercaseBytes:
|
|
||||||
attrValue.append(c.lower())
|
|
||||||
# 10.5
|
|
||||||
else:
|
|
||||||
attrValue.append(c)
|
|
||||||
elif c == b">":
|
|
||||||
return b"".join(attrName), b""
|
|
||||||
elif c in asciiUppercaseBytes:
|
|
||||||
attrValue.append(c.lower())
|
|
||||||
elif c is None:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
attrValue.append(c)
|
|
||||||
# Step 11
|
|
||||||
while True:
|
|
||||||
c = next(data)
|
|
||||||
if c in spacesAngleBrackets:
|
|
||||||
return b"".join(attrName), b"".join(attrValue)
|
|
||||||
elif c in asciiUppercaseBytes:
|
|
||||||
attrValue.append(c.lower())
|
|
||||||
elif c is None:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
attrValue.append(c)
|
|
||||||
|
|
||||||
|
|
||||||
class ContentAttrParser(object):
|
|
||||||
def __init__(self, data):
|
|
||||||
assert isinstance(data, bytes)
|
|
||||||
self.data = data
|
|
||||||
|
|
||||||
def parse(self):
|
|
||||||
try:
|
|
||||||
# Check if the attr name is charset
|
|
||||||
# otherwise return
|
|
||||||
self.data.jumpTo(b"charset")
|
|
||||||
self.data.position += 1
|
|
||||||
self.data.skip()
|
|
||||||
if not self.data.currentByte == b"=":
|
|
||||||
# If there is no = sign keep looking for attrs
|
|
||||||
return None
|
|
||||||
self.data.position += 1
|
|
||||||
self.data.skip()
|
|
||||||
# Look for an encoding between matching quote marks
|
|
||||||
if self.data.currentByte in (b'"', b"'"):
|
|
||||||
quoteMark = self.data.currentByte
|
|
||||||
self.data.position += 1
|
|
||||||
oldPosition = self.data.position
|
|
||||||
if self.data.jumpTo(quoteMark):
|
|
||||||
return self.data[oldPosition:self.data.position]
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
# Unquoted value
|
|
||||||
oldPosition = self.data.position
|
|
||||||
try:
|
|
||||||
self.data.skipUntil(spaceCharactersBytes)
|
|
||||||
return self.data[oldPosition:self.data.position]
|
|
||||||
except StopIteration:
|
|
||||||
# Return the whole remaining value
|
|
||||||
return self.data[oldPosition:]
|
|
||||||
except StopIteration:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def codecName(encoding):
|
|
||||||
"""Return the python codec name corresponding to an encoding or None if the
|
|
||||||
string doesn't correspond to a valid encoding."""
|
|
||||||
if isinstance(encoding, bytes):
|
|
||||||
try:
|
|
||||||
encoding = encoding.decode("ascii")
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
return None
|
|
||||||
if encoding:
|
|
||||||
canonicalName = ascii_punctuation_re.sub("", encoding).lower()
|
|
||||||
return encodings.get(canonicalName, None)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
-296
@@ -1,296 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
from xml.sax.saxutils import escape, unescape
|
|
||||||
from six.moves import urllib_parse as urlparse
|
|
||||||
|
|
||||||
from .tokenizer import HTMLTokenizer
|
|
||||||
from .constants import tokenTypes
|
|
||||||
|
|
||||||
|
|
||||||
content_type_rgx = re.compile(r'''
|
|
||||||
^
|
|
||||||
# Match a content type <application>/<type>
|
|
||||||
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
|
|
||||||
# Match any character set and encoding
|
|
||||||
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|
|
||||||
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
|
|
||||||
# Assume the rest is data
|
|
||||||
,.*
|
|
||||||
$
|
|
||||||
''',
|
|
||||||
re.VERBOSE)
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLSanitizerMixin(object):
|
|
||||||
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
|
|
||||||
|
|
||||||
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
|
|
||||||
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
|
|
||||||
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
|
|
||||||
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
|
|
||||||
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
|
|
||||||
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
|
|
||||||
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
|
|
||||||
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
|
|
||||||
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
|
|
||||||
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
|
|
||||||
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
|
|
||||||
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
|
|
||||||
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
|
|
||||||
|
|
||||||
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
|
|
||||||
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
|
|
||||||
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
|
|
||||||
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
|
|
||||||
'munderover', 'none']
|
|
||||||
|
|
||||||
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
|
|
||||||
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
|
|
||||||
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
|
|
||||||
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
|
|
||||||
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
|
|
||||||
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
|
|
||||||
|
|
||||||
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
|
||||||
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
|
|
||||||
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
|
|
||||||
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
|
|
||||||
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
|
|
||||||
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
|
|
||||||
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
|
|
||||||
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
|
|
||||||
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
|
|
||||||
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
|
|
||||||
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
|
|
||||||
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
|
|
||||||
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
|
|
||||||
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
|
|
||||||
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
|
|
||||||
'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
|
|
||||||
'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
|
|
||||||
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
|
|
||||||
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
|
|
||||||
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
|
|
||||||
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
|
|
||||||
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
|
|
||||||
'width', 'wrap', 'xml:lang']
|
|
||||||
|
|
||||||
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
|
|
||||||
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
|
|
||||||
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
|
|
||||||
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
|
|
||||||
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
|
|
||||||
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
|
|
||||||
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
|
|
||||||
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
|
|
||||||
'xlink:type', 'xmlns', 'xmlns:xlink']
|
|
||||||
|
|
||||||
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
|
|
||||||
'arabic-form', 'ascent', 'attributeName', 'attributeType',
|
|
||||||
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
|
|
||||||
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
|
|
||||||
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
|
|
||||||
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
|
|
||||||
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
|
|
||||||
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
|
|
||||||
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
|
|
||||||
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
|
|
||||||
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
|
|
||||||
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
|
|
||||||
'opacity', 'orient', 'origin', 'overline-position',
|
|
||||||
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
|
|
||||||
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
|
|
||||||
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
|
|
||||||
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
|
|
||||||
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
|
|
||||||
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
|
|
||||||
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
|
|
||||||
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
|
|
||||||
'transform', 'type', 'u1', 'u2', 'underline-position',
|
|
||||||
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
|
|
||||||
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
|
|
||||||
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
|
|
||||||
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
|
|
||||||
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
|
|
||||||
'y1', 'y2', 'zoomAndPan']
|
|
||||||
|
|
||||||
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', 'background', 'datasrc',
|
|
||||||
'dynsrc', 'lowsrc', 'ping', 'poster', 'xlink:href', 'xml:base']
|
|
||||||
|
|
||||||
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
|
|
||||||
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
|
|
||||||
'mask', 'stroke']
|
|
||||||
|
|
||||||
svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
|
|
||||||
'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
|
|
||||||
'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
|
|
||||||
'set', 'use']
|
|
||||||
|
|
||||||
acceptable_css_properties = ['azimuth', 'background-color',
|
|
||||||
'border-bottom-color', 'border-collapse', 'border-color',
|
|
||||||
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
|
|
||||||
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
|
|
||||||
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
|
|
||||||
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
|
|
||||||
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
|
|
||||||
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
|
|
||||||
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
|
|
||||||
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
|
|
||||||
'white-space', 'width']
|
|
||||||
|
|
||||||
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
|
|
||||||
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
|
|
||||||
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
|
|
||||||
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
|
|
||||||
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
|
|
||||||
'transparent', 'underline', 'white', 'yellow']
|
|
||||||
|
|
||||||
acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
|
|
||||||
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
|
|
||||||
'stroke-opacity']
|
|
||||||
|
|
||||||
acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
|
|
||||||
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
|
|
||||||
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
|
|
||||||
'ssh', 'sftp', 'rtsp', 'afs', 'data']
|
|
||||||
|
|
||||||
acceptable_content_types = ['image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain']
|
|
||||||
|
|
||||||
# subclasses may define their own versions of these constants
|
|
||||||
allowed_elements = acceptable_elements + mathml_elements + svg_elements
|
|
||||||
allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
|
|
||||||
allowed_css_properties = acceptable_css_properties
|
|
||||||
allowed_css_keywords = acceptable_css_keywords
|
|
||||||
allowed_svg_properties = acceptable_svg_properties
|
|
||||||
allowed_protocols = acceptable_protocols
|
|
||||||
allowed_content_types = acceptable_content_types
|
|
||||||
|
|
||||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
|
||||||
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
|
|
||||||
# attributes are parsed, and a restricted set, # specified by
|
|
||||||
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
|
|
||||||
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
|
|
||||||
# in ALLOWED_PROTOCOLS are allowed.
|
|
||||||
#
|
|
||||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
|
||||||
# => <script> do_nasty_stuff() </script>
|
|
||||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
|
||||||
# => <a>Click here for $100</a>
|
|
||||||
def sanitize_token(self, token):
|
|
||||||
|
|
||||||
# accommodate filters which use token_type differently
|
|
||||||
token_type = token["type"]
|
|
||||||
if token_type in list(tokenTypes.keys()):
|
|
||||||
token_type = tokenTypes[token_type]
|
|
||||||
|
|
||||||
if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
|
|
||||||
tokenTypes["EmptyTag"]):
|
|
||||||
if token["name"] in self.allowed_elements:
|
|
||||||
return self.allowed_token(token, token_type)
|
|
||||||
else:
|
|
||||||
return self.disallowed_token(token, token_type)
|
|
||||||
elif token_type == tokenTypes["Comment"]:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
return token
|
|
||||||
|
|
||||||
def allowed_token(self, token, token_type):
|
|
||||||
if "data" in token:
|
|
||||||
attrs = dict([(name, val) for name, val in
|
|
||||||
token["data"][::-1]
|
|
||||||
if name in self.allowed_attributes])
|
|
||||||
for attr in self.attr_val_is_uri:
|
|
||||||
if attr not in attrs:
|
|
||||||
continue
|
|
||||||
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
|
|
||||||
unescape(attrs[attr])).lower()
|
|
||||||
# remove replacement characters from unescaped characters
|
|
||||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
|
||||||
uri = urlparse.urlparse(val_unescaped)
|
|
||||||
if uri:
|
|
||||||
if uri.scheme not in self.allowed_protocols:
|
|
||||||
del attrs[attr]
|
|
||||||
if uri.scheme == 'data':
|
|
||||||
m = content_type_rgx.match(uri.path)
|
|
||||||
if not m:
|
|
||||||
del attrs[attr]
|
|
||||||
if m.group('content_type') not in self.allowed_content_types:
|
|
||||||
del attrs[attr]
|
|
||||||
|
|
||||||
for attr in self.svg_attr_val_allows_ref:
|
|
||||||
if attr in attrs:
|
|
||||||
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
|
||||||
' ',
|
|
||||||
unescape(attrs[attr]))
|
|
||||||
if (token["name"] in self.svg_allow_local_href and
|
|
||||||
'xlink:href' in attrs and re.search('^\s*[^#\s].*',
|
|
||||||
attrs['xlink:href'])):
|
|
||||||
del attrs['xlink:href']
|
|
||||||
if 'style' in attrs:
|
|
||||||
attrs['style'] = self.sanitize_css(attrs['style'])
|
|
||||||
token["data"] = [[name, val] for name, val in list(attrs.items())]
|
|
||||||
return token
|
|
||||||
|
|
||||||
def disallowed_token(self, token, token_type):
|
|
||||||
if token_type == tokenTypes["EndTag"]:
|
|
||||||
token["data"] = "</%s>" % token["name"]
|
|
||||||
elif token["data"]:
|
|
||||||
attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
|
|
||||||
token["data"] = "<%s%s>" % (token["name"], attrs)
|
|
||||||
else:
|
|
||||||
token["data"] = "<%s>" % token["name"]
|
|
||||||
if token.get("selfClosing"):
|
|
||||||
token["data"] = token["data"][:-1] + "/>"
|
|
||||||
|
|
||||||
if token["type"] in list(tokenTypes.keys()):
|
|
||||||
token["type"] = "Characters"
|
|
||||||
else:
|
|
||||||
token["type"] = tokenTypes["Characters"]
|
|
||||||
|
|
||||||
del token["name"]
|
|
||||||
return token
|
|
||||||
|
|
||||||
def sanitize_css(self, style):
|
|
||||||
# disallow urls
|
|
||||||
style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
|
||||||
|
|
||||||
# gauntlet
|
|
||||||
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
|
||||||
return ''
|
|
||||||
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
|
||||||
return ''
|
|
||||||
|
|
||||||
clean = []
|
|
||||||
for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
|
|
||||||
if not value:
|
|
||||||
continue
|
|
||||||
if prop.lower() in self.allowed_css_properties:
|
|
||||||
clean.append(prop + ': ' + value + ';')
|
|
||||||
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
|
||||||
'padding']:
|
|
||||||
for keyword in value.split():
|
|
||||||
if keyword not in self.acceptable_css_keywords and \
|
|
||||||
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
clean.append(prop + ': ' + value + ';')
|
|
||||||
elif prop.lower() in self.allowed_svg_properties:
|
|
||||||
clean.append(prop + ': ' + value + ';')
|
|
||||||
|
|
||||||
return ' '.join(clean)
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
|
|
||||||
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
|
||||||
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
|
|
||||||
# Change case matching defaults as we only output lowercase html anyway
|
|
||||||
# This solution doesn't seem ideal...
|
|
||||||
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
|
|
||||||
lowercaseElementName, lowercaseAttrName, parser=parser)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
for token in HTMLTokenizer.__iter__(self):
|
|
||||||
token = self.sanitize_token(token)
|
|
||||||
if token:
|
|
||||||
yield token
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from .. import treewalkers
|
|
||||||
|
|
||||||
from .htmlserializer import HTMLSerializer
|
|
||||||
|
|
||||||
|
|
||||||
def serialize(input, tree="etree", format="html", encoding=None,
|
|
||||||
**serializer_opts):
|
|
||||||
# XXX: Should we cache this?
|
|
||||||
walker = treewalkers.getTreeWalker(tree)
|
|
||||||
if format == "html":
|
|
||||||
s = HTMLSerializer(**serializer_opts)
|
|
||||||
else:
|
|
||||||
raise ValueError("type must be html")
|
|
||||||
return s.render(walker(input), encoding)
|
|
||||||
@@ -1,317 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
from pip._vendor.six import text_type
|
|
||||||
|
|
||||||
try:
|
|
||||||
from functools import reduce
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
from ..constants import voidElements, booleanAttributes, spaceCharacters
|
|
||||||
from ..constants import rcdataElements, entities, xmlEntities
|
|
||||||
from .. import utils
|
|
||||||
from xml.sax.saxutils import escape
|
|
||||||
|
|
||||||
spaceCharacters = "".join(spaceCharacters)
|
|
||||||
|
|
||||||
try:
|
|
||||||
from codecs import register_error, xmlcharrefreplace_errors
|
|
||||||
except ImportError:
|
|
||||||
unicode_encode_errors = "strict"
|
|
||||||
else:
|
|
||||||
unicode_encode_errors = "htmlentityreplace"
|
|
||||||
|
|
||||||
encode_entity_map = {}
|
|
||||||
is_ucs4 = len("\U0010FFFF") == 1
|
|
||||||
for k, v in list(entities.items()):
|
|
||||||
# skip multi-character entities
|
|
||||||
if ((is_ucs4 and len(v) > 1) or
|
|
||||||
(not is_ucs4 and len(v) > 2)):
|
|
||||||
continue
|
|
||||||
if v != "&":
|
|
||||||
if len(v) == 2:
|
|
||||||
v = utils.surrogatePairToCodepoint(v)
|
|
||||||
else:
|
|
||||||
v = ord(v)
|
|
||||||
if v not in encode_entity_map or k.islower():
|
|
||||||
# prefer < over < and similarly for &, >, etc.
|
|
||||||
encode_entity_map[v] = k
|
|
||||||
|
|
||||||
def htmlentityreplace_errors(exc):
|
|
||||||
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
|
|
||||||
res = []
|
|
||||||
codepoints = []
|
|
||||||
skip = False
|
|
||||||
for i, c in enumerate(exc.object[exc.start:exc.end]):
|
|
||||||
if skip:
|
|
||||||
skip = False
|
|
||||||
continue
|
|
||||||
index = i + exc.start
|
|
||||||
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
|
|
||||||
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
|
|
||||||
skip = True
|
|
||||||
else:
|
|
||||||
codepoint = ord(c)
|
|
||||||
codepoints.append(codepoint)
|
|
||||||
for cp in codepoints:
|
|
||||||
e = encode_entity_map.get(cp)
|
|
||||||
if e:
|
|
||||||
res.append("&")
|
|
||||||
res.append(e)
|
|
||||||
if not e.endswith(";"):
|
|
||||||
res.append(";")
|
|
||||||
else:
|
|
||||||
res.append("&#x%s;" % (hex(cp)[2:]))
|
|
||||||
return ("".join(res), exc.end)
|
|
||||||
else:
|
|
||||||
return xmlcharrefreplace_errors(exc)
|
|
||||||
|
|
||||||
register_error(unicode_encode_errors, htmlentityreplace_errors)
|
|
||||||
|
|
||||||
del register_error
|
|
||||||
|
|
||||||
|
|
||||||
class HTMLSerializer(object):
|
|
||||||
|
|
||||||
# attribute quoting options
|
|
||||||
quote_attr_values = False
|
|
||||||
quote_char = '"'
|
|
||||||
use_best_quote_char = True
|
|
||||||
|
|
||||||
# tag syntax options
|
|
||||||
omit_optional_tags = True
|
|
||||||
minimize_boolean_attributes = True
|
|
||||||
use_trailing_solidus = False
|
|
||||||
space_before_trailing_solidus = True
|
|
||||||
|
|
||||||
# escaping options
|
|
||||||
escape_lt_in_attrs = False
|
|
||||||
escape_rcdata = False
|
|
||||||
resolve_entities = True
|
|
||||||
|
|
||||||
# miscellaneous options
|
|
||||||
alphabetical_attributes = False
|
|
||||||
inject_meta_charset = True
|
|
||||||
strip_whitespace = False
|
|
||||||
sanitize = False
|
|
||||||
|
|
||||||
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
|
|
||||||
"omit_optional_tags", "minimize_boolean_attributes",
|
|
||||||
"use_trailing_solidus", "space_before_trailing_solidus",
|
|
||||||
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
|
|
||||||
"alphabetical_attributes", "inject_meta_charset",
|
|
||||||
"strip_whitespace", "sanitize")
|
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
"""Initialize HTMLSerializer.
|
|
||||||
|
|
||||||
Keyword options (default given first unless specified) include:
|
|
||||||
|
|
||||||
inject_meta_charset=True|False
|
|
||||||
Whether it insert a meta element to define the character set of the
|
|
||||||
document.
|
|
||||||
quote_attr_values=True|False
|
|
||||||
Whether to quote attribute values that don't require quoting
|
|
||||||
per HTML5 parsing rules.
|
|
||||||
quote_char=u'"'|u"'"
|
|
||||||
Use given quote character for attribute quoting. Default is to
|
|
||||||
use double quote unless attribute value contains a double quote,
|
|
||||||
in which case single quotes are used instead.
|
|
||||||
escape_lt_in_attrs=False|True
|
|
||||||
Whether to escape < in attribute values.
|
|
||||||
escape_rcdata=False|True
|
|
||||||
Whether to escape characters that need to be escaped within normal
|
|
||||||
elements within rcdata elements such as style.
|
|
||||||
resolve_entities=True|False
|
|
||||||
Whether to resolve named character entities that appear in the
|
|
||||||
source tree. The XML predefined entities < > & " '
|
|
||||||
are unaffected by this setting.
|
|
||||||
strip_whitespace=False|True
|
|
||||||
Whether to remove semantically meaningless whitespace. (This
|
|
||||||
compresses all whitespace to a single space except within pre.)
|
|
||||||
minimize_boolean_attributes=True|False
|
|
||||||
Shortens boolean attributes to give just the attribute value,
|
|
||||||
for example <input disabled="disabled"> becomes <input disabled>.
|
|
||||||
use_trailing_solidus=False|True
|
|
||||||
Includes a close-tag slash at the end of the start tag of void
|
|
||||||
elements (empty elements whose end tag is forbidden). E.g. <hr/>.
|
|
||||||
space_before_trailing_solidus=True|False
|
|
||||||
Places a space immediately before the closing slash in a tag
|
|
||||||
using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
|
|
||||||
sanitize=False|True
|
|
||||||
Strip all unsafe or unknown constructs from output.
|
|
||||||
See `html5lib user documentation`_
|
|
||||||
omit_optional_tags=True|False
|
|
||||||
Omit start/end tags that are optional.
|
|
||||||
alphabetical_attributes=False|True
|
|
||||||
Reorder attributes to be in alphabetical order.
|
|
||||||
|
|
||||||
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
|
|
||||||
"""
|
|
||||||
if 'quote_char' in kwargs:
|
|
||||||
self.use_best_quote_char = False
|
|
||||||
for attr in self.options:
|
|
||||||
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
|
|
||||||
self.errors = []
|
|
||||||
self.strict = False
|
|
||||||
|
|
||||||
def encode(self, string):
|
|
||||||
assert(isinstance(string, text_type))
|
|
||||||
if self.encoding:
|
|
||||||
return string.encode(self.encoding, unicode_encode_errors)
|
|
||||||
else:
|
|
||||||
return string
|
|
||||||
|
|
||||||
def encodeStrict(self, string):
|
|
||||||
assert(isinstance(string, text_type))
|
|
||||||
if self.encoding:
|
|
||||||
return string.encode(self.encoding, "strict")
|
|
||||||
else:
|
|
||||||
return string
|
|
||||||
|
|
||||||
def serialize(self, treewalker, encoding=None):
|
|
||||||
self.encoding = encoding
|
|
||||||
in_cdata = False
|
|
||||||
self.errors = []
|
|
||||||
|
|
||||||
if encoding and self.inject_meta_charset:
|
|
||||||
from ..filters.inject_meta_charset import Filter
|
|
||||||
treewalker = Filter(treewalker, encoding)
|
|
||||||
# WhitespaceFilter should be used before OptionalTagFilter
|
|
||||||
# for maximum efficiently of this latter filter
|
|
||||||
if self.strip_whitespace:
|
|
||||||
from ..filters.whitespace import Filter
|
|
||||||
treewalker = Filter(treewalker)
|
|
||||||
if self.sanitize:
|
|
||||||
from ..filters.sanitizer import Filter
|
|
||||||
treewalker = Filter(treewalker)
|
|
||||||
if self.omit_optional_tags:
|
|
||||||
from ..filters.optionaltags import Filter
|
|
||||||
treewalker = Filter(treewalker)
|
|
||||||
# Alphabetical attributes must be last, as other filters
|
|
||||||
# could add attributes and alter the order
|
|
||||||
if self.alphabetical_attributes:
|
|
||||||
from ..filters.alphabeticalattributes import Filter
|
|
||||||
treewalker = Filter(treewalker)
|
|
||||||
|
|
||||||
for token in treewalker:
|
|
||||||
type = token["type"]
|
|
||||||
if type == "Doctype":
|
|
||||||
doctype = "<!DOCTYPE %s" % token["name"]
|
|
||||||
|
|
||||||
if token["publicId"]:
|
|
||||||
doctype += ' PUBLIC "%s"' % token["publicId"]
|
|
||||||
elif token["systemId"]:
|
|
||||||
doctype += " SYSTEM"
|
|
||||||
if token["systemId"]:
|
|
||||||
if token["systemId"].find('"') >= 0:
|
|
||||||
if token["systemId"].find("'") >= 0:
|
|
||||||
self.serializeError("System identifer contains both single and double quote characters")
|
|
||||||
quote_char = "'"
|
|
||||||
else:
|
|
||||||
quote_char = '"'
|
|
||||||
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
|
|
||||||
|
|
||||||
doctype += ">"
|
|
||||||
yield self.encodeStrict(doctype)
|
|
||||||
|
|
||||||
elif type in ("Characters", "SpaceCharacters"):
|
|
||||||
if type == "SpaceCharacters" or in_cdata:
|
|
||||||
if in_cdata and token["data"].find("</") >= 0:
|
|
||||||
self.serializeError("Unexpected </ in CDATA")
|
|
||||||
yield self.encode(token["data"])
|
|
||||||
else:
|
|
||||||
yield self.encode(escape(token["data"]))
|
|
||||||
|
|
||||||
elif type in ("StartTag", "EmptyTag"):
|
|
||||||
name = token["name"]
|
|
||||||
yield self.encodeStrict("<%s" % name)
|
|
||||||
if name in rcdataElements and not self.escape_rcdata:
|
|
||||||
in_cdata = True
|
|
||||||
elif in_cdata:
|
|
||||||
self.serializeError("Unexpected child element of a CDATA element")
|
|
||||||
for (attr_namespace, attr_name), attr_value in token["data"].items():
|
|
||||||
# TODO: Add namespace support here
|
|
||||||
k = attr_name
|
|
||||||
v = attr_value
|
|
||||||
yield self.encodeStrict(' ')
|
|
||||||
|
|
||||||
yield self.encodeStrict(k)
|
|
||||||
if not self.minimize_boolean_attributes or \
|
|
||||||
(k not in booleanAttributes.get(name, tuple())
|
|
||||||
and k not in booleanAttributes.get("", tuple())):
|
|
||||||
yield self.encodeStrict("=")
|
|
||||||
if self.quote_attr_values or not v:
|
|
||||||
quote_attr = True
|
|
||||||
else:
|
|
||||||
quote_attr = reduce(lambda x, y: x or (y in v),
|
|
||||||
spaceCharacters + ">\"'=", False)
|
|
||||||
v = v.replace("&", "&")
|
|
||||||
if self.escape_lt_in_attrs:
|
|
||||||
v = v.replace("<", "<")
|
|
||||||
if quote_attr:
|
|
||||||
quote_char = self.quote_char
|
|
||||||
if self.use_best_quote_char:
|
|
||||||
if "'" in v and '"' not in v:
|
|
||||||
quote_char = '"'
|
|
||||||
elif '"' in v and "'" not in v:
|
|
||||||
quote_char = "'"
|
|
||||||
if quote_char == "'":
|
|
||||||
v = v.replace("'", "'")
|
|
||||||
else:
|
|
||||||
v = v.replace('"', """)
|
|
||||||
yield self.encodeStrict(quote_char)
|
|
||||||
yield self.encode(v)
|
|
||||||
yield self.encodeStrict(quote_char)
|
|
||||||
else:
|
|
||||||
yield self.encode(v)
|
|
||||||
if name in voidElements and self.use_trailing_solidus:
|
|
||||||
if self.space_before_trailing_solidus:
|
|
||||||
yield self.encodeStrict(" /")
|
|
||||||
else:
|
|
||||||
yield self.encodeStrict("/")
|
|
||||||
yield self.encode(">")
|
|
||||||
|
|
||||||
elif type == "EndTag":
|
|
||||||
name = token["name"]
|
|
||||||
if name in rcdataElements:
|
|
||||||
in_cdata = False
|
|
||||||
elif in_cdata:
|
|
||||||
self.serializeError("Unexpected child element of a CDATA element")
|
|
||||||
yield self.encodeStrict("</%s>" % name)
|
|
||||||
|
|
||||||
elif type == "Comment":
|
|
||||||
data = token["data"]
|
|
||||||
if data.find("--") >= 0:
|
|
||||||
self.serializeError("Comment contains --")
|
|
||||||
yield self.encodeStrict("<!--%s-->" % token["data"])
|
|
||||||
|
|
||||||
elif type == "Entity":
|
|
||||||
name = token["name"]
|
|
||||||
key = name + ";"
|
|
||||||
if key not in entities:
|
|
||||||
self.serializeError("Entity %s not recognized" % name)
|
|
||||||
if self.resolve_entities and key not in xmlEntities:
|
|
||||||
data = entities[key]
|
|
||||||
else:
|
|
||||||
data = "&%s;" % name
|
|
||||||
yield self.encodeStrict(data)
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.serializeError(token["data"])
|
|
||||||
|
|
||||||
def render(self, treewalker, encoding=None):
|
|
||||||
if encoding:
|
|
||||||
return b"".join(list(self.serialize(treewalker, encoding)))
|
|
||||||
else:
|
|
||||||
return "".join(list(self.serialize(treewalker)))
|
|
||||||
|
|
||||||
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
|
|
||||||
# XXX The idea is to make data mandatory.
|
|
||||||
self.errors.append(data)
|
|
||||||
if self.strict:
|
|
||||||
raise SerializeError
|
|
||||||
|
|
||||||
|
|
||||||
def SerializeError(Exception):
|
|
||||||
"""Error in serialized tree"""
|
|
||||||
pass
|
|
||||||
-1731
File diff suppressed because it is too large
Load Diff
@@ -1,44 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from xml.sax.xmlreader import AttributesNSImpl
|
|
||||||
|
|
||||||
from ..constants import adjustForeignAttributes, unadjustForeignAttributes
|
|
||||||
|
|
||||||
prefix_mapping = {}
|
|
||||||
for prefix, localName, namespace in adjustForeignAttributes.values():
|
|
||||||
if prefix is not None:
|
|
||||||
prefix_mapping[prefix] = namespace
|
|
||||||
|
|
||||||
|
|
||||||
def to_sax(walker, handler):
|
|
||||||
"""Call SAX-like content handler based on treewalker walker"""
|
|
||||||
handler.startDocument()
|
|
||||||
for prefix, namespace in prefix_mapping.items():
|
|
||||||
handler.startPrefixMapping(prefix, namespace)
|
|
||||||
|
|
||||||
for token in walker:
|
|
||||||
type = token["type"]
|
|
||||||
if type == "Doctype":
|
|
||||||
continue
|
|
||||||
elif type in ("StartTag", "EmptyTag"):
|
|
||||||
attrs = AttributesNSImpl(token["data"],
|
|
||||||
unadjustForeignAttributes)
|
|
||||||
handler.startElementNS((token["namespace"], token["name"]),
|
|
||||||
token["name"],
|
|
||||||
attrs)
|
|
||||||
if type == "EmptyTag":
|
|
||||||
handler.endElementNS((token["namespace"], token["name"]),
|
|
||||||
token["name"])
|
|
||||||
elif type == "EndTag":
|
|
||||||
handler.endElementNS((token["namespace"], token["name"]),
|
|
||||||
token["name"])
|
|
||||||
elif type in ("Characters", "SpaceCharacters"):
|
|
||||||
handler.characters(token["data"])
|
|
||||||
elif type == "Comment":
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
assert False, "Unknown token type"
|
|
||||||
|
|
||||||
for prefix, namespace in prefix_mapping.items():
|
|
||||||
handler.endPrefixMapping(prefix)
|
|
||||||
handler.endDocument()
|
|
||||||
@@ -1,76 +0,0 @@
|
|||||||
"""A collection of modules for building different kinds of tree from
|
|
||||||
HTML documents.
|
|
||||||
|
|
||||||
To create a treebuilder for a new type of tree, you need to do
|
|
||||||
implement several things:
|
|
||||||
|
|
||||||
1) A set of classes for various types of elements: Document, Doctype,
|
|
||||||
Comment, Element. These must implement the interface of
|
|
||||||
_base.treebuilders.Node (although comment nodes have a different
|
|
||||||
signature for their constructor, see treebuilders.etree.Comment)
|
|
||||||
Textual content may also be implemented as another node type, or not, as
|
|
||||||
your tree implementation requires.
|
|
||||||
|
|
||||||
2) A treebuilder object (called TreeBuilder by convention) that
|
|
||||||
inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
|
|
||||||
documentClass - the class to use for the bottommost node of a document
|
|
||||||
elementClass - the class to use for HTML Elements
|
|
||||||
commentClass - the class to use for comments
|
|
||||||
doctypeClass - the class to use for doctypes
|
|
||||||
It also has one required method:
|
|
||||||
getDocument - Returns the root node of the complete document tree
|
|
||||||
|
|
||||||
3) If you wish to run the unit tests, you must also create a
|
|
||||||
testSerializer method on your treebuilder which accepts a node and
|
|
||||||
returns a string containing Node and its children serialized according
|
|
||||||
to the format used in the unittests
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from ..utils import default_etree
|
|
||||||
|
|
||||||
treeBuilderCache = {}
|
|
||||||
|
|
||||||
|
|
||||||
def getTreeBuilder(treeType, implementation=None, **kwargs):
|
|
||||||
"""Get a TreeBuilder class for various types of tree with built-in support
|
|
||||||
|
|
||||||
treeType - the name of the tree type required (case-insensitive). Supported
|
|
||||||
values are:
|
|
||||||
|
|
||||||
"dom" - A generic builder for DOM implementations, defaulting to
|
|
||||||
a xml.dom.minidom based implementation.
|
|
||||||
"etree" - A generic builder for tree implementations exposing an
|
|
||||||
ElementTree-like interface, defaulting to
|
|
||||||
xml.etree.cElementTree if available and
|
|
||||||
xml.etree.ElementTree if not.
|
|
||||||
"lxml" - A etree-based builder for lxml.etree, handling
|
|
||||||
limitations of lxml's implementation.
|
|
||||||
|
|
||||||
implementation - (Currently applies to the "etree" and "dom" tree types). A
|
|
||||||
module implementing the tree type e.g.
|
|
||||||
xml.etree.ElementTree or xml.etree.cElementTree."""
|
|
||||||
|
|
||||||
treeType = treeType.lower()
|
|
||||||
if treeType not in treeBuilderCache:
|
|
||||||
if treeType == "dom":
|
|
||||||
from . import dom
|
|
||||||
# Come up with a sane default (pref. from the stdlib)
|
|
||||||
if implementation is None:
|
|
||||||
from xml.dom import minidom
|
|
||||||
implementation = minidom
|
|
||||||
# NEVER cache here, caching is done in the dom submodule
|
|
||||||
return dom.getDomModule(implementation, **kwargs).TreeBuilder
|
|
||||||
elif treeType == "lxml":
|
|
||||||
from . import etree_lxml
|
|
||||||
treeBuilderCache[treeType] = etree_lxml.TreeBuilder
|
|
||||||
elif treeType == "etree":
|
|
||||||
from . import etree
|
|
||||||
if implementation is None:
|
|
||||||
implementation = default_etree
|
|
||||||
# NEVER cache here, caching is done in the etree submodule
|
|
||||||
return etree.getETreeModule(implementation, **kwargs).TreeBuilder
|
|
||||||
else:
|
|
||||||
raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
|
|
||||||
return treeBuilderCache.get(treeType)
|
|
||||||
@@ -1,377 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
from pip._vendor.six import text_type
|
|
||||||
|
|
||||||
from ..constants import scopingElements, tableInsertModeElements, namespaces
|
|
||||||
|
|
||||||
# The scope markers are inserted when entering object elements,
|
|
||||||
# marquees, table cells, and table captions, and are used to prevent formatting
|
|
||||||
# from "leaking" into tables, object elements, and marquees.
|
|
||||||
Marker = None
|
|
||||||
|
|
||||||
listElementsMap = {
|
|
||||||
None: (frozenset(scopingElements), False),
|
|
||||||
"button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
|
|
||||||
"list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
|
|
||||||
(namespaces["html"], "ul")])), False),
|
|
||||||
"table": (frozenset([(namespaces["html"], "html"),
|
|
||||||
(namespaces["html"], "table")]), False),
|
|
||||||
"select": (frozenset([(namespaces["html"], "optgroup"),
|
|
||||||
(namespaces["html"], "option")]), True)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Node(object):
|
|
||||||
def __init__(self, name):
|
|
||||||
"""Node representing an item in the tree.
|
|
||||||
name - The tag name associated with the node
|
|
||||||
parent - The parent of the current node (or None for the document node)
|
|
||||||
value - The value of the current node (applies to text nodes and
|
|
||||||
comments
|
|
||||||
attributes - a dict holding name, value pairs for attributes of the node
|
|
||||||
childNodes - a list of child nodes of the current node. This must
|
|
||||||
include all elements but not necessarily other node types
|
|
||||||
_flags - A list of miscellaneous flags that can be set on the node
|
|
||||||
"""
|
|
||||||
self.name = name
|
|
||||||
self.parent = None
|
|
||||||
self.value = None
|
|
||||||
self.attributes = {}
|
|
||||||
self.childNodes = []
|
|
||||||
self._flags = []
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
attributesStr = " ".join(["%s=\"%s\"" % (name, value)
|
|
||||||
for name, value in
|
|
||||||
self.attributes.items()])
|
|
||||||
if attributesStr:
|
|
||||||
return "<%s %s>" % (self.name, attributesStr)
|
|
||||||
else:
|
|
||||||
return "<%s>" % (self.name)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "<%s>" % (self.name)
|
|
||||||
|
|
||||||
def appendChild(self, node):
|
|
||||||
"""Insert node as a child of the current node
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def insertText(self, data, insertBefore=None):
|
|
||||||
"""Insert data as text in the current node, positioned before the
|
|
||||||
start of node insertBefore or to the end of the node's text.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def insertBefore(self, node, refNode):
|
|
||||||
"""Insert node as a child of the current node, before refNode in the
|
|
||||||
list of child nodes. Raises ValueError if refNode is not a child of
|
|
||||||
the current node"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def removeChild(self, node):
|
|
||||||
"""Remove node from the children of the current node
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def reparentChildren(self, newParent):
|
|
||||||
"""Move all the children of the current node to newParent.
|
|
||||||
This is needed so that trees that don't store text as nodes move the
|
|
||||||
text in the correct way
|
|
||||||
"""
|
|
||||||
# XXX - should this method be made more general?
|
|
||||||
for child in self.childNodes:
|
|
||||||
newParent.appendChild(child)
|
|
||||||
self.childNodes = []
|
|
||||||
|
|
||||||
def cloneNode(self):
|
|
||||||
"""Return a shallow copy of the current node i.e. a node with the same
|
|
||||||
name and attributes but with no parent or child nodes
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def hasContent(self):
|
|
||||||
"""Return true if the node has children or text, false otherwise
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class ActiveFormattingElements(list):
|
|
||||||
def append(self, node):
|
|
||||||
equalCount = 0
|
|
||||||
if node != Marker:
|
|
||||||
for element in self[::-1]:
|
|
||||||
if element == Marker:
|
|
||||||
break
|
|
||||||
if self.nodesEqual(element, node):
|
|
||||||
equalCount += 1
|
|
||||||
if equalCount == 3:
|
|
||||||
self.remove(element)
|
|
||||||
break
|
|
||||||
list.append(self, node)
|
|
||||||
|
|
||||||
def nodesEqual(self, node1, node2):
|
|
||||||
if not node1.nameTuple == node2.nameTuple:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not node1.attributes == node2.attributes:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
class TreeBuilder(object):
|
|
||||||
"""Base treebuilder implementation
|
|
||||||
documentClass - the class to use for the bottommost node of a document
|
|
||||||
elementClass - the class to use for HTML Elements
|
|
||||||
commentClass - the class to use for comments
|
|
||||||
doctypeClass - the class to use for doctypes
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Document class
|
|
||||||
documentClass = None
|
|
||||||
|
|
||||||
# The class to use for creating a node
|
|
||||||
elementClass = None
|
|
||||||
|
|
||||||
# The class to use for creating comments
|
|
||||||
commentClass = None
|
|
||||||
|
|
||||||
# The class to use for creating doctypes
|
|
||||||
doctypeClass = None
|
|
||||||
|
|
||||||
# Fragment class
|
|
||||||
fragmentClass = None
|
|
||||||
|
|
||||||
def __init__(self, namespaceHTMLElements):
|
|
||||||
if namespaceHTMLElements:
|
|
||||||
self.defaultNamespace = "http://www.w3.org/1999/xhtml"
|
|
||||||
else:
|
|
||||||
self.defaultNamespace = None
|
|
||||||
self.reset()
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.openElements = []
|
|
||||||
self.activeFormattingElements = ActiveFormattingElements()
|
|
||||||
|
|
||||||
# XXX - rename these to headElement, formElement
|
|
||||||
self.headPointer = None
|
|
||||||
self.formPointer = None
|
|
||||||
|
|
||||||
self.insertFromTable = False
|
|
||||||
|
|
||||||
self.document = self.documentClass()
|
|
||||||
|
|
||||||
def elementInScope(self, target, variant=None):
|
|
||||||
|
|
||||||
# If we pass a node in we match that. if we pass a string
|
|
||||||
# match any node with that name
|
|
||||||
exactNode = hasattr(target, "nameTuple")
|
|
||||||
|
|
||||||
listElements, invert = listElementsMap[variant]
|
|
||||||
|
|
||||||
for node in reversed(self.openElements):
|
|
||||||
if (node.name == target and not exactNode or
|
|
||||||
node == target and exactNode):
|
|
||||||
return True
|
|
||||||
elif (invert ^ (node.nameTuple in listElements)):
|
|
||||||
return False
|
|
||||||
|
|
||||||
assert False # We should never reach this point
|
|
||||||
|
|
||||||
def reconstructActiveFormattingElements(self):
|
|
||||||
# Within this algorithm the order of steps described in the
|
|
||||||
# specification is not quite the same as the order of steps in the
|
|
||||||
# code. It should still do the same though.
|
|
||||||
|
|
||||||
# Step 1: stop the algorithm when there's nothing to do.
|
|
||||||
if not self.activeFormattingElements:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Step 2 and step 3: we start with the last element. So i is -1.
|
|
||||||
i = len(self.activeFormattingElements) - 1
|
|
||||||
entry = self.activeFormattingElements[i]
|
|
||||||
if entry == Marker or entry in self.openElements:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Step 6
|
|
||||||
while entry != Marker and entry not in self.openElements:
|
|
||||||
if i == 0:
|
|
||||||
# This will be reset to 0 below
|
|
||||||
i = -1
|
|
||||||
break
|
|
||||||
i -= 1
|
|
||||||
# Step 5: let entry be one earlier in the list.
|
|
||||||
entry = self.activeFormattingElements[i]
|
|
||||||
|
|
||||||
while True:
|
|
||||||
# Step 7
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
# Step 8
|
|
||||||
entry = self.activeFormattingElements[i]
|
|
||||||
clone = entry.cloneNode() # Mainly to get a new copy of the attributes
|
|
||||||
|
|
||||||
# Step 9
|
|
||||||
element = self.insertElement({"type": "StartTag",
|
|
||||||
"name": clone.name,
|
|
||||||
"namespace": clone.namespace,
|
|
||||||
"data": clone.attributes})
|
|
||||||
|
|
||||||
# Step 10
|
|
||||||
self.activeFormattingElements[i] = element
|
|
||||||
|
|
||||||
# Step 11
|
|
||||||
if element == self.activeFormattingElements[-1]:
|
|
||||||
break
|
|
||||||
|
|
||||||
def clearActiveFormattingElements(self):
|
|
||||||
entry = self.activeFormattingElements.pop()
|
|
||||||
while self.activeFormattingElements and entry != Marker:
|
|
||||||
entry = self.activeFormattingElements.pop()
|
|
||||||
|
|
||||||
def elementInActiveFormattingElements(self, name):
|
|
||||||
"""Check if an element exists between the end of the active
|
|
||||||
formatting elements and the last marker. If it does, return it, else
|
|
||||||
return false"""
|
|
||||||
|
|
||||||
for item in self.activeFormattingElements[::-1]:
|
|
||||||
# Check for Marker first because if it's a Marker it doesn't have a
|
|
||||||
# name attribute.
|
|
||||||
if item == Marker:
|
|
||||||
break
|
|
||||||
elif item.name == name:
|
|
||||||
return item
|
|
||||||
return False
|
|
||||||
|
|
||||||
def insertRoot(self, token):
|
|
||||||
element = self.createElement(token)
|
|
||||||
self.openElements.append(element)
|
|
||||||
self.document.appendChild(element)
|
|
||||||
|
|
||||||
def insertDoctype(self, token):
|
|
||||||
name = token["name"]
|
|
||||||
publicId = token["publicId"]
|
|
||||||
systemId = token["systemId"]
|
|
||||||
|
|
||||||
doctype = self.doctypeClass(name, publicId, systemId)
|
|
||||||
self.document.appendChild(doctype)
|
|
||||||
|
|
||||||
def insertComment(self, token, parent=None):
|
|
||||||
if parent is None:
|
|
||||||
parent = self.openElements[-1]
|
|
||||||
parent.appendChild(self.commentClass(token["data"]))
|
|
||||||
|
|
||||||
def createElement(self, token):
|
|
||||||
"""Create an element but don't insert it anywhere"""
|
|
||||||
name = token["name"]
|
|
||||||
namespace = token.get("namespace", self.defaultNamespace)
|
|
||||||
element = self.elementClass(name, namespace)
|
|
||||||
element.attributes = token["data"]
|
|
||||||
return element
|
|
||||||
|
|
||||||
def _getInsertFromTable(self):
|
|
||||||
return self._insertFromTable
|
|
||||||
|
|
||||||
def _setInsertFromTable(self, value):
|
|
||||||
"""Switch the function used to insert an element from the
|
|
||||||
normal one to the misnested table one and back again"""
|
|
||||||
self._insertFromTable = value
|
|
||||||
if value:
|
|
||||||
self.insertElement = self.insertElementTable
|
|
||||||
else:
|
|
||||||
self.insertElement = self.insertElementNormal
|
|
||||||
|
|
||||||
insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
|
|
||||||
|
|
||||||
def insertElementNormal(self, token):
|
|
||||||
name = token["name"]
|
|
||||||
assert isinstance(name, text_type), "Element %s not unicode" % name
|
|
||||||
namespace = token.get("namespace", self.defaultNamespace)
|
|
||||||
element = self.elementClass(name, namespace)
|
|
||||||
element.attributes = token["data"]
|
|
||||||
self.openElements[-1].appendChild(element)
|
|
||||||
self.openElements.append(element)
|
|
||||||
return element
|
|
||||||
|
|
||||||
def insertElementTable(self, token):
|
|
||||||
"""Create an element and insert it into the tree"""
|
|
||||||
element = self.createElement(token)
|
|
||||||
if self.openElements[-1].name not in tableInsertModeElements:
|
|
||||||
return self.insertElementNormal(token)
|
|
||||||
else:
|
|
||||||
# We should be in the InTable mode. This means we want to do
|
|
||||||
# special magic element rearranging
|
|
||||||
parent, insertBefore = self.getTableMisnestedNodePosition()
|
|
||||||
if insertBefore is None:
|
|
||||||
parent.appendChild(element)
|
|
||||||
else:
|
|
||||||
parent.insertBefore(element, insertBefore)
|
|
||||||
self.openElements.append(element)
|
|
||||||
return element
|
|
||||||
|
|
||||||
def insertText(self, data, parent=None):
|
|
||||||
"""Insert text data."""
|
|
||||||
if parent is None:
|
|
||||||
parent = self.openElements[-1]
|
|
||||||
|
|
||||||
if (not self.insertFromTable or (self.insertFromTable and
|
|
||||||
self.openElements[-1].name
|
|
||||||
not in tableInsertModeElements)):
|
|
||||||
parent.insertText(data)
|
|
||||||
else:
|
|
||||||
# We should be in the InTable mode. This means we want to do
|
|
||||||
# special magic element rearranging
|
|
||||||
parent, insertBefore = self.getTableMisnestedNodePosition()
|
|
||||||
parent.insertText(data, insertBefore)
|
|
||||||
|
|
||||||
def getTableMisnestedNodePosition(self):
|
|
||||||
"""Get the foster parent element, and sibling to insert before
|
|
||||||
(or None) when inserting a misnested table node"""
|
|
||||||
# The foster parent element is the one which comes before the most
|
|
||||||
# recently opened table element
|
|
||||||
# XXX - this is really inelegant
|
|
||||||
lastTable = None
|
|
||||||
fosterParent = None
|
|
||||||
insertBefore = None
|
|
||||||
for elm in self.openElements[::-1]:
|
|
||||||
if elm.name == "table":
|
|
||||||
lastTable = elm
|
|
||||||
break
|
|
||||||
if lastTable:
|
|
||||||
# XXX - we should really check that this parent is actually a
|
|
||||||
# node here
|
|
||||||
if lastTable.parent:
|
|
||||||
fosterParent = lastTable.parent
|
|
||||||
insertBefore = lastTable
|
|
||||||
else:
|
|
||||||
fosterParent = self.openElements[
|
|
||||||
self.openElements.index(lastTable) - 1]
|
|
||||||
else:
|
|
||||||
fosterParent = self.openElements[0]
|
|
||||||
return fosterParent, insertBefore
|
|
||||||
|
|
||||||
def generateImpliedEndTags(self, exclude=None):
|
|
||||||
name = self.openElements[-1].name
|
|
||||||
# XXX td, th and tr are not actually needed
|
|
||||||
if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
|
|
||||||
and name != exclude):
|
|
||||||
self.openElements.pop()
|
|
||||||
# XXX This is not entirely what the specification says. We should
|
|
||||||
# investigate it more closely.
|
|
||||||
self.generateImpliedEndTags(exclude)
|
|
||||||
|
|
||||||
def getDocument(self):
|
|
||||||
"Return the final tree"
|
|
||||||
return self.document
|
|
||||||
|
|
||||||
def getFragment(self):
|
|
||||||
"Return the final fragment"
|
|
||||||
# assert self.innerHTML
|
|
||||||
fragment = self.fragmentClass()
|
|
||||||
self.openElements[0].reparentChildren(fragment)
|
|
||||||
return fragment
|
|
||||||
|
|
||||||
def testSerializer(self, node):
|
|
||||||
"""Serialize the subtree of node in the format required by unit tests
|
|
||||||
node - the node from which to start serializing"""
|
|
||||||
raise NotImplementedError
|
|
||||||
@@ -1,227 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
|
|
||||||
from xml.dom import minidom, Node
|
|
||||||
import weakref
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
from .. import constants
|
|
||||||
from ..constants import namespaces
|
|
||||||
from ..utils import moduleFactoryFactory
|
|
||||||
|
|
||||||
|
|
||||||
def getDomBuilder(DomImplementation):
|
|
||||||
Dom = DomImplementation
|
|
||||||
|
|
||||||
class AttrList(object):
|
|
||||||
def __init__(self, element):
|
|
||||||
self.element = element
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return list(self.element.attributes.items()).__iter__()
|
|
||||||
|
|
||||||
def __setitem__(self, name, value):
|
|
||||||
self.element.setAttribute(name, value)
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(list(self.element.attributes.items()))
|
|
||||||
|
|
||||||
def items(self):
|
|
||||||
return [(item[0], item[1]) for item in
|
|
||||||
list(self.element.attributes.items())]
|
|
||||||
|
|
||||||
def keys(self):
|
|
||||||
return list(self.element.attributes.keys())
|
|
||||||
|
|
||||||
def __getitem__(self, name):
|
|
||||||
return self.element.getAttribute(name)
|
|
||||||
|
|
||||||
def __contains__(self, name):
|
|
||||||
if isinstance(name, tuple):
|
|
||||||
raise NotImplementedError
|
|
||||||
else:
|
|
||||||
return self.element.hasAttribute(name)
|
|
||||||
|
|
||||||
class NodeBuilder(_base.Node):
|
|
||||||
def __init__(self, element):
|
|
||||||
_base.Node.__init__(self, element.nodeName)
|
|
||||||
self.element = element
|
|
||||||
|
|
||||||
namespace = property(lambda self: hasattr(self.element, "namespaceURI")
|
|
||||||
and self.element.namespaceURI or None)
|
|
||||||
|
|
||||||
def appendChild(self, node):
|
|
||||||
node.parent = self
|
|
||||||
self.element.appendChild(node.element)
|
|
||||||
|
|
||||||
def insertText(self, data, insertBefore=None):
|
|
||||||
text = self.element.ownerDocument.createTextNode(data)
|
|
||||||
if insertBefore:
|
|
||||||
self.element.insertBefore(text, insertBefore.element)
|
|
||||||
else:
|
|
||||||
self.element.appendChild(text)
|
|
||||||
|
|
||||||
def insertBefore(self, node, refNode):
|
|
||||||
self.element.insertBefore(node.element, refNode.element)
|
|
||||||
node.parent = self
|
|
||||||
|
|
||||||
def removeChild(self, node):
|
|
||||||
if node.element.parentNode == self.element:
|
|
||||||
self.element.removeChild(node.element)
|
|
||||||
node.parent = None
|
|
||||||
|
|
||||||
def reparentChildren(self, newParent):
|
|
||||||
while self.element.hasChildNodes():
|
|
||||||
child = self.element.firstChild
|
|
||||||
self.element.removeChild(child)
|
|
||||||
newParent.element.appendChild(child)
|
|
||||||
self.childNodes = []
|
|
||||||
|
|
||||||
def getAttributes(self):
|
|
||||||
return AttrList(self.element)
|
|
||||||
|
|
||||||
def setAttributes(self, attributes):
|
|
||||||
if attributes:
|
|
||||||
for name, value in list(attributes.items()):
|
|
||||||
if isinstance(name, tuple):
|
|
||||||
if name[0] is not None:
|
|
||||||
qualifiedName = (name[0] + ":" + name[1])
|
|
||||||
else:
|
|
||||||
qualifiedName = name[1]
|
|
||||||
self.element.setAttributeNS(name[2], qualifiedName,
|
|
||||||
value)
|
|
||||||
else:
|
|
||||||
self.element.setAttribute(
|
|
||||||
name, value)
|
|
||||||
attributes = property(getAttributes, setAttributes)
|
|
||||||
|
|
||||||
def cloneNode(self):
|
|
||||||
return NodeBuilder(self.element.cloneNode(False))
|
|
||||||
|
|
||||||
def hasContent(self):
|
|
||||||
return self.element.hasChildNodes()
|
|
||||||
|
|
||||||
def getNameTuple(self):
|
|
||||||
if self.namespace is None:
|
|
||||||
return namespaces["html"], self.name
|
|
||||||
else:
|
|
||||||
return self.namespace, self.name
|
|
||||||
|
|
||||||
nameTuple = property(getNameTuple)
|
|
||||||
|
|
||||||
class TreeBuilder(_base.TreeBuilder):
|
|
||||||
def documentClass(self):
|
|
||||||
self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
|
|
||||||
return weakref.proxy(self)
|
|
||||||
|
|
||||||
def insertDoctype(self, token):
|
|
||||||
name = token["name"]
|
|
||||||
publicId = token["publicId"]
|
|
||||||
systemId = token["systemId"]
|
|
||||||
|
|
||||||
domimpl = Dom.getDOMImplementation()
|
|
||||||
doctype = domimpl.createDocumentType(name, publicId, systemId)
|
|
||||||
self.document.appendChild(NodeBuilder(doctype))
|
|
||||||
if Dom == minidom:
|
|
||||||
doctype.ownerDocument = self.dom
|
|
||||||
|
|
||||||
def elementClass(self, name, namespace=None):
|
|
||||||
if namespace is None and self.defaultNamespace is None:
|
|
||||||
node = self.dom.createElement(name)
|
|
||||||
else:
|
|
||||||
node = self.dom.createElementNS(namespace, name)
|
|
||||||
|
|
||||||
return NodeBuilder(node)
|
|
||||||
|
|
||||||
def commentClass(self, data):
|
|
||||||
return NodeBuilder(self.dom.createComment(data))
|
|
||||||
|
|
||||||
def fragmentClass(self):
|
|
||||||
return NodeBuilder(self.dom.createDocumentFragment())
|
|
||||||
|
|
||||||
def appendChild(self, node):
|
|
||||||
self.dom.appendChild(node.element)
|
|
||||||
|
|
||||||
def testSerializer(self, element):
|
|
||||||
return testSerializer(element)
|
|
||||||
|
|
||||||
def getDocument(self):
|
|
||||||
return self.dom
|
|
||||||
|
|
||||||
def getFragment(self):
|
|
||||||
return _base.TreeBuilder.getFragment(self).element
|
|
||||||
|
|
||||||
def insertText(self, data, parent=None):
|
|
||||||
data = data
|
|
||||||
if parent != self:
|
|
||||||
_base.TreeBuilder.insertText(self, data, parent)
|
|
||||||
else:
|
|
||||||
# HACK: allow text nodes as children of the document node
|
|
||||||
if hasattr(self.dom, '_child_node_types'):
|
|
||||||
if Node.TEXT_NODE not in self.dom._child_node_types:
|
|
||||||
self.dom._child_node_types = list(self.dom._child_node_types)
|
|
||||||
self.dom._child_node_types.append(Node.TEXT_NODE)
|
|
||||||
self.dom.appendChild(self.dom.createTextNode(data))
|
|
||||||
|
|
||||||
implementation = DomImplementation
|
|
||||||
name = None
|
|
||||||
|
|
||||||
def testSerializer(element):
|
|
||||||
element.normalize()
|
|
||||||
rv = []
|
|
||||||
|
|
||||||
def serializeElement(element, indent=0):
|
|
||||||
if element.nodeType == Node.DOCUMENT_TYPE_NODE:
|
|
||||||
if element.name:
|
|
||||||
if element.publicId or element.systemId:
|
|
||||||
publicId = element.publicId or ""
|
|
||||||
systemId = element.systemId or ""
|
|
||||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
|
||||||
(' ' * indent, element.name, publicId, systemId))
|
|
||||||
else:
|
|
||||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
|
|
||||||
else:
|
|
||||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
|
||||||
elif element.nodeType == Node.DOCUMENT_NODE:
|
|
||||||
rv.append("#document")
|
|
||||||
elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
|
|
||||||
rv.append("#document-fragment")
|
|
||||||
elif element.nodeType == Node.COMMENT_NODE:
|
|
||||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
|
|
||||||
elif element.nodeType == Node.TEXT_NODE:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
|
|
||||||
else:
|
|
||||||
if (hasattr(element, "namespaceURI") and
|
|
||||||
element.namespaceURI is not None):
|
|
||||||
name = "%s %s" % (constants.prefixes[element.namespaceURI],
|
|
||||||
element.nodeName)
|
|
||||||
else:
|
|
||||||
name = element.nodeName
|
|
||||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
|
||||||
if element.hasAttributes():
|
|
||||||
attributes = []
|
|
||||||
for i in range(len(element.attributes)):
|
|
||||||
attr = element.attributes.item(i)
|
|
||||||
name = attr.nodeName
|
|
||||||
value = attr.value
|
|
||||||
ns = attr.namespaceURI
|
|
||||||
if ns:
|
|
||||||
name = "%s %s" % (constants.prefixes[ns], attr.localName)
|
|
||||||
else:
|
|
||||||
name = attr.nodeName
|
|
||||||
attributes.append((name, value))
|
|
||||||
|
|
||||||
for name, value in sorted(attributes):
|
|
||||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
|
||||||
indent += 2
|
|
||||||
for child in element.childNodes:
|
|
||||||
serializeElement(child, indent)
|
|
||||||
serializeElement(element, 0)
|
|
||||||
|
|
||||||
return "\n".join(rv)
|
|
||||||
|
|
||||||
return locals()
|
|
||||||
|
|
||||||
|
|
||||||
# The actual means to get a module!
|
|
||||||
getDomModule = moduleFactoryFactory(getDomBuilder)
|
|
||||||
@@ -1,337 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
from pip._vendor.six import text_type
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
from .. import ihatexml
|
|
||||||
from .. import constants
|
|
||||||
from ..constants import namespaces
|
|
||||||
from ..utils import moduleFactoryFactory
|
|
||||||
|
|
||||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
|
||||||
|
|
||||||
|
|
||||||
def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
|
||||||
ElementTree = ElementTreeImplementation
|
|
||||||
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
|
||||||
|
|
||||||
class Element(_base.Node):
|
|
||||||
def __init__(self, name, namespace=None):
|
|
||||||
self._name = name
|
|
||||||
self._namespace = namespace
|
|
||||||
self._element = ElementTree.Element(self._getETreeTag(name,
|
|
||||||
namespace))
|
|
||||||
if namespace is None:
|
|
||||||
self.nameTuple = namespaces["html"], self._name
|
|
||||||
else:
|
|
||||||
self.nameTuple = self._namespace, self._name
|
|
||||||
self.parent = None
|
|
||||||
self._childNodes = []
|
|
||||||
self._flags = []
|
|
||||||
|
|
||||||
def _getETreeTag(self, name, namespace):
|
|
||||||
if namespace is None:
|
|
||||||
etree_tag = name
|
|
||||||
else:
|
|
||||||
etree_tag = "{%s}%s" % (namespace, name)
|
|
||||||
return etree_tag
|
|
||||||
|
|
||||||
def _setName(self, name):
|
|
||||||
self._name = name
|
|
||||||
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
|
||||||
|
|
||||||
def _getName(self):
|
|
||||||
return self._name
|
|
||||||
|
|
||||||
name = property(_getName, _setName)
|
|
||||||
|
|
||||||
def _setNamespace(self, namespace):
|
|
||||||
self._namespace = namespace
|
|
||||||
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
|
||||||
|
|
||||||
def _getNamespace(self):
|
|
||||||
return self._namespace
|
|
||||||
|
|
||||||
namespace = property(_getNamespace, _setNamespace)
|
|
||||||
|
|
||||||
def _getAttributes(self):
|
|
||||||
return self._element.attrib
|
|
||||||
|
|
||||||
def _setAttributes(self, attributes):
|
|
||||||
# Delete existing attributes first
|
|
||||||
# XXX - there may be a better way to do this...
|
|
||||||
for key in list(self._element.attrib.keys()):
|
|
||||||
del self._element.attrib[key]
|
|
||||||
for key, value in attributes.items():
|
|
||||||
if isinstance(key, tuple):
|
|
||||||
name = "{%s}%s" % (key[2], key[1])
|
|
||||||
else:
|
|
||||||
name = key
|
|
||||||
self._element.set(name, value)
|
|
||||||
|
|
||||||
attributes = property(_getAttributes, _setAttributes)
|
|
||||||
|
|
||||||
def _getChildNodes(self):
|
|
||||||
return self._childNodes
|
|
||||||
|
|
||||||
def _setChildNodes(self, value):
|
|
||||||
del self._element[:]
|
|
||||||
self._childNodes = []
|
|
||||||
for element in value:
|
|
||||||
self.insertChild(element)
|
|
||||||
|
|
||||||
childNodes = property(_getChildNodes, _setChildNodes)
|
|
||||||
|
|
||||||
def hasContent(self):
|
|
||||||
"""Return true if the node has children or text"""
|
|
||||||
return bool(self._element.text or len(self._element))
|
|
||||||
|
|
||||||
def appendChild(self, node):
|
|
||||||
self._childNodes.append(node)
|
|
||||||
self._element.append(node._element)
|
|
||||||
node.parent = self
|
|
||||||
|
|
||||||
def insertBefore(self, node, refNode):
|
|
||||||
index = list(self._element).index(refNode._element)
|
|
||||||
self._element.insert(index, node._element)
|
|
||||||
node.parent = self
|
|
||||||
|
|
||||||
def removeChild(self, node):
|
|
||||||
self._element.remove(node._element)
|
|
||||||
node.parent = None
|
|
||||||
|
|
||||||
def insertText(self, data, insertBefore=None):
|
|
||||||
if not(len(self._element)):
|
|
||||||
if not self._element.text:
|
|
||||||
self._element.text = ""
|
|
||||||
self._element.text += data
|
|
||||||
elif insertBefore is None:
|
|
||||||
# Insert the text as the tail of the last child element
|
|
||||||
if not self._element[-1].tail:
|
|
||||||
self._element[-1].tail = ""
|
|
||||||
self._element[-1].tail += data
|
|
||||||
else:
|
|
||||||
# Insert the text before the specified node
|
|
||||||
children = list(self._element)
|
|
||||||
index = children.index(insertBefore._element)
|
|
||||||
if index > 0:
|
|
||||||
if not self._element[index - 1].tail:
|
|
||||||
self._element[index - 1].tail = ""
|
|
||||||
self._element[index - 1].tail += data
|
|
||||||
else:
|
|
||||||
if not self._element.text:
|
|
||||||
self._element.text = ""
|
|
||||||
self._element.text += data
|
|
||||||
|
|
||||||
def cloneNode(self):
|
|
||||||
element = type(self)(self.name, self.namespace)
|
|
||||||
for name, value in self.attributes.items():
|
|
||||||
element.attributes[name] = value
|
|
||||||
return element
|
|
||||||
|
|
||||||
def reparentChildren(self, newParent):
|
|
||||||
if newParent.childNodes:
|
|
||||||
newParent.childNodes[-1]._element.tail += self._element.text
|
|
||||||
else:
|
|
||||||
if not newParent._element.text:
|
|
||||||
newParent._element.text = ""
|
|
||||||
if self._element.text is not None:
|
|
||||||
newParent._element.text += self._element.text
|
|
||||||
self._element.text = ""
|
|
||||||
_base.Node.reparentChildren(self, newParent)
|
|
||||||
|
|
||||||
class Comment(Element):
|
|
||||||
def __init__(self, data):
|
|
||||||
# Use the superclass constructor to set all properties on the
|
|
||||||
# wrapper element
|
|
||||||
self._element = ElementTree.Comment(data)
|
|
||||||
self.parent = None
|
|
||||||
self._childNodes = []
|
|
||||||
self._flags = []
|
|
||||||
|
|
||||||
def _getData(self):
|
|
||||||
return self._element.text
|
|
||||||
|
|
||||||
def _setData(self, value):
|
|
||||||
self._element.text = value
|
|
||||||
|
|
||||||
data = property(_getData, _setData)
|
|
||||||
|
|
||||||
class DocumentType(Element):
|
|
||||||
def __init__(self, name, publicId, systemId):
|
|
||||||
Element.__init__(self, "<!DOCTYPE>")
|
|
||||||
self._element.text = name
|
|
||||||
self.publicId = publicId
|
|
||||||
self.systemId = systemId
|
|
||||||
|
|
||||||
def _getPublicId(self):
|
|
||||||
return self._element.get("publicId", "")
|
|
||||||
|
|
||||||
def _setPublicId(self, value):
|
|
||||||
if value is not None:
|
|
||||||
self._element.set("publicId", value)
|
|
||||||
|
|
||||||
publicId = property(_getPublicId, _setPublicId)
|
|
||||||
|
|
||||||
def _getSystemId(self):
|
|
||||||
return self._element.get("systemId", "")
|
|
||||||
|
|
||||||
def _setSystemId(self, value):
|
|
||||||
if value is not None:
|
|
||||||
self._element.set("systemId", value)
|
|
||||||
|
|
||||||
systemId = property(_getSystemId, _setSystemId)
|
|
||||||
|
|
||||||
class Document(Element):
|
|
||||||
def __init__(self):
|
|
||||||
Element.__init__(self, "DOCUMENT_ROOT")
|
|
||||||
|
|
||||||
class DocumentFragment(Element):
|
|
||||||
def __init__(self):
|
|
||||||
Element.__init__(self, "DOCUMENT_FRAGMENT")
|
|
||||||
|
|
||||||
def testSerializer(element):
|
|
||||||
rv = []
|
|
||||||
|
|
||||||
def serializeElement(element, indent=0):
|
|
||||||
if not(hasattr(element, "tag")):
|
|
||||||
element = element.getroot()
|
|
||||||
if element.tag == "<!DOCTYPE>":
|
|
||||||
if element.get("publicId") or element.get("systemId"):
|
|
||||||
publicId = element.get("publicId") or ""
|
|
||||||
systemId = element.get("systemId") or ""
|
|
||||||
rv.append("""<!DOCTYPE %s "%s" "%s">""" %
|
|
||||||
(element.text, publicId, systemId))
|
|
||||||
else:
|
|
||||||
rv.append("<!DOCTYPE %s>" % (element.text,))
|
|
||||||
elif element.tag == "DOCUMENT_ROOT":
|
|
||||||
rv.append("#document")
|
|
||||||
if element.text is not None:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
|
||||||
if element.tail is not None:
|
|
||||||
raise TypeError("Document node cannot have tail")
|
|
||||||
if hasattr(element, "attrib") and len(element.attrib):
|
|
||||||
raise TypeError("Document node cannot have attributes")
|
|
||||||
elif element.tag == ElementTreeCommentType:
|
|
||||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
|
||||||
else:
|
|
||||||
assert isinstance(element.tag, text_type), \
|
|
||||||
"Expected unicode, got %s, %s" % (type(element.tag), element.tag)
|
|
||||||
nsmatch = tag_regexp.match(element.tag)
|
|
||||||
|
|
||||||
if nsmatch is None:
|
|
||||||
name = element.tag
|
|
||||||
else:
|
|
||||||
ns, name = nsmatch.groups()
|
|
||||||
prefix = constants.prefixes[ns]
|
|
||||||
name = "%s %s" % (prefix, name)
|
|
||||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
|
||||||
|
|
||||||
if hasattr(element, "attrib"):
|
|
||||||
attributes = []
|
|
||||||
for name, value in element.attrib.items():
|
|
||||||
nsmatch = tag_regexp.match(name)
|
|
||||||
if nsmatch is not None:
|
|
||||||
ns, name = nsmatch.groups()
|
|
||||||
prefix = constants.prefixes[ns]
|
|
||||||
attr_string = "%s %s" % (prefix, name)
|
|
||||||
else:
|
|
||||||
attr_string = name
|
|
||||||
attributes.append((attr_string, value))
|
|
||||||
|
|
||||||
for name, value in sorted(attributes):
|
|
||||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
|
||||||
if element.text:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
|
||||||
indent += 2
|
|
||||||
for child in element:
|
|
||||||
serializeElement(child, indent)
|
|
||||||
if element.tail:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
|
||||||
serializeElement(element, 0)
|
|
||||||
|
|
||||||
return "\n".join(rv)
|
|
||||||
|
|
||||||
def tostring(element):
|
|
||||||
"""Serialize an element and its child nodes to a string"""
|
|
||||||
rv = []
|
|
||||||
filter = ihatexml.InfosetFilter()
|
|
||||||
|
|
||||||
def serializeElement(element):
|
|
||||||
if isinstance(element, ElementTree.ElementTree):
|
|
||||||
element = element.getroot()
|
|
||||||
|
|
||||||
if element.tag == "<!DOCTYPE>":
|
|
||||||
if element.get("publicId") or element.get("systemId"):
|
|
||||||
publicId = element.get("publicId") or ""
|
|
||||||
systemId = element.get("systemId") or ""
|
|
||||||
rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
|
|
||||||
(element.text, publicId, systemId))
|
|
||||||
else:
|
|
||||||
rv.append("<!DOCTYPE %s>" % (element.text,))
|
|
||||||
elif element.tag == "DOCUMENT_ROOT":
|
|
||||||
if element.text is not None:
|
|
||||||
rv.append(element.text)
|
|
||||||
if element.tail is not None:
|
|
||||||
raise TypeError("Document node cannot have tail")
|
|
||||||
if hasattr(element, "attrib") and len(element.attrib):
|
|
||||||
raise TypeError("Document node cannot have attributes")
|
|
||||||
|
|
||||||
for child in element:
|
|
||||||
serializeElement(child)
|
|
||||||
|
|
||||||
elif element.tag == ElementTreeCommentType:
|
|
||||||
rv.append("<!--%s-->" % (element.text,))
|
|
||||||
else:
|
|
||||||
# This is assumed to be an ordinary element
|
|
||||||
if not element.attrib:
|
|
||||||
rv.append("<%s>" % (filter.fromXmlName(element.tag),))
|
|
||||||
else:
|
|
||||||
attr = " ".join(["%s=\"%s\"" % (
|
|
||||||
filter.fromXmlName(name), value)
|
|
||||||
for name, value in element.attrib.items()])
|
|
||||||
rv.append("<%s %s>" % (element.tag, attr))
|
|
||||||
if element.text:
|
|
||||||
rv.append(element.text)
|
|
||||||
|
|
||||||
for child in element:
|
|
||||||
serializeElement(child)
|
|
||||||
|
|
||||||
rv.append("</%s>" % (element.tag,))
|
|
||||||
|
|
||||||
if element.tail:
|
|
||||||
rv.append(element.tail)
|
|
||||||
|
|
||||||
serializeElement(element)
|
|
||||||
|
|
||||||
return "".join(rv)
|
|
||||||
|
|
||||||
class TreeBuilder(_base.TreeBuilder):
|
|
||||||
documentClass = Document
|
|
||||||
doctypeClass = DocumentType
|
|
||||||
elementClass = Element
|
|
||||||
commentClass = Comment
|
|
||||||
fragmentClass = DocumentFragment
|
|
||||||
implementation = ElementTreeImplementation
|
|
||||||
|
|
||||||
def testSerializer(self, element):
|
|
||||||
return testSerializer(element)
|
|
||||||
|
|
||||||
def getDocument(self):
|
|
||||||
if fullTree:
|
|
||||||
return self.document._element
|
|
||||||
else:
|
|
||||||
if self.defaultNamespace is not None:
|
|
||||||
return self.document._element.find(
|
|
||||||
"{%s}html" % self.defaultNamespace)
|
|
||||||
else:
|
|
||||||
return self.document._element.find("html")
|
|
||||||
|
|
||||||
def getFragment(self):
|
|
||||||
return _base.TreeBuilder.getFragment(self)._element
|
|
||||||
|
|
||||||
return locals()
|
|
||||||
|
|
||||||
|
|
||||||
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
|
||||||
@@ -1,369 +0,0 @@
|
|||||||
"""Module for supporting the lxml.etree library. The idea here is to use as much
|
|
||||||
of the native library as possible, without using fragile hacks like custom element
|
|
||||||
names that break between releases. The downside of this is that we cannot represent
|
|
||||||
all possible trees; specifically the following are known to cause problems:
|
|
||||||
|
|
||||||
Text or comments as siblings of the root element
|
|
||||||
Docypes with no name
|
|
||||||
|
|
||||||
When any of these things occur, we emit a DataLossWarning
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
import warnings
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
from ..constants import DataLossWarning
|
|
||||||
from .. import constants
|
|
||||||
from . import etree as etree_builders
|
|
||||||
from .. import ihatexml
|
|
||||||
|
|
||||||
import lxml.etree as etree
|
|
||||||
|
|
||||||
|
|
||||||
fullTree = True
|
|
||||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
|
||||||
|
|
||||||
comment_type = etree.Comment("asd").tag
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentType(object):
|
|
||||||
def __init__(self, name, publicId, systemId):
|
|
||||||
self.name = name
|
|
||||||
self.publicId = publicId
|
|
||||||
self.systemId = systemId
|
|
||||||
|
|
||||||
|
|
||||||
class Document(object):
|
|
||||||
def __init__(self):
|
|
||||||
self._elementTree = None
|
|
||||||
self._childNodes = []
|
|
||||||
|
|
||||||
def appendChild(self, element):
|
|
||||||
self._elementTree.getroot().addnext(element._element)
|
|
||||||
|
|
||||||
def _getChildNodes(self):
|
|
||||||
return self._childNodes
|
|
||||||
|
|
||||||
childNodes = property(_getChildNodes)
|
|
||||||
|
|
||||||
|
|
||||||
def testSerializer(element):
|
|
||||||
rv = []
|
|
||||||
finalText = None
|
|
||||||
infosetFilter = ihatexml.InfosetFilter()
|
|
||||||
|
|
||||||
def serializeElement(element, indent=0):
|
|
||||||
if not hasattr(element, "tag"):
|
|
||||||
if hasattr(element, "getroot"):
|
|
||||||
# Full tree case
|
|
||||||
rv.append("#document")
|
|
||||||
if element.docinfo.internalDTD:
|
|
||||||
if not (element.docinfo.public_id or
|
|
||||||
element.docinfo.system_url):
|
|
||||||
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
|
||||||
else:
|
|
||||||
dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
|
|
||||||
element.docinfo.root_name,
|
|
||||||
element.docinfo.public_id,
|
|
||||||
element.docinfo.system_url)
|
|
||||||
rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
|
|
||||||
next_element = element.getroot()
|
|
||||||
while next_element.getprevious() is not None:
|
|
||||||
next_element = next_element.getprevious()
|
|
||||||
while next_element is not None:
|
|
||||||
serializeElement(next_element, indent + 2)
|
|
||||||
next_element = next_element.getnext()
|
|
||||||
elif isinstance(element, str) or isinstance(element, bytes):
|
|
||||||
# Text in a fragment
|
|
||||||
assert isinstance(element, str) or sys.version_info.major == 2
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
|
||||||
else:
|
|
||||||
# Fragment case
|
|
||||||
rv.append("#document-fragment")
|
|
||||||
for next_element in element:
|
|
||||||
serializeElement(next_element, indent + 2)
|
|
||||||
elif element.tag == comment_type:
|
|
||||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
|
||||||
if hasattr(element, "tail") and element.tail:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
|
|
||||||
else:
|
|
||||||
assert isinstance(element, etree._Element)
|
|
||||||
nsmatch = etree_builders.tag_regexp.match(element.tag)
|
|
||||||
if nsmatch is not None:
|
|
||||||
ns = nsmatch.group(1)
|
|
||||||
tag = nsmatch.group(2)
|
|
||||||
prefix = constants.prefixes[ns]
|
|
||||||
rv.append("|%s<%s %s>" % (' ' * indent, prefix,
|
|
||||||
infosetFilter.fromXmlName(tag)))
|
|
||||||
else:
|
|
||||||
rv.append("|%s<%s>" % (' ' * indent,
|
|
||||||
infosetFilter.fromXmlName(element.tag)))
|
|
||||||
|
|
||||||
if hasattr(element, "attrib"):
|
|
||||||
attributes = []
|
|
||||||
for name, value in element.attrib.items():
|
|
||||||
nsmatch = tag_regexp.match(name)
|
|
||||||
if nsmatch is not None:
|
|
||||||
ns, name = nsmatch.groups()
|
|
||||||
name = infosetFilter.fromXmlName(name)
|
|
||||||
prefix = constants.prefixes[ns]
|
|
||||||
attr_string = "%s %s" % (prefix, name)
|
|
||||||
else:
|
|
||||||
attr_string = infosetFilter.fromXmlName(name)
|
|
||||||
attributes.append((attr_string, value))
|
|
||||||
|
|
||||||
for name, value in sorted(attributes):
|
|
||||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
|
||||||
|
|
||||||
if element.text:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
|
||||||
indent += 2
|
|
||||||
for child in element:
|
|
||||||
serializeElement(child, indent)
|
|
||||||
if hasattr(element, "tail") and element.tail:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
|
||||||
serializeElement(element, 0)
|
|
||||||
|
|
||||||
if finalText is not None:
|
|
||||||
rv.append("|%s\"%s\"" % (' ' * 2, finalText))
|
|
||||||
|
|
||||||
return "\n".join(rv)
|
|
||||||
|
|
||||||
|
|
||||||
def tostring(element):
|
|
||||||
"""Serialize an element and its child nodes to a string"""
|
|
||||||
rv = []
|
|
||||||
finalText = None
|
|
||||||
|
|
||||||
def serializeElement(element):
|
|
||||||
if not hasattr(element, "tag"):
|
|
||||||
if element.docinfo.internalDTD:
|
|
||||||
if element.docinfo.doctype:
|
|
||||||
dtd_str = element.docinfo.doctype
|
|
||||||
else:
|
|
||||||
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
|
||||||
rv.append(dtd_str)
|
|
||||||
serializeElement(element.getroot())
|
|
||||||
|
|
||||||
elif element.tag == comment_type:
|
|
||||||
rv.append("<!--%s-->" % (element.text,))
|
|
||||||
|
|
||||||
else:
|
|
||||||
# This is assumed to be an ordinary element
|
|
||||||
if not element.attrib:
|
|
||||||
rv.append("<%s>" % (element.tag,))
|
|
||||||
else:
|
|
||||||
attr = " ".join(["%s=\"%s\"" % (name, value)
|
|
||||||
for name, value in element.attrib.items()])
|
|
||||||
rv.append("<%s %s>" % (element.tag, attr))
|
|
||||||
if element.text:
|
|
||||||
rv.append(element.text)
|
|
||||||
|
|
||||||
for child in element:
|
|
||||||
serializeElement(child)
|
|
||||||
|
|
||||||
rv.append("</%s>" % (element.tag,))
|
|
||||||
|
|
||||||
if hasattr(element, "tail") and element.tail:
|
|
||||||
rv.append(element.tail)
|
|
||||||
|
|
||||||
serializeElement(element)
|
|
||||||
|
|
||||||
if finalText is not None:
|
|
||||||
rv.append("%s\"" % (' ' * 2, finalText))
|
|
||||||
|
|
||||||
return "".join(rv)
|
|
||||||
|
|
||||||
|
|
||||||
class TreeBuilder(_base.TreeBuilder):
|
|
||||||
documentClass = Document
|
|
||||||
doctypeClass = DocumentType
|
|
||||||
elementClass = None
|
|
||||||
commentClass = None
|
|
||||||
fragmentClass = Document
|
|
||||||
implementation = etree
|
|
||||||
|
|
||||||
def __init__(self, namespaceHTMLElements, fullTree=False):
|
|
||||||
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
|
||||||
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
|
|
||||||
self.namespaceHTMLElements = namespaceHTMLElements
|
|
||||||
|
|
||||||
class Attributes(dict):
|
|
||||||
def __init__(self, element, value={}):
|
|
||||||
self._element = element
|
|
||||||
dict.__init__(self, value)
|
|
||||||
for key, value in self.items():
|
|
||||||
if isinstance(key, tuple):
|
|
||||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
|
||||||
else:
|
|
||||||
name = infosetFilter.coerceAttribute(key)
|
|
||||||
self._element._element.attrib[name] = value
|
|
||||||
|
|
||||||
def __setitem__(self, key, value):
|
|
||||||
dict.__setitem__(self, key, value)
|
|
||||||
if isinstance(key, tuple):
|
|
||||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
|
||||||
else:
|
|
||||||
name = infosetFilter.coerceAttribute(key)
|
|
||||||
self._element._element.attrib[name] = value
|
|
||||||
|
|
||||||
class Element(builder.Element):
|
|
||||||
def __init__(self, name, namespace):
|
|
||||||
name = infosetFilter.coerceElement(name)
|
|
||||||
builder.Element.__init__(self, name, namespace=namespace)
|
|
||||||
self._attributes = Attributes(self)
|
|
||||||
|
|
||||||
def _setName(self, name):
|
|
||||||
self._name = infosetFilter.coerceElement(name)
|
|
||||||
self._element.tag = self._getETreeTag(
|
|
||||||
self._name, self._namespace)
|
|
||||||
|
|
||||||
def _getName(self):
|
|
||||||
return infosetFilter.fromXmlName(self._name)
|
|
||||||
|
|
||||||
name = property(_getName, _setName)
|
|
||||||
|
|
||||||
def _getAttributes(self):
|
|
||||||
return self._attributes
|
|
||||||
|
|
||||||
def _setAttributes(self, attributes):
|
|
||||||
self._attributes = Attributes(self, attributes)
|
|
||||||
|
|
||||||
attributes = property(_getAttributes, _setAttributes)
|
|
||||||
|
|
||||||
def insertText(self, data, insertBefore=None):
|
|
||||||
data = infosetFilter.coerceCharacters(data)
|
|
||||||
builder.Element.insertText(self, data, insertBefore)
|
|
||||||
|
|
||||||
def appendChild(self, child):
|
|
||||||
builder.Element.appendChild(self, child)
|
|
||||||
|
|
||||||
class Comment(builder.Comment):
|
|
||||||
def __init__(self, data):
|
|
||||||
data = infosetFilter.coerceComment(data)
|
|
||||||
builder.Comment.__init__(self, data)
|
|
||||||
|
|
||||||
def _setData(self, data):
|
|
||||||
data = infosetFilter.coerceComment(data)
|
|
||||||
self._element.text = data
|
|
||||||
|
|
||||||
def _getData(self):
|
|
||||||
return self._element.text
|
|
||||||
|
|
||||||
data = property(_getData, _setData)
|
|
||||||
|
|
||||||
self.elementClass = Element
|
|
||||||
self.commentClass = builder.Comment
|
|
||||||
# self.fragmentClass = builder.DocumentFragment
|
|
||||||
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
_base.TreeBuilder.reset(self)
|
|
||||||
self.insertComment = self.insertCommentInitial
|
|
||||||
self.initial_comments = []
|
|
||||||
self.doctype = None
|
|
||||||
|
|
||||||
def testSerializer(self, element):
|
|
||||||
return testSerializer(element)
|
|
||||||
|
|
||||||
def getDocument(self):
|
|
||||||
if fullTree:
|
|
||||||
return self.document._elementTree
|
|
||||||
else:
|
|
||||||
return self.document._elementTree.getroot()
|
|
||||||
|
|
||||||
def getFragment(self):
|
|
||||||
fragment = []
|
|
||||||
element = self.openElements[0]._element
|
|
||||||
if element.text:
|
|
||||||
fragment.append(element.text)
|
|
||||||
fragment.extend(list(element))
|
|
||||||
if element.tail:
|
|
||||||
fragment.append(element.tail)
|
|
||||||
return fragment
|
|
||||||
|
|
||||||
def insertDoctype(self, token):
|
|
||||||
name = token["name"]
|
|
||||||
publicId = token["publicId"]
|
|
||||||
systemId = token["systemId"]
|
|
||||||
|
|
||||||
if not name:
|
|
||||||
warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
|
|
||||||
self.doctype = None
|
|
||||||
else:
|
|
||||||
coercedName = self.infosetFilter.coerceElement(name)
|
|
||||||
if coercedName != name:
|
|
||||||
warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
|
|
||||||
|
|
||||||
doctype = self.doctypeClass(coercedName, publicId, systemId)
|
|
||||||
self.doctype = doctype
|
|
||||||
|
|
||||||
def insertCommentInitial(self, data, parent=None):
|
|
||||||
self.initial_comments.append(data)
|
|
||||||
|
|
||||||
def insertCommentMain(self, data, parent=None):
|
|
||||||
if (parent == self.document and
|
|
||||||
self.document._elementTree.getroot()[-1].tag == comment_type):
|
|
||||||
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
|
||||||
super(TreeBuilder, self).insertComment(data, parent)
|
|
||||||
|
|
||||||
def insertRoot(self, token):
|
|
||||||
"""Create the document root"""
|
|
||||||
# Because of the way libxml2 works, it doesn't seem to be possible to
|
|
||||||
# alter information like the doctype after the tree has been parsed.
|
|
||||||
# Therefore we need to use the built-in parser to create our iniial
|
|
||||||
# tree, after which we can add elements like normal
|
|
||||||
docStr = ""
|
|
||||||
if self.doctype:
|
|
||||||
assert self.doctype.name
|
|
||||||
docStr += "<!DOCTYPE %s" % self.doctype.name
|
|
||||||
if (self.doctype.publicId is not None or
|
|
||||||
self.doctype.systemId is not None):
|
|
||||||
docStr += (' PUBLIC "%s" ' %
|
|
||||||
(self.infosetFilter.coercePubid(self.doctype.publicId or "")))
|
|
||||||
if self.doctype.systemId:
|
|
||||||
sysid = self.doctype.systemId
|
|
||||||
if sysid.find("'") >= 0 and sysid.find('"') >= 0:
|
|
||||||
warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
|
|
||||||
sysid = sysid.replace("'", 'U00027')
|
|
||||||
if sysid.find("'") >= 0:
|
|
||||||
docStr += '"%s"' % sysid
|
|
||||||
else:
|
|
||||||
docStr += "'%s'" % sysid
|
|
||||||
else:
|
|
||||||
docStr += "''"
|
|
||||||
docStr += ">"
|
|
||||||
if self.doctype.name != token["name"]:
|
|
||||||
warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
|
|
||||||
docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
|
|
||||||
root = etree.fromstring(docStr)
|
|
||||||
|
|
||||||
# Append the initial comments:
|
|
||||||
for comment_token in self.initial_comments:
|
|
||||||
root.addprevious(etree.Comment(comment_token["data"]))
|
|
||||||
|
|
||||||
# Create the root document and add the ElementTree to it
|
|
||||||
self.document = self.documentClass()
|
|
||||||
self.document._elementTree = root.getroottree()
|
|
||||||
|
|
||||||
# Give the root element the right name
|
|
||||||
name = token["name"]
|
|
||||||
namespace = token.get("namespace", self.defaultNamespace)
|
|
||||||
if namespace is None:
|
|
||||||
etree_tag = name
|
|
||||||
else:
|
|
||||||
etree_tag = "{%s}%s" % (namespace, name)
|
|
||||||
root.tag = etree_tag
|
|
||||||
|
|
||||||
# Add the root element to the internal child/open data structures
|
|
||||||
root_element = self.elementClass(name, namespace)
|
|
||||||
root_element._element = root
|
|
||||||
self.document._childNodes.append(root_element)
|
|
||||||
self.openElements.append(root_element)
|
|
||||||
|
|
||||||
# Reset to the default insert comment function
|
|
||||||
self.insertComment = self.insertCommentMain
|
|
||||||
@@ -1,147 +0,0 @@
|
|||||||
"""A collection of modules for iterating through different kinds of
|
|
||||||
tree, generating tokens identical to those produced by the tokenizer
|
|
||||||
module.
|
|
||||||
|
|
||||||
To create a tree walker for a new type of tree, you need to do
|
|
||||||
implement a tree walker object (called TreeWalker by convention) that
|
|
||||||
implements a 'serialize' method taking a tree as sole argument and
|
|
||||||
returning an iterator generating tokens.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
|
|
||||||
"pulldom"]
|
|
||||||
|
|
||||||
import sys
|
|
||||||
|
|
||||||
from .. import constants
|
|
||||||
from ..utils import default_etree
|
|
||||||
|
|
||||||
treeWalkerCache = {}
|
|
||||||
|
|
||||||
|
|
||||||
def getTreeWalker(treeType, implementation=None, **kwargs):
|
|
||||||
"""Get a TreeWalker class for various types of tree with built-in support
|
|
||||||
|
|
||||||
treeType - the name of the tree type required (case-insensitive). Supported
|
|
||||||
values are:
|
|
||||||
|
|
||||||
"dom" - The xml.dom.minidom DOM implementation
|
|
||||||
"pulldom" - The xml.dom.pulldom event stream
|
|
||||||
"etree" - A generic walker for tree implementations exposing an
|
|
||||||
elementtree-like interface (known to work with
|
|
||||||
ElementTree, cElementTree and lxml.etree).
|
|
||||||
"lxml" - Optimized walker for lxml.etree
|
|
||||||
"genshi" - a Genshi stream
|
|
||||||
|
|
||||||
implementation - (Currently applies to the "etree" tree type only). A module
|
|
||||||
implementing the tree type e.g. xml.etree.ElementTree or
|
|
||||||
cElementTree."""
|
|
||||||
|
|
||||||
treeType = treeType.lower()
|
|
||||||
if treeType not in treeWalkerCache:
|
|
||||||
if treeType in ("dom", "pulldom"):
|
|
||||||
name = "%s.%s" % (__name__, treeType)
|
|
||||||
__import__(name)
|
|
||||||
mod = sys.modules[name]
|
|
||||||
treeWalkerCache[treeType] = mod.TreeWalker
|
|
||||||
elif treeType == "genshi":
|
|
||||||
from . import genshistream
|
|
||||||
treeWalkerCache[treeType] = genshistream.TreeWalker
|
|
||||||
elif treeType == "lxml":
|
|
||||||
from . import lxmletree
|
|
||||||
treeWalkerCache[treeType] = lxmletree.TreeWalker
|
|
||||||
elif treeType == "etree":
|
|
||||||
from . import etree
|
|
||||||
if implementation is None:
|
|
||||||
implementation = default_etree
|
|
||||||
# XXX: NEVER cache here, caching is done in the etree submodule
|
|
||||||
return etree.getETreeModule(implementation, **kwargs).TreeWalker
|
|
||||||
return treeWalkerCache.get(treeType)
|
|
||||||
|
|
||||||
|
|
||||||
def concatenateCharacterTokens(tokens):
|
|
||||||
pendingCharacters = []
|
|
||||||
for token in tokens:
|
|
||||||
type = token["type"]
|
|
||||||
if type in ("Characters", "SpaceCharacters"):
|
|
||||||
pendingCharacters.append(token["data"])
|
|
||||||
else:
|
|
||||||
if pendingCharacters:
|
|
||||||
yield {"type": "Characters", "data": "".join(pendingCharacters)}
|
|
||||||
pendingCharacters = []
|
|
||||||
yield token
|
|
||||||
if pendingCharacters:
|
|
||||||
yield {"type": "Characters", "data": "".join(pendingCharacters)}
|
|
||||||
|
|
||||||
|
|
||||||
def pprint(walker):
|
|
||||||
"""Pretty printer for tree walkers"""
|
|
||||||
output = []
|
|
||||||
indent = 0
|
|
||||||
for token in concatenateCharacterTokens(walker):
|
|
||||||
type = token["type"]
|
|
||||||
if type in ("StartTag", "EmptyTag"):
|
|
||||||
# tag name
|
|
||||||
if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
|
|
||||||
if token["namespace"] in constants.prefixes:
|
|
||||||
ns = constants.prefixes[token["namespace"]]
|
|
||||||
else:
|
|
||||||
ns = token["namespace"]
|
|
||||||
name = "%s %s" % (ns, token["name"])
|
|
||||||
else:
|
|
||||||
name = token["name"]
|
|
||||||
output.append("%s<%s>" % (" " * indent, name))
|
|
||||||
indent += 2
|
|
||||||
# attributes (sorted for consistent ordering)
|
|
||||||
attrs = token["data"]
|
|
||||||
for (namespace, localname), value in sorted(attrs.items()):
|
|
||||||
if namespace:
|
|
||||||
if namespace in constants.prefixes:
|
|
||||||
ns = constants.prefixes[namespace]
|
|
||||||
else:
|
|
||||||
ns = namespace
|
|
||||||
name = "%s %s" % (ns, localname)
|
|
||||||
else:
|
|
||||||
name = localname
|
|
||||||
output.append("%s%s=\"%s\"" % (" " * indent, name, value))
|
|
||||||
# self-closing
|
|
||||||
if type == "EmptyTag":
|
|
||||||
indent -= 2
|
|
||||||
|
|
||||||
elif type == "EndTag":
|
|
||||||
indent -= 2
|
|
||||||
|
|
||||||
elif type == "Comment":
|
|
||||||
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
|
|
||||||
|
|
||||||
elif type == "Doctype":
|
|
||||||
if token["name"]:
|
|
||||||
if token["publicId"]:
|
|
||||||
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
|
|
||||||
(" " * indent,
|
|
||||||
token["name"],
|
|
||||||
token["publicId"],
|
|
||||||
token["systemId"] if token["systemId"] else ""))
|
|
||||||
elif token["systemId"]:
|
|
||||||
output.append("""%s<!DOCTYPE %s "" "%s">""" %
|
|
||||||
(" " * indent,
|
|
||||||
token["name"],
|
|
||||||
token["systemId"]))
|
|
||||||
else:
|
|
||||||
output.append("%s<!DOCTYPE %s>" % (" " * indent,
|
|
||||||
token["name"]))
|
|
||||||
else:
|
|
||||||
output.append("%s<!DOCTYPE >" % (" " * indent,))
|
|
||||||
|
|
||||||
elif type == "Characters":
|
|
||||||
output.append("%s\"%s\"" % (" " * indent, token["data"]))
|
|
||||||
|
|
||||||
elif type == "SpaceCharacters":
|
|
||||||
assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError("Unknown token type, %s" % type)
|
|
||||||
|
|
||||||
return "\n".join(output)
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
from pip._vendor.six import text_type, string_types
|
|
||||||
|
|
||||||
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
|
|
||||||
"TreeWalker", "NonRecursiveTreeWalker"]
|
|
||||||
|
|
||||||
from xml.dom import Node
|
|
||||||
|
|
||||||
DOCUMENT = Node.DOCUMENT_NODE
|
|
||||||
DOCTYPE = Node.DOCUMENT_TYPE_NODE
|
|
||||||
TEXT = Node.TEXT_NODE
|
|
||||||
ELEMENT = Node.ELEMENT_NODE
|
|
||||||
COMMENT = Node.COMMENT_NODE
|
|
||||||
ENTITY = Node.ENTITY_NODE
|
|
||||||
UNKNOWN = "<#UNKNOWN#>"
|
|
||||||
|
|
||||||
from ..constants import voidElements, spaceCharacters
|
|
||||||
spaceCharacters = "".join(spaceCharacters)
|
|
||||||
|
|
||||||
|
|
||||||
def to_text(s, blank_if_none=True):
|
|
||||||
"""Wrapper around six.text_type to convert None to empty string"""
|
|
||||||
if s is None:
|
|
||||||
if blank_if_none:
|
|
||||||
return ""
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
elif isinstance(s, text_type):
|
|
||||||
return s
|
|
||||||
else:
|
|
||||||
return text_type(s)
|
|
||||||
|
|
||||||
|
|
||||||
def is_text_or_none(string):
|
|
||||||
"""Wrapper around isinstance(string_types) or is None"""
|
|
||||||
return string is None or isinstance(string, string_types)
|
|
||||||
|
|
||||||
|
|
||||||
class TreeWalker(object):
|
|
||||||
def __init__(self, tree):
|
|
||||||
self.tree = tree
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def error(self, msg):
|
|
||||||
return {"type": "SerializeError", "data": msg}
|
|
||||||
|
|
||||||
def emptyTag(self, namespace, name, attrs, hasChildren=False):
|
|
||||||
assert namespace is None or isinstance(namespace, string_types), type(namespace)
|
|
||||||
assert isinstance(name, string_types), type(name)
|
|
||||||
assert all((namespace is None or isinstance(namespace, string_types)) and
|
|
||||||
isinstance(name, string_types) and
|
|
||||||
isinstance(value, string_types)
|
|
||||||
for (namespace, name), value in attrs.items())
|
|
||||||
|
|
||||||
yield {"type": "EmptyTag", "name": to_text(name, False),
|
|
||||||
"namespace": to_text(namespace),
|
|
||||||
"data": attrs}
|
|
||||||
if hasChildren:
|
|
||||||
yield self.error("Void element has children")
|
|
||||||
|
|
||||||
def startTag(self, namespace, name, attrs):
|
|
||||||
assert namespace is None or isinstance(namespace, string_types), type(namespace)
|
|
||||||
assert isinstance(name, string_types), type(name)
|
|
||||||
assert all((namespace is None or isinstance(namespace, string_types)) and
|
|
||||||
isinstance(name, string_types) and
|
|
||||||
isinstance(value, string_types)
|
|
||||||
for (namespace, name), value in attrs.items())
|
|
||||||
|
|
||||||
return {"type": "StartTag",
|
|
||||||
"name": text_type(name),
|
|
||||||
"namespace": to_text(namespace),
|
|
||||||
"data": dict(((to_text(namespace, False), to_text(name)),
|
|
||||||
to_text(value, False))
|
|
||||||
for (namespace, name), value in attrs.items())}
|
|
||||||
|
|
||||||
def endTag(self, namespace, name):
|
|
||||||
assert namespace is None or isinstance(namespace, string_types), type(namespace)
|
|
||||||
assert isinstance(name, string_types), type(namespace)
|
|
||||||
|
|
||||||
return {"type": "EndTag",
|
|
||||||
"name": to_text(name, False),
|
|
||||||
"namespace": to_text(namespace),
|
|
||||||
"data": {}}
|
|
||||||
|
|
||||||
def text(self, data):
|
|
||||||
assert isinstance(data, string_types), type(data)
|
|
||||||
|
|
||||||
data = to_text(data)
|
|
||||||
middle = data.lstrip(spaceCharacters)
|
|
||||||
left = data[:len(data) - len(middle)]
|
|
||||||
if left:
|
|
||||||
yield {"type": "SpaceCharacters", "data": left}
|
|
||||||
data = middle
|
|
||||||
middle = data.rstrip(spaceCharacters)
|
|
||||||
right = data[len(middle):]
|
|
||||||
if middle:
|
|
||||||
yield {"type": "Characters", "data": middle}
|
|
||||||
if right:
|
|
||||||
yield {"type": "SpaceCharacters", "data": right}
|
|
||||||
|
|
||||||
def comment(self, data):
|
|
||||||
assert isinstance(data, string_types), type(data)
|
|
||||||
|
|
||||||
return {"type": "Comment", "data": text_type(data)}
|
|
||||||
|
|
||||||
def doctype(self, name, publicId=None, systemId=None, correct=True):
|
|
||||||
assert is_text_or_none(name), type(name)
|
|
||||||
assert is_text_or_none(publicId), type(publicId)
|
|
||||||
assert is_text_or_none(systemId), type(systemId)
|
|
||||||
|
|
||||||
return {"type": "Doctype",
|
|
||||||
"name": to_text(name),
|
|
||||||
"publicId": to_text(publicId),
|
|
||||||
"systemId": to_text(systemId),
|
|
||||||
"correct": to_text(correct)}
|
|
||||||
|
|
||||||
def entity(self, name):
|
|
||||||
assert isinstance(name, string_types), type(name)
|
|
||||||
|
|
||||||
return {"type": "Entity", "name": text_type(name)}
|
|
||||||
|
|
||||||
def unknown(self, nodeType):
|
|
||||||
return self.error("Unknown node type: " + nodeType)
|
|
||||||
|
|
||||||
|
|
||||||
class NonRecursiveTreeWalker(TreeWalker):
|
|
||||||
def getNodeDetails(self, node):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def getFirstChild(self, node):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def getNextSibling(self, node):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def getParentNode(self, node):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
currentNode = self.tree
|
|
||||||
while currentNode is not None:
|
|
||||||
details = self.getNodeDetails(currentNode)
|
|
||||||
type, details = details[0], details[1:]
|
|
||||||
hasChildren = False
|
|
||||||
|
|
||||||
if type == DOCTYPE:
|
|
||||||
yield self.doctype(*details)
|
|
||||||
|
|
||||||
elif type == TEXT:
|
|
||||||
for token in self.text(*details):
|
|
||||||
yield token
|
|
||||||
|
|
||||||
elif type == ELEMENT:
|
|
||||||
namespace, name, attributes, hasChildren = details
|
|
||||||
if name in voidElements:
|
|
||||||
for token in self.emptyTag(namespace, name, attributes,
|
|
||||||
hasChildren):
|
|
||||||
yield token
|
|
||||||
hasChildren = False
|
|
||||||
else:
|
|
||||||
yield self.startTag(namespace, name, attributes)
|
|
||||||
|
|
||||||
elif type == COMMENT:
|
|
||||||
yield self.comment(details[0])
|
|
||||||
|
|
||||||
elif type == ENTITY:
|
|
||||||
yield self.entity(details[0])
|
|
||||||
|
|
||||||
elif type == DOCUMENT:
|
|
||||||
hasChildren = True
|
|
||||||
|
|
||||||
else:
|
|
||||||
yield self.unknown(details[0])
|
|
||||||
|
|
||||||
if hasChildren:
|
|
||||||
firstChild = self.getFirstChild(currentNode)
|
|
||||||
else:
|
|
||||||
firstChild = None
|
|
||||||
|
|
||||||
if firstChild is not None:
|
|
||||||
currentNode = firstChild
|
|
||||||
else:
|
|
||||||
while currentNode is not None:
|
|
||||||
details = self.getNodeDetails(currentNode)
|
|
||||||
type, details = details[0], details[1:]
|
|
||||||
if type == ELEMENT:
|
|
||||||
namespace, name, attributes, hasChildren = details
|
|
||||||
if name not in voidElements:
|
|
||||||
yield self.endTag(namespace, name)
|
|
||||||
if self.tree is currentNode:
|
|
||||||
currentNode = None
|
|
||||||
break
|
|
||||||
nextSibling = self.getNextSibling(currentNode)
|
|
||||||
if nextSibling is not None:
|
|
||||||
currentNode = nextSibling
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
currentNode = self.getParentNode(currentNode)
|
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from xml.dom import Node
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
|
|
||||||
|
|
||||||
class TreeWalker(_base.NonRecursiveTreeWalker):
|
|
||||||
def getNodeDetails(self, node):
|
|
||||||
if node.nodeType == Node.DOCUMENT_TYPE_NODE:
|
|
||||||
return _base.DOCTYPE, node.name, node.publicId, node.systemId
|
|
||||||
|
|
||||||
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
|
|
||||||
return _base.TEXT, node.nodeValue
|
|
||||||
|
|
||||||
elif node.nodeType == Node.ELEMENT_NODE:
|
|
||||||
attrs = {}
|
|
||||||
for attr in list(node.attributes.keys()):
|
|
||||||
attr = node.getAttributeNode(attr)
|
|
||||||
if attr.namespaceURI:
|
|
||||||
attrs[(attr.namespaceURI, attr.localName)] = attr.value
|
|
||||||
else:
|
|
||||||
attrs[(None, attr.name)] = attr.value
|
|
||||||
return (_base.ELEMENT, node.namespaceURI, node.nodeName,
|
|
||||||
attrs, node.hasChildNodes())
|
|
||||||
|
|
||||||
elif node.nodeType == Node.COMMENT_NODE:
|
|
||||||
return _base.COMMENT, node.nodeValue
|
|
||||||
|
|
||||||
elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
|
|
||||||
return (_base.DOCUMENT,)
|
|
||||||
|
|
||||||
else:
|
|
||||||
return _base.UNKNOWN, node.nodeType
|
|
||||||
|
|
||||||
def getFirstChild(self, node):
|
|
||||||
return node.firstChild
|
|
||||||
|
|
||||||
def getNextSibling(self, node):
|
|
||||||
return node.nextSibling
|
|
||||||
|
|
||||||
def getParentNode(self, node):
|
|
||||||
return node.parentNode
|
|
||||||
@@ -1,136 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
try:
|
|
||||||
from collections import OrderedDict
|
|
||||||
except ImportError:
|
|
||||||
try:
|
|
||||||
from ordereddict import OrderedDict
|
|
||||||
except ImportError:
|
|
||||||
OrderedDict = dict
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from pip._vendor.six import text_type
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
from ..utils import moduleFactoryFactory
|
|
||||||
|
|
||||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
|
||||||
|
|
||||||
|
|
||||||
def getETreeBuilder(ElementTreeImplementation):
|
|
||||||
ElementTree = ElementTreeImplementation
|
|
||||||
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
|
||||||
|
|
||||||
class TreeWalker(_base.NonRecursiveTreeWalker):
|
|
||||||
"""Given the particular ElementTree representation, this implementation,
|
|
||||||
to avoid using recursion, returns "nodes" as tuples with the following
|
|
||||||
content:
|
|
||||||
|
|
||||||
1. The current element
|
|
||||||
|
|
||||||
2. The index of the element relative to its parent
|
|
||||||
|
|
||||||
3. A stack of ancestor elements
|
|
||||||
|
|
||||||
4. A flag "text", "tail" or None to indicate if the current node is a
|
|
||||||
text node; either the text or tail of the current element (1)
|
|
||||||
"""
|
|
||||||
def getNodeDetails(self, node):
|
|
||||||
if isinstance(node, tuple): # It might be the root Element
|
|
||||||
elt, key, parents, flag = node
|
|
||||||
if flag in ("text", "tail"):
|
|
||||||
return _base.TEXT, getattr(elt, flag)
|
|
||||||
else:
|
|
||||||
node = elt
|
|
||||||
|
|
||||||
if not(hasattr(node, "tag")):
|
|
||||||
node = node.getroot()
|
|
||||||
|
|
||||||
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
|
||||||
return (_base.DOCUMENT,)
|
|
||||||
|
|
||||||
elif node.tag == "<!DOCTYPE>":
|
|
||||||
return (_base.DOCTYPE, node.text,
|
|
||||||
node.get("publicId"), node.get("systemId"))
|
|
||||||
|
|
||||||
elif node.tag == ElementTreeCommentType:
|
|
||||||
return _base.COMMENT, node.text
|
|
||||||
|
|
||||||
else:
|
|
||||||
assert type(node.tag) == text_type, type(node.tag)
|
|
||||||
# This is assumed to be an ordinary element
|
|
||||||
match = tag_regexp.match(node.tag)
|
|
||||||
if match:
|
|
||||||
namespace, tag = match.groups()
|
|
||||||
else:
|
|
||||||
namespace = None
|
|
||||||
tag = node.tag
|
|
||||||
attrs = OrderedDict()
|
|
||||||
for name, value in list(node.attrib.items()):
|
|
||||||
match = tag_regexp.match(name)
|
|
||||||
if match:
|
|
||||||
attrs[(match.group(1), match.group(2))] = value
|
|
||||||
else:
|
|
||||||
attrs[(None, name)] = value
|
|
||||||
return (_base.ELEMENT, namespace, tag,
|
|
||||||
attrs, len(node) or node.text)
|
|
||||||
|
|
||||||
def getFirstChild(self, node):
|
|
||||||
if isinstance(node, tuple):
|
|
||||||
element, key, parents, flag = node
|
|
||||||
else:
|
|
||||||
element, key, parents, flag = node, None, [], None
|
|
||||||
|
|
||||||
if flag in ("text", "tail"):
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
if element.text:
|
|
||||||
return element, key, parents, "text"
|
|
||||||
elif len(element):
|
|
||||||
parents.append(element)
|
|
||||||
return element[0], 0, parents, None
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def getNextSibling(self, node):
|
|
||||||
if isinstance(node, tuple):
|
|
||||||
element, key, parents, flag = node
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if flag == "text":
|
|
||||||
if len(element):
|
|
||||||
parents.append(element)
|
|
||||||
return element[0], 0, parents, None
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
if element.tail and flag != "tail":
|
|
||||||
return element, key, parents, "tail"
|
|
||||||
elif key < len(parents[-1]) - 1:
|
|
||||||
return parents[-1][key + 1], key + 1, parents, None
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def getParentNode(self, node):
|
|
||||||
if isinstance(node, tuple):
|
|
||||||
element, key, parents, flag = node
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if flag == "text":
|
|
||||||
if not parents:
|
|
||||||
return element
|
|
||||||
else:
|
|
||||||
return element, key, parents, None
|
|
||||||
else:
|
|
||||||
parent = parents.pop()
|
|
||||||
if not parents:
|
|
||||||
return parent
|
|
||||||
else:
|
|
||||||
return parent, list(parents[-1]).index(parent), parents, None
|
|
||||||
|
|
||||||
return locals()
|
|
||||||
|
|
||||||
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from genshi.core import QName
|
|
||||||
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
|
|
||||||
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
|
|
||||||
from ..constants import voidElements, namespaces
|
|
||||||
|
|
||||||
|
|
||||||
class TreeWalker(_base.TreeWalker):
|
|
||||||
def __iter__(self):
|
|
||||||
# Buffer the events so we can pass in the following one
|
|
||||||
previous = None
|
|
||||||
for event in self.tree:
|
|
||||||
if previous is not None:
|
|
||||||
for token in self.tokens(previous, event):
|
|
||||||
yield token
|
|
||||||
previous = event
|
|
||||||
|
|
||||||
# Don't forget the final event!
|
|
||||||
if previous is not None:
|
|
||||||
for token in self.tokens(previous, None):
|
|
||||||
yield token
|
|
||||||
|
|
||||||
def tokens(self, event, next):
|
|
||||||
kind, data, pos = event
|
|
||||||
if kind == START:
|
|
||||||
tag, attribs = data
|
|
||||||
name = tag.localname
|
|
||||||
namespace = tag.namespace
|
|
||||||
converted_attribs = {}
|
|
||||||
for k, v in attribs:
|
|
||||||
if isinstance(k, QName):
|
|
||||||
converted_attribs[(k.namespace, k.localname)] = v
|
|
||||||
else:
|
|
||||||
converted_attribs[(None, k)] = v
|
|
||||||
|
|
||||||
if namespace == namespaces["html"] and name in voidElements:
|
|
||||||
for token in self.emptyTag(namespace, name, converted_attribs,
|
|
||||||
not next or next[0] != END
|
|
||||||
or next[1] != tag):
|
|
||||||
yield token
|
|
||||||
else:
|
|
||||||
yield self.startTag(namespace, name, converted_attribs)
|
|
||||||
|
|
||||||
elif kind == END:
|
|
||||||
name = data.localname
|
|
||||||
namespace = data.namespace
|
|
||||||
if name not in voidElements:
|
|
||||||
yield self.endTag(namespace, name)
|
|
||||||
|
|
||||||
elif kind == COMMENT:
|
|
||||||
yield self.comment(data)
|
|
||||||
|
|
||||||
elif kind == TEXT:
|
|
||||||
for token in self.text(data):
|
|
||||||
yield token
|
|
||||||
|
|
||||||
elif kind == DOCTYPE:
|
|
||||||
yield self.doctype(*data)
|
|
||||||
|
|
||||||
elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
|
|
||||||
START_CDATA, END_CDATA, PI):
|
|
||||||
pass
|
|
||||||
|
|
||||||
else:
|
|
||||||
yield self.unknown(kind)
|
|
||||||
@@ -1,201 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
from pip._vendor.six import text_type
|
|
||||||
|
|
||||||
from lxml import etree
|
|
||||||
from ..treebuilders.etree import tag_regexp
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
|
|
||||||
from .. import ihatexml
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_str(s):
|
|
||||||
if s is None:
|
|
||||||
return None
|
|
||||||
elif isinstance(s, text_type):
|
|
||||||
return s
|
|
||||||
else:
|
|
||||||
return s.decode("utf-8", "strict")
|
|
||||||
|
|
||||||
|
|
||||||
class Root(object):
|
|
||||||
def __init__(self, et):
|
|
||||||
self.elementtree = et
|
|
||||||
self.children = []
|
|
||||||
if et.docinfo.internalDTD:
|
|
||||||
self.children.append(Doctype(self,
|
|
||||||
ensure_str(et.docinfo.root_name),
|
|
||||||
ensure_str(et.docinfo.public_id),
|
|
||||||
ensure_str(et.docinfo.system_url)))
|
|
||||||
root = et.getroot()
|
|
||||||
node = root
|
|
||||||
|
|
||||||
while node.getprevious() is not None:
|
|
||||||
node = node.getprevious()
|
|
||||||
while node is not None:
|
|
||||||
self.children.append(node)
|
|
||||||
node = node.getnext()
|
|
||||||
|
|
||||||
self.text = None
|
|
||||||
self.tail = None
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self.children[key]
|
|
||||||
|
|
||||||
def getnext(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return 1
|
|
||||||
|
|
||||||
|
|
||||||
class Doctype(object):
|
|
||||||
def __init__(self, root_node, name, public_id, system_id):
|
|
||||||
self.root_node = root_node
|
|
||||||
self.name = name
|
|
||||||
self.public_id = public_id
|
|
||||||
self.system_id = system_id
|
|
||||||
|
|
||||||
self.text = None
|
|
||||||
self.tail = None
|
|
||||||
|
|
||||||
def getnext(self):
|
|
||||||
return self.root_node.children[1]
|
|
||||||
|
|
||||||
|
|
||||||
class FragmentRoot(Root):
|
|
||||||
def __init__(self, children):
|
|
||||||
self.children = [FragmentWrapper(self, child) for child in children]
|
|
||||||
self.text = self.tail = None
|
|
||||||
|
|
||||||
def getnext(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
class FragmentWrapper(object):
|
|
||||||
def __init__(self, fragment_root, obj):
|
|
||||||
self.root_node = fragment_root
|
|
||||||
self.obj = obj
|
|
||||||
if hasattr(self.obj, 'text'):
|
|
||||||
self.text = ensure_str(self.obj.text)
|
|
||||||
else:
|
|
||||||
self.text = None
|
|
||||||
if hasattr(self.obj, 'tail'):
|
|
||||||
self.tail = ensure_str(self.obj.tail)
|
|
||||||
else:
|
|
||||||
self.tail = None
|
|
||||||
|
|
||||||
def __getattr__(self, name):
|
|
||||||
return getattr(self.obj, name)
|
|
||||||
|
|
||||||
def getnext(self):
|
|
||||||
siblings = self.root_node.children
|
|
||||||
idx = siblings.index(self)
|
|
||||||
if idx < len(siblings) - 1:
|
|
||||||
return siblings[idx + 1]
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self.obj[key]
|
|
||||||
|
|
||||||
def __bool__(self):
|
|
||||||
return bool(self.obj)
|
|
||||||
|
|
||||||
def getparent(self):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return str(self.obj)
|
|
||||||
|
|
||||||
def __unicode__(self):
|
|
||||||
return str(self.obj)
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.obj)
|
|
||||||
|
|
||||||
|
|
||||||
class TreeWalker(_base.NonRecursiveTreeWalker):
|
|
||||||
def __init__(self, tree):
|
|
||||||
if hasattr(tree, "getroot"):
|
|
||||||
tree = Root(tree)
|
|
||||||
elif isinstance(tree, list):
|
|
||||||
tree = FragmentRoot(tree)
|
|
||||||
_base.NonRecursiveTreeWalker.__init__(self, tree)
|
|
||||||
self.filter = ihatexml.InfosetFilter()
|
|
||||||
|
|
||||||
def getNodeDetails(self, node):
|
|
||||||
if isinstance(node, tuple): # Text node
|
|
||||||
node, key = node
|
|
||||||
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
|
||||||
return _base.TEXT, ensure_str(getattr(node, key))
|
|
||||||
|
|
||||||
elif isinstance(node, Root):
|
|
||||||
return (_base.DOCUMENT,)
|
|
||||||
|
|
||||||
elif isinstance(node, Doctype):
|
|
||||||
return _base.DOCTYPE, node.name, node.public_id, node.system_id
|
|
||||||
|
|
||||||
elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
|
|
||||||
return _base.TEXT, node.obj
|
|
||||||
|
|
||||||
elif node.tag == etree.Comment:
|
|
||||||
return _base.COMMENT, ensure_str(node.text)
|
|
||||||
|
|
||||||
elif node.tag == etree.Entity:
|
|
||||||
return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
|
|
||||||
|
|
||||||
else:
|
|
||||||
# This is assumed to be an ordinary element
|
|
||||||
match = tag_regexp.match(ensure_str(node.tag))
|
|
||||||
if match:
|
|
||||||
namespace, tag = match.groups()
|
|
||||||
else:
|
|
||||||
namespace = None
|
|
||||||
tag = ensure_str(node.tag)
|
|
||||||
attrs = {}
|
|
||||||
for name, value in list(node.attrib.items()):
|
|
||||||
name = ensure_str(name)
|
|
||||||
value = ensure_str(value)
|
|
||||||
match = tag_regexp.match(name)
|
|
||||||
if match:
|
|
||||||
attrs[(match.group(1), match.group(2))] = value
|
|
||||||
else:
|
|
||||||
attrs[(None, name)] = value
|
|
||||||
return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
|
|
||||||
attrs, len(node) > 0 or node.text)
|
|
||||||
|
|
||||||
def getFirstChild(self, node):
|
|
||||||
assert not isinstance(node, tuple), "Text nodes have no children"
|
|
||||||
|
|
||||||
assert len(node) or node.text, "Node has no children"
|
|
||||||
if node.text:
|
|
||||||
return (node, "text")
|
|
||||||
else:
|
|
||||||
return node[0]
|
|
||||||
|
|
||||||
def getNextSibling(self, node):
|
|
||||||
if isinstance(node, tuple): # Text node
|
|
||||||
node, key = node
|
|
||||||
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
|
||||||
if key == "text":
|
|
||||||
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
|
|
||||||
# because node[0] might evaluate to False if it has no child element
|
|
||||||
if len(node):
|
|
||||||
return node[0]
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else: # tail
|
|
||||||
return node.getnext()
|
|
||||||
|
|
||||||
return (node, "tail") if node.tail else node.getnext()
|
|
||||||
|
|
||||||
def getParentNode(self, node):
|
|
||||||
if isinstance(node, tuple): # Text node
|
|
||||||
node, key = node
|
|
||||||
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
|
||||||
if key == "text":
|
|
||||||
return node
|
|
||||||
# else: fallback to "normal" processing
|
|
||||||
|
|
||||||
return node.getparent()
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
|
|
||||||
COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
|
|
||||||
|
|
||||||
from . import _base
|
|
||||||
|
|
||||||
from ..constants import voidElements
|
|
||||||
|
|
||||||
|
|
||||||
class TreeWalker(_base.TreeWalker):
|
|
||||||
def __iter__(self):
|
|
||||||
ignore_until = None
|
|
||||||
previous = None
|
|
||||||
for event in self.tree:
|
|
||||||
if previous is not None and \
|
|
||||||
(ignore_until is None or previous[1] is ignore_until):
|
|
||||||
if previous[1] is ignore_until:
|
|
||||||
ignore_until = None
|
|
||||||
for token in self.tokens(previous, event):
|
|
||||||
yield token
|
|
||||||
if token["type"] == "EmptyTag":
|
|
||||||
ignore_until = previous[1]
|
|
||||||
previous = event
|
|
||||||
if ignore_until is None or previous[1] is ignore_until:
|
|
||||||
for token in self.tokens(previous, None):
|
|
||||||
yield token
|
|
||||||
elif ignore_until is not None:
|
|
||||||
raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
|
|
||||||
|
|
||||||
def tokens(self, event, next):
|
|
||||||
type, node = event
|
|
||||||
if type == START_ELEMENT:
|
|
||||||
name = node.nodeName
|
|
||||||
namespace = node.namespaceURI
|
|
||||||
attrs = {}
|
|
||||||
for attr in list(node.attributes.keys()):
|
|
||||||
attr = node.getAttributeNode(attr)
|
|
||||||
attrs[(attr.namespaceURI, attr.localName)] = attr.value
|
|
||||||
if name in voidElements:
|
|
||||||
for token in self.emptyTag(namespace,
|
|
||||||
name,
|
|
||||||
attrs,
|
|
||||||
not next or next[1] is not node):
|
|
||||||
yield token
|
|
||||||
else:
|
|
||||||
yield self.startTag(namespace, name, attrs)
|
|
||||||
|
|
||||||
elif type == END_ELEMENT:
|
|
||||||
name = node.nodeName
|
|
||||||
namespace = node.namespaceURI
|
|
||||||
if name not in voidElements:
|
|
||||||
yield self.endTag(namespace, name)
|
|
||||||
|
|
||||||
elif type == COMMENT:
|
|
||||||
yield self.comment(node.nodeValue)
|
|
||||||
|
|
||||||
elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
|
|
||||||
for token in self.text(node.nodeValue):
|
|
||||||
yield token
|
|
||||||
|
|
||||||
else:
|
|
||||||
yield self.unknown(type)
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from .py import Trie as PyTrie
|
|
||||||
|
|
||||||
Trie = PyTrie
|
|
||||||
|
|
||||||
try:
|
|
||||||
from .datrie import Trie as DATrie
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
Trie = DATrie
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from collections import Mapping
|
|
||||||
|
|
||||||
|
|
||||||
class Trie(Mapping):
|
|
||||||
"""Abstract base class for tries"""
|
|
||||||
|
|
||||||
def keys(self, prefix=None):
|
|
||||||
keys = super().keys()
|
|
||||||
|
|
||||||
if prefix is None:
|
|
||||||
return set(keys)
|
|
||||||
|
|
||||||
# Python 2.6: no set comprehensions
|
|
||||||
return set([x for x in keys if x.startswith(prefix)])
|
|
||||||
|
|
||||||
def has_keys_with_prefix(self, prefix):
|
|
||||||
for key in self.keys():
|
|
||||||
if key.startswith(prefix):
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def longest_prefix(self, prefix):
|
|
||||||
if prefix in self:
|
|
||||||
return prefix
|
|
||||||
|
|
||||||
for i in range(1, len(prefix) + 1):
|
|
||||||
if prefix[:-i] in self:
|
|
||||||
return prefix[:-i]
|
|
||||||
|
|
||||||
raise KeyError(prefix)
|
|
||||||
|
|
||||||
def longest_prefix_item(self, prefix):
|
|
||||||
lprefix = self.longest_prefix(prefix)
|
|
||||||
return (lprefix, self[lprefix])
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
from __future__ import absolute_import, division, unicode_literals
|
|
||||||
|
|
||||||
from datrie import Trie as DATrie
|
|
||||||
from pip._vendor.six import text_type
|
|
||||||
|
|
||||||
from ._base import Trie as ABCTrie
|
|
||||||
|
|
||||||
|
|
||||||
class Trie(ABCTrie):
|
|
||||||
def __init__(self, data):
|
|
||||||
chars = set()
|
|
||||||
for key in data.keys():
|
|
||||||
if not isinstance(key, text_type):
|
|
||||||
raise TypeError("All keys must be strings")
|
|
||||||
for char in key:
|
|
||||||
chars.add(char)
|
|
||||||
|
|
||||||
self._data = DATrie("".join(chars))
|
|
||||||
for key, value in data.items():
|
|
||||||
self._data[key] = value
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
|
||||||
return key in self._data
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._data)
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
raise NotImplementedError()
|
|
||||||
|
|
||||||
def __getitem__(self, key):
|
|
||||||
return self._data[key]
|
|
||||||
|
|
||||||
def keys(self, prefix=None):
|
|
||||||
return self._data.keys(prefix)
|
|
||||||
|
|
||||||
def has_keys_with_prefix(self, prefix):
|
|
||||||
return self._data.has_keys_with_prefix(prefix)
|
|
||||||
|
|
||||||
def longest_prefix(self, prefix):
|
|
||||||
return self._data.longest_prefix(prefix)
|
|
||||||
|
|
||||||
def longest_prefix_item(self, prefix):
|
|
||||||
return self._data.longest_prefix_item(prefix)
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user