mirror of
https://github.com/kennethreitz/pipenv.git
synced 2026-06-05 22:50:18 +00:00
Rename notpip to pip. Vendor in pip-22.2.1 and latest requirementslib and vistir.
This commit is contained in:
+1
-1
@@ -12,7 +12,7 @@ recursive-include pipenv/vendor vendor.txt
|
||||
recursive-include pipenv *.json
|
||||
recursive-include pipenv *.rst
|
||||
|
||||
include pipenv/patched/notpip/_vendor/vendor.txt
|
||||
include pipenv/patched/pip/_vendor/vendor.txt
|
||||
include pipenv/patched/patched.txt
|
||||
include pipenv/vendor/Makefile
|
||||
include pipenv/pipenv.1
|
||||
|
||||
@@ -11,7 +11,7 @@ CLEAN_TARGETS := $(addprefix clean-py,$(PY_VERSIONS))
|
||||
DATE_STRING := $(shell date +%Y.%m.%d)
|
||||
THIS_MONTH_DATE := $(shell date +%Y.%m.01)
|
||||
NEXT_MONTH_DATE := $(shell date -d "+1 month" +%Y.%m.01)
|
||||
PATCHED_PIP_VERSION := $(shell awk '/__version__/{gsub(/"/,"",$$3); print $$3}' pipenv/patched/notpip/__init__.py)
|
||||
PATCHED_PIP_VERSION := $(shell awk '/__version__/{gsub(/"/,"",$$3); print $$3}' pipenv/patched/pip/__init__.py)
|
||||
GITDIR_STAMPFILE := $(CURDIR)/.git-checkout-dir
|
||||
create_git_tmpdir = $(shell mktemp -dt pipenv-vendor-XXXXXXXX 2>/dev/null || mktemp -d 2>/dev/null)
|
||||
write_git_tmpdir = $(file > $(GITDIR_STAMPFILE),$(create_git_tmpdir))
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
* Rename patched ``notpip`` to ``pip`` in order to be clear that its a patched version of pip.
|
||||
* Remove the part of _post_pip_import.patch that overrode the standalone pip to be the user installed pip,
|
||||
now we fully rely on our vendored and patched ``pip``, even for all types of installs.
|
||||
* Vendor in the next newest version of ``pip==22.2``
|
||||
* Modify patch for ``pipdeptree`` to not use ``pip-shims``
|
||||
+1
-1
@@ -24,7 +24,7 @@ warnings.filterwarnings("ignore", category=ResourceWarning)
|
||||
warnings.filterwarnings("ignore", category=UserWarning)
|
||||
|
||||
# Load patched pip instead of system pip
|
||||
os.environ["PIP_SHIMS_BASE_MODULE"] = "pipenv.patched.notpip"
|
||||
os.environ["PIP_SHIMS_BASE_MODULE"] = "pipenv.patched.pip"
|
||||
os.environ["PIP_DISABLE_PIP_VERSION_CHECK"] = "1"
|
||||
|
||||
# Hack to make things work better.
|
||||
|
||||
+9
-9
@@ -84,7 +84,7 @@ else:
|
||||
|
||||
|
||||
def do_clear(project):
|
||||
from pipenv.patched.notpip._internal import locations
|
||||
from pipenv.patched.pip._internal import locations
|
||||
|
||||
click.secho(fix_utf8("Clearing caches..."), bold=True)
|
||||
try:
|
||||
@@ -160,10 +160,10 @@ def cleanup_virtualenv(project, bare=True):
|
||||
|
||||
|
||||
def import_requirements(project, r=None, dev=False):
|
||||
from pipenv.patched.notpip._internal.req.constructors import (
|
||||
from pipenv.patched.pip._internal.req.constructors import (
|
||||
install_req_from_parsed_requirement,
|
||||
)
|
||||
from pipenv.patched.notpip._vendor import requests
|
||||
from pipenv.patched.pip._vendor import requests
|
||||
from pipenv.vendor.pip_shims.shims import parse_requirements
|
||||
|
||||
# Parse requirements.txt file with Pip's parser.
|
||||
@@ -1141,7 +1141,7 @@ def do_lock(
|
||||
|
||||
# Support for --keep-outdated...
|
||||
if keep_outdated:
|
||||
from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
for section_name, section in (
|
||||
("default", project.packages),
|
||||
@@ -1372,7 +1372,7 @@ def get_pip_args(
|
||||
selective_upgrade: bool = False,
|
||||
src_dir: Optional[str] = None,
|
||||
) -> List[str]:
|
||||
from pipenv.patched.notpip._vendor.packaging.version import parse as parse_version
|
||||
from pipenv.patched.pip._vendor.packaging.version import parse as parse_version
|
||||
|
||||
arg_map = {
|
||||
"pre": ["--pre"],
|
||||
@@ -1472,7 +1472,7 @@ def pip_install(
|
||||
trusted_hosts=None,
|
||||
use_pep517=True,
|
||||
):
|
||||
piplogger = logging.getLogger("pipenv.patched.notpip._internal.commands.install")
|
||||
piplogger = logging.getLogger("pipenv.patched.pip._internal.commands.install")
|
||||
if not trusted_hosts:
|
||||
trusted_hosts = []
|
||||
trusted_hosts.extend(os.environ.get("PIP_TRUSTED_HOSTS", []))
|
||||
@@ -1850,7 +1850,7 @@ def do_outdated(project, pypi_mirror=None, pre=False, clear=False):
|
||||
from collections import namedtuple
|
||||
from collections.abc import Mapping
|
||||
|
||||
from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from .vendor.requirementslib.models.requirements import Requirement
|
||||
from .vendor.requirementslib.models.utils import get_version
|
||||
@@ -2293,7 +2293,7 @@ def do_uninstall(
|
||||
pypi_mirror=None,
|
||||
ctx=None,
|
||||
):
|
||||
from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from .vendor.requirementslib.models.requirements import Requirement
|
||||
|
||||
@@ -3010,7 +3010,7 @@ def do_clean(
|
||||
system=False,
|
||||
):
|
||||
# Ensure that virtualenv is available.
|
||||
from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
ensure_project(
|
||||
project, three=three, python=python, validate=False, pypi_mirror=pypi_mirror
|
||||
|
||||
@@ -14,7 +14,7 @@ from sysconfig import get_paths, get_python_version, get_scheme_names
|
||||
import pkg_resources
|
||||
|
||||
import pipenv
|
||||
from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.utils.constants import is_type_checking
|
||||
from pipenv.utils.indexes import prepare_pip_source_args
|
||||
from pipenv.utils.processes import subprocess_run
|
||||
@@ -29,7 +29,7 @@ if is_type_checking():
|
||||
import pip_shims.shims
|
||||
import tomlkit
|
||||
|
||||
from pipenv.patched.notpip._vendor.packaging.version import Version
|
||||
from pipenv.patched.pip._vendor.packaging.version import Version
|
||||
from pipenv.project import Project, TPipfile, TSource
|
||||
|
||||
BASE_WORKING_SET = pkg_resources.WorkingSet(sys.path)
|
||||
@@ -545,9 +545,7 @@ class Environment:
|
||||
Get the pip version in the environment. Useful for knowing which args we can use
|
||||
when installing.
|
||||
"""
|
||||
from pipenv.patched.notpip._vendor.packaging.version import (
|
||||
parse as parse_version,
|
||||
)
|
||||
from pipenv.patched.pip._vendor.packaging.version import parse as parse_version
|
||||
|
||||
pip = next(
|
||||
iter(pkg for pkg in self.get_installed_packages() if pkg.key == "pip"), None
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
from distutils.errors import DistutilsArgError
|
||||
from distutils.fancy_getopt import FancyGetopt
|
||||
from typing import Dict, List
|
||||
|
||||
_options = [
|
||||
("exec-prefix=", None, ""),
|
||||
("home=", None, ""),
|
||||
("install-base=", None, ""),
|
||||
("install-data=", None, ""),
|
||||
("install-headers=", None, ""),
|
||||
("install-lib=", None, ""),
|
||||
("install-platlib=", None, ""),
|
||||
("install-purelib=", None, ""),
|
||||
("install-scripts=", None, ""),
|
||||
("prefix=", None, ""),
|
||||
("root=", None, ""),
|
||||
("user", None, ""),
|
||||
]
|
||||
|
||||
|
||||
# typeshed doesn't permit Tuple[str, None, str], see python/typeshed#3469.
|
||||
_distutils_getopt = FancyGetopt(_options) # type: ignore
|
||||
|
||||
|
||||
def parse_distutils_args(args: List[str]) -> Dict[str, str]:
|
||||
"""Parse provided arguments, returning an object that has the
|
||||
matched arguments.
|
||||
|
||||
Any unknown arguments are ignored.
|
||||
"""
|
||||
result = {}
|
||||
for arg in args:
|
||||
try:
|
||||
_, match = _distutils_getopt.getopt(args=[arg])
|
||||
except DistutilsArgError:
|
||||
# We don't care about any other options, which here may be
|
||||
# considered unrecognized since our option list is not
|
||||
# exhaustive.
|
||||
pass
|
||||
else:
|
||||
result.update(match.__dict__)
|
||||
return result
|
||||
@@ -1,3 +0,0 @@
|
||||
from .core import contents, where
|
||||
|
||||
__version__ = "2021.10.08"
|
||||
@@ -1,83 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
from .universaldetector import UniversalDetector
|
||||
from .enums import InputState
|
||||
from .version import __version__, VERSION
|
||||
|
||||
|
||||
__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION']
|
||||
|
||||
|
||||
def detect(byte_str):
|
||||
"""
|
||||
Detect the encoding of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
return detector.close()
|
||||
|
||||
|
||||
def detect_all(byte_str):
|
||||
"""
|
||||
Detect all the possible encodings of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
detector.close()
|
||||
|
||||
if detector._input_state == InputState.HIGH_BYTE:
|
||||
results = []
|
||||
for prober in detector._charset_probers:
|
||||
if prober.get_confidence() > detector.MINIMUM_THRESHOLD:
|
||||
charset_name = prober.charset_name
|
||||
lower_charset_name = prober.charset_name.lower()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
if lower_charset_name.startswith('iso-8859'):
|
||||
if detector._has_win_bytes:
|
||||
charset_name = detector.ISO_WIN_MAP.get(lower_charset_name,
|
||||
charset_name)
|
||||
results.append({
|
||||
'encoding': charset_name,
|
||||
'confidence': prober.get_confidence(),
|
||||
'language': prober.language,
|
||||
})
|
||||
if len(results) > 0:
|
||||
return sorted(results, key=lambda result: -result['confidence'])
|
||||
|
||||
return [detector.result]
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -1,246 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
HZ_CLS = (
|
||||
1,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,4,0,5,2,0, # 78 - 7f
|
||||
1,1,1,1,1,1,1,1, # 80 - 87
|
||||
1,1,1,1,1,1,1,1, # 88 - 8f
|
||||
1,1,1,1,1,1,1,1, # 90 - 97
|
||||
1,1,1,1,1,1,1,1, # 98 - 9f
|
||||
1,1,1,1,1,1,1,1, # a0 - a7
|
||||
1,1,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,1,1,1,1,1,1, # c0 - c7
|
||||
1,1,1,1,1,1,1,1, # c8 - cf
|
||||
1,1,1,1,1,1,1,1, # d0 - d7
|
||||
1,1,1,1,1,1,1,1, # d8 - df
|
||||
1,1,1,1,1,1,1,1, # e0 - e7
|
||||
1,1,1,1,1,1,1,1, # e8 - ef
|
||||
1,1,1,1,1,1,1,1, # f0 - f7
|
||||
1,1,1,1,1,1,1,1, # f8 - ff
|
||||
)
|
||||
|
||||
HZ_ST = (
|
||||
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
|
||||
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
|
||||
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
|
||||
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
|
||||
)
|
||||
|
||||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
HZ_SM_MODEL = {'class_table': HZ_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': HZ_ST,
|
||||
'char_len_table': HZ_CHAR_LEN_TABLE,
|
||||
'name': "HZ-GB-2312",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022CN_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,4,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022CN_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
|
||||
)
|
||||
|
||||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
|
||||
'class_factor': 9,
|
||||
'state_table': ISO2022CN_ST,
|
||||
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-CN",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022JP_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,2,2, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,7,0,0,0, # 20 - 27
|
||||
3,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
6,0,4,0,8,0,0,0, # 40 - 47
|
||||
0,9,5,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022JP_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
|
||||
)
|
||||
|
||||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': ISO2022JP_ST,
|
||||
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-JP",
|
||||
'language': 'Japanese'}
|
||||
|
||||
ISO2022KR_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,3,0,0,0, # 20 - 27
|
||||
0,4,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,5,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022KR_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
|
||||
)
|
||||
|
||||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': ISO2022KR_ST,
|
||||
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-KR",
|
||||
'language': 'Korean'}
|
||||
|
||||
|
||||
@@ -1,387 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# EUCTW frequency table
|
||||
# Converted from big5 work
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http:#www.edu.tw:81/mandr/>
|
||||
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCTW_TABLE_SIZE = 5376
|
||||
|
||||
EUCTW_CHAR_TO_FREQ_ORDER = (
|
||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
|
||||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
|
||||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
|
||||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
|
||||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
|
||||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
|
||||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
|
||||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
|
||||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
|
||||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
|
||||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
|
||||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
|
||||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
|
||||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
|
||||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
|
||||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
|
||||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
|
||||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
|
||||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
|
||||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
|
||||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
|
||||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
|
||||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
|
||||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
|
||||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
|
||||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
|
||||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
|
||||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
|
||||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
|
||||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
|
||||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
|
||||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
|
||||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
|
||||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
|
||||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
|
||||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
|
||||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
|
||||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
|
||||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
|
||||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
|
||||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
|
||||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
|
||||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
|
||||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
|
||||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
|
||||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
|
||||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
|
||||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
|
||||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
|
||||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
|
||||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
|
||||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
|
||||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
|
||||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
|
||||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
|
||||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
|
||||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
|
||||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
|
||||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
|
||||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
|
||||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
|
||||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
|
||||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
|
||||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
|
||||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
|
||||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
|
||||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
|
||||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
|
||||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
|
||||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
|
||||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
|
||||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
|
||||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
|
||||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
|
||||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
|
||||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
|
||||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
|
||||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
|
||||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
|
||||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
|
||||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
|
||||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
|
||||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
|
||||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
|
||||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
|
||||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
|
||||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
|
||||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
|
||||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
|
||||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
|
||||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
|
||||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
|
||||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
|
||||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
|
||||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
|
||||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
|
||||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
|
||||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
|
||||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
|
||||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
|
||||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
|
||||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
|
||||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
|
||||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
|
||||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
|
||||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
|
||||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
|
||||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
|
||||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
|
||||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
|
||||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
|
||||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
|
||||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
|
||||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
|
||||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
|
||||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
|
||||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
|
||||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
|
||||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
|
||||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
|
||||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
|
||||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
|
||||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
|
||||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
|
||||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
|
||||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
|
||||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
|
||||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
|
||||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
|
||||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
|
||||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
|
||||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
|
||||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
|
||||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
|
||||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
|
||||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
|
||||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
|
||||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
|
||||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
|
||||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
|
||||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
|
||||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
|
||||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
|
||||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
|
||||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
|
||||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
|
||||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
|
||||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
|
||||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
|
||||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
|
||||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
|
||||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
|
||||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
|
||||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
|
||||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
|
||||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
|
||||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
|
||||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
|
||||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
|
||||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
|
||||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
|
||||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
|
||||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
|
||||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
|
||||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
|
||||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
|
||||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
|
||||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
|
||||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
|
||||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
|
||||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
|
||||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
|
||||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
|
||||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
|
||||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
|
||||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
|
||||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
|
||||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
|
||||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
|
||||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
|
||||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
|
||||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
|
||||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
|
||||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
|
||||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
|
||||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
|
||||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
|
||||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
|
||||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
|
||||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
|
||||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
|
||||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
|
||||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
|
||||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
|
||||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
|
||||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
|
||||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
|
||||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
|
||||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
|
||||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
|
||||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
|
||||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
|
||||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
|
||||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
|
||||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
|
||||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
|
||||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
|
||||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
|
||||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
|
||||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
|
||||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
|
||||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
|
||||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
|
||||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
|
||||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
|
||||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
|
||||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
|
||||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
|
||||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
|
||||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
|
||||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
|
||||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
|
||||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
|
||||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
|
||||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
|
||||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
|
||||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
|
||||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
|
||||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
|
||||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
|
||||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
|
||||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
|
||||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
|
||||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
|
||||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
|
||||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
|
||||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
|
||||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
|
||||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
|
||||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
|
||||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
|
||||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
|
||||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
|
||||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
|
||||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
|
||||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
|
||||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
|
||||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
|
||||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
|
||||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
|
||||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
|
||||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
|
||||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
|
||||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
|
||||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
|
||||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
|
||||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
|
||||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
|
||||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
|
||||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
|
||||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
|
||||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
|
||||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
|
||||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
|
||||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
|
||||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
|
||||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
|
||||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
|
||||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
|
||||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
|
||||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
|
||||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
|
||||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
|
||||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
|
||||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
|
||||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
|
||||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
|
||||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
|
||||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
|
||||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
|
||||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
|
||||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
|
||||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
|
||||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
|
||||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
|
||||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
|
||||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
|
||||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
|
||||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
|
||||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
|
||||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
|
||||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
|
||||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
|
||||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
|
||||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
|
||||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
|
||||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
|
||||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
|
||||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
|
||||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
|
||||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
|
||||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
|
||||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
|
||||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
|
||||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
|
||||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
|
||||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
|
||||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
|
||||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
|
||||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
|
||||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
|
||||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
|
||||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
|
||||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
|
||||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
|
||||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
|
||||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
|
||||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
|
||||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
|
||||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
|
||||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
|
||||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
|
||||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
|
||||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
|
||||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
|
||||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
|
||||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
|
||||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
|
||||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
|
||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||
)
|
||||
|
||||
@@ -1,233 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||
jp2CharContext = (
|
||||
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
|
||||
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
|
||||
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
|
||||
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
|
||||
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
|
||||
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
|
||||
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
|
||||
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
|
||||
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
|
||||
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
|
||||
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
|
||||
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
|
||||
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
|
||||
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
|
||||
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
|
||||
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
|
||||
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
|
||||
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
|
||||
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
|
||||
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
|
||||
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
|
||||
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
|
||||
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
|
||||
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
|
||||
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
|
||||
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
|
||||
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
|
||||
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
|
||||
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
|
||||
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
|
||||
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
|
||||
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
|
||||
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
|
||||
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
|
||||
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
|
||||
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
|
||||
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
|
||||
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
|
||||
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
|
||||
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
|
||||
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
|
||||
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
|
||||
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
|
||||
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
|
||||
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
|
||||
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
|
||||
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
|
||||
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
|
||||
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
|
||||
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
|
||||
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
|
||||
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
|
||||
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
|
||||
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
|
||||
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
|
||||
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
|
||||
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
|
||||
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
|
||||
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
|
||||
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
|
||||
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
|
||||
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
|
||||
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
|
||||
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
|
||||
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
|
||||
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
|
||||
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
|
||||
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
|
||||
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
|
||||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
||||
)
|
||||
|
||||
class JapaneseContextAnalysis(object):
|
||||
NUM_OF_CATEGORY = 6
|
||||
DONT_KNOW = -1
|
||||
ENOUGH_REL_THRESHOLD = 100
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
def __init__(self):
|
||||
self._total_rel = None
|
||||
self._rel_sample = None
|
||||
self._need_to_skip_char_num = None
|
||||
self._last_char_order = None
|
||||
self._done = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._total_rel = 0 # total sequence received
|
||||
# category counters, each integer counts sequence in its category
|
||||
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||
# if last byte in current buffer is not the last byte of a character,
|
||||
# we need to know how many bytes to skip in next buffer
|
||||
self._need_to_skip_char_num = 0
|
||||
self._last_char_order = -1 # The order of previous char
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
|
||||
def feed(self, byte_str, num_bytes):
|
||||
if self._done:
|
||||
return
|
||||
|
||||
# The buffer we got is byte oriented, and a character may span in more than one
|
||||
# buffers. In case the last one or two byte in last buffer is not
|
||||
# complete, we record how many byte needed to complete that character
|
||||
# and skip these bytes here. We can choose to record those bytes as
|
||||
# well and analyse the character once it is complete, but since a
|
||||
# character will not make much difference, by simply skipping
|
||||
# this character will simply our logic and improve performance.
|
||||
i = self._need_to_skip_char_num
|
||||
while i < num_bytes:
|
||||
order, char_len = self.get_order(byte_str[i:i + 2])
|
||||
i += char_len
|
||||
if i > num_bytes:
|
||||
self._need_to_skip_char_num = i - num_bytes
|
||||
self._last_char_order = -1
|
||||
else:
|
||||
if (order != -1) and (self._last_char_order != -1):
|
||||
self._total_rel += 1
|
||||
if self._total_rel > self.MAX_REL_THRESHOLD:
|
||||
self._done = True
|
||||
break
|
||||
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
||||
self._last_char_order = order
|
||||
|
||||
def got_enough_data(self):
|
||||
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||
|
||||
def get_confidence(self):
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||
else:
|
||||
return self.DONT_KNOW
|
||||
|
||||
def get_order(self, byte_str):
|
||||
return -1, 1
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISContextAnalysis, self).__init__()
|
||||
self._charset_name = "SHIFT_JIS"
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._charset_name
|
||||
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
||||
char_len = 2
|
||||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
||||
self._charset_name = "CP932"
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
||||
return second_char - 0x9F, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
||||
char_len = 2
|
||||
elif first_char == 0x8F:
|
||||
char_len = 3
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
||||
return second_char - 0xA1, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
|
||||
@@ -1,572 +0,0 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
# BIG5
|
||||
|
||||
BIG5_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
4,4,4,4,4,4,4,4, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
4,3,3,3,3,3,3,3, # a0 - a7
|
||||
3,3,3,3,3,3,3,3, # a8 - af
|
||||
3,3,3,3,3,3,3,3, # b0 - b7
|
||||
3,3,3,3,3,3,3,3, # b8 - bf
|
||||
3,3,3,3,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
BIG5_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
|
||||
)
|
||||
|
||||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||
|
||||
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
|
||||
'class_factor': 5,
|
||||
'state_table': BIG5_ST,
|
||||
'char_len_table': BIG5_CHAR_LEN_TABLE,
|
||||
'name': 'Big5'}
|
||||
|
||||
# CP949
|
||||
|
||||
CP949_CLS = (
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
|
||||
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
|
||||
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
|
||||
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
|
||||
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
|
||||
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
|
||||
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
|
||||
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
|
||||
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
|
||||
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
||||
)
|
||||
|
||||
CP949_ST = (
|
||||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
|
||||
)
|
||||
|
||||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
|
||||
CP949_SM_MODEL = {'class_table': CP949_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': CP949_ST,
|
||||
'char_len_table': CP949_CHAR_LEN_TABLE,
|
||||
'name': 'CP949'}
|
||||
|
||||
# EUC-JP
|
||||
|
||||
EUCJP_CLS = (
|
||||
4,4,4,4,4,4,4,4, # 00 - 07
|
||||
4,4,4,4,4,4,5,5, # 08 - 0f
|
||||
4,4,4,4,4,4,4,4, # 10 - 17
|
||||
4,4,4,5,4,4,4,4, # 18 - 1f
|
||||
4,4,4,4,4,4,4,4, # 20 - 27
|
||||
4,4,4,4,4,4,4,4, # 28 - 2f
|
||||
4,4,4,4,4,4,4,4, # 30 - 37
|
||||
4,4,4,4,4,4,4,4, # 38 - 3f
|
||||
4,4,4,4,4,4,4,4, # 40 - 47
|
||||
4,4,4,4,4,4,4,4, # 48 - 4f
|
||||
4,4,4,4,4,4,4,4, # 50 - 57
|
||||
4,4,4,4,4,4,4,4, # 58 - 5f
|
||||
4,4,4,4,4,4,4,4, # 60 - 67
|
||||
4,4,4,4,4,4,4,4, # 68 - 6f
|
||||
4,4,4,4,4,4,4,4, # 70 - 77
|
||||
4,4,4,4,4,4,4,4, # 78 - 7f
|
||||
5,5,5,5,5,5,5,5, # 80 - 87
|
||||
5,5,5,5,5,5,1,3, # 88 - 8f
|
||||
5,5,5,5,5,5,5,5, # 90 - 97
|
||||
5,5,5,5,5,5,5,5, # 98 - 9f
|
||||
5,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,0,5 # f8 - ff
|
||||
)
|
||||
|
||||
EUCJP_ST = (
|
||||
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
|
||||
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
|
||||
)
|
||||
|
||||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||
|
||||
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': EUCJP_ST,
|
||||
'char_len_table': EUCJP_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-JP'}
|
||||
|
||||
# EUC-KR
|
||||
|
||||
EUCKR_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,3,3,3, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,3,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCKR_ST = (
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
|
||||
)
|
||||
|
||||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||
|
||||
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
|
||||
'class_factor': 4,
|
||||
'state_table': EUCKR_ST,
|
||||
'char_len_table': EUCKR_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-KR'}
|
||||
|
||||
# EUC-TW
|
||||
|
||||
EUCTW_CLS = (
|
||||
2,2,2,2,2,2,2,2, # 00 - 07
|
||||
2,2,2,2,2,2,0,0, # 08 - 0f
|
||||
2,2,2,2,2,2,2,2, # 10 - 17
|
||||
2,2,2,0,2,2,2,2, # 18 - 1f
|
||||
2,2,2,2,2,2,2,2, # 20 - 27
|
||||
2,2,2,2,2,2,2,2, # 28 - 2f
|
||||
2,2,2,2,2,2,2,2, # 30 - 37
|
||||
2,2,2,2,2,2,2,2, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,2, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,6,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,3,4,4,4,4,4,4, # a0 - a7
|
||||
5,5,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,3,1,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCTW_ST = (
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
|
||||
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
|
||||
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||
|
||||
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': EUCTW_ST,
|
||||
'char_len_table': EUCTW_CHAR_LEN_TABLE,
|
||||
'name': 'x-euc-tw'}
|
||||
|
||||
# GB2312
|
||||
|
||||
GB2312_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
3,3,3,3,3,3,3,3, # 30 - 37
|
||||
3,3,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,4, # 78 - 7f
|
||||
5,6,6,6,6,6,6,6, # 80 - 87
|
||||
6,6,6,6,6,6,6,6, # 88 - 8f
|
||||
6,6,6,6,6,6,6,6, # 90 - 97
|
||||
6,6,6,6,6,6,6,6, # 98 - 9f
|
||||
6,6,6,6,6,6,6,6, # a0 - a7
|
||||
6,6,6,6,6,6,6,6, # a8 - af
|
||||
6,6,6,6,6,6,6,6, # b0 - b7
|
||||
6,6,6,6,6,6,6,6, # b8 - bf
|
||||
6,6,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
6,6,6,6,6,6,6,6, # e0 - e7
|
||||
6,6,6,6,6,6,6,6, # e8 - ef
|
||||
6,6,6,6,6,6,6,6, # f0 - f7
|
||||
6,6,6,6,6,6,6,0 # f8 - ff
|
||||
)
|
||||
|
||||
GB2312_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
|
||||
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
# To be accurate, the length of class 6 can be either 2 or 4.
|
||||
# But it is not necessary to discriminate between the two since
|
||||
# it is used for frequency analysis only, and we are validating
|
||||
# each code range there as well. So it is safe to set it to be
|
||||
# 2 here.
|
||||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||
|
||||
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': GB2312_ST,
|
||||
'char_len_table': GB2312_CHAR_LEN_TABLE,
|
||||
'name': 'GB2312'}
|
||||
|
||||
# Shift_JIS
|
||||
|
||||
SJIS_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
3,3,3,3,3,2,2,3, # 80 - 87
|
||||
3,3,3,3,3,3,3,3, # 88 - 8f
|
||||
3,3,3,3,3,3,3,3, # 90 - 97
|
||||
3,3,3,3,3,3,3,3, # 98 - 9f
|
||||
#0xa0 is illegal in sjis encoding, but some pages does
|
||||
#contain such byte. We need to be more error forgiven.
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,4,4,4, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,0,0,0) # f8 - ff
|
||||
|
||||
|
||||
SJIS_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
|
||||
)
|
||||
|
||||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||
|
||||
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': SJIS_ST,
|
||||
'char_len_table': SJIS_CHAR_LEN_TABLE,
|
||||
'name': 'Shift_JIS'}
|
||||
|
||||
# UCS2-BE
|
||||
|
||||
UCS2BE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2BE_ST = (
|
||||
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
|
||||
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
|
||||
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
|
||||
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||
|
||||
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2BE_ST,
|
||||
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16BE'}
|
||||
|
||||
# UCS2-LE
|
||||
|
||||
UCS2LE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2LE_ST = (
|
||||
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
|
||||
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
|
||||
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||
|
||||
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2LE_ST,
|
||||
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16LE'}
|
||||
|
||||
# UTF-8
|
||||
|
||||
UTF8_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
2,2,2,2,3,3,3,3, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
5,5,5,5,5,5,5,5, # a0 - a7
|
||||
5,5,5,5,5,5,5,5, # a8 - af
|
||||
5,5,5,5,5,5,5,5, # b0 - b7
|
||||
5,5,5,5,5,5,5,5, # b8 - bf
|
||||
0,0,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
7,8,8,8,8,8,8,8, # e0 - e7
|
||||
8,8,8,8,8,9,8,8, # e8 - ef
|
||||
10,11,11,11,11,11,11,11, # f0 - f7
|
||||
12,13,13,13,14,15,0,0 # f8 - ff
|
||||
)
|
||||
|
||||
UTF8_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
|
||||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
|
||||
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
|
||||
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
|
||||
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
|
||||
)
|
||||
|
||||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
|
||||
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
|
||||
'class_factor': 16,
|
||||
'state_table': UTF8_ST,
|
||||
'char_len_table': UTF8_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-8'}
|
||||
@@ -1,310 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Metadata about languages used by our model training code for our
|
||||
SingleByteCharSetProbers. Could be used for other things in the future.
|
||||
|
||||
This code is based on the language metadata from the uchardet project.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
from string import ascii_letters
|
||||
|
||||
|
||||
# TODO: Add Ukranian (KOI8-U)
|
||||
|
||||
class Language(object):
|
||||
"""Metadata about a language useful for training models
|
||||
|
||||
:ivar name: The human name for the language, in English.
|
||||
:type name: str
|
||||
:ivar iso_code: 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
|
||||
or use another catalog as a last resort.
|
||||
:type iso_code: str
|
||||
:ivar use_ascii: Whether or not ASCII letters should be included in trained
|
||||
models.
|
||||
:type use_ascii: bool
|
||||
:ivar charsets: The charsets we want to support and create data for.
|
||||
:type charsets: list of str
|
||||
:ivar alphabet: The characters in the language's alphabet. If `use_ascii` is
|
||||
`True`, you only need to add those not in the ASCII set.
|
||||
:type alphabet: str
|
||||
:ivar wiki_start_pages: The Wikipedia pages to start from if we're crawling
|
||||
Wikipedia for training data.
|
||||
:type wiki_start_pages: list of str
|
||||
"""
|
||||
def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None,
|
||||
alphabet=None, wiki_start_pages=None):
|
||||
super(Language, self).__init__()
|
||||
self.name = name
|
||||
self.iso_code = iso_code
|
||||
self.use_ascii = use_ascii
|
||||
self.charsets = charsets
|
||||
if self.use_ascii:
|
||||
if alphabet:
|
||||
alphabet += ascii_letters
|
||||
else:
|
||||
alphabet = ascii_letters
|
||||
elif not alphabet:
|
||||
raise ValueError('Must supply alphabet if use_ascii is False')
|
||||
self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None
|
||||
self.wiki_start_pages = wiki_start_pages
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(self.__class__.__name__,
|
||||
', '.join('{}={!r}'.format(k, v)
|
||||
for k, v in self.__dict__.items()
|
||||
if not k.startswith('_')))
|
||||
|
||||
|
||||
LANGUAGES = {'Arabic': Language(name='Arabic',
|
||||
iso_code='ar',
|
||||
use_ascii=False,
|
||||
# We only support encodings that use isolated
|
||||
# forms, because the current recommendation is
|
||||
# that the rendering system handles presentation
|
||||
# forms. This means we purposefully skip IBM864.
|
||||
charsets=['ISO-8859-6', 'WINDOWS-1256',
|
||||
'CP720', 'CP864'],
|
||||
alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ',
|
||||
wiki_start_pages=[u'الصفحة_الرئيسية']),
|
||||
'Belarusian': Language(name='Belarusian',
|
||||
iso_code='be',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'IBM866', 'MacCyrillic'],
|
||||
alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ'
|
||||
u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'),
|
||||
wiki_start_pages=[u'Галоўная_старонка']),
|
||||
'Bulgarian': Language(name='Bulgarian',
|
||||
iso_code='bg',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'IBM855'],
|
||||
alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ'
|
||||
u'абвгдежзийклмнопрстуфхцчшщъьюя'),
|
||||
wiki_start_pages=[u'Начална_страница']),
|
||||
'Czech': Language(name='Czech',
|
||||
iso_code='cz',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ',
|
||||
wiki_start_pages=[u'Hlavní_strana']),
|
||||
'Danish': Language(name='Danish',
|
||||
iso_code='da',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'æøåÆØÅ',
|
||||
wiki_start_pages=[u'Forside']),
|
||||
'German': Language(name='German',
|
||||
iso_code='de',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'WINDOWS-1252'],
|
||||
alphabet=u'äöüßÄÖÜ',
|
||||
wiki_start_pages=[u'Wikipedia:Hauptseite']),
|
||||
'Greek': Language(name='Greek',
|
||||
iso_code='el',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-7', 'WINDOWS-1253'],
|
||||
alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ'
|
||||
u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'),
|
||||
wiki_start_pages=[u'Πύλη:Κύρια']),
|
||||
'English': Language(name='English',
|
||||
iso_code='en',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'WINDOWS-1252'],
|
||||
wiki_start_pages=[u'Main_Page']),
|
||||
'Esperanto': Language(name='Esperanto',
|
||||
iso_code='eo',
|
||||
# Q, W, X, and Y not used at all
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-3'],
|
||||
alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz'
|
||||
u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'),
|
||||
wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']),
|
||||
'Spanish': Language(name='Spanish',
|
||||
iso_code='es',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ',
|
||||
wiki_start_pages=[u'Wikipedia:Portada']),
|
||||
'Estonian': Language(name='Estonian',
|
||||
iso_code='et',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-4', 'ISO-8859-13',
|
||||
'WINDOWS-1257'],
|
||||
# C, F, Š, Q, W, X, Y, Z, Ž are only for
|
||||
# loanwords
|
||||
alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ'
|
||||
u'abdeghijklmnoprstuvõäöü'),
|
||||
wiki_start_pages=[u'Esileht']),
|
||||
'Finnish': Language(name='Finnish',
|
||||
iso_code='fi',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ÅÄÖŠŽåäöšž',
|
||||
wiki_start_pages=[u'Wikipedia:Etusivu']),
|
||||
'French': Language(name='French',
|
||||
iso_code='fr',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ',
|
||||
wiki_start_pages=[u'Wikipédia:Accueil_principal',
|
||||
u'Bœuf (animal)']),
|
||||
'Hebrew': Language(name='Hebrew',
|
||||
iso_code='he',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-8', 'WINDOWS-1255'],
|
||||
alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ',
|
||||
wiki_start_pages=[u'עמוד_ראשי']),
|
||||
'Croatian': Language(name='Croatian',
|
||||
iso_code='hr',
|
||||
# Q, W, X, Y are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'abcčćdđefghijklmnoprsštuvzž'
|
||||
u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'),
|
||||
wiki_start_pages=[u'Glavna_stranica']),
|
||||
'Hungarian': Language(name='Hungarian',
|
||||
iso_code='hu',
|
||||
# Q, W, X, Y are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű'
|
||||
u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'),
|
||||
wiki_start_pages=[u'Kezdőlap']),
|
||||
'Italian': Language(name='Italian',
|
||||
iso_code='it',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ÀÈÉÌÒÓÙàèéìòóù',
|
||||
wiki_start_pages=[u'Pagina_principale']),
|
||||
'Lithuanian': Language(name='Lithuanian',
|
||||
iso_code='lt',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-13', 'WINDOWS-1257',
|
||||
'ISO-8859-4'],
|
||||
# Q, W, and X not used at all
|
||||
alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ'
|
||||
u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'),
|
||||
wiki_start_pages=[u'Pagrindinis_puslapis']),
|
||||
'Latvian': Language(name='Latvian',
|
||||
iso_code='lv',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-13', 'WINDOWS-1257',
|
||||
'ISO-8859-4'],
|
||||
# Q, W, X, Y are only for loanwords
|
||||
alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ'
|
||||
u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'),
|
||||
wiki_start_pages=[u'Sākumlapa']),
|
||||
'Macedonian': Language(name='Macedonian',
|
||||
iso_code='mk',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'MacCyrillic', 'IBM855'],
|
||||
alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ'
|
||||
u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'),
|
||||
wiki_start_pages=[u'Главна_страница']),
|
||||
'Dutch': Language(name='Dutch',
|
||||
iso_code='nl',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'WINDOWS-1252'],
|
||||
wiki_start_pages=[u'Hoofdpagina']),
|
||||
'Polish': Language(name='Polish',
|
||||
iso_code='pl',
|
||||
# Q and X are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ'
|
||||
u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'),
|
||||
wiki_start_pages=[u'Wikipedia:Strona_główna']),
|
||||
'Portuguese': Language(name='Portuguese',
|
||||
iso_code='pt',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú',
|
||||
wiki_start_pages=[u'Wikipédia:Página_principal']),
|
||||
'Romanian': Language(name='Romanian',
|
||||
iso_code='ro',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=u'ăâîșțĂÂÎȘȚ',
|
||||
wiki_start_pages=[u'Pagina_principală']),
|
||||
'Russian': Language(name='Russian',
|
||||
iso_code='ru',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'KOI8-R', 'MacCyrillic', 'IBM866',
|
||||
'IBM855'],
|
||||
alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
|
||||
u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'),
|
||||
wiki_start_pages=[u'Заглавная_страница']),
|
||||
'Slovak': Language(name='Slovak',
|
||||
iso_code='sk',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ',
|
||||
wiki_start_pages=[u'Hlavná_stránka']),
|
||||
'Slovene': Language(name='Slovene',
|
||||
iso_code='sl',
|
||||
# Q, W, X, Y are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'abcčdefghijklmnoprsštuvzž'
|
||||
u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'),
|
||||
wiki_start_pages=[u'Glavna_stran']),
|
||||
# Serbian can be written in both Latin and Cyrillic, but there's no
|
||||
# simple way to get the Latin alphabet pages from Wikipedia through
|
||||
# the API, so for now we just support Cyrillic.
|
||||
'Serbian': Language(name='Serbian',
|
||||
iso_code='sr',
|
||||
alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ'
|
||||
u'абвгдђежзијклљмнњопрстћуфхцчџш'),
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'MacCyrillic', 'IBM855'],
|
||||
wiki_start_pages=[u'Главна_страна']),
|
||||
'Thai': Language(name='Thai',
|
||||
iso_code='th',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-11', 'TIS-620', 'CP874'],
|
||||
alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛',
|
||||
wiki_start_pages=[u'หน้าหลัก']),
|
||||
'Turkish': Language(name='Turkish',
|
||||
iso_code='tr',
|
||||
# Q, W, and X are not used by Turkish
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-3', 'ISO-8859-9',
|
||||
'WINDOWS-1254'],
|
||||
alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû'
|
||||
u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'),
|
||||
wiki_start_pages=[u'Ana_Sayfa']),
|
||||
'Vietnamese': Language(name='Vietnamese',
|
||||
iso_code='vi',
|
||||
use_ascii=False,
|
||||
# Windows-1258 is the only common 8-bit
|
||||
# Vietnamese encoding supported by Python.
|
||||
# From Wikipedia:
|
||||
# For systems that lack support for Unicode,
|
||||
# dozens of 8-bit Vietnamese code pages are
|
||||
# available.[1] The most common are VISCII
|
||||
# (TCVN 5712:1993), VPS, and Windows-1258.[3]
|
||||
# Where ASCII is required, such as when
|
||||
# ensuring readability in plain text e-mail,
|
||||
# Vietnamese letters are often encoded
|
||||
# according to Vietnamese Quoted-Readable
|
||||
# (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4]
|
||||
# though usage of either variable-width
|
||||
# scheme has declined dramatically following
|
||||
# the adoption of Unicode on the World Wide
|
||||
# Web.
|
||||
charsets=['WINDOWS-1258'],
|
||||
alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy'
|
||||
u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'),
|
||||
wiki_start_pages=[u'Chữ_Quốc_ngữ']),
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
"""Modules copied from Python 3 standard libraries, for internal use only.
|
||||
|
||||
Individual classes and functions are found in d2._backport.misc. Intended
|
||||
usage is to always import things missing from 3.1 from that module: the
|
||||
built-in/stdlib objects will be used if found.
|
||||
"""
|
||||
@@ -1,41 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012 The Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""Backports for individual classes and functions."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
__all__ = ['cache_from_source', 'callable', 'fsencode']
|
||||
|
||||
|
||||
try:
|
||||
from imp import cache_from_source
|
||||
except ImportError:
|
||||
def cache_from_source(py_file, debug=__debug__):
|
||||
ext = debug and 'c' or 'o'
|
||||
return py_file + ext
|
||||
|
||||
|
||||
try:
|
||||
callable = callable
|
||||
except NameError:
|
||||
from collections import Callable
|
||||
|
||||
def callable(obj):
|
||||
return isinstance(obj, Callable)
|
||||
|
||||
|
||||
try:
|
||||
fsencode = os.fsencode
|
||||
except AttributeError:
|
||||
def fsencode(filename):
|
||||
if isinstance(filename, bytes):
|
||||
return filename
|
||||
elif isinstance(filename, str):
|
||||
return filename.encode(sys.getfilesystemencoding())
|
||||
else:
|
||||
raise TypeError("expect bytes or str, not %s" %
|
||||
type(filename).__name__)
|
||||
@@ -1,764 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012 The Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""Utility functions for copying and archiving files and directory trees.
|
||||
|
||||
XXX The functions here don't copy the resource fork or other metadata on Mac.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import stat
|
||||
from os.path import abspath
|
||||
import fnmatch
|
||||
try:
|
||||
from collections.abc import Callable
|
||||
except ImportError:
|
||||
from collections import Callable
|
||||
import errno
|
||||
from . import tarfile
|
||||
|
||||
try:
|
||||
import bz2
|
||||
_BZ2_SUPPORTED = True
|
||||
except ImportError:
|
||||
_BZ2_SUPPORTED = False
|
||||
|
||||
try:
|
||||
from pwd import getpwnam
|
||||
except ImportError:
|
||||
getpwnam = None
|
||||
|
||||
try:
|
||||
from grp import getgrnam
|
||||
except ImportError:
|
||||
getgrnam = None
|
||||
|
||||
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
|
||||
"copytree", "move", "rmtree", "Error", "SpecialFileError",
|
||||
"ExecError", "make_archive", "get_archive_formats",
|
||||
"register_archive_format", "unregister_archive_format",
|
||||
"get_unpack_formats", "register_unpack_format",
|
||||
"unregister_unpack_format", "unpack_archive", "ignore_patterns"]
|
||||
|
||||
class Error(EnvironmentError):
|
||||
pass
|
||||
|
||||
class SpecialFileError(EnvironmentError):
|
||||
"""Raised when trying to do a kind of operation (e.g. copying) which is
|
||||
not supported on a special file (e.g. a named pipe)"""
|
||||
|
||||
class ExecError(EnvironmentError):
|
||||
"""Raised when a command could not be executed"""
|
||||
|
||||
class ReadError(EnvironmentError):
|
||||
"""Raised when an archive cannot be read"""
|
||||
|
||||
class RegistryError(Exception):
|
||||
"""Raised when a registry operation with the archiving
|
||||
and unpacking registries fails"""
|
||||
|
||||
|
||||
try:
|
||||
WindowsError
|
||||
except NameError:
|
||||
WindowsError = None
|
||||
|
||||
def copyfileobj(fsrc, fdst, length=16*1024):
|
||||
"""copy data from file-like object fsrc to file-like object fdst"""
|
||||
while 1:
|
||||
buf = fsrc.read(length)
|
||||
if not buf:
|
||||
break
|
||||
fdst.write(buf)
|
||||
|
||||
def _samefile(src, dst):
|
||||
# Macintosh, Unix.
|
||||
if hasattr(os.path, 'samefile'):
|
||||
try:
|
||||
return os.path.samefile(src, dst)
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
# All other platforms: check for same pathname.
|
||||
return (os.path.normcase(os.path.abspath(src)) ==
|
||||
os.path.normcase(os.path.abspath(dst)))
|
||||
|
||||
def copyfile(src, dst):
|
||||
"""Copy data from src to dst"""
|
||||
if _samefile(src, dst):
|
||||
raise Error("`%s` and `%s` are the same file" % (src, dst))
|
||||
|
||||
for fn in [src, dst]:
|
||||
try:
|
||||
st = os.stat(fn)
|
||||
except OSError:
|
||||
# File most likely does not exist
|
||||
pass
|
||||
else:
|
||||
# XXX What about other special files? (sockets, devices...)
|
||||
if stat.S_ISFIFO(st.st_mode):
|
||||
raise SpecialFileError("`%s` is a named pipe" % fn)
|
||||
|
||||
with open(src, 'rb') as fsrc:
|
||||
with open(dst, 'wb') as fdst:
|
||||
copyfileobj(fsrc, fdst)
|
||||
|
||||
def copymode(src, dst):
|
||||
"""Copy mode bits from src to dst"""
|
||||
if hasattr(os, 'chmod'):
|
||||
st = os.stat(src)
|
||||
mode = stat.S_IMODE(st.st_mode)
|
||||
os.chmod(dst, mode)
|
||||
|
||||
def copystat(src, dst):
|
||||
"""Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
|
||||
st = os.stat(src)
|
||||
mode = stat.S_IMODE(st.st_mode)
|
||||
if hasattr(os, 'utime'):
|
||||
os.utime(dst, (st.st_atime, st.st_mtime))
|
||||
if hasattr(os, 'chmod'):
|
||||
os.chmod(dst, mode)
|
||||
if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
|
||||
try:
|
||||
os.chflags(dst, st.st_flags)
|
||||
except OSError as why:
|
||||
if (not hasattr(errno, 'EOPNOTSUPP') or
|
||||
why.errno != errno.EOPNOTSUPP):
|
||||
raise
|
||||
|
||||
def copy(src, dst):
|
||||
"""Copy data and mode bits ("cp src dst").
|
||||
|
||||
The destination may be a directory.
|
||||
|
||||
"""
|
||||
if os.path.isdir(dst):
|
||||
dst = os.path.join(dst, os.path.basename(src))
|
||||
copyfile(src, dst)
|
||||
copymode(src, dst)
|
||||
|
||||
def copy2(src, dst):
|
||||
"""Copy data and all stat info ("cp -p src dst").
|
||||
|
||||
The destination may be a directory.
|
||||
|
||||
"""
|
||||
if os.path.isdir(dst):
|
||||
dst = os.path.join(dst, os.path.basename(src))
|
||||
copyfile(src, dst)
|
||||
copystat(src, dst)
|
||||
|
||||
def ignore_patterns(*patterns):
|
||||
"""Function that can be used as copytree() ignore parameter.
|
||||
|
||||
Patterns is a sequence of glob-style patterns
|
||||
that are used to exclude files"""
|
||||
def _ignore_patterns(path, names):
|
||||
ignored_names = []
|
||||
for pattern in patterns:
|
||||
ignored_names.extend(fnmatch.filter(names, pattern))
|
||||
return set(ignored_names)
|
||||
return _ignore_patterns
|
||||
|
||||
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
||||
ignore_dangling_symlinks=False):
|
||||
"""Recursively copy a directory tree.
|
||||
|
||||
The destination directory must not already exist.
|
||||
If exception(s) occur, an Error is raised with a list of reasons.
|
||||
|
||||
If the optional symlinks flag is true, symbolic links in the
|
||||
source tree result in symbolic links in the destination tree; if
|
||||
it is false, the contents of the files pointed to by symbolic
|
||||
links are copied. If the file pointed by the symlink doesn't
|
||||
exist, an exception will be added in the list of errors raised in
|
||||
an Error exception at the end of the copy process.
|
||||
|
||||
You can set the optional ignore_dangling_symlinks flag to true if you
|
||||
want to silence this exception. Notice that this has no effect on
|
||||
platforms that don't support os.symlink.
|
||||
|
||||
The optional ignore argument is a callable. If given, it
|
||||
is called with the `src` parameter, which is the directory
|
||||
being visited by copytree(), and `names` which is the list of
|
||||
`src` contents, as returned by os.listdir():
|
||||
|
||||
callable(src, names) -> ignored_names
|
||||
|
||||
Since copytree() is called recursively, the callable will be
|
||||
called once for each directory that is copied. It returns a
|
||||
list of names relative to the `src` directory that should
|
||||
not be copied.
|
||||
|
||||
The optional copy_function argument is a callable that will be used
|
||||
to copy each file. It will be called with the source path and the
|
||||
destination path as arguments. By default, copy2() is used, but any
|
||||
function that supports the same signature (like copy()) can be used.
|
||||
|
||||
"""
|
||||
names = os.listdir(src)
|
||||
if ignore is not None:
|
||||
ignored_names = ignore(src, names)
|
||||
else:
|
||||
ignored_names = set()
|
||||
|
||||
os.makedirs(dst)
|
||||
errors = []
|
||||
for name in names:
|
||||
if name in ignored_names:
|
||||
continue
|
||||
srcname = os.path.join(src, name)
|
||||
dstname = os.path.join(dst, name)
|
||||
try:
|
||||
if os.path.islink(srcname):
|
||||
linkto = os.readlink(srcname)
|
||||
if symlinks:
|
||||
os.symlink(linkto, dstname)
|
||||
else:
|
||||
# ignore dangling symlink if the flag is on
|
||||
if not os.path.exists(linkto) and ignore_dangling_symlinks:
|
||||
continue
|
||||
# otherwise let the copy occurs. copy2 will raise an error
|
||||
copy_function(srcname, dstname)
|
||||
elif os.path.isdir(srcname):
|
||||
copytree(srcname, dstname, symlinks, ignore, copy_function)
|
||||
else:
|
||||
# Will raise a SpecialFileError for unsupported file types
|
||||
copy_function(srcname, dstname)
|
||||
# catch the Error from the recursive copytree so that we can
|
||||
# continue with other files
|
||||
except Error as err:
|
||||
errors.extend(err.args[0])
|
||||
except EnvironmentError as why:
|
||||
errors.append((srcname, dstname, str(why)))
|
||||
try:
|
||||
copystat(src, dst)
|
||||
except OSError as why:
|
||||
if WindowsError is not None and isinstance(why, WindowsError):
|
||||
# Copying file access times may fail on Windows
|
||||
pass
|
||||
else:
|
||||
errors.extend((src, dst, str(why)))
|
||||
if errors:
|
||||
raise Error(errors)
|
||||
|
||||
def rmtree(path, ignore_errors=False, onerror=None):
|
||||
"""Recursively delete a directory tree.
|
||||
|
||||
If ignore_errors is set, errors are ignored; otherwise, if onerror
|
||||
is set, it is called to handle the error with arguments (func,
|
||||
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
|
||||
path is the argument to that function that caused it to fail; and
|
||||
exc_info is a tuple returned by sys.exc_info(). If ignore_errors
|
||||
is false and onerror is None, an exception is raised.
|
||||
|
||||
"""
|
||||
if ignore_errors:
|
||||
def onerror(*args):
|
||||
pass
|
||||
elif onerror is None:
|
||||
def onerror(*args):
|
||||
raise
|
||||
try:
|
||||
if os.path.islink(path):
|
||||
# symlinks to directories are forbidden, see bug #1669
|
||||
raise OSError("Cannot call rmtree on a symbolic link")
|
||||
except OSError:
|
||||
onerror(os.path.islink, path, sys.exc_info())
|
||||
# can't continue even if onerror hook returns
|
||||
return
|
||||
names = []
|
||||
try:
|
||||
names = os.listdir(path)
|
||||
except os.error:
|
||||
onerror(os.listdir, path, sys.exc_info())
|
||||
for name in names:
|
||||
fullname = os.path.join(path, name)
|
||||
try:
|
||||
mode = os.lstat(fullname).st_mode
|
||||
except os.error:
|
||||
mode = 0
|
||||
if stat.S_ISDIR(mode):
|
||||
rmtree(fullname, ignore_errors, onerror)
|
||||
else:
|
||||
try:
|
||||
os.remove(fullname)
|
||||
except os.error:
|
||||
onerror(os.remove, fullname, sys.exc_info())
|
||||
try:
|
||||
os.rmdir(path)
|
||||
except os.error:
|
||||
onerror(os.rmdir, path, sys.exc_info())
|
||||
|
||||
|
||||
def _basename(path):
|
||||
# A basename() variant which first strips the trailing slash, if present.
|
||||
# Thus we always get the last component of the path, even for directories.
|
||||
return os.path.basename(path.rstrip(os.path.sep))
|
||||
|
||||
def move(src, dst):
|
||||
"""Recursively move a file or directory to another location. This is
|
||||
similar to the Unix "mv" command.
|
||||
|
||||
If the destination is a directory or a symlink to a directory, the source
|
||||
is moved inside the directory. The destination path must not already
|
||||
exist.
|
||||
|
||||
If the destination already exists but is not a directory, it may be
|
||||
overwritten depending on os.rename() semantics.
|
||||
|
||||
If the destination is on our current filesystem, then rename() is used.
|
||||
Otherwise, src is copied to the destination and then removed.
|
||||
A lot more could be done here... A look at a mv.c shows a lot of
|
||||
the issues this implementation glosses over.
|
||||
|
||||
"""
|
||||
real_dst = dst
|
||||
if os.path.isdir(dst):
|
||||
if _samefile(src, dst):
|
||||
# We might be on a case insensitive filesystem,
|
||||
# perform the rename anyway.
|
||||
os.rename(src, dst)
|
||||
return
|
||||
|
||||
real_dst = os.path.join(dst, _basename(src))
|
||||
if os.path.exists(real_dst):
|
||||
raise Error("Destination path '%s' already exists" % real_dst)
|
||||
try:
|
||||
os.rename(src, real_dst)
|
||||
except OSError:
|
||||
if os.path.isdir(src):
|
||||
if _destinsrc(src, dst):
|
||||
raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
|
||||
copytree(src, real_dst, symlinks=True)
|
||||
rmtree(src)
|
||||
else:
|
||||
copy2(src, real_dst)
|
||||
os.unlink(src)
|
||||
|
||||
def _destinsrc(src, dst):
|
||||
src = abspath(src)
|
||||
dst = abspath(dst)
|
||||
if not src.endswith(os.path.sep):
|
||||
src += os.path.sep
|
||||
if not dst.endswith(os.path.sep):
|
||||
dst += os.path.sep
|
||||
return dst.startswith(src)
|
||||
|
||||
def _get_gid(name):
|
||||
"""Returns a gid, given a group name."""
|
||||
if getgrnam is None or name is None:
|
||||
return None
|
||||
try:
|
||||
result = getgrnam(name)
|
||||
except KeyError:
|
||||
result = None
|
||||
if result is not None:
|
||||
return result[2]
|
||||
return None
|
||||
|
||||
def _get_uid(name):
|
||||
"""Returns an uid, given a user name."""
|
||||
if getpwnam is None or name is None:
|
||||
return None
|
||||
try:
|
||||
result = getpwnam(name)
|
||||
except KeyError:
|
||||
result = None
|
||||
if result is not None:
|
||||
return result[2]
|
||||
return None
|
||||
|
||||
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
|
||||
owner=None, group=None, logger=None):
|
||||
"""Create a (possibly compressed) tar file from all the files under
|
||||
'base_dir'.
|
||||
|
||||
'compress' must be "gzip" (the default), "bzip2", or None.
|
||||
|
||||
'owner' and 'group' can be used to define an owner and a group for the
|
||||
archive that is being built. If not provided, the current owner and group
|
||||
will be used.
|
||||
|
||||
The output tar file will be named 'base_name' + ".tar", possibly plus
|
||||
the appropriate compression extension (".gz", or ".bz2").
|
||||
|
||||
Returns the output filename.
|
||||
"""
|
||||
tar_compression = {'gzip': 'gz', None: ''}
|
||||
compress_ext = {'gzip': '.gz'}
|
||||
|
||||
if _BZ2_SUPPORTED:
|
||||
tar_compression['bzip2'] = 'bz2'
|
||||
compress_ext['bzip2'] = '.bz2'
|
||||
|
||||
# flags for compression program, each element of list will be an argument
|
||||
if compress is not None and compress not in compress_ext:
|
||||
raise ValueError("bad value for 'compress', or compression format not "
|
||||
"supported : {0}".format(compress))
|
||||
|
||||
archive_name = base_name + '.tar' + compress_ext.get(compress, '')
|
||||
archive_dir = os.path.dirname(archive_name)
|
||||
|
||||
if not os.path.exists(archive_dir):
|
||||
if logger is not None:
|
||||
logger.info("creating %s", archive_dir)
|
||||
if not dry_run:
|
||||
os.makedirs(archive_dir)
|
||||
|
||||
# creating the tarball
|
||||
if logger is not None:
|
||||
logger.info('Creating tar archive')
|
||||
|
||||
uid = _get_uid(owner)
|
||||
gid = _get_gid(group)
|
||||
|
||||
def _set_uid_gid(tarinfo):
|
||||
if gid is not None:
|
||||
tarinfo.gid = gid
|
||||
tarinfo.gname = group
|
||||
if uid is not None:
|
||||
tarinfo.uid = uid
|
||||
tarinfo.uname = owner
|
||||
return tarinfo
|
||||
|
||||
if not dry_run:
|
||||
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
|
||||
try:
|
||||
tar.add(base_dir, filter=_set_uid_gid)
|
||||
finally:
|
||||
tar.close()
|
||||
|
||||
return archive_name
|
||||
|
||||
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
|
||||
# XXX see if we want to keep an external call here
|
||||
if verbose:
|
||||
zipoptions = "-r"
|
||||
else:
|
||||
zipoptions = "-rq"
|
||||
from distutils.errors import DistutilsExecError
|
||||
from distutils.spawn import spawn
|
||||
try:
|
||||
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
|
||||
except DistutilsExecError:
|
||||
# XXX really should distinguish between "couldn't find
|
||||
# external 'zip' command" and "zip failed".
|
||||
raise ExecError("unable to create zip file '%s': "
|
||||
"could neither import the 'zipfile' module nor "
|
||||
"find a standalone zip utility") % zip_filename
|
||||
|
||||
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
|
||||
"""Create a zip file from all the files under 'base_dir'.
|
||||
|
||||
The output zip file will be named 'base_name' + ".zip". Uses either the
|
||||
"zipfile" Python module (if available) or the InfoZIP "zip" utility
|
||||
(if installed and found on the default search path). If neither tool is
|
||||
available, raises ExecError. Returns the name of the output zip
|
||||
file.
|
||||
"""
|
||||
zip_filename = base_name + ".zip"
|
||||
archive_dir = os.path.dirname(base_name)
|
||||
|
||||
if not os.path.exists(archive_dir):
|
||||
if logger is not None:
|
||||
logger.info("creating %s", archive_dir)
|
||||
if not dry_run:
|
||||
os.makedirs(archive_dir)
|
||||
|
||||
# If zipfile module is not available, try spawning an external 'zip'
|
||||
# command.
|
||||
try:
|
||||
import zipfile
|
||||
except ImportError:
|
||||
zipfile = None
|
||||
|
||||
if zipfile is None:
|
||||
_call_external_zip(base_dir, zip_filename, verbose, dry_run)
|
||||
else:
|
||||
if logger is not None:
|
||||
logger.info("creating '%s' and adding '%s' to it",
|
||||
zip_filename, base_dir)
|
||||
|
||||
if not dry_run:
|
||||
zip = zipfile.ZipFile(zip_filename, "w",
|
||||
compression=zipfile.ZIP_DEFLATED)
|
||||
|
||||
for dirpath, dirnames, filenames in os.walk(base_dir):
|
||||
for name in filenames:
|
||||
path = os.path.normpath(os.path.join(dirpath, name))
|
||||
if os.path.isfile(path):
|
||||
zip.write(path, path)
|
||||
if logger is not None:
|
||||
logger.info("adding '%s'", path)
|
||||
zip.close()
|
||||
|
||||
return zip_filename
|
||||
|
||||
_ARCHIVE_FORMATS = {
|
||||
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
|
||||
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
|
||||
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
|
||||
'zip': (_make_zipfile, [], "ZIP file"),
|
||||
}
|
||||
|
||||
if _BZ2_SUPPORTED:
|
||||
_ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
|
||||
"bzip2'ed tar-file")
|
||||
|
||||
def get_archive_formats():
|
||||
"""Returns a list of supported formats for archiving and unarchiving.
|
||||
|
||||
Each element of the returned sequence is a tuple (name, description)
|
||||
"""
|
||||
formats = [(name, registry[2]) for name, registry in
|
||||
_ARCHIVE_FORMATS.items()]
|
||||
formats.sort()
|
||||
return formats
|
||||
|
||||
def register_archive_format(name, function, extra_args=None, description=''):
|
||||
"""Registers an archive format.
|
||||
|
||||
name is the name of the format. function is the callable that will be
|
||||
used to create archives. If provided, extra_args is a sequence of
|
||||
(name, value) tuples that will be passed as arguments to the callable.
|
||||
description can be provided to describe the format, and will be returned
|
||||
by the get_archive_formats() function.
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = []
|
||||
if not isinstance(function, Callable):
|
||||
raise TypeError('The %s object is not callable' % function)
|
||||
if not isinstance(extra_args, (tuple, list)):
|
||||
raise TypeError('extra_args needs to be a sequence')
|
||||
for element in extra_args:
|
||||
if not isinstance(element, (tuple, list)) or len(element) !=2:
|
||||
raise TypeError('extra_args elements are : (arg_name, value)')
|
||||
|
||||
_ARCHIVE_FORMATS[name] = (function, extra_args, description)
|
||||
|
||||
def unregister_archive_format(name):
|
||||
del _ARCHIVE_FORMATS[name]
|
||||
|
||||
def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
|
||||
dry_run=0, owner=None, group=None, logger=None):
|
||||
"""Create an archive file (eg. zip or tar).
|
||||
|
||||
'base_name' is the name of the file to create, minus any format-specific
|
||||
extension; 'format' is the archive format: one of "zip", "tar", "bztar"
|
||||
or "gztar".
|
||||
|
||||
'root_dir' is a directory that will be the root directory of the
|
||||
archive; ie. we typically chdir into 'root_dir' before creating the
|
||||
archive. 'base_dir' is the directory where we start archiving from;
|
||||
ie. 'base_dir' will be the common prefix of all files and
|
||||
directories in the archive. 'root_dir' and 'base_dir' both default
|
||||
to the current directory. Returns the name of the archive file.
|
||||
|
||||
'owner' and 'group' are used when creating a tar archive. By default,
|
||||
uses the current owner and group.
|
||||
"""
|
||||
save_cwd = os.getcwd()
|
||||
if root_dir is not None:
|
||||
if logger is not None:
|
||||
logger.debug("changing into '%s'", root_dir)
|
||||
base_name = os.path.abspath(base_name)
|
||||
if not dry_run:
|
||||
os.chdir(root_dir)
|
||||
|
||||
if base_dir is None:
|
||||
base_dir = os.curdir
|
||||
|
||||
kwargs = {'dry_run': dry_run, 'logger': logger}
|
||||
|
||||
try:
|
||||
format_info = _ARCHIVE_FORMATS[format]
|
||||
except KeyError:
|
||||
raise ValueError("unknown archive format '%s'" % format)
|
||||
|
||||
func = format_info[0]
|
||||
for arg, val in format_info[1]:
|
||||
kwargs[arg] = val
|
||||
|
||||
if format != 'zip':
|
||||
kwargs['owner'] = owner
|
||||
kwargs['group'] = group
|
||||
|
||||
try:
|
||||
filename = func(base_name, base_dir, **kwargs)
|
||||
finally:
|
||||
if root_dir is not None:
|
||||
if logger is not None:
|
||||
logger.debug("changing back to '%s'", save_cwd)
|
||||
os.chdir(save_cwd)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_unpack_formats():
|
||||
"""Returns a list of supported formats for unpacking.
|
||||
|
||||
Each element of the returned sequence is a tuple
|
||||
(name, extensions, description)
|
||||
"""
|
||||
formats = [(name, info[0], info[3]) for name, info in
|
||||
_UNPACK_FORMATS.items()]
|
||||
formats.sort()
|
||||
return formats
|
||||
|
||||
def _check_unpack_options(extensions, function, extra_args):
|
||||
"""Checks what gets registered as an unpacker."""
|
||||
# first make sure no other unpacker is registered for this extension
|
||||
existing_extensions = {}
|
||||
for name, info in _UNPACK_FORMATS.items():
|
||||
for ext in info[0]:
|
||||
existing_extensions[ext] = name
|
||||
|
||||
for extension in extensions:
|
||||
if extension in existing_extensions:
|
||||
msg = '%s is already registered for "%s"'
|
||||
raise RegistryError(msg % (extension,
|
||||
existing_extensions[extension]))
|
||||
|
||||
if not isinstance(function, Callable):
|
||||
raise TypeError('The registered function must be a callable')
|
||||
|
||||
|
||||
def register_unpack_format(name, extensions, function, extra_args=None,
|
||||
description=''):
|
||||
"""Registers an unpack format.
|
||||
|
||||
`name` is the name of the format. `extensions` is a list of extensions
|
||||
corresponding to the format.
|
||||
|
||||
`function` is the callable that will be
|
||||
used to unpack archives. The callable will receive archives to unpack.
|
||||
If it's unable to handle an archive, it needs to raise a ReadError
|
||||
exception.
|
||||
|
||||
If provided, `extra_args` is a sequence of
|
||||
(name, value) tuples that will be passed as arguments to the callable.
|
||||
description can be provided to describe the format, and will be returned
|
||||
by the get_unpack_formats() function.
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = []
|
||||
_check_unpack_options(extensions, function, extra_args)
|
||||
_UNPACK_FORMATS[name] = extensions, function, extra_args, description
|
||||
|
||||
def unregister_unpack_format(name):
|
||||
"""Removes the pack format from the registry."""
|
||||
del _UNPACK_FORMATS[name]
|
||||
|
||||
def _ensure_directory(path):
|
||||
"""Ensure that the parent directory of `path` exists"""
|
||||
dirname = os.path.dirname(path)
|
||||
if not os.path.isdir(dirname):
|
||||
os.makedirs(dirname)
|
||||
|
||||
def _unpack_zipfile(filename, extract_dir):
|
||||
"""Unpack zip `filename` to `extract_dir`
|
||||
"""
|
||||
try:
|
||||
import zipfile
|
||||
except ImportError:
|
||||
raise ReadError('zlib not supported, cannot unpack this archive.')
|
||||
|
||||
if not zipfile.is_zipfile(filename):
|
||||
raise ReadError("%s is not a zip file" % filename)
|
||||
|
||||
zip = zipfile.ZipFile(filename)
|
||||
try:
|
||||
for info in zip.infolist():
|
||||
name = info.filename
|
||||
|
||||
# don't extract absolute paths or ones with .. in them
|
||||
if name.startswith('/') or '..' in name:
|
||||
continue
|
||||
|
||||
target = os.path.join(extract_dir, *name.split('/'))
|
||||
if not target:
|
||||
continue
|
||||
|
||||
_ensure_directory(target)
|
||||
if not name.endswith('/'):
|
||||
# file
|
||||
data = zip.read(info.filename)
|
||||
f = open(target, 'wb')
|
||||
try:
|
||||
f.write(data)
|
||||
finally:
|
||||
f.close()
|
||||
del data
|
||||
finally:
|
||||
zip.close()
|
||||
|
||||
def _unpack_tarfile(filename, extract_dir):
|
||||
"""Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
|
||||
"""
|
||||
try:
|
||||
tarobj = tarfile.open(filename)
|
||||
except tarfile.TarError:
|
||||
raise ReadError(
|
||||
"%s is not a compressed or uncompressed tar file" % filename)
|
||||
try:
|
||||
tarobj.extractall(extract_dir)
|
||||
finally:
|
||||
tarobj.close()
|
||||
|
||||
_UNPACK_FORMATS = {
|
||||
'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
|
||||
'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
|
||||
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
|
||||
}
|
||||
|
||||
if _BZ2_SUPPORTED:
|
||||
_UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
|
||||
"bzip2'ed tar-file")
|
||||
|
||||
def _find_unpack_format(filename):
|
||||
for name, info in _UNPACK_FORMATS.items():
|
||||
for extension in info[0]:
|
||||
if filename.endswith(extension):
|
||||
return name
|
||||
return None
|
||||
|
||||
def unpack_archive(filename, extract_dir=None, format=None):
|
||||
"""Unpack an archive.
|
||||
|
||||
`filename` is the name of the archive.
|
||||
|
||||
`extract_dir` is the name of the target directory, where the archive
|
||||
is unpacked. If not provided, the current working directory is used.
|
||||
|
||||
`format` is the archive format: one of "zip", "tar", or "gztar". Or any
|
||||
other registered format. If not provided, unpack_archive will use the
|
||||
filename extension and see if an unpacker was registered for that
|
||||
extension.
|
||||
|
||||
In case none is found, a ValueError is raised.
|
||||
"""
|
||||
if extract_dir is None:
|
||||
extract_dir = os.getcwd()
|
||||
|
||||
if format is not None:
|
||||
try:
|
||||
format_info = _UNPACK_FORMATS[format]
|
||||
except KeyError:
|
||||
raise ValueError("Unknown unpack format '{0}'".format(format))
|
||||
|
||||
func = format_info[1]
|
||||
func(filename, extract_dir, **dict(format_info[2]))
|
||||
else:
|
||||
# we need to look at the registered unpackers supported extensions
|
||||
format = _find_unpack_format(filename)
|
||||
if format is None:
|
||||
raise ReadError("Unknown archive format '{0}'".format(filename))
|
||||
|
||||
func = _UNPACK_FORMATS[format][1]
|
||||
kwargs = dict(_UNPACK_FORMATS[format][2])
|
||||
func(filename, extract_dir, **kwargs)
|
||||
@@ -1,84 +0,0 @@
|
||||
[posix_prefix]
|
||||
# Configuration directories. Some of these come straight out of the
|
||||
# configure script. They are for implementing the other variables, not to
|
||||
# be used directly in [resource_locations].
|
||||
confdir = /etc
|
||||
datadir = /usr/share
|
||||
libdir = /usr/lib
|
||||
statedir = /var
|
||||
# User resource directory
|
||||
local = ~/.local/{distribution.name}
|
||||
|
||||
stdlib = {base}/lib/python{py_version_short}
|
||||
platstdlib = {platbase}/lib/python{py_version_short}
|
||||
purelib = {base}/lib/python{py_version_short}/site-packages
|
||||
platlib = {platbase}/lib/python{py_version_short}/site-packages
|
||||
include = {base}/include/python{py_version_short}{abiflags}
|
||||
platinclude = {platbase}/include/python{py_version_short}{abiflags}
|
||||
data = {base}
|
||||
|
||||
[posix_home]
|
||||
stdlib = {base}/lib/python
|
||||
platstdlib = {base}/lib/python
|
||||
purelib = {base}/lib/python
|
||||
platlib = {base}/lib/python
|
||||
include = {base}/include/python
|
||||
platinclude = {base}/include/python
|
||||
scripts = {base}/bin
|
||||
data = {base}
|
||||
|
||||
[nt]
|
||||
stdlib = {base}/Lib
|
||||
platstdlib = {base}/Lib
|
||||
purelib = {base}/Lib/site-packages
|
||||
platlib = {base}/Lib/site-packages
|
||||
include = {base}/Include
|
||||
platinclude = {base}/Include
|
||||
scripts = {base}/Scripts
|
||||
data = {base}
|
||||
|
||||
[os2]
|
||||
stdlib = {base}/Lib
|
||||
platstdlib = {base}/Lib
|
||||
purelib = {base}/Lib/site-packages
|
||||
platlib = {base}/Lib/site-packages
|
||||
include = {base}/Include
|
||||
platinclude = {base}/Include
|
||||
scripts = {base}/Scripts
|
||||
data = {base}
|
||||
|
||||
[os2_home]
|
||||
stdlib = {userbase}/lib/python{py_version_short}
|
||||
platstdlib = {userbase}/lib/python{py_version_short}
|
||||
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
include = {userbase}/include/python{py_version_short}
|
||||
scripts = {userbase}/bin
|
||||
data = {userbase}
|
||||
|
||||
[nt_user]
|
||||
stdlib = {userbase}/Python{py_version_nodot}
|
||||
platstdlib = {userbase}/Python{py_version_nodot}
|
||||
purelib = {userbase}/Python{py_version_nodot}/site-packages
|
||||
platlib = {userbase}/Python{py_version_nodot}/site-packages
|
||||
include = {userbase}/Python{py_version_nodot}/Include
|
||||
scripts = {userbase}/Scripts
|
||||
data = {userbase}
|
||||
|
||||
[posix_user]
|
||||
stdlib = {userbase}/lib/python{py_version_short}
|
||||
platstdlib = {userbase}/lib/python{py_version_short}
|
||||
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
include = {userbase}/include/python{py_version_short}
|
||||
scripts = {userbase}/bin
|
||||
data = {userbase}
|
||||
|
||||
[osx_framework_user]
|
||||
stdlib = {userbase}/lib/python
|
||||
platstdlib = {userbase}/lib/python
|
||||
purelib = {userbase}/lib/python/site-packages
|
||||
platlib = {userbase}/lib/python/site-packages
|
||||
include = {userbase}/include
|
||||
scripts = {userbase}/bin
|
||||
data = {userbase}
|
||||
@@ -1,786 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012 The Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""Access to Python's configuration information."""
|
||||
|
||||
import codecs
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from os.path import pardir, realpath
|
||||
try:
|
||||
import configparser
|
||||
except ImportError:
|
||||
import ConfigParser as configparser
|
||||
|
||||
|
||||
__all__ = [
|
||||
'get_config_h_filename',
|
||||
'get_config_var',
|
||||
'get_config_vars',
|
||||
'get_makefile_filename',
|
||||
'get_path',
|
||||
'get_path_names',
|
||||
'get_paths',
|
||||
'get_platform',
|
||||
'get_python_version',
|
||||
'get_scheme_names',
|
||||
'parse_config_h',
|
||||
]
|
||||
|
||||
|
||||
def _safe_realpath(path):
|
||||
try:
|
||||
return realpath(path)
|
||||
except OSError:
|
||||
return path
|
||||
|
||||
|
||||
if sys.executable:
|
||||
_PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable))
|
||||
else:
|
||||
# sys.executable can be empty if argv[0] has been changed and Python is
|
||||
# unable to retrieve the real program name
|
||||
_PROJECT_BASE = _safe_realpath(os.getcwd())
|
||||
|
||||
if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower():
|
||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir))
|
||||
# PC/VS7.1
|
||||
if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower():
|
||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
||||
# PC/AMD64
|
||||
if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower():
|
||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
||||
|
||||
|
||||
def is_python_build():
|
||||
for fn in ("Setup.dist", "Setup.local"):
|
||||
if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)):
|
||||
return True
|
||||
return False
|
||||
|
||||
_PYTHON_BUILD = is_python_build()
|
||||
|
||||
_cfg_read = False
|
||||
|
||||
def _ensure_cfg_read():
|
||||
global _cfg_read
|
||||
if not _cfg_read:
|
||||
from ..resources import finder
|
||||
backport_package = __name__.rsplit('.', 1)[0]
|
||||
_finder = finder(backport_package)
|
||||
_cfgfile = _finder.find('sysconfig.cfg')
|
||||
assert _cfgfile, 'sysconfig.cfg exists'
|
||||
with _cfgfile.as_stream() as s:
|
||||
_SCHEMES.readfp(s)
|
||||
if _PYTHON_BUILD:
|
||||
for scheme in ('posix_prefix', 'posix_home'):
|
||||
_SCHEMES.set(scheme, 'include', '{srcdir}/Include')
|
||||
_SCHEMES.set(scheme, 'platinclude', '{projectbase}/.')
|
||||
|
||||
_cfg_read = True
|
||||
|
||||
|
||||
_SCHEMES = configparser.RawConfigParser()
|
||||
_VAR_REPL = re.compile(r'\{([^{]*?)\}')
|
||||
|
||||
def _expand_globals(config):
|
||||
_ensure_cfg_read()
|
||||
if config.has_section('globals'):
|
||||
globals = config.items('globals')
|
||||
else:
|
||||
globals = tuple()
|
||||
|
||||
sections = config.sections()
|
||||
for section in sections:
|
||||
if section == 'globals':
|
||||
continue
|
||||
for option, value in globals:
|
||||
if config.has_option(section, option):
|
||||
continue
|
||||
config.set(section, option, value)
|
||||
config.remove_section('globals')
|
||||
|
||||
# now expanding local variables defined in the cfg file
|
||||
#
|
||||
for section in config.sections():
|
||||
variables = dict(config.items(section))
|
||||
|
||||
def _replacer(matchobj):
|
||||
name = matchobj.group(1)
|
||||
if name in variables:
|
||||
return variables[name]
|
||||
return matchobj.group(0)
|
||||
|
||||
for option, value in config.items(section):
|
||||
config.set(section, option, _VAR_REPL.sub(_replacer, value))
|
||||
|
||||
#_expand_globals(_SCHEMES)
|
||||
|
||||
_PY_VERSION = '%s.%s.%s' % sys.version_info[:3]
|
||||
_PY_VERSION_SHORT = '%s.%s' % sys.version_info[:2]
|
||||
_PY_VERSION_SHORT_NO_DOT = '%s%s' % sys.version_info[:2]
|
||||
_PREFIX = os.path.normpath(sys.prefix)
|
||||
_EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
|
||||
_CONFIG_VARS = None
|
||||
_USER_BASE = None
|
||||
|
||||
|
||||
def _subst_vars(path, local_vars):
|
||||
"""In the string `path`, replace tokens like {some.thing} with the
|
||||
corresponding value from the map `local_vars`.
|
||||
|
||||
If there is no corresponding value, leave the token unchanged.
|
||||
"""
|
||||
def _replacer(matchobj):
|
||||
name = matchobj.group(1)
|
||||
if name in local_vars:
|
||||
return local_vars[name]
|
||||
elif name in os.environ:
|
||||
return os.environ[name]
|
||||
return matchobj.group(0)
|
||||
return _VAR_REPL.sub(_replacer, path)
|
||||
|
||||
|
||||
def _extend_dict(target_dict, other_dict):
|
||||
target_keys = target_dict.keys()
|
||||
for key, value in other_dict.items():
|
||||
if key in target_keys:
|
||||
continue
|
||||
target_dict[key] = value
|
||||
|
||||
|
||||
def _expand_vars(scheme, vars):
|
||||
res = {}
|
||||
if vars is None:
|
||||
vars = {}
|
||||
_extend_dict(vars, get_config_vars())
|
||||
|
||||
for key, value in _SCHEMES.items(scheme):
|
||||
if os.name in ('posix', 'nt'):
|
||||
value = os.path.expanduser(value)
|
||||
res[key] = os.path.normpath(_subst_vars(value, vars))
|
||||
return res
|
||||
|
||||
|
||||
def format_value(value, vars):
|
||||
def _replacer(matchobj):
|
||||
name = matchobj.group(1)
|
||||
if name in vars:
|
||||
return vars[name]
|
||||
return matchobj.group(0)
|
||||
return _VAR_REPL.sub(_replacer, value)
|
||||
|
||||
|
||||
def _get_default_scheme():
|
||||
if os.name == 'posix':
|
||||
# the default scheme for posix is posix_prefix
|
||||
return 'posix_prefix'
|
||||
return os.name
|
||||
|
||||
|
||||
def _getuserbase():
|
||||
env_base = os.environ.get("PYTHONUSERBASE", None)
|
||||
|
||||
def joinuser(*args):
|
||||
return os.path.expanduser(os.path.join(*args))
|
||||
|
||||
# what about 'os2emx', 'riscos' ?
|
||||
if os.name == "nt":
|
||||
base = os.environ.get("APPDATA") or "~"
|
||||
if env_base:
|
||||
return env_base
|
||||
else:
|
||||
return joinuser(base, "Python")
|
||||
|
||||
if sys.platform == "darwin":
|
||||
framework = get_config_var("PYTHONFRAMEWORK")
|
||||
if framework:
|
||||
if env_base:
|
||||
return env_base
|
||||
else:
|
||||
return joinuser("~", "Library", framework, "%d.%d" %
|
||||
sys.version_info[:2])
|
||||
|
||||
if env_base:
|
||||
return env_base
|
||||
else:
|
||||
return joinuser("~", ".local")
|
||||
|
||||
|
||||
def _parse_makefile(filename, vars=None):
|
||||
"""Parse a Makefile-style file.
|
||||
|
||||
A dictionary containing name/value pairs is returned. If an
|
||||
optional dictionary is passed in as the second argument, it is
|
||||
used instead of a new dictionary.
|
||||
"""
|
||||
# Regexes needed for parsing Makefile (and similar syntaxes,
|
||||
# like old-style Setup files).
|
||||
_variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
|
||||
_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
|
||||
_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
|
||||
|
||||
if vars is None:
|
||||
vars = {}
|
||||
done = {}
|
||||
notdone = {}
|
||||
|
||||
with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('#') or line.strip() == '':
|
||||
continue
|
||||
m = _variable_rx.match(line)
|
||||
if m:
|
||||
n, v = m.group(1, 2)
|
||||
v = v.strip()
|
||||
# `$$' is a literal `$' in make
|
||||
tmpv = v.replace('$$', '')
|
||||
|
||||
if "$" in tmpv:
|
||||
notdone[n] = v
|
||||
else:
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
# insert literal `$'
|
||||
done[n] = v.replace('$$', '$')
|
||||
else:
|
||||
done[n] = v
|
||||
|
||||
# do variable interpolation here
|
||||
variables = list(notdone.keys())
|
||||
|
||||
# Variables with a 'PY_' prefix in the makefile. These need to
|
||||
# be made available without that prefix through sysconfig.
|
||||
# Special care is needed to ensure that variable expansion works, even
|
||||
# if the expansion uses the name without a prefix.
|
||||
renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS')
|
||||
|
||||
while len(variables) > 0:
|
||||
for name in tuple(variables):
|
||||
value = notdone[name]
|
||||
m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
|
||||
if m is not None:
|
||||
n = m.group(1)
|
||||
found = True
|
||||
if n in done:
|
||||
item = str(done[n])
|
||||
elif n in notdone:
|
||||
# get it on a subsequent round
|
||||
found = False
|
||||
elif n in os.environ:
|
||||
# do it like make: fall back to environment
|
||||
item = os.environ[n]
|
||||
|
||||
elif n in renamed_variables:
|
||||
if (name.startswith('PY_') and
|
||||
name[3:] in renamed_variables):
|
||||
item = ""
|
||||
|
||||
elif 'PY_' + n in notdone:
|
||||
found = False
|
||||
|
||||
else:
|
||||
item = str(done['PY_' + n])
|
||||
|
||||
else:
|
||||
done[n] = item = ""
|
||||
|
||||
if found:
|
||||
after = value[m.end():]
|
||||
value = value[:m.start()] + item + after
|
||||
if "$" in after:
|
||||
notdone[name] = value
|
||||
else:
|
||||
try:
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
done[name] = value.strip()
|
||||
else:
|
||||
done[name] = value
|
||||
variables.remove(name)
|
||||
|
||||
if (name.startswith('PY_') and
|
||||
name[3:] in renamed_variables):
|
||||
|
||||
name = name[3:]
|
||||
if name not in done:
|
||||
done[name] = value
|
||||
|
||||
else:
|
||||
# bogus variable reference (e.g. "prefix=$/opt/python");
|
||||
# just drop it since we can't deal
|
||||
done[name] = value
|
||||
variables.remove(name)
|
||||
|
||||
# strip spurious spaces
|
||||
for k, v in done.items():
|
||||
if isinstance(v, str):
|
||||
done[k] = v.strip()
|
||||
|
||||
# save the results in the global dictionary
|
||||
vars.update(done)
|
||||
return vars
|
||||
|
||||
|
||||
def get_makefile_filename():
|
||||
"""Return the path of the Makefile."""
|
||||
if _PYTHON_BUILD:
|
||||
return os.path.join(_PROJECT_BASE, "Makefile")
|
||||
if hasattr(sys, 'abiflags'):
|
||||
config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags)
|
||||
else:
|
||||
config_dir_name = 'config'
|
||||
return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
|
||||
|
||||
|
||||
def _init_posix(vars):
|
||||
"""Initialize the module as appropriate for POSIX systems."""
|
||||
# load the installed Makefile:
|
||||
makefile = get_makefile_filename()
|
||||
try:
|
||||
_parse_makefile(makefile, vars)
|
||||
except IOError as e:
|
||||
msg = "invalid Python installation: unable to open %s" % makefile
|
||||
if hasattr(e, "strerror"):
|
||||
msg = msg + " (%s)" % e.strerror
|
||||
raise IOError(msg)
|
||||
# load the installed pyconfig.h:
|
||||
config_h = get_config_h_filename()
|
||||
try:
|
||||
with open(config_h) as f:
|
||||
parse_config_h(f, vars)
|
||||
except IOError as e:
|
||||
msg = "invalid Python installation: unable to open %s" % config_h
|
||||
if hasattr(e, "strerror"):
|
||||
msg = msg + " (%s)" % e.strerror
|
||||
raise IOError(msg)
|
||||
# On AIX, there are wrong paths to the linker scripts in the Makefile
|
||||
# -- these paths are relative to the Python source, but when installed
|
||||
# the scripts are in another directory.
|
||||
if _PYTHON_BUILD:
|
||||
vars['LDSHARED'] = vars['BLDSHARED']
|
||||
|
||||
|
||||
def _init_non_posix(vars):
|
||||
"""Initialize the module as appropriate for NT"""
|
||||
# set basic install directories
|
||||
vars['LIBDEST'] = get_path('stdlib')
|
||||
vars['BINLIBDEST'] = get_path('platstdlib')
|
||||
vars['INCLUDEPY'] = get_path('include')
|
||||
vars['SO'] = '.pyd'
|
||||
vars['EXE'] = '.exe'
|
||||
vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT
|
||||
vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable))
|
||||
|
||||
#
|
||||
# public APIs
|
||||
#
|
||||
|
||||
|
||||
def parse_config_h(fp, vars=None):
|
||||
"""Parse a config.h-style file.
|
||||
|
||||
A dictionary containing name/value pairs is returned. If an
|
||||
optional dictionary is passed in as the second argument, it is
|
||||
used instead of a new dictionary.
|
||||
"""
|
||||
if vars is None:
|
||||
vars = {}
|
||||
define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
|
||||
undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n")
|
||||
|
||||
while True:
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
break
|
||||
m = define_rx.match(line)
|
||||
if m:
|
||||
n, v = m.group(1, 2)
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
pass
|
||||
vars[n] = v
|
||||
else:
|
||||
m = undef_rx.match(line)
|
||||
if m:
|
||||
vars[m.group(1)] = 0
|
||||
return vars
|
||||
|
||||
|
||||
def get_config_h_filename():
|
||||
"""Return the path of pyconfig.h."""
|
||||
if _PYTHON_BUILD:
|
||||
if os.name == "nt":
|
||||
inc_dir = os.path.join(_PROJECT_BASE, "PC")
|
||||
else:
|
||||
inc_dir = _PROJECT_BASE
|
||||
else:
|
||||
inc_dir = get_path('platinclude')
|
||||
return os.path.join(inc_dir, 'pyconfig.h')
|
||||
|
||||
|
||||
def get_scheme_names():
|
||||
"""Return a tuple containing the schemes names."""
|
||||
return tuple(sorted(_SCHEMES.sections()))
|
||||
|
||||
|
||||
def get_path_names():
|
||||
"""Return a tuple containing the paths names."""
|
||||
# xxx see if we want a static list
|
||||
return _SCHEMES.options('posix_prefix')
|
||||
|
||||
|
||||
def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
|
||||
"""Return a mapping containing an install scheme.
|
||||
|
||||
``scheme`` is the install scheme name. If not provided, it will
|
||||
return the default scheme for the current platform.
|
||||
"""
|
||||
_ensure_cfg_read()
|
||||
if expand:
|
||||
return _expand_vars(scheme, vars)
|
||||
else:
|
||||
return dict(_SCHEMES.items(scheme))
|
||||
|
||||
|
||||
def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
|
||||
"""Return a path corresponding to the scheme.
|
||||
|
||||
``scheme`` is the install scheme name.
|
||||
"""
|
||||
return get_paths(scheme, vars, expand)[name]
|
||||
|
||||
|
||||
def get_config_vars(*args):
|
||||
"""With no arguments, return a dictionary of all configuration
|
||||
variables relevant for the current platform.
|
||||
|
||||
On Unix, this means every variable defined in Python's installed Makefile;
|
||||
On Windows and Mac OS it's a much smaller set.
|
||||
|
||||
With arguments, return a list of values that result from looking up
|
||||
each argument in the configuration variable dictionary.
|
||||
"""
|
||||
global _CONFIG_VARS
|
||||
if _CONFIG_VARS is None:
|
||||
_CONFIG_VARS = {}
|
||||
# Normalized versions of prefix and exec_prefix are handy to have;
|
||||
# in fact, these are the standard versions used most places in the
|
||||
# distutils2 module.
|
||||
_CONFIG_VARS['prefix'] = _PREFIX
|
||||
_CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX
|
||||
_CONFIG_VARS['py_version'] = _PY_VERSION
|
||||
_CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
|
||||
_CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2]
|
||||
_CONFIG_VARS['base'] = _PREFIX
|
||||
_CONFIG_VARS['platbase'] = _EXEC_PREFIX
|
||||
_CONFIG_VARS['projectbase'] = _PROJECT_BASE
|
||||
try:
|
||||
_CONFIG_VARS['abiflags'] = sys.abiflags
|
||||
except AttributeError:
|
||||
# sys.abiflags may not be defined on all platforms.
|
||||
_CONFIG_VARS['abiflags'] = ''
|
||||
|
||||
if os.name in ('nt', 'os2'):
|
||||
_init_non_posix(_CONFIG_VARS)
|
||||
if os.name == 'posix':
|
||||
_init_posix(_CONFIG_VARS)
|
||||
# Setting 'userbase' is done below the call to the
|
||||
# init function to enable using 'get_config_var' in
|
||||
# the init-function.
|
||||
if sys.version >= '2.6':
|
||||
_CONFIG_VARS['userbase'] = _getuserbase()
|
||||
|
||||
if 'srcdir' not in _CONFIG_VARS:
|
||||
_CONFIG_VARS['srcdir'] = _PROJECT_BASE
|
||||
else:
|
||||
_CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir'])
|
||||
|
||||
# Convert srcdir into an absolute path if it appears necessary.
|
||||
# Normally it is relative to the build directory. However, during
|
||||
# testing, for example, we might be running a non-installed python
|
||||
# from a different directory.
|
||||
if _PYTHON_BUILD and os.name == "posix":
|
||||
base = _PROJECT_BASE
|
||||
try:
|
||||
cwd = os.getcwd()
|
||||
except OSError:
|
||||
cwd = None
|
||||
if (not os.path.isabs(_CONFIG_VARS['srcdir']) and
|
||||
base != cwd):
|
||||
# srcdir is relative and we are not in the same directory
|
||||
# as the executable. Assume executable is in the build
|
||||
# directory and make srcdir absolute.
|
||||
srcdir = os.path.join(base, _CONFIG_VARS['srcdir'])
|
||||
_CONFIG_VARS['srcdir'] = os.path.normpath(srcdir)
|
||||
|
||||
if sys.platform == 'darwin':
|
||||
kernel_version = os.uname()[2] # Kernel version (8.4.3)
|
||||
major_version = int(kernel_version.split('.')[0])
|
||||
|
||||
if major_version < 8:
|
||||
# On Mac OS X before 10.4, check if -arch and -isysroot
|
||||
# are in CFLAGS or LDFLAGS and remove them if they are.
|
||||
# This is needed when building extensions on a 10.3 system
|
||||
# using a universal build of python.
|
||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||
# a number of derived variables. These need to be
|
||||
# patched up as well.
|
||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||
flags = _CONFIG_VARS[key]
|
||||
flags = re.sub(r'-arch\s+\w+\s', ' ', flags)
|
||||
flags = re.sub('-isysroot [^ \t]*', ' ', flags)
|
||||
_CONFIG_VARS[key] = flags
|
||||
else:
|
||||
# Allow the user to override the architecture flags using
|
||||
# an environment variable.
|
||||
# NOTE: This name was introduced by Apple in OSX 10.5 and
|
||||
# is used by several scripting languages distributed with
|
||||
# that OS release.
|
||||
if 'ARCHFLAGS' in os.environ:
|
||||
arch = os.environ['ARCHFLAGS']
|
||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||
# a number of derived variables. These need to be
|
||||
# patched up as well.
|
||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||
|
||||
flags = _CONFIG_VARS[key]
|
||||
flags = re.sub(r'-arch\s+\w+\s', ' ', flags)
|
||||
flags = flags + ' ' + arch
|
||||
_CONFIG_VARS[key] = flags
|
||||
|
||||
# If we're on OSX 10.5 or later and the user tries to
|
||||
# compiles an extension using an SDK that is not present
|
||||
# on the current machine it is better to not use an SDK
|
||||
# than to fail.
|
||||
#
|
||||
# The major usecase for this is users using a Python.org
|
||||
# binary installer on OSX 10.6: that installer uses
|
||||
# the 10.4u SDK, but that SDK is not installed by default
|
||||
# when you install Xcode.
|
||||
#
|
||||
CFLAGS = _CONFIG_VARS.get('CFLAGS', '')
|
||||
m = re.search(r'-isysroot\s+(\S+)', CFLAGS)
|
||||
if m is not None:
|
||||
sdk = m.group(1)
|
||||
if not os.path.exists(sdk):
|
||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||
# a number of derived variables. These need to be
|
||||
# patched up as well.
|
||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||
|
||||
flags = _CONFIG_VARS[key]
|
||||
flags = re.sub(r'-isysroot\s+\S+(\s|$)', ' ', flags)
|
||||
_CONFIG_VARS[key] = flags
|
||||
|
||||
if args:
|
||||
vals = []
|
||||
for name in args:
|
||||
vals.append(_CONFIG_VARS.get(name))
|
||||
return vals
|
||||
else:
|
||||
return _CONFIG_VARS
|
||||
|
||||
|
||||
def get_config_var(name):
|
||||
"""Return the value of a single variable using the dictionary returned by
|
||||
'get_config_vars()'.
|
||||
|
||||
Equivalent to get_config_vars().get(name)
|
||||
"""
|
||||
return get_config_vars().get(name)
|
||||
|
||||
|
||||
def get_platform():
|
||||
"""Return a string that identifies the current platform.
|
||||
|
||||
This is used mainly to distinguish platform-specific build directories and
|
||||
platform-specific built distributions. Typically includes the OS name
|
||||
and version and the architecture (as supplied by 'os.uname()'),
|
||||
although the exact information included depends on the OS; eg. for IRIX
|
||||
the architecture isn't particularly important (IRIX only runs on SGI
|
||||
hardware), but for Linux the kernel version isn't particularly
|
||||
important.
|
||||
|
||||
Examples of returned values:
|
||||
linux-i586
|
||||
linux-alpha (?)
|
||||
solaris-2.6-sun4u
|
||||
irix-5.3
|
||||
irix64-6.2
|
||||
|
||||
Windows will return one of:
|
||||
win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
|
||||
win-ia64 (64bit Windows on Itanium)
|
||||
win32 (all others - specifically, sys.platform is returned)
|
||||
|
||||
For other non-POSIX platforms, currently just returns 'sys.platform'.
|
||||
"""
|
||||
if os.name == 'nt':
|
||||
# sniff sys.version for architecture.
|
||||
prefix = " bit ("
|
||||
i = sys.version.find(prefix)
|
||||
if i == -1:
|
||||
return sys.platform
|
||||
j = sys.version.find(")", i)
|
||||
look = sys.version[i+len(prefix):j].lower()
|
||||
if look == 'amd64':
|
||||
return 'win-amd64'
|
||||
if look == 'itanium':
|
||||
return 'win-ia64'
|
||||
return sys.platform
|
||||
|
||||
if os.name != "posix" or not hasattr(os, 'uname'):
|
||||
# XXX what about the architecture? NT is Intel or Alpha,
|
||||
# Mac OS is M68k or PPC, etc.
|
||||
return sys.platform
|
||||
|
||||
# Try to distinguish various flavours of Unix
|
||||
osname, host, release, version, machine = os.uname()
|
||||
|
||||
# Convert the OS name to lowercase, remove '/' characters
|
||||
# (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
|
||||
osname = osname.lower().replace('/', '')
|
||||
machine = machine.replace(' ', '_')
|
||||
machine = machine.replace('/', '-')
|
||||
|
||||
if osname[:5] == "linux":
|
||||
# At least on Linux/Intel, 'machine' is the processor --
|
||||
# i386, etc.
|
||||
# XXX what about Alpha, SPARC, etc?
|
||||
return "%s-%s" % (osname, machine)
|
||||
elif osname[:5] == "sunos":
|
||||
if release[0] >= "5": # SunOS 5 == Solaris 2
|
||||
osname = "solaris"
|
||||
release = "%d.%s" % (int(release[0]) - 3, release[2:])
|
||||
# fall through to standard osname-release-machine representation
|
||||
elif osname[:4] == "irix": # could be "irix64"!
|
||||
return "%s-%s" % (osname, release)
|
||||
elif osname[:3] == "aix":
|
||||
return "%s-%s.%s" % (osname, version, release)
|
||||
elif osname[:6] == "cygwin":
|
||||
osname = "cygwin"
|
||||
rel_re = re.compile(r'[\d.]+')
|
||||
m = rel_re.match(release)
|
||||
if m:
|
||||
release = m.group()
|
||||
elif osname[:6] == "darwin":
|
||||
#
|
||||
# For our purposes, we'll assume that the system version from
|
||||
# distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
|
||||
# to. This makes the compatibility story a bit more sane because the
|
||||
# machine is going to compile and link as if it were
|
||||
# MACOSX_DEPLOYMENT_TARGET.
|
||||
cfgvars = get_config_vars()
|
||||
macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
|
||||
|
||||
if True:
|
||||
# Always calculate the release of the running machine,
|
||||
# needed to determine if we can build fat binaries or not.
|
||||
|
||||
macrelease = macver
|
||||
# Get the system version. Reading this plist is a documented
|
||||
# way to get the system version (see the documentation for
|
||||
# the Gestalt Manager)
|
||||
try:
|
||||
f = open('/System/Library/CoreServices/SystemVersion.plist')
|
||||
except IOError:
|
||||
# We're on a plain darwin box, fall back to the default
|
||||
# behaviour.
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
m = re.search(r'<key>ProductUserVisibleVersion</key>\s*'
|
||||
r'<string>(.*?)</string>', f.read())
|
||||
finally:
|
||||
f.close()
|
||||
if m is not None:
|
||||
macrelease = '.'.join(m.group(1).split('.')[:2])
|
||||
# else: fall back to the default behaviour
|
||||
|
||||
if not macver:
|
||||
macver = macrelease
|
||||
|
||||
if macver:
|
||||
release = macver
|
||||
osname = "macosx"
|
||||
|
||||
if ((macrelease + '.') >= '10.4.' and
|
||||
'-arch' in get_config_vars().get('CFLAGS', '').strip()):
|
||||
# The universal build will build fat binaries, but not on
|
||||
# systems before 10.4
|
||||
#
|
||||
# Try to detect 4-way universal builds, those have machine-type
|
||||
# 'universal' instead of 'fat'.
|
||||
|
||||
machine = 'fat'
|
||||
cflags = get_config_vars().get('CFLAGS')
|
||||
|
||||
archs = re.findall(r'-arch\s+(\S+)', cflags)
|
||||
archs = tuple(sorted(set(archs)))
|
||||
|
||||
if len(archs) == 1:
|
||||
machine = archs[0]
|
||||
elif archs == ('i386', 'ppc'):
|
||||
machine = 'fat'
|
||||
elif archs == ('i386', 'x86_64'):
|
||||
machine = 'intel'
|
||||
elif archs == ('i386', 'ppc', 'x86_64'):
|
||||
machine = 'fat3'
|
||||
elif archs == ('ppc64', 'x86_64'):
|
||||
machine = 'fat64'
|
||||
elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
|
||||
machine = 'universal'
|
||||
else:
|
||||
raise ValueError(
|
||||
"Don't know machine value for archs=%r" % (archs,))
|
||||
|
||||
elif machine == 'i386':
|
||||
# On OSX the machine type returned by uname is always the
|
||||
# 32-bit variant, even if the executable architecture is
|
||||
# the 64-bit variant
|
||||
if sys.maxsize >= 2**32:
|
||||
machine = 'x86_64'
|
||||
|
||||
elif machine in ('PowerPC', 'Power_Macintosh'):
|
||||
# Pick a sane name for the PPC architecture.
|
||||
# See 'i386' case
|
||||
if sys.maxsize >= 2**32:
|
||||
machine = 'ppc64'
|
||||
else:
|
||||
machine = 'ppc'
|
||||
|
||||
return "%s-%s-%s" % (osname, release, machine)
|
||||
|
||||
|
||||
def get_python_version():
|
||||
return _PY_VERSION_SHORT
|
||||
|
||||
|
||||
def _print_dict(title, data):
|
||||
for index, (key, value) in enumerate(sorted(data.items())):
|
||||
if index == 0:
|
||||
print('%s: ' % (title))
|
||||
print('\t%s = "%s"' % (key, value))
|
||||
|
||||
|
||||
def _main():
|
||||
"""Display all information sysconfig detains."""
|
||||
print('Platform: "%s"' % get_platform())
|
||||
print('Python version: "%s"' % get_python_version())
|
||||
print('Current installation scheme: "%s"' % _get_default_scheme())
|
||||
print()
|
||||
_print_dict('Paths', get_paths())
|
||||
print()
|
||||
_print_dict('Variables', get_config_vars())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_main()
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,20 +0,0 @@
|
||||
Copyright (c) 2006-2013 James Graham and other contributors
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
@@ -1,35 +0,0 @@
|
||||
"""
|
||||
HTML parsing library based on the `WHATWG HTML specification
|
||||
<https://whatwg.org/html>`_. The parser is designed to be compatible with
|
||||
existing HTML found in the wild and implements well-defined error recovery that
|
||||
is largely compatible with modern desktop web browsers.
|
||||
|
||||
Example usage::
|
||||
|
||||
from pipenv.patched.notpip._vendor import html5lib
|
||||
with open("my_document.html", "rb") as f:
|
||||
tree = html5lib.parse(f)
|
||||
|
||||
For convenience, this module re-exports the following names:
|
||||
|
||||
* :func:`~.html5parser.parse`
|
||||
* :func:`~.html5parser.parseFragment`
|
||||
* :class:`~.html5parser.HTMLParser`
|
||||
* :func:`~.treebuilders.getTreeBuilder`
|
||||
* :func:`~.treewalkers.getTreeWalker`
|
||||
* :func:`~.serializer.serialize`
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .html5parser import HTMLParser, parse, parseFragment
|
||||
from .treebuilders import getTreeBuilder
|
||||
from .treewalkers import getTreeWalker
|
||||
from .serializer import serialize
|
||||
|
||||
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||
"getTreeWalker", "serialize"]
|
||||
|
||||
# this has to be at the top level, see how setup.py parses this
|
||||
#: Distribution version number.
|
||||
__version__ = "1.1"
|
||||
@@ -1,289 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from .constants import DataLossWarning
|
||||
|
||||
baseChar = """
|
||||
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
|
||||
[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
|
||||
[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
|
||||
[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
|
||||
[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
|
||||
[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
|
||||
[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
|
||||
[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
|
||||
[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
|
||||
[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
|
||||
[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
|
||||
[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
|
||||
[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
|
||||
[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
|
||||
[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
|
||||
[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
|
||||
[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
|
||||
[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
|
||||
[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
|
||||
[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
|
||||
[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
|
||||
[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
|
||||
[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
|
||||
[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
|
||||
[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
|
||||
[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
|
||||
[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
|
||||
[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
|
||||
[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
|
||||
[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
|
||||
#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
|
||||
#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
|
||||
#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
|
||||
[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
|
||||
[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
|
||||
#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
|
||||
[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
|
||||
[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
|
||||
[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
|
||||
[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
|
||||
[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
|
||||
#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
|
||||
[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
|
||||
[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
|
||||
[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
|
||||
[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
|
||||
|
||||
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
|
||||
|
||||
combiningCharacter = """
|
||||
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
|
||||
[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
|
||||
[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
|
||||
[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
|
||||
#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
|
||||
[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
|
||||
[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
|
||||
#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
|
||||
[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
|
||||
[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
|
||||
#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
|
||||
[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
|
||||
[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
|
||||
[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
|
||||
[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
|
||||
[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
|
||||
#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
|
||||
[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
|
||||
#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
|
||||
[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
|
||||
[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
|
||||
#x3099 | #x309A"""
|
||||
|
||||
digit = """
|
||||
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
|
||||
[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
|
||||
[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
|
||||
[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
|
||||
|
||||
extender = """
|
||||
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
|
||||
#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
|
||||
|
||||
letter = " | ".join([baseChar, ideographic])
|
||||
|
||||
# Without the
|
||||
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
|
||||
extender])
|
||||
nameFirst = " | ".join([letter, "_"])
|
||||
|
||||
reChar = re.compile(r"#x([\d|A-F]{4,4})")
|
||||
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
|
||||
|
||||
|
||||
def charStringToList(chars):
|
||||
charRanges = [item.strip() for item in chars.split(" | ")]
|
||||
rv = []
|
||||
for item in charRanges:
|
||||
foundMatch = False
|
||||
for regexp in (reChar, reCharRange):
|
||||
match = regexp.match(item)
|
||||
if match is not None:
|
||||
rv.append([hexToInt(item) for item in match.groups()])
|
||||
if len(rv[-1]) == 1:
|
||||
rv[-1] = rv[-1] * 2
|
||||
foundMatch = True
|
||||
break
|
||||
if not foundMatch:
|
||||
assert len(item) == 1
|
||||
|
||||
rv.append([ord(item)] * 2)
|
||||
rv = normaliseCharList(rv)
|
||||
return rv
|
||||
|
||||
|
||||
def normaliseCharList(charList):
|
||||
charList = sorted(charList)
|
||||
for item in charList:
|
||||
assert item[1] >= item[0]
|
||||
rv = []
|
||||
i = 0
|
||||
while i < len(charList):
|
||||
j = 1
|
||||
rv.append(charList[i])
|
||||
while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
|
||||
rv[-1][1] = charList[i + j][1]
|
||||
j += 1
|
||||
i += j
|
||||
return rv
|
||||
|
||||
|
||||
# We don't really support characters above the BMP :(
|
||||
max_unicode = int("FFFF", 16)
|
||||
|
||||
|
||||
def missingRanges(charList):
|
||||
rv = []
|
||||
if charList[0] != 0:
|
||||
rv.append([0, charList[0][0] - 1])
|
||||
for i, item in enumerate(charList[:-1]):
|
||||
rv.append([item[1] + 1, charList[i + 1][0] - 1])
|
||||
if charList[-1][1] != max_unicode:
|
||||
rv.append([charList[-1][1] + 1, max_unicode])
|
||||
return rv
|
||||
|
||||
|
||||
def listToRegexpStr(charList):
|
||||
rv = []
|
||||
for item in charList:
|
||||
if item[0] == item[1]:
|
||||
rv.append(escapeRegexp(chr(item[0])))
|
||||
else:
|
||||
rv.append(escapeRegexp(chr(item[0])) + "-" +
|
||||
escapeRegexp(chr(item[1])))
|
||||
return "[%s]" % "".join(rv)
|
||||
|
||||
|
||||
def hexToInt(hex_str):
|
||||
return int(hex_str, 16)
|
||||
|
||||
|
||||
def escapeRegexp(string):
|
||||
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
|
||||
"[", "]", "|", "(", ")", "-")
|
||||
for char in specialCharacters:
|
||||
string = string.replace(char, "\\" + char)
|
||||
|
||||
return string
|
||||
|
||||
# output from the above
|
||||
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
# Simpler things
|
||||
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
|
||||
|
||||
|
||||
class InfosetFilter(object):
|
||||
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
|
||||
|
||||
def __init__(self,
|
||||
dropXmlnsLocalName=False,
|
||||
dropXmlnsAttrNs=False,
|
||||
preventDoubleDashComments=False,
|
||||
preventDashAtCommentEnd=False,
|
||||
replaceFormFeedCharacters=True,
|
||||
preventSingleQuotePubid=False):
|
||||
|
||||
self.dropXmlnsLocalName = dropXmlnsLocalName
|
||||
self.dropXmlnsAttrNs = dropXmlnsAttrNs
|
||||
|
||||
self.preventDoubleDashComments = preventDoubleDashComments
|
||||
self.preventDashAtCommentEnd = preventDashAtCommentEnd
|
||||
|
||||
self.replaceFormFeedCharacters = replaceFormFeedCharacters
|
||||
|
||||
self.preventSingleQuotePubid = preventSingleQuotePubid
|
||||
|
||||
self.replaceCache = {}
|
||||
|
||||
def coerceAttribute(self, name, namespace=None):
|
||||
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
|
||||
warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
|
||||
return None
|
||||
elif (self.dropXmlnsAttrNs and
|
||||
namespace == "http://www.w3.org/2000/xmlns/"):
|
||||
warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
|
||||
return None
|
||||
else:
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceElement(self, name):
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceComment(self, data):
|
||||
if self.preventDoubleDashComments:
|
||||
while "--" in data:
|
||||
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
||||
data = data.replace("--", "- -")
|
||||
if data.endswith("-"):
|
||||
warnings.warn("Comments cannot end in a dash", DataLossWarning)
|
||||
data += " "
|
||||
return data
|
||||
|
||||
def coerceCharacters(self, data):
|
||||
if self.replaceFormFeedCharacters:
|
||||
for _ in range(data.count("\x0C")):
|
||||
warnings.warn("Text cannot contain U+000C", DataLossWarning)
|
||||
data = data.replace("\x0C", " ")
|
||||
# Other non-xml characters
|
||||
return data
|
||||
|
||||
def coercePubid(self, data):
|
||||
dataOutput = data
|
||||
for char in nonPubidCharRegexp.findall(data):
|
||||
warnings.warn("Coercing non-XML pubid", DataLossWarning)
|
||||
replacement = self.getReplacementCharacter(char)
|
||||
dataOutput = dataOutput.replace(char, replacement)
|
||||
if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
|
||||
warnings.warn("Pubid cannot contain single quote", DataLossWarning)
|
||||
dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
|
||||
return dataOutput
|
||||
|
||||
def toXmlName(self, name):
|
||||
nameFirst = name[0]
|
||||
nameRest = name[1:]
|
||||
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
||||
if m:
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
||||
else:
|
||||
nameFirstOutput = nameFirst
|
||||
|
||||
nameRestOutput = nameRest
|
||||
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
||||
for char in replaceChars:
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
replacement = self.getReplacementCharacter(char)
|
||||
nameRestOutput = nameRestOutput.replace(char, replacement)
|
||||
return nameFirstOutput + nameRestOutput
|
||||
|
||||
def getReplacementCharacter(self, char):
|
||||
if char in self.replaceCache:
|
||||
replacement = self.replaceCache[char]
|
||||
else:
|
||||
replacement = self.escapeChar(char)
|
||||
return replacement
|
||||
|
||||
def fromXmlName(self, name):
|
||||
for item in set(self.replacementRegexp.findall(name)):
|
||||
name = name.replace(item, self.unescapeChar(item))
|
||||
return name
|
||||
|
||||
def escapeChar(self, char):
|
||||
replacement = "U%05X" % ord(char)
|
||||
self.replaceCache[char] = replacement
|
||||
return replacement
|
||||
|
||||
def unescapeChar(self, charcode):
|
||||
return chr(int(charcode[1:], 16))
|
||||
@@ -1,918 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from pipenv.patched.notpip._vendor.six import text_type
|
||||
from pipenv.patched.notpip._vendor.six.moves import http_client, urllib
|
||||
|
||||
import codecs
|
||||
import re
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
from pipenv.patched.notpip._vendor import webencodings
|
||||
|
||||
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
||||
from .constants import _ReparseException
|
||||
from . import _utils
|
||||
|
||||
# Non-unicode versions of constants for use in the pre-parser
|
||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
|
||||
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
|
||||
|
||||
|
||||
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
|
||||
|
||||
if _utils.supports_lone_surrogates:
|
||||
# Use one extra step of indirection and create surrogates with
|
||||
# eval. Not using this indirection would introduce an illegal
|
||||
# unicode literal on platforms not supporting such lone
|
||||
# surrogates.
|
||||
assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
|
||||
eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
|
||||
"]")
|
||||
else:
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
||||
|
||||
non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF}
|
||||
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
||||
|
||||
# Cache for charsUntil()
|
||||
charsUntilRegEx = {}
|
||||
|
||||
|
||||
class BufferedStream(object):
|
||||
"""Buffering for streams that do not have buffering of their own
|
||||
|
||||
The buffer is implemented as a list of chunks on the assumption that
|
||||
joining many strings will be slow since it is O(n**2)
|
||||
"""
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
self.buffer = []
|
||||
self.position = [-1, 0] # chunk number, offset
|
||||
|
||||
def tell(self):
|
||||
pos = 0
|
||||
for chunk in self.buffer[:self.position[0]]:
|
||||
pos += len(chunk)
|
||||
pos += self.position[1]
|
||||
return pos
|
||||
|
||||
def seek(self, pos):
|
||||
assert pos <= self._bufferedBytes()
|
||||
offset = pos
|
||||
i = 0
|
||||
while len(self.buffer[i]) < offset:
|
||||
offset -= len(self.buffer[i])
|
||||
i += 1
|
||||
self.position = [i, offset]
|
||||
|
||||
def read(self, bytes):
|
||||
if not self.buffer:
|
||||
return self._readStream(bytes)
|
||||
elif (self.position[0] == len(self.buffer) and
|
||||
self.position[1] == len(self.buffer[-1])):
|
||||
return self._readStream(bytes)
|
||||
else:
|
||||
return self._readFromBuffer(bytes)
|
||||
|
||||
def _bufferedBytes(self):
|
||||
return sum([len(item) for item in self.buffer])
|
||||
|
||||
def _readStream(self, bytes):
|
||||
data = self.stream.read(bytes)
|
||||
self.buffer.append(data)
|
||||
self.position[0] += 1
|
||||
self.position[1] = len(data)
|
||||
return data
|
||||
|
||||
def _readFromBuffer(self, bytes):
|
||||
remainingBytes = bytes
|
||||
rv = []
|
||||
bufferIndex = self.position[0]
|
||||
bufferOffset = self.position[1]
|
||||
while bufferIndex < len(self.buffer) and remainingBytes != 0:
|
||||
assert remainingBytes > 0
|
||||
bufferedData = self.buffer[bufferIndex]
|
||||
|
||||
if remainingBytes <= len(bufferedData) - bufferOffset:
|
||||
bytesToRead = remainingBytes
|
||||
self.position = [bufferIndex, bufferOffset + bytesToRead]
|
||||
else:
|
||||
bytesToRead = len(bufferedData) - bufferOffset
|
||||
self.position = [bufferIndex, len(bufferedData)]
|
||||
bufferIndex += 1
|
||||
rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
|
||||
remainingBytes -= bytesToRead
|
||||
|
||||
bufferOffset = 0
|
||||
|
||||
if remainingBytes:
|
||||
rv.append(self._readStream(remainingBytes))
|
||||
|
||||
return b"".join(rv)
|
||||
|
||||
|
||||
def HTMLInputStream(source, **kwargs):
|
||||
# Work around Python bug #20007: read(0) closes the connection.
|
||||
# http://bugs.python.org/issue20007
|
||||
if (isinstance(source, http_client.HTTPResponse) or
|
||||
# Also check for addinfourl wrapping HTTPResponse
|
||||
(isinstance(source, urllib.response.addbase) and
|
||||
isinstance(source.fp, http_client.HTTPResponse))):
|
||||
isUnicode = False
|
||||
elif hasattr(source, "read"):
|
||||
isUnicode = isinstance(source.read(0), text_type)
|
||||
else:
|
||||
isUnicode = isinstance(source, text_type)
|
||||
|
||||
if isUnicode:
|
||||
encodings = [x for x in kwargs if x.endswith("_encoding")]
|
||||
if encodings:
|
||||
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
|
||||
|
||||
return HTMLUnicodeInputStream(source, **kwargs)
|
||||
else:
|
||||
return HTMLBinaryInputStream(source, **kwargs)
|
||||
|
||||
|
||||
class HTMLUnicodeInputStream(object):
|
||||
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
||||
|
||||
This class takes care of character encoding and removing or replacing
|
||||
incorrect byte-sequences and also provides column and line tracking.
|
||||
|
||||
"""
|
||||
|
||||
_defaultChunkSize = 10240
|
||||
|
||||
def __init__(self, source):
|
||||
"""Initialises the HTMLInputStream.
|
||||
|
||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
for use by html5lib.
|
||||
|
||||
source can be either a file-object, local filename or a string.
|
||||
|
||||
The optional encoding parameter must be a string that indicates
|
||||
the encoding. If specified, that encoding will be used,
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
"""
|
||||
|
||||
if not _utils.supports_lone_surrogates:
|
||||
# Such platforms will have already checked for such
|
||||
# surrogate errors, so no need to do this checking.
|
||||
self.reportCharacterErrors = None
|
||||
elif len("\U0010FFFF") == 1:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS4
|
||||
else:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS2
|
||||
|
||||
# List of where new lines occur
|
||||
self.newLines = [0]
|
||||
|
||||
self.charEncoding = (lookupEncoding("utf-8"), "certain")
|
||||
self.dataStream = self.openStream(source)
|
||||
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.chunk = ""
|
||||
self.chunkSize = 0
|
||||
self.chunkOffset = 0
|
||||
self.errors = []
|
||||
|
||||
# number of (complete) lines in previous chunks
|
||||
self.prevNumLines = 0
|
||||
# number of columns in the last line of the previous chunk
|
||||
self.prevNumCols = 0
|
||||
|
||||
# Deal with CR LF and surrogates split over chunk boundaries
|
||||
self._bufferedCharacter = None
|
||||
|
||||
def openStream(self, source):
|
||||
"""Produces a file object from source.
|
||||
|
||||
source can be either a file object, local filename or a string.
|
||||
|
||||
"""
|
||||
# Already a file object
|
||||
if hasattr(source, 'read'):
|
||||
stream = source
|
||||
else:
|
||||
stream = StringIO(source)
|
||||
|
||||
return stream
|
||||
|
||||
def _position(self, offset):
|
||||
chunk = self.chunk
|
||||
nLines = chunk.count('\n', 0, offset)
|
||||
positionLine = self.prevNumLines + nLines
|
||||
lastLinePos = chunk.rfind('\n', 0, offset)
|
||||
if lastLinePos == -1:
|
||||
positionColumn = self.prevNumCols + offset
|
||||
else:
|
||||
positionColumn = offset - (lastLinePos + 1)
|
||||
return (positionLine, positionColumn)
|
||||
|
||||
def position(self):
|
||||
"""Returns (line, col) of the current position in the stream."""
|
||||
line, col = self._position(self.chunkOffset)
|
||||
return (line + 1, col)
|
||||
|
||||
def char(self):
|
||||
""" Read one character from the stream or queue if available. Return
|
||||
EOF when EOF is reached.
|
||||
"""
|
||||
# Read a new chunk from the input stream if necessary
|
||||
if self.chunkOffset >= self.chunkSize:
|
||||
if not self.readChunk():
|
||||
return EOF
|
||||
|
||||
chunkOffset = self.chunkOffset
|
||||
char = self.chunk[chunkOffset]
|
||||
self.chunkOffset = chunkOffset + 1
|
||||
|
||||
return char
|
||||
|
||||
def readChunk(self, chunkSize=None):
|
||||
if chunkSize is None:
|
||||
chunkSize = self._defaultChunkSize
|
||||
|
||||
self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
|
||||
|
||||
self.chunk = ""
|
||||
self.chunkSize = 0
|
||||
self.chunkOffset = 0
|
||||
|
||||
data = self.dataStream.read(chunkSize)
|
||||
|
||||
# Deal with CR LF and surrogates broken across chunks
|
||||
if self._bufferedCharacter:
|
||||
data = self._bufferedCharacter + data
|
||||
self._bufferedCharacter = None
|
||||
elif not data:
|
||||
# We have no more data, bye-bye stream
|
||||
return False
|
||||
|
||||
if len(data) > 1:
|
||||
lastv = ord(data[-1])
|
||||
if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
|
||||
self._bufferedCharacter = data[-1]
|
||||
data = data[:-1]
|
||||
|
||||
if self.reportCharacterErrors:
|
||||
self.reportCharacterErrors(data)
|
||||
|
||||
# Replace invalid characters
|
||||
data = data.replace("\r\n", "\n")
|
||||
data = data.replace("\r", "\n")
|
||||
|
||||
self.chunk = data
|
||||
self.chunkSize = len(data)
|
||||
|
||||
return True
|
||||
|
||||
def characterErrorsUCS4(self, data):
|
||||
for _ in range(len(invalid_unicode_re.findall(data))):
|
||||
self.errors.append("invalid-codepoint")
|
||||
|
||||
def characterErrorsUCS2(self, data):
|
||||
# Someone picked the wrong compile option
|
||||
# You lose
|
||||
skip = False
|
||||
for match in invalid_unicode_re.finditer(data):
|
||||
if skip:
|
||||
continue
|
||||
codepoint = ord(match.group())
|
||||
pos = match.start()
|
||||
# Pretty sure there should be endianness issues here
|
||||
if _utils.isSurrogatePair(data[pos:pos + 2]):
|
||||
# We have a surrogate pair!
|
||||
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
||||
if char_val in non_bmp_invalid_codepoints:
|
||||
self.errors.append("invalid-codepoint")
|
||||
skip = True
|
||||
elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
|
||||
pos == len(data) - 1):
|
||||
self.errors.append("invalid-codepoint")
|
||||
else:
|
||||
skip = False
|
||||
self.errors.append("invalid-codepoint")
|
||||
|
||||
def charsUntil(self, characters, opposite=False):
|
||||
""" Returns a string of characters from the stream up to but not
|
||||
including any character in 'characters' or EOF. 'characters' must be
|
||||
a container that supports the 'in' method and iteration over its
|
||||
characters.
|
||||
"""
|
||||
|
||||
# Use a cache of regexps to find the required characters
|
||||
try:
|
||||
chars = charsUntilRegEx[(characters, opposite)]
|
||||
except KeyError:
|
||||
if __debug__:
|
||||
for c in characters:
|
||||
assert(ord(c) < 128)
|
||||
regex = "".join(["\\x%02x" % ord(c) for c in characters])
|
||||
if not opposite:
|
||||
regex = "^%s" % regex
|
||||
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
|
||||
|
||||
rv = []
|
||||
|
||||
while True:
|
||||
# Find the longest matching prefix
|
||||
m = chars.match(self.chunk, self.chunkOffset)
|
||||
if m is None:
|
||||
# If nothing matched, and it wasn't because we ran out of chunk,
|
||||
# then stop
|
||||
if self.chunkOffset != self.chunkSize:
|
||||
break
|
||||
else:
|
||||
end = m.end()
|
||||
# If not the whole chunk matched, return everything
|
||||
# up to the part that didn't match
|
||||
if end != self.chunkSize:
|
||||
rv.append(self.chunk[self.chunkOffset:end])
|
||||
self.chunkOffset = end
|
||||
break
|
||||
# If the whole remainder of the chunk matched,
|
||||
# use it all and read the next chunk
|
||||
rv.append(self.chunk[self.chunkOffset:])
|
||||
if not self.readChunk():
|
||||
# Reached EOF
|
||||
break
|
||||
|
||||
r = "".join(rv)
|
||||
return r
|
||||
|
||||
def unget(self, char):
|
||||
# Only one character is allowed to be ungotten at once - it must
|
||||
# be consumed again before any further call to unget
|
||||
if char is not EOF:
|
||||
if self.chunkOffset == 0:
|
||||
# unget is called quite rarely, so it's a good idea to do
|
||||
# more work here if it saves a bit of work in the frequently
|
||||
# called char and charsUntil.
|
||||
# So, just prepend the ungotten character onto the current
|
||||
# chunk:
|
||||
self.chunk = char + self.chunk
|
||||
self.chunkSize += 1
|
||||
else:
|
||||
self.chunkOffset -= 1
|
||||
assert self.chunk[self.chunkOffset] == char
|
||||
|
||||
|
||||
class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
||||
|
||||
This class takes care of character encoding and removing or replacing
|
||||
incorrect byte-sequences and also provides column and line tracking.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, source, override_encoding=None, transport_encoding=None,
|
||||
same_origin_parent_encoding=None, likely_encoding=None,
|
||||
default_encoding="windows-1252", useChardet=True):
|
||||
"""Initialises the HTMLInputStream.
|
||||
|
||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
for use by html5lib.
|
||||
|
||||
source can be either a file-object, local filename or a string.
|
||||
|
||||
The optional encoding parameter must be a string that indicates
|
||||
the encoding. If specified, that encoding will be used,
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
"""
|
||||
# Raw Stream - for unicode objects this will encode to utf-8 and set
|
||||
# self.charEncoding as appropriate
|
||||
self.rawStream = self.openStream(source)
|
||||
|
||||
HTMLUnicodeInputStream.__init__(self, self.rawStream)
|
||||
|
||||
# Encoding Information
|
||||
# Number of bytes to use when looking for a meta element with
|
||||
# encoding information
|
||||
self.numBytesMeta = 1024
|
||||
# Number of bytes to use when using detecting encoding using chardet
|
||||
self.numBytesChardet = 100
|
||||
# Things from args
|
||||
self.override_encoding = override_encoding
|
||||
self.transport_encoding = transport_encoding
|
||||
self.same_origin_parent_encoding = same_origin_parent_encoding
|
||||
self.likely_encoding = likely_encoding
|
||||
self.default_encoding = default_encoding
|
||||
|
||||
# Determine encoding
|
||||
self.charEncoding = self.determineEncoding(useChardet)
|
||||
assert self.charEncoding[0] is not None
|
||||
|
||||
# Call superclass
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
|
||||
HTMLUnicodeInputStream.reset(self)
|
||||
|
||||
def openStream(self, source):
|
||||
"""Produces a file object from source.
|
||||
|
||||
source can be either a file object, local filename or a string.
|
||||
|
||||
"""
|
||||
# Already a file object
|
||||
if hasattr(source, 'read'):
|
||||
stream = source
|
||||
else:
|
||||
stream = BytesIO(source)
|
||||
|
||||
try:
|
||||
stream.seek(stream.tell())
|
||||
except Exception:
|
||||
stream = BufferedStream(stream)
|
||||
|
||||
return stream
|
||||
|
||||
def determineEncoding(self, chardet=True):
|
||||
# BOMs take precedence over everything
|
||||
# This will also read past the BOM if present
|
||||
charEncoding = self.detectBOM(), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# If we've been overridden, we've been overridden
|
||||
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Now check the transport layer
|
||||
charEncoding = lookupEncoding(self.transport_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Look for meta elements with encoding information
|
||||
charEncoding = self.detectEncodingMeta(), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Parent document encoding
|
||||
charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
|
||||
if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
|
||||
return charEncoding
|
||||
|
||||
# "likely" encoding
|
||||
charEncoding = lookupEncoding(self.likely_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Guess with chardet, if available
|
||||
if chardet:
|
||||
try:
|
||||
from pipenv.patched.notpip._vendor.chardet.universaldetector import UniversalDetector
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
buffers = []
|
||||
detector = UniversalDetector()
|
||||
while not detector.done:
|
||||
buffer = self.rawStream.read(self.numBytesChardet)
|
||||
assert isinstance(buffer, bytes)
|
||||
if not buffer:
|
||||
break
|
||||
buffers.append(buffer)
|
||||
detector.feed(buffer)
|
||||
detector.close()
|
||||
encoding = lookupEncoding(detector.result['encoding'])
|
||||
self.rawStream.seek(0)
|
||||
if encoding is not None:
|
||||
return encoding, "tentative"
|
||||
|
||||
# Try the default encoding
|
||||
charEncoding = lookupEncoding(self.default_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Fallback to html5lib's default if even that hasn't worked
|
||||
return lookupEncoding("windows-1252"), "tentative"
|
||||
|
||||
def changeEncoding(self, newEncoding):
|
||||
assert self.charEncoding[1] != "certain"
|
||||
newEncoding = lookupEncoding(newEncoding)
|
||||
if newEncoding is None:
|
||||
return
|
||||
if newEncoding.name in ("utf-16be", "utf-16le"):
|
||||
newEncoding = lookupEncoding("utf-8")
|
||||
assert newEncoding is not None
|
||||
elif newEncoding == self.charEncoding[0]:
|
||||
self.charEncoding = (self.charEncoding[0], "certain")
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
self.charEncoding = (newEncoding, "certain")
|
||||
self.reset()
|
||||
raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
||||
|
||||
def detectBOM(self):
|
||||
"""Attempts to detect at BOM at the start of the stream. If
|
||||
an encoding can be determined from the BOM return the name of the
|
||||
encoding otherwise return None"""
|
||||
bomDict = {
|
||||
codecs.BOM_UTF8: 'utf-8',
|
||||
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
|
||||
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
|
||||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
string = self.rawStream.read(4)
|
||||
assert isinstance(string, bytes)
|
||||
|
||||
# Try detecting the BOM using bytes from the string
|
||||
encoding = bomDict.get(string[:3]) # UTF-8
|
||||
seek = 3
|
||||
if not encoding:
|
||||
# Need to detect UTF-32 before UTF-16
|
||||
encoding = bomDict.get(string) # UTF-32
|
||||
seek = 4
|
||||
if not encoding:
|
||||
encoding = bomDict.get(string[:2]) # UTF-16
|
||||
seek = 2
|
||||
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
if encoding:
|
||||
self.rawStream.seek(seek)
|
||||
return lookupEncoding(encoding)
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
return None
|
||||
|
||||
def detectEncodingMeta(self):
|
||||
"""Report the encoding declared by the meta element
|
||||
"""
|
||||
buffer = self.rawStream.read(self.numBytesMeta)
|
||||
assert isinstance(buffer, bytes)
|
||||
parser = EncodingParser(buffer)
|
||||
self.rawStream.seek(0)
|
||||
encoding = parser.getEncoding()
|
||||
|
||||
if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
|
||||
encoding = lookupEncoding("utf-8")
|
||||
|
||||
return encoding
|
||||
|
||||
|
||||
class EncodingBytes(bytes):
|
||||
"""String-like object with an associated position and various extra methods
|
||||
If the position is ever greater than the string length then an exception is
|
||||
raised"""
|
||||
def __new__(self, value):
|
||||
assert isinstance(value, bytes)
|
||||
return bytes.__new__(self, value.lower())
|
||||
|
||||
def __init__(self, value):
|
||||
# pylint:disable=unused-argument
|
||||
self._position = -1
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
p = self._position = self._position + 1
|
||||
if p >= len(self):
|
||||
raise StopIteration
|
||||
elif p < 0:
|
||||
raise TypeError
|
||||
return self[p:p + 1]
|
||||
|
||||
def next(self):
|
||||
# Py2 compat
|
||||
return self.__next__()
|
||||
|
||||
def previous(self):
|
||||
p = self._position
|
||||
if p >= len(self):
|
||||
raise StopIteration
|
||||
elif p < 0:
|
||||
raise TypeError
|
||||
self._position = p = p - 1
|
||||
return self[p:p + 1]
|
||||
|
||||
def setPosition(self, position):
|
||||
if self._position >= len(self):
|
||||
raise StopIteration
|
||||
self._position = position
|
||||
|
||||
def getPosition(self):
|
||||
if self._position >= len(self):
|
||||
raise StopIteration
|
||||
if self._position >= 0:
|
||||
return self._position
|
||||
else:
|
||||
return None
|
||||
|
||||
position = property(getPosition, setPosition)
|
||||
|
||||
def getCurrentByte(self):
|
||||
return self[self.position:self.position + 1]
|
||||
|
||||
currentByte = property(getCurrentByte)
|
||||
|
||||
def skip(self, chars=spaceCharactersBytes):
|
||||
"""Skip past a list of characters"""
|
||||
p = self.position # use property for the error-checking
|
||||
while p < len(self):
|
||||
c = self[p:p + 1]
|
||||
if c not in chars:
|
||||
self._position = p
|
||||
return c
|
||||
p += 1
|
||||
self._position = p
|
||||
return None
|
||||
|
||||
def skipUntil(self, chars):
|
||||
p = self.position
|
||||
while p < len(self):
|
||||
c = self[p:p + 1]
|
||||
if c in chars:
|
||||
self._position = p
|
||||
return c
|
||||
p += 1
|
||||
self._position = p
|
||||
return None
|
||||
|
||||
def matchBytes(self, bytes):
|
||||
"""Look for a sequence of bytes at the start of a string. If the bytes
|
||||
are found return True and advance the position to the byte after the
|
||||
match. Otherwise return False and leave the position alone"""
|
||||
rv = self.startswith(bytes, self.position)
|
||||
if rv:
|
||||
self.position += len(bytes)
|
||||
return rv
|
||||
|
||||
def jumpTo(self, bytes):
|
||||
"""Look for the next sequence of bytes matching a given sequence. If
|
||||
a match is found advance the position to the last byte of the match"""
|
||||
try:
|
||||
self._position = self.index(bytes, self.position) + len(bytes) - 1
|
||||
except ValueError:
|
||||
raise StopIteration
|
||||
return True
|
||||
|
||||
|
||||
class EncodingParser(object):
|
||||
"""Mini parser for detecting character encoding from meta elements"""
|
||||
|
||||
def __init__(self, data):
|
||||
"""string - the data to work on for encoding detection"""
|
||||
self.data = EncodingBytes(data)
|
||||
self.encoding = None
|
||||
|
||||
def getEncoding(self):
|
||||
if b"<meta" not in self.data:
|
||||
return None
|
||||
|
||||
methodDispatch = (
|
||||
(b"<!--", self.handleComment),
|
||||
(b"<meta", self.handleMeta),
|
||||
(b"</", self.handlePossibleEndTag),
|
||||
(b"<!", self.handleOther),
|
||||
(b"<?", self.handleOther),
|
||||
(b"<", self.handlePossibleStartTag))
|
||||
for _ in self.data:
|
||||
keepParsing = True
|
||||
try:
|
||||
self.data.jumpTo(b"<")
|
||||
except StopIteration:
|
||||
break
|
||||
for key, method in methodDispatch:
|
||||
if self.data.matchBytes(key):
|
||||
try:
|
||||
keepParsing = method()
|
||||
break
|
||||
except StopIteration:
|
||||
keepParsing = False
|
||||
break
|
||||
if not keepParsing:
|
||||
break
|
||||
|
||||
return self.encoding
|
||||
|
||||
def handleComment(self):
|
||||
"""Skip over comments"""
|
||||
return self.data.jumpTo(b"-->")
|
||||
|
||||
def handleMeta(self):
|
||||
if self.data.currentByte not in spaceCharactersBytes:
|
||||
# if we have <meta not followed by a space so just keep going
|
||||
return True
|
||||
# We have a valid meta element we want to search for attributes
|
||||
hasPragma = False
|
||||
pendingEncoding = None
|
||||
while True:
|
||||
# Try to find the next attribute after the current position
|
||||
attr = self.getAttribute()
|
||||
if attr is None:
|
||||
return True
|
||||
else:
|
||||
if attr[0] == b"http-equiv":
|
||||
hasPragma = attr[1] == b"content-type"
|
||||
if hasPragma and pendingEncoding is not None:
|
||||
self.encoding = pendingEncoding
|
||||
return False
|
||||
elif attr[0] == b"charset":
|
||||
tentativeEncoding = attr[1]
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
self.encoding = codec
|
||||
return False
|
||||
elif attr[0] == b"content":
|
||||
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
|
||||
tentativeEncoding = contentParser.parse()
|
||||
if tentativeEncoding is not None:
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
if hasPragma:
|
||||
self.encoding = codec
|
||||
return False
|
||||
else:
|
||||
pendingEncoding = codec
|
||||
|
||||
def handlePossibleStartTag(self):
|
||||
return self.handlePossibleTag(False)
|
||||
|
||||
def handlePossibleEndTag(self):
|
||||
next(self.data)
|
||||
return self.handlePossibleTag(True)
|
||||
|
||||
def handlePossibleTag(self, endTag):
|
||||
data = self.data
|
||||
if data.currentByte not in asciiLettersBytes:
|
||||
# If the next byte is not an ascii letter either ignore this
|
||||
# fragment (possible start tag case) or treat it according to
|
||||
# handleOther
|
||||
if endTag:
|
||||
data.previous()
|
||||
self.handleOther()
|
||||
return True
|
||||
|
||||
c = data.skipUntil(spacesAngleBrackets)
|
||||
if c == b"<":
|
||||
# return to the first step in the overall "two step" algorithm
|
||||
# reprocessing the < byte
|
||||
data.previous()
|
||||
else:
|
||||
# Read all attributes
|
||||
attr = self.getAttribute()
|
||||
while attr is not None:
|
||||
attr = self.getAttribute()
|
||||
return True
|
||||
|
||||
def handleOther(self):
|
||||
return self.data.jumpTo(b">")
|
||||
|
||||
def getAttribute(self):
|
||||
"""Return a name,value pair for the next attribute in the stream,
|
||||
if one is found, or None"""
|
||||
data = self.data
|
||||
# Step 1 (skip chars)
|
||||
c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
|
||||
assert c is None or len(c) == 1
|
||||
# Step 2
|
||||
if c in (b">", None):
|
||||
return None
|
||||
# Step 3
|
||||
attrName = []
|
||||
attrValue = []
|
||||
# Step 4 attribute name
|
||||
while True:
|
||||
if c == b"=" and attrName:
|
||||
break
|
||||
elif c in spaceCharactersBytes:
|
||||
# Step 6!
|
||||
c = data.skip()
|
||||
break
|
||||
elif c in (b"/", b">"):
|
||||
return b"".join(attrName), b""
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrName.append(c.lower())
|
||||
elif c is None:
|
||||
return None
|
||||
else:
|
||||
attrName.append(c)
|
||||
# Step 5
|
||||
c = next(data)
|
||||
# Step 7
|
||||
if c != b"=":
|
||||
data.previous()
|
||||
return b"".join(attrName), b""
|
||||
# Step 8
|
||||
next(data)
|
||||
# Step 9
|
||||
c = data.skip()
|
||||
# Step 10
|
||||
if c in (b"'", b'"'):
|
||||
# 10.1
|
||||
quoteChar = c
|
||||
while True:
|
||||
# 10.2
|
||||
c = next(data)
|
||||
# 10.3
|
||||
if c == quoteChar:
|
||||
next(data)
|
||||
return b"".join(attrName), b"".join(attrValue)
|
||||
# 10.4
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrValue.append(c.lower())
|
||||
# 10.5
|
||||
else:
|
||||
attrValue.append(c)
|
||||
elif c == b">":
|
||||
return b"".join(attrName), b""
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrValue.append(c.lower())
|
||||
elif c is None:
|
||||
return None
|
||||
else:
|
||||
attrValue.append(c)
|
||||
# Step 11
|
||||
while True:
|
||||
c = next(data)
|
||||
if c in spacesAngleBrackets:
|
||||
return b"".join(attrName), b"".join(attrValue)
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrValue.append(c.lower())
|
||||
elif c is None:
|
||||
return None
|
||||
else:
|
||||
attrValue.append(c)
|
||||
|
||||
|
||||
class ContentAttrParser(object):
|
||||
def __init__(self, data):
|
||||
assert isinstance(data, bytes)
|
||||
self.data = data
|
||||
|
||||
def parse(self):
|
||||
try:
|
||||
# Check if the attr name is charset
|
||||
# otherwise return
|
||||
self.data.jumpTo(b"charset")
|
||||
self.data.position += 1
|
||||
self.data.skip()
|
||||
if not self.data.currentByte == b"=":
|
||||
# If there is no = sign keep looking for attrs
|
||||
return None
|
||||
self.data.position += 1
|
||||
self.data.skip()
|
||||
# Look for an encoding between matching quote marks
|
||||
if self.data.currentByte in (b'"', b"'"):
|
||||
quoteMark = self.data.currentByte
|
||||
self.data.position += 1
|
||||
oldPosition = self.data.position
|
||||
if self.data.jumpTo(quoteMark):
|
||||
return self.data[oldPosition:self.data.position]
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
# Unquoted value
|
||||
oldPosition = self.data.position
|
||||
try:
|
||||
self.data.skipUntil(spaceCharactersBytes)
|
||||
return self.data[oldPosition:self.data.position]
|
||||
except StopIteration:
|
||||
# Return the whole remaining value
|
||||
return self.data[oldPosition:]
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
def lookupEncoding(encoding):
|
||||
"""Return the python codec name corresponding to an encoding or None if the
|
||||
string doesn't correspond to a valid encoding."""
|
||||
if isinstance(encoding, bytes):
|
||||
try:
|
||||
encoding = encoding.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
|
||||
if encoding is not None:
|
||||
try:
|
||||
return webencodings.lookup(encoding)
|
||||
except AttributeError:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .py import Trie
|
||||
|
||||
__all__ = ["Trie"]
|
||||
@@ -1,40 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
except ImportError: # Python 2.7
|
||||
from collections import Mapping
|
||||
|
||||
|
||||
class Trie(Mapping):
|
||||
"""Abstract base class for tries"""
|
||||
|
||||
def keys(self, prefix=None):
|
||||
# pylint:disable=arguments-differ
|
||||
keys = super(Trie, self).keys()
|
||||
|
||||
if prefix is None:
|
||||
return set(keys)
|
||||
|
||||
return {x for x in keys if x.startswith(prefix)}
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
for key in self.keys():
|
||||
if key.startswith(prefix):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def longest_prefix(self, prefix):
|
||||
if prefix in self:
|
||||
return prefix
|
||||
|
||||
for i in range(1, len(prefix) + 1):
|
||||
if prefix[:-i] in self:
|
||||
return prefix[:-i]
|
||||
|
||||
raise KeyError(prefix)
|
||||
|
||||
def longest_prefix_item(self, prefix):
|
||||
lprefix = self.longest_prefix(prefix)
|
||||
return (lprefix, self[lprefix])
|
||||
@@ -1,67 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from pipenv.patched.notpip._vendor.six import text_type
|
||||
|
||||
from bisect import bisect_left
|
||||
|
||||
from ._base import Trie as ABCTrie
|
||||
|
||||
|
||||
class Trie(ABCTrie):
|
||||
def __init__(self, data):
|
||||
if not all(isinstance(x, text_type) for x in data.keys()):
|
||||
raise TypeError("All keys must be strings")
|
||||
|
||||
self._data = data
|
||||
self._keys = sorted(data.keys())
|
||||
self._cachestr = ""
|
||||
self._cachepoints = (0, len(data))
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._data
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._data)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def keys(self, prefix=None):
|
||||
if prefix is None or prefix == "" or not self._keys:
|
||||
return set(self._keys)
|
||||
|
||||
if prefix.startswith(self._cachestr):
|
||||
lo, hi = self._cachepoints
|
||||
start = i = bisect_left(self._keys, prefix, lo, hi)
|
||||
else:
|
||||
start = i = bisect_left(self._keys, prefix)
|
||||
|
||||
keys = set()
|
||||
if start == len(self._keys):
|
||||
return keys
|
||||
|
||||
while self._keys[i].startswith(prefix):
|
||||
keys.add(self._keys[i])
|
||||
i += 1
|
||||
|
||||
self._cachestr = prefix
|
||||
self._cachepoints = (start, i)
|
||||
|
||||
return keys
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
if prefix in self._data:
|
||||
return True
|
||||
|
||||
if prefix.startswith(self._cachestr):
|
||||
lo, hi = self._cachepoints
|
||||
i = bisect_left(self._keys, prefix, lo, hi)
|
||||
else:
|
||||
i = bisect_left(self._keys, prefix)
|
||||
|
||||
if i == len(self._keys):
|
||||
return False
|
||||
|
||||
return self._keys[i].startswith(prefix)
|
||||
@@ -1,159 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from types import ModuleType
|
||||
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
except ImportError:
|
||||
from collections import Mapping
|
||||
|
||||
from pipenv.patched.notpip._vendor.six import text_type, PY3
|
||||
|
||||
if PY3:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
else:
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
|
||||
|
||||
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||
"surrogatePairToCodepoint", "moduleFactoryFactory",
|
||||
"supports_lone_surrogates"]
|
||||
|
||||
|
||||
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
|
||||
# caught by the below test. In general this would be any platform
|
||||
# using UTF-16 as its encoding of unicode strings, such as
|
||||
# Jython. This is because UTF-16 itself is based on the use of such
|
||||
# surrogates, and there is no mechanism to further escape such
|
||||
# escapes.
|
||||
try:
|
||||
_x = eval('"\\uD800"') # pylint:disable=eval-used
|
||||
if not isinstance(_x, text_type):
|
||||
# We need this with u"" because of http://bugs.jython.org/issue2039
|
||||
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
||||
assert isinstance(_x, text_type)
|
||||
except Exception:
|
||||
supports_lone_surrogates = False
|
||||
else:
|
||||
supports_lone_surrogates = True
|
||||
|
||||
|
||||
class MethodDispatcher(dict):
|
||||
"""Dict with 2 special properties:
|
||||
|
||||
On initiation, keys that are lists, sets or tuples are converted to
|
||||
multiple keys so accessing any one of the items in the original
|
||||
list-like object returns the matching value
|
||||
|
||||
md = MethodDispatcher({("foo", "bar"):"baz"})
|
||||
md["foo"] == "baz"
|
||||
|
||||
A default value which can be set through the default attribute.
|
||||
"""
|
||||
|
||||
def __init__(self, items=()):
|
||||
_dictEntries = []
|
||||
for name, value in items:
|
||||
if isinstance(name, (list, tuple, frozenset, set)):
|
||||
for item in name:
|
||||
_dictEntries.append((item, value))
|
||||
else:
|
||||
_dictEntries.append((name, value))
|
||||
dict.__init__(self, _dictEntries)
|
||||
assert len(self) == len(_dictEntries)
|
||||
self.default = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.get(self, key, self.default)
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
return BoundMethodDispatcher(instance, self)
|
||||
|
||||
|
||||
class BoundMethodDispatcher(Mapping):
|
||||
"""Wraps a MethodDispatcher, binding its return values to `instance`"""
|
||||
def __init__(self, instance, dispatcher):
|
||||
self.instance = instance
|
||||
self.dispatcher = dispatcher
|
||||
|
||||
def __getitem__(self, key):
|
||||
# see https://docs.python.org/3/reference/datamodel.html#object.__get__
|
||||
# on a function, __get__ is used to bind a function to an instance as a bound method
|
||||
return self.dispatcher[key].__get__(self.instance)
|
||||
|
||||
def get(self, key, default):
|
||||
if key in self.dispatcher:
|
||||
return self[key]
|
||||
else:
|
||||
return default
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.dispatcher)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.dispatcher)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.dispatcher
|
||||
|
||||
|
||||
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
||||
# python builds
|
||||
|
||||
def isSurrogatePair(data):
|
||||
return (len(data) == 2 and
|
||||
ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
|
||||
ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
|
||||
|
||||
|
||||
def surrogatePairToCodepoint(data):
|
||||
char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
|
||||
(ord(data[1]) - 0xDC00))
|
||||
return char_val
|
||||
|
||||
# Module Factory Factory (no, this isn't Java, I know)
|
||||
# Here to stop this being duplicated all over the place.
|
||||
|
||||
|
||||
def moduleFactoryFactory(factory):
|
||||
moduleCache = {}
|
||||
|
||||
def moduleFactory(baseModule, *args, **kwargs):
|
||||
if isinstance(ModuleType.__name__, type("")):
|
||||
name = "_%s_factory" % baseModule.__name__
|
||||
else:
|
||||
name = b"_%s_factory" % baseModule.__name__
|
||||
|
||||
kwargs_tuple = tuple(kwargs.items())
|
||||
|
||||
try:
|
||||
return moduleCache[name][args][kwargs_tuple]
|
||||
except KeyError:
|
||||
mod = ModuleType(name)
|
||||
objs = factory(baseModule, *args, **kwargs)
|
||||
mod.__dict__.update(objs)
|
||||
if "name" not in moduleCache:
|
||||
moduleCache[name] = {}
|
||||
if "args" not in moduleCache[name]:
|
||||
moduleCache[name][args] = {}
|
||||
if "kwargs" not in moduleCache[name][args]:
|
||||
moduleCache[name][args][kwargs_tuple] = {}
|
||||
moduleCache[name][args][kwargs_tuple] = mod
|
||||
return mod
|
||||
|
||||
return moduleFactory
|
||||
|
||||
|
||||
def memoize(func):
|
||||
cache = {}
|
||||
|
||||
def wrapped(*args, **kwargs):
|
||||
key = (tuple(args), tuple(kwargs.items()))
|
||||
if key not in cache:
|
||||
cache[key] = func(*args, **kwargs)
|
||||
return cache[key]
|
||||
|
||||
return wrapped
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,29 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import base
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
def _attr_key(attr):
|
||||
"""Return an appropriate key for an attribute for sorting
|
||||
|
||||
Attributes have a namespace that can be either ``None`` or a string. We
|
||||
can't compare the two because they're different types, so we convert
|
||||
``None`` to an empty string first.
|
||||
|
||||
"""
|
||||
return (attr[0][0] or ''), attr[0][1]
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Alphabetizes attributes for elements"""
|
||||
def __iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
if token["type"] in ("StartTag", "EmptyTag"):
|
||||
attrs = OrderedDict()
|
||||
for name, value in sorted(token["data"].items(),
|
||||
key=_attr_key):
|
||||
attrs[name] = value
|
||||
token["data"] = attrs
|
||||
yield token
|
||||
@@ -1,12 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
class Filter(object):
|
||||
def __init__(self, source):
|
||||
self.source = source
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.source)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.source, name)
|
||||
@@ -1,73 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Injects ``<meta charset=ENCODING>`` tag into head of document"""
|
||||
def __init__(self, source, encoding):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg encoding: the encoding to set
|
||||
|
||||
"""
|
||||
base.Filter.__init__(self, source)
|
||||
self.encoding = encoding
|
||||
|
||||
def __iter__(self):
|
||||
state = "pre_head"
|
||||
meta_found = (self.encoding is None)
|
||||
pending = []
|
||||
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag":
|
||||
if token["name"].lower() == "head":
|
||||
state = "in_head"
|
||||
|
||||
elif type == "EmptyTag":
|
||||
if token["name"].lower() == "meta":
|
||||
# replace charset with actual encoding
|
||||
has_http_equiv_content_type = False
|
||||
for (namespace, name), value in token["data"].items():
|
||||
if namespace is not None:
|
||||
continue
|
||||
elif name.lower() == 'charset':
|
||||
token["data"][(namespace, name)] = self.encoding
|
||||
meta_found = True
|
||||
break
|
||||
elif name == 'http-equiv' and value.lower() == 'content-type':
|
||||
has_http_equiv_content_type = True
|
||||
else:
|
||||
if has_http_equiv_content_type and (None, "content") in token["data"]:
|
||||
token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
|
||||
meta_found = True
|
||||
|
||||
elif token["name"].lower() == "head" and not meta_found:
|
||||
# insert meta into empty head
|
||||
yield {"type": "StartTag", "name": "head",
|
||||
"data": token["data"]}
|
||||
yield {"type": "EmptyTag", "name": "meta",
|
||||
"data": {(None, "charset"): self.encoding}}
|
||||
yield {"type": "EndTag", "name": "head"}
|
||||
meta_found = True
|
||||
continue
|
||||
|
||||
elif type == "EndTag":
|
||||
if token["name"].lower() == "head" and pending:
|
||||
# insert meta into head (if necessary) and flush pending queue
|
||||
yield pending.pop(0)
|
||||
if not meta_found:
|
||||
yield {"type": "EmptyTag", "name": "meta",
|
||||
"data": {(None, "charset"): self.encoding}}
|
||||
while pending:
|
||||
yield pending.pop(0)
|
||||
meta_found = True
|
||||
state = "post_head"
|
||||
|
||||
if state == "in_head":
|
||||
pending.append(token)
|
||||
else:
|
||||
yield token
|
||||
@@ -1,93 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from pipenv.patched.notpip._vendor.six import text_type
|
||||
|
||||
from . import base
|
||||
from ..constants import namespaces, voidElements
|
||||
|
||||
from ..constants import spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Lints the token stream for errors
|
||||
|
||||
If it finds any errors, it'll raise an ``AssertionError``.
|
||||
|
||||
"""
|
||||
def __init__(self, source, require_matching_tags=True):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg source: the source token stream
|
||||
|
||||
:arg require_matching_tags: whether or not to require matching tags
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
self.require_matching_tags = require_matching_tags
|
||||
|
||||
def __iter__(self):
|
||||
open_elements = []
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(token["data"], dict)
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert type == "EmptyTag"
|
||||
else:
|
||||
assert type == "StartTag"
|
||||
if type == "StartTag" and self.require_matching_tags:
|
||||
open_elements.append((namespace, name))
|
||||
for (namespace, name), value in token["data"].items():
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
assert isinstance(value, text_type)
|
||||
|
||||
elif type == "EndTag":
|
||||
namespace = token["namespace"]
|
||||
name = token["name"]
|
||||
assert namespace is None or isinstance(namespace, text_type)
|
||||
assert namespace != ""
|
||||
assert isinstance(name, text_type)
|
||||
assert name != ""
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
|
||||
elif self.require_matching_tags:
|
||||
start = open_elements.pop()
|
||||
assert start == (namespace, name)
|
||||
|
||||
elif type == "Comment":
|
||||
data = token["data"]
|
||||
assert isinstance(data, text_type)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
data = token["data"]
|
||||
assert isinstance(data, text_type)
|
||||
assert data != ""
|
||||
if type == "SpaceCharacters":
|
||||
assert data.strip(spaceCharacters) == ""
|
||||
|
||||
elif type == "Doctype":
|
||||
name = token["name"]
|
||||
assert name is None or isinstance(name, text_type)
|
||||
assert token["publicId"] is None or isinstance(name, text_type)
|
||||
assert token["systemId"] is None or isinstance(name, text_type)
|
||||
|
||||
elif type == "Entity":
|
||||
assert isinstance(token["name"], text_type)
|
||||
|
||||
elif type == "SerializerError":
|
||||
assert isinstance(token["data"], text_type)
|
||||
|
||||
else:
|
||||
assert False, "Unknown token type: %(type)s" % {"type": type}
|
||||
|
||||
yield token
|
||||
@@ -1,207 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import base
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Removes optional tags from the token stream"""
|
||||
def slider(self):
|
||||
previous1 = previous2 = None
|
||||
for token in self.source:
|
||||
if previous1 is not None:
|
||||
yield previous2, previous1, token
|
||||
previous2 = previous1
|
||||
previous1 = token
|
||||
if previous1 is not None:
|
||||
yield previous2, previous1, None
|
||||
|
||||
def __iter__(self):
|
||||
for previous, token, next in self.slider():
|
||||
type = token["type"]
|
||||
if type == "StartTag":
|
||||
if (token["data"] or
|
||||
not self.is_optional_start(token["name"], previous, next)):
|
||||
yield token
|
||||
elif type == "EndTag":
|
||||
if not self.is_optional_end(token["name"], next):
|
||||
yield token
|
||||
else:
|
||||
yield token
|
||||
|
||||
def is_optional_start(self, tagname, previous, next):
|
||||
type = next and next["type"] or None
|
||||
if tagname in 'html':
|
||||
# An html element's start tag may be omitted if the first thing
|
||||
# inside the html element is not a space character or a comment.
|
||||
return type not in ("Comment", "SpaceCharacters")
|
||||
elif tagname == 'head':
|
||||
# A head element's start tag may be omitted if the first thing
|
||||
# inside the head element is an element.
|
||||
# XXX: we also omit the start tag if the head element is empty
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
return True
|
||||
elif type == "EndTag":
|
||||
return next["name"] == "head"
|
||||
elif tagname == 'body':
|
||||
# A body element's start tag may be omitted if the first thing
|
||||
# inside the body element is not a space character or a comment,
|
||||
# except if the first thing inside the body element is a script
|
||||
# or style element and the node immediately preceding the body
|
||||
# element is a head element whose end tag has been omitted.
|
||||
if type in ("Comment", "SpaceCharacters"):
|
||||
return False
|
||||
elif type == "StartTag":
|
||||
# XXX: we do not look at the preceding event, so we never omit
|
||||
# the body element's start tag if it's followed by a script or
|
||||
# a style element.
|
||||
return next["name"] not in ('script', 'style')
|
||||
else:
|
||||
return True
|
||||
elif tagname == 'colgroup':
|
||||
# A colgroup element's start tag may be omitted if the first thing
|
||||
# inside the colgroup element is a col element, and if the element
|
||||
# is not immediately preceded by another colgroup element whose
|
||||
# end tag has been omitted.
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
# XXX: we do not look at the preceding event, so instead we never
|
||||
# omit the colgroup element's end tag when it is immediately
|
||||
# followed by another colgroup element. See is_optional_end.
|
||||
return next["name"] == "col"
|
||||
else:
|
||||
return False
|
||||
elif tagname == 'tbody':
|
||||
# A tbody element's start tag may be omitted if the first thing
|
||||
# inside the tbody element is a tr element, and if the element is
|
||||
# not immediately preceded by a tbody, thead, or tfoot element
|
||||
# whose end tag has been omitted.
|
||||
if type == "StartTag":
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
# immediately followed by a tbody element. See is_optional_end.
|
||||
if previous and previous['type'] == 'EndTag' and \
|
||||
previous['name'] in ('tbody', 'thead', 'tfoot'):
|
||||
return False
|
||||
return next["name"] == 'tr'
|
||||
else:
|
||||
return False
|
||||
return False
|
||||
|
||||
def is_optional_end(self, tagname, next):
|
||||
type = next and next["type"] or None
|
||||
if tagname in ('html', 'head', 'body'):
|
||||
# An html element's end tag may be omitted if the html element
|
||||
# is not immediately followed by a space character or a comment.
|
||||
return type not in ("Comment", "SpaceCharacters")
|
||||
elif tagname in ('li', 'optgroup', 'tr'):
|
||||
# A li element's end tag may be omitted if the li element is
|
||||
# immediately followed by another li element or if there is
|
||||
# no more content in the parent element.
|
||||
# An optgroup element's end tag may be omitted if the optgroup
|
||||
# element is immediately followed by another optgroup element,
|
||||
# or if there is no more content in the parent element.
|
||||
# A tr element's end tag may be omitted if the tr element is
|
||||
# immediately followed by another tr element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == "StartTag":
|
||||
return next["name"] == tagname
|
||||
else:
|
||||
return type == "EndTag" or type is None
|
||||
elif tagname in ('dt', 'dd'):
|
||||
# A dt element's end tag may be omitted if the dt element is
|
||||
# immediately followed by another dt element or a dd element.
|
||||
# A dd element's end tag may be omitted if the dd element is
|
||||
# immediately followed by another dd element or a dt element,
|
||||
# or if there is no more content in the parent element.
|
||||
if type == "StartTag":
|
||||
return next["name"] in ('dt', 'dd')
|
||||
elif tagname == 'dd':
|
||||
return type == "EndTag" or type is None
|
||||
else:
|
||||
return False
|
||||
elif tagname == 'p':
|
||||
# A p element's end tag may be omitted if the p element is
|
||||
# immediately followed by an address, article, aside,
|
||||
# blockquote, datagrid, dialog, dir, div, dl, fieldset,
|
||||
# footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
|
||||
# nav, ol, p, pre, section, table, or ul, element, or if
|
||||
# there is no more content in the parent element.
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
return next["name"] in ('address', 'article', 'aside',
|
||||
'blockquote', 'datagrid', 'dialog',
|
||||
'dir', 'div', 'dl', 'fieldset', 'footer',
|
||||
'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
'header', 'hr', 'menu', 'nav', 'ol',
|
||||
'p', 'pre', 'section', 'table', 'ul')
|
||||
else:
|
||||
return type == "EndTag" or type is None
|
||||
elif tagname == 'option':
|
||||
# An option element's end tag may be omitted if the option
|
||||
# element is immediately followed by another option element,
|
||||
# or if it is immediately followed by an <code>optgroup</code>
|
||||
# element, or if there is no more content in the parent
|
||||
# element.
|
||||
if type == "StartTag":
|
||||
return next["name"] in ('option', 'optgroup')
|
||||
else:
|
||||
return type == "EndTag" or type is None
|
||||
elif tagname in ('rt', 'rp'):
|
||||
# An rt element's end tag may be omitted if the rt element is
|
||||
# immediately followed by an rt or rp element, or if there is
|
||||
# no more content in the parent element.
|
||||
# An rp element's end tag may be omitted if the rp element is
|
||||
# immediately followed by an rt or rp element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == "StartTag":
|
||||
return next["name"] in ('rt', 'rp')
|
||||
else:
|
||||
return type == "EndTag" or type is None
|
||||
elif tagname == 'colgroup':
|
||||
# A colgroup element's end tag may be omitted if the colgroup
|
||||
# element is not immediately followed by a space character or
|
||||
# a comment.
|
||||
if type in ("Comment", "SpaceCharacters"):
|
||||
return False
|
||||
elif type == "StartTag":
|
||||
# XXX: we also look for an immediately following colgroup
|
||||
# element. See is_optional_start.
|
||||
return next["name"] != 'colgroup'
|
||||
else:
|
||||
return True
|
||||
elif tagname in ('thead', 'tbody'):
|
||||
# A thead element's end tag may be omitted if the thead element
|
||||
# is immediately followed by a tbody or tfoot element.
|
||||
# A tbody element's end tag may be omitted if the tbody element
|
||||
# is immediately followed by a tbody or tfoot element, or if
|
||||
# there is no more content in the parent element.
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == "StartTag":
|
||||
return next["name"] in ['tbody', 'tfoot']
|
||||
elif tagname == 'tbody':
|
||||
return type == "EndTag" or type is None
|
||||
else:
|
||||
return False
|
||||
elif tagname == 'tfoot':
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == "StartTag":
|
||||
return next["name"] == 'tbody'
|
||||
else:
|
||||
return type == "EndTag" or type is None
|
||||
elif tagname in ('td', 'th'):
|
||||
# A td element's end tag may be omitted if the td element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
# A th element's end tag may be omitted if the th element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == "StartTag":
|
||||
return next["name"] in ('td', 'th')
|
||||
else:
|
||||
return type == "EndTag" or type is None
|
||||
return False
|
||||
@@ -1,916 +0,0 @@
|
||||
"""Deprecated from html5lib 1.1.
|
||||
|
||||
See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
|
||||
information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
|
||||
is recommended as a replacement. Please let us know in the aforementioned issue
|
||||
if Bleach is unsuitable for your needs.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
import warnings
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from pipenv.patched.notpip._vendor.six.moves import urllib_parse as urlparse
|
||||
|
||||
from . import base
|
||||
from ..constants import namespaces, prefixes
|
||||
|
||||
__all__ = ["Filter"]
|
||||
|
||||
|
||||
_deprecation_msg = (
|
||||
"html5lib's sanitizer is deprecated; see " +
|
||||
"https://github.com/html5lib/html5lib-python/issues/443 and please let " +
|
||||
"us know if Bleach is unsuitable for your needs"
|
||||
)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
allowed_elements = frozenset((
|
||||
(namespaces['html'], 'a'),
|
||||
(namespaces['html'], 'abbr'),
|
||||
(namespaces['html'], 'acronym'),
|
||||
(namespaces['html'], 'address'),
|
||||
(namespaces['html'], 'area'),
|
||||
(namespaces['html'], 'article'),
|
||||
(namespaces['html'], 'aside'),
|
||||
(namespaces['html'], 'audio'),
|
||||
(namespaces['html'], 'b'),
|
||||
(namespaces['html'], 'big'),
|
||||
(namespaces['html'], 'blockquote'),
|
||||
(namespaces['html'], 'br'),
|
||||
(namespaces['html'], 'button'),
|
||||
(namespaces['html'], 'canvas'),
|
||||
(namespaces['html'], 'caption'),
|
||||
(namespaces['html'], 'center'),
|
||||
(namespaces['html'], 'cite'),
|
||||
(namespaces['html'], 'code'),
|
||||
(namespaces['html'], 'col'),
|
||||
(namespaces['html'], 'colgroup'),
|
||||
(namespaces['html'], 'command'),
|
||||
(namespaces['html'], 'datagrid'),
|
||||
(namespaces['html'], 'datalist'),
|
||||
(namespaces['html'], 'dd'),
|
||||
(namespaces['html'], 'del'),
|
||||
(namespaces['html'], 'details'),
|
||||
(namespaces['html'], 'dfn'),
|
||||
(namespaces['html'], 'dialog'),
|
||||
(namespaces['html'], 'dir'),
|
||||
(namespaces['html'], 'div'),
|
||||
(namespaces['html'], 'dl'),
|
||||
(namespaces['html'], 'dt'),
|
||||
(namespaces['html'], 'em'),
|
||||
(namespaces['html'], 'event-source'),
|
||||
(namespaces['html'], 'fieldset'),
|
||||
(namespaces['html'], 'figcaption'),
|
||||
(namespaces['html'], 'figure'),
|
||||
(namespaces['html'], 'footer'),
|
||||
(namespaces['html'], 'font'),
|
||||
(namespaces['html'], 'form'),
|
||||
(namespaces['html'], 'header'),
|
||||
(namespaces['html'], 'h1'),
|
||||
(namespaces['html'], 'h2'),
|
||||
(namespaces['html'], 'h3'),
|
||||
(namespaces['html'], 'h4'),
|
||||
(namespaces['html'], 'h5'),
|
||||
(namespaces['html'], 'h6'),
|
||||
(namespaces['html'], 'hr'),
|
||||
(namespaces['html'], 'i'),
|
||||
(namespaces['html'], 'img'),
|
||||
(namespaces['html'], 'input'),
|
||||
(namespaces['html'], 'ins'),
|
||||
(namespaces['html'], 'keygen'),
|
||||
(namespaces['html'], 'kbd'),
|
||||
(namespaces['html'], 'label'),
|
||||
(namespaces['html'], 'legend'),
|
||||
(namespaces['html'], 'li'),
|
||||
(namespaces['html'], 'm'),
|
||||
(namespaces['html'], 'map'),
|
||||
(namespaces['html'], 'menu'),
|
||||
(namespaces['html'], 'meter'),
|
||||
(namespaces['html'], 'multicol'),
|
||||
(namespaces['html'], 'nav'),
|
||||
(namespaces['html'], 'nextid'),
|
||||
(namespaces['html'], 'ol'),
|
||||
(namespaces['html'], 'output'),
|
||||
(namespaces['html'], 'optgroup'),
|
||||
(namespaces['html'], 'option'),
|
||||
(namespaces['html'], 'p'),
|
||||
(namespaces['html'], 'pre'),
|
||||
(namespaces['html'], 'progress'),
|
||||
(namespaces['html'], 'q'),
|
||||
(namespaces['html'], 's'),
|
||||
(namespaces['html'], 'samp'),
|
||||
(namespaces['html'], 'section'),
|
||||
(namespaces['html'], 'select'),
|
||||
(namespaces['html'], 'small'),
|
||||
(namespaces['html'], 'sound'),
|
||||
(namespaces['html'], 'source'),
|
||||
(namespaces['html'], 'spacer'),
|
||||
(namespaces['html'], 'span'),
|
||||
(namespaces['html'], 'strike'),
|
||||
(namespaces['html'], 'strong'),
|
||||
(namespaces['html'], 'sub'),
|
||||
(namespaces['html'], 'sup'),
|
||||
(namespaces['html'], 'table'),
|
||||
(namespaces['html'], 'tbody'),
|
||||
(namespaces['html'], 'td'),
|
||||
(namespaces['html'], 'textarea'),
|
||||
(namespaces['html'], 'time'),
|
||||
(namespaces['html'], 'tfoot'),
|
||||
(namespaces['html'], 'th'),
|
||||
(namespaces['html'], 'thead'),
|
||||
(namespaces['html'], 'tr'),
|
||||
(namespaces['html'], 'tt'),
|
||||
(namespaces['html'], 'u'),
|
||||
(namespaces['html'], 'ul'),
|
||||
(namespaces['html'], 'var'),
|
||||
(namespaces['html'], 'video'),
|
||||
(namespaces['mathml'], 'maction'),
|
||||
(namespaces['mathml'], 'math'),
|
||||
(namespaces['mathml'], 'merror'),
|
||||
(namespaces['mathml'], 'mfrac'),
|
||||
(namespaces['mathml'], 'mi'),
|
||||
(namespaces['mathml'], 'mmultiscripts'),
|
||||
(namespaces['mathml'], 'mn'),
|
||||
(namespaces['mathml'], 'mo'),
|
||||
(namespaces['mathml'], 'mover'),
|
||||
(namespaces['mathml'], 'mpadded'),
|
||||
(namespaces['mathml'], 'mphantom'),
|
||||
(namespaces['mathml'], 'mprescripts'),
|
||||
(namespaces['mathml'], 'mroot'),
|
||||
(namespaces['mathml'], 'mrow'),
|
||||
(namespaces['mathml'], 'mspace'),
|
||||
(namespaces['mathml'], 'msqrt'),
|
||||
(namespaces['mathml'], 'mstyle'),
|
||||
(namespaces['mathml'], 'msub'),
|
||||
(namespaces['mathml'], 'msubsup'),
|
||||
(namespaces['mathml'], 'msup'),
|
||||
(namespaces['mathml'], 'mtable'),
|
||||
(namespaces['mathml'], 'mtd'),
|
||||
(namespaces['mathml'], 'mtext'),
|
||||
(namespaces['mathml'], 'mtr'),
|
||||
(namespaces['mathml'], 'munder'),
|
||||
(namespaces['mathml'], 'munderover'),
|
||||
(namespaces['mathml'], 'none'),
|
||||
(namespaces['svg'], 'a'),
|
||||
(namespaces['svg'], 'animate'),
|
||||
(namespaces['svg'], 'animateColor'),
|
||||
(namespaces['svg'], 'animateMotion'),
|
||||
(namespaces['svg'], 'animateTransform'),
|
||||
(namespaces['svg'], 'clipPath'),
|
||||
(namespaces['svg'], 'circle'),
|
||||
(namespaces['svg'], 'defs'),
|
||||
(namespaces['svg'], 'desc'),
|
||||
(namespaces['svg'], 'ellipse'),
|
||||
(namespaces['svg'], 'font-face'),
|
||||
(namespaces['svg'], 'font-face-name'),
|
||||
(namespaces['svg'], 'font-face-src'),
|
||||
(namespaces['svg'], 'g'),
|
||||
(namespaces['svg'], 'glyph'),
|
||||
(namespaces['svg'], 'hkern'),
|
||||
(namespaces['svg'], 'linearGradient'),
|
||||
(namespaces['svg'], 'line'),
|
||||
(namespaces['svg'], 'marker'),
|
||||
(namespaces['svg'], 'metadata'),
|
||||
(namespaces['svg'], 'missing-glyph'),
|
||||
(namespaces['svg'], 'mpath'),
|
||||
(namespaces['svg'], 'path'),
|
||||
(namespaces['svg'], 'polygon'),
|
||||
(namespaces['svg'], 'polyline'),
|
||||
(namespaces['svg'], 'radialGradient'),
|
||||
(namespaces['svg'], 'rect'),
|
||||
(namespaces['svg'], 'set'),
|
||||
(namespaces['svg'], 'stop'),
|
||||
(namespaces['svg'], 'svg'),
|
||||
(namespaces['svg'], 'switch'),
|
||||
(namespaces['svg'], 'text'),
|
||||
(namespaces['svg'], 'title'),
|
||||
(namespaces['svg'], 'tspan'),
|
||||
(namespaces['svg'], 'use'),
|
||||
))
|
||||
|
||||
allowed_attributes = frozenset((
|
||||
# HTML attributes
|
||||
(None, 'abbr'),
|
||||
(None, 'accept'),
|
||||
(None, 'accept-charset'),
|
||||
(None, 'accesskey'),
|
||||
(None, 'action'),
|
||||
(None, 'align'),
|
||||
(None, 'alt'),
|
||||
(None, 'autocomplete'),
|
||||
(None, 'autofocus'),
|
||||
(None, 'axis'),
|
||||
(None, 'background'),
|
||||
(None, 'balance'),
|
||||
(None, 'bgcolor'),
|
||||
(None, 'bgproperties'),
|
||||
(None, 'border'),
|
||||
(None, 'bordercolor'),
|
||||
(None, 'bordercolordark'),
|
||||
(None, 'bordercolorlight'),
|
||||
(None, 'bottompadding'),
|
||||
(None, 'cellpadding'),
|
||||
(None, 'cellspacing'),
|
||||
(None, 'ch'),
|
||||
(None, 'challenge'),
|
||||
(None, 'char'),
|
||||
(None, 'charoff'),
|
||||
(None, 'choff'),
|
||||
(None, 'charset'),
|
||||
(None, 'checked'),
|
||||
(None, 'cite'),
|
||||
(None, 'class'),
|
||||
(None, 'clear'),
|
||||
(None, 'color'),
|
||||
(None, 'cols'),
|
||||
(None, 'colspan'),
|
||||
(None, 'compact'),
|
||||
(None, 'contenteditable'),
|
||||
(None, 'controls'),
|
||||
(None, 'coords'),
|
||||
(None, 'data'),
|
||||
(None, 'datafld'),
|
||||
(None, 'datapagesize'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'datetime'),
|
||||
(None, 'default'),
|
||||
(None, 'delay'),
|
||||
(None, 'dir'),
|
||||
(None, 'disabled'),
|
||||
(None, 'draggable'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'enctype'),
|
||||
(None, 'end'),
|
||||
(None, 'face'),
|
||||
(None, 'for'),
|
||||
(None, 'form'),
|
||||
(None, 'frame'),
|
||||
(None, 'galleryimg'),
|
||||
(None, 'gutter'),
|
||||
(None, 'headers'),
|
||||
(None, 'height'),
|
||||
(None, 'hidefocus'),
|
||||
(None, 'hidden'),
|
||||
(None, 'high'),
|
||||
(None, 'href'),
|
||||
(None, 'hreflang'),
|
||||
(None, 'hspace'),
|
||||
(None, 'icon'),
|
||||
(None, 'id'),
|
||||
(None, 'inputmode'),
|
||||
(None, 'ismap'),
|
||||
(None, 'keytype'),
|
||||
(None, 'label'),
|
||||
(None, 'leftspacing'),
|
||||
(None, 'lang'),
|
||||
(None, 'list'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'loop'),
|
||||
(None, 'loopcount'),
|
||||
(None, 'loopend'),
|
||||
(None, 'loopstart'),
|
||||
(None, 'low'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'max'),
|
||||
(None, 'maxlength'),
|
||||
(None, 'media'),
|
||||
(None, 'method'),
|
||||
(None, 'min'),
|
||||
(None, 'multiple'),
|
||||
(None, 'name'),
|
||||
(None, 'nohref'),
|
||||
(None, 'noshade'),
|
||||
(None, 'nowrap'),
|
||||
(None, 'open'),
|
||||
(None, 'optimum'),
|
||||
(None, 'pattern'),
|
||||
(None, 'ping'),
|
||||
(None, 'point-size'),
|
||||
(None, 'poster'),
|
||||
(None, 'pqg'),
|
||||
(None, 'preload'),
|
||||
(None, 'prompt'),
|
||||
(None, 'radiogroup'),
|
||||
(None, 'readonly'),
|
||||
(None, 'rel'),
|
||||
(None, 'repeat-max'),
|
||||
(None, 'repeat-min'),
|
||||
(None, 'replace'),
|
||||
(None, 'required'),
|
||||
(None, 'rev'),
|
||||
(None, 'rightspacing'),
|
||||
(None, 'rows'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rules'),
|
||||
(None, 'scope'),
|
||||
(None, 'selected'),
|
||||
(None, 'shape'),
|
||||
(None, 'size'),
|
||||
(None, 'span'),
|
||||
(None, 'src'),
|
||||
(None, 'start'),
|
||||
(None, 'step'),
|
||||
(None, 'style'),
|
||||
(None, 'summary'),
|
||||
(None, 'suppress'),
|
||||
(None, 'tabindex'),
|
||||
(None, 'target'),
|
||||
(None, 'template'),
|
||||
(None, 'title'),
|
||||
(None, 'toppadding'),
|
||||
(None, 'type'),
|
||||
(None, 'unselectable'),
|
||||
(None, 'usemap'),
|
||||
(None, 'urn'),
|
||||
(None, 'valign'),
|
||||
(None, 'value'),
|
||||
(None, 'variable'),
|
||||
(None, 'volume'),
|
||||
(None, 'vspace'),
|
||||
(None, 'vrml'),
|
||||
(None, 'width'),
|
||||
(None, 'wrap'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
# MathML attributes
|
||||
(None, 'actiontype'),
|
||||
(None, 'align'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnalign'),
|
||||
(None, 'columnlines'),
|
||||
(None, 'columnspacing'),
|
||||
(None, 'columnspan'),
|
||||
(None, 'depth'),
|
||||
(None, 'display'),
|
||||
(None, 'displaystyle'),
|
||||
(None, 'equalcolumns'),
|
||||
(None, 'equalrows'),
|
||||
(None, 'fence'),
|
||||
(None, 'fontstyle'),
|
||||
(None, 'fontweight'),
|
||||
(None, 'frame'),
|
||||
(None, 'height'),
|
||||
(None, 'linethickness'),
|
||||
(None, 'lspace'),
|
||||
(None, 'mathbackground'),
|
||||
(None, 'mathcolor'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'mathvariant'),
|
||||
(None, 'maxsize'),
|
||||
(None, 'minsize'),
|
||||
(None, 'other'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowalign'),
|
||||
(None, 'rowlines'),
|
||||
(None, 'rowspacing'),
|
||||
(None, 'rowspan'),
|
||||
(None, 'rspace'),
|
||||
(None, 'scriptlevel'),
|
||||
(None, 'selection'),
|
||||
(None, 'separator'),
|
||||
(None, 'stretchy'),
|
||||
(None, 'width'),
|
||||
(None, 'width'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
# SVG attributes
|
||||
(None, 'accent-height'),
|
||||
(None, 'accumulate'),
|
||||
(None, 'additive'),
|
||||
(None, 'alphabetic'),
|
||||
(None, 'arabic-form'),
|
||||
(None, 'ascent'),
|
||||
(None, 'attributeName'),
|
||||
(None, 'attributeType'),
|
||||
(None, 'baseProfile'),
|
||||
(None, 'bbox'),
|
||||
(None, 'begin'),
|
||||
(None, 'by'),
|
||||
(None, 'calcMode'),
|
||||
(None, 'cap-height'),
|
||||
(None, 'class'),
|
||||
(None, 'clip-path'),
|
||||
(None, 'color'),
|
||||
(None, 'color-rendering'),
|
||||
(None, 'content'),
|
||||
(None, 'cx'),
|
||||
(None, 'cy'),
|
||||
(None, 'd'),
|
||||
(None, 'dx'),
|
||||
(None, 'dy'),
|
||||
(None, 'descent'),
|
||||
(None, 'display'),
|
||||
(None, 'dur'),
|
||||
(None, 'end'),
|
||||
(None, 'fill'),
|
||||
(None, 'fill-opacity'),
|
||||
(None, 'fill-rule'),
|
||||
(None, 'font-family'),
|
||||
(None, 'font-size'),
|
||||
(None, 'font-stretch'),
|
||||
(None, 'font-style'),
|
||||
(None, 'font-variant'),
|
||||
(None, 'font-weight'),
|
||||
(None, 'from'),
|
||||
(None, 'fx'),
|
||||
(None, 'fy'),
|
||||
(None, 'g1'),
|
||||
(None, 'g2'),
|
||||
(None, 'glyph-name'),
|
||||
(None, 'gradientUnits'),
|
||||
(None, 'hanging'),
|
||||
(None, 'height'),
|
||||
(None, 'horiz-adv-x'),
|
||||
(None, 'horiz-origin-x'),
|
||||
(None, 'id'),
|
||||
(None, 'ideographic'),
|
||||
(None, 'k'),
|
||||
(None, 'keyPoints'),
|
||||
(None, 'keySplines'),
|
||||
(None, 'keyTimes'),
|
||||
(None, 'lang'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'markerHeight'),
|
||||
(None, 'markerUnits'),
|
||||
(None, 'markerWidth'),
|
||||
(None, 'mathematical'),
|
||||
(None, 'max'),
|
||||
(None, 'min'),
|
||||
(None, 'name'),
|
||||
(None, 'offset'),
|
||||
(None, 'opacity'),
|
||||
(None, 'orient'),
|
||||
(None, 'origin'),
|
||||
(None, 'overline-position'),
|
||||
(None, 'overline-thickness'),
|
||||
(None, 'panose-1'),
|
||||
(None, 'path'),
|
||||
(None, 'pathLength'),
|
||||
(None, 'points'),
|
||||
(None, 'preserveAspectRatio'),
|
||||
(None, 'r'),
|
||||
(None, 'refX'),
|
||||
(None, 'refY'),
|
||||
(None, 'repeatCount'),
|
||||
(None, 'repeatDur'),
|
||||
(None, 'requiredExtensions'),
|
||||
(None, 'requiredFeatures'),
|
||||
(None, 'restart'),
|
||||
(None, 'rotate'),
|
||||
(None, 'rx'),
|
||||
(None, 'ry'),
|
||||
(None, 'slope'),
|
||||
(None, 'stemh'),
|
||||
(None, 'stemv'),
|
||||
(None, 'stop-color'),
|
||||
(None, 'stop-opacity'),
|
||||
(None, 'strikethrough-position'),
|
||||
(None, 'strikethrough-thickness'),
|
||||
(None, 'stroke'),
|
||||
(None, 'stroke-dasharray'),
|
||||
(None, 'stroke-dashoffset'),
|
||||
(None, 'stroke-linecap'),
|
||||
(None, 'stroke-linejoin'),
|
||||
(None, 'stroke-miterlimit'),
|
||||
(None, 'stroke-opacity'),
|
||||
(None, 'stroke-width'),
|
||||
(None, 'systemLanguage'),
|
||||
(None, 'target'),
|
||||
(None, 'text-anchor'),
|
||||
(None, 'to'),
|
||||
(None, 'transform'),
|
||||
(None, 'type'),
|
||||
(None, 'u1'),
|
||||
(None, 'u2'),
|
||||
(None, 'underline-position'),
|
||||
(None, 'underline-thickness'),
|
||||
(None, 'unicode'),
|
||||
(None, 'unicode-range'),
|
||||
(None, 'units-per-em'),
|
||||
(None, 'values'),
|
||||
(None, 'version'),
|
||||
(None, 'viewBox'),
|
||||
(None, 'visibility'),
|
||||
(None, 'width'),
|
||||
(None, 'widths'),
|
||||
(None, 'x'),
|
||||
(None, 'x-height'),
|
||||
(None, 'x1'),
|
||||
(None, 'x2'),
|
||||
(namespaces['xlink'], 'actuate'),
|
||||
(namespaces['xlink'], 'arcrole'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xlink'], 'role'),
|
||||
(namespaces['xlink'], 'show'),
|
||||
(namespaces['xlink'], 'title'),
|
||||
(namespaces['xlink'], 'type'),
|
||||
(namespaces['xml'], 'base'),
|
||||
(namespaces['xml'], 'lang'),
|
||||
(namespaces['xml'], 'space'),
|
||||
(None, 'y'),
|
||||
(None, 'y1'),
|
||||
(None, 'y2'),
|
||||
(None, 'zoomAndPan'),
|
||||
))
|
||||
|
||||
attr_val_is_uri = frozenset((
|
||||
(None, 'href'),
|
||||
(None, 'src'),
|
||||
(None, 'cite'),
|
||||
(None, 'action'),
|
||||
(None, 'longdesc'),
|
||||
(None, 'poster'),
|
||||
(None, 'background'),
|
||||
(None, 'datasrc'),
|
||||
(None, 'dynsrc'),
|
||||
(None, 'lowsrc'),
|
||||
(None, 'ping'),
|
||||
(namespaces['xlink'], 'href'),
|
||||
(namespaces['xml'], 'base'),
|
||||
))
|
||||
|
||||
svg_attr_val_allows_ref = frozenset((
|
||||
(None, 'clip-path'),
|
||||
(None, 'color-profile'),
|
||||
(None, 'cursor'),
|
||||
(None, 'fill'),
|
||||
(None, 'filter'),
|
||||
(None, 'marker'),
|
||||
(None, 'marker-start'),
|
||||
(None, 'marker-mid'),
|
||||
(None, 'marker-end'),
|
||||
(None, 'mask'),
|
||||
(None, 'stroke'),
|
||||
))
|
||||
|
||||
svg_allow_local_href = frozenset((
|
||||
(None, 'altGlyph'),
|
||||
(None, 'animate'),
|
||||
(None, 'animateColor'),
|
||||
(None, 'animateMotion'),
|
||||
(None, 'animateTransform'),
|
||||
(None, 'cursor'),
|
||||
(None, 'feImage'),
|
||||
(None, 'filter'),
|
||||
(None, 'linearGradient'),
|
||||
(None, 'pattern'),
|
||||
(None, 'radialGradient'),
|
||||
(None, 'textpath'),
|
||||
(None, 'tref'),
|
||||
(None, 'set'),
|
||||
(None, 'use')
|
||||
))
|
||||
|
||||
allowed_css_properties = frozenset((
|
||||
'azimuth',
|
||||
'background-color',
|
||||
'border-bottom-color',
|
||||
'border-collapse',
|
||||
'border-color',
|
||||
'border-left-color',
|
||||
'border-right-color',
|
||||
'border-top-color',
|
||||
'clear',
|
||||
'color',
|
||||
'cursor',
|
||||
'direction',
|
||||
'display',
|
||||
'elevation',
|
||||
'float',
|
||||
'font',
|
||||
'font-family',
|
||||
'font-size',
|
||||
'font-style',
|
||||
'font-variant',
|
||||
'font-weight',
|
||||
'height',
|
||||
'letter-spacing',
|
||||
'line-height',
|
||||
'overflow',
|
||||
'pause',
|
||||
'pause-after',
|
||||
'pause-before',
|
||||
'pitch',
|
||||
'pitch-range',
|
||||
'richness',
|
||||
'speak',
|
||||
'speak-header',
|
||||
'speak-numeral',
|
||||
'speak-punctuation',
|
||||
'speech-rate',
|
||||
'stress',
|
||||
'text-align',
|
||||
'text-decoration',
|
||||
'text-indent',
|
||||
'unicode-bidi',
|
||||
'vertical-align',
|
||||
'voice-family',
|
||||
'volume',
|
||||
'white-space',
|
||||
'width',
|
||||
))
|
||||
|
||||
allowed_css_keywords = frozenset((
|
||||
'auto',
|
||||
'aqua',
|
||||
'black',
|
||||
'block',
|
||||
'blue',
|
||||
'bold',
|
||||
'both',
|
||||
'bottom',
|
||||
'brown',
|
||||
'center',
|
||||
'collapse',
|
||||
'dashed',
|
||||
'dotted',
|
||||
'fuchsia',
|
||||
'gray',
|
||||
'green',
|
||||
'!important',
|
||||
'italic',
|
||||
'left',
|
||||
'lime',
|
||||
'maroon',
|
||||
'medium',
|
||||
'none',
|
||||
'navy',
|
||||
'normal',
|
||||
'nowrap',
|
||||
'olive',
|
||||
'pointer',
|
||||
'purple',
|
||||
'red',
|
||||
'right',
|
||||
'solid',
|
||||
'silver',
|
||||
'teal',
|
||||
'top',
|
||||
'transparent',
|
||||
'underline',
|
||||
'white',
|
||||
'yellow',
|
||||
))
|
||||
|
||||
allowed_svg_properties = frozenset((
|
||||
'fill',
|
||||
'fill-opacity',
|
||||
'fill-rule',
|
||||
'stroke',
|
||||
'stroke-width',
|
||||
'stroke-linecap',
|
||||
'stroke-linejoin',
|
||||
'stroke-opacity',
|
||||
))
|
||||
|
||||
allowed_protocols = frozenset((
|
||||
'ed2k',
|
||||
'ftp',
|
||||
'http',
|
||||
'https',
|
||||
'irc',
|
||||
'mailto',
|
||||
'news',
|
||||
'gopher',
|
||||
'nntp',
|
||||
'telnet',
|
||||
'webcal',
|
||||
'xmpp',
|
||||
'callto',
|
||||
'feed',
|
||||
'urn',
|
||||
'aim',
|
||||
'rsync',
|
||||
'tag',
|
||||
'ssh',
|
||||
'sftp',
|
||||
'rtsp',
|
||||
'afs',
|
||||
'data',
|
||||
))
|
||||
|
||||
allowed_content_types = frozenset((
|
||||
'image/png',
|
||||
'image/jpeg',
|
||||
'image/gif',
|
||||
'image/webp',
|
||||
'image/bmp',
|
||||
'text/plain',
|
||||
))
|
||||
|
||||
|
||||
data_content_type = re.compile(r'''
|
||||
^
|
||||
# Match a content type <application>/<type>
|
||||
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
|
||||
# Match any character set and encoding
|
||||
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|
||||
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
|
||||
# Assume the rest is data
|
||||
,.*
|
||||
$
|
||||
''',
|
||||
re.VERBOSE)
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
|
||||
def __init__(self,
|
||||
source,
|
||||
allowed_elements=allowed_elements,
|
||||
allowed_attributes=allowed_attributes,
|
||||
allowed_css_properties=allowed_css_properties,
|
||||
allowed_css_keywords=allowed_css_keywords,
|
||||
allowed_svg_properties=allowed_svg_properties,
|
||||
allowed_protocols=allowed_protocols,
|
||||
allowed_content_types=allowed_content_types,
|
||||
attr_val_is_uri=attr_val_is_uri,
|
||||
svg_attr_val_allows_ref=svg_attr_val_allows_ref,
|
||||
svg_allow_local_href=svg_allow_local_href):
|
||||
"""Creates a Filter
|
||||
|
||||
:arg allowed_elements: set of elements to allow--everything else will
|
||||
be escaped
|
||||
|
||||
:arg allowed_attributes: set of attributes to allow in
|
||||
elements--everything else will be stripped
|
||||
|
||||
:arg allowed_css_properties: set of CSS properties to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_css_keywords: set of CSS keywords to allow--everything
|
||||
else will be stripped
|
||||
|
||||
:arg allowed_svg_properties: set of SVG properties to allow--everything
|
||||
else will be removed
|
||||
|
||||
:arg allowed_protocols: set of allowed protocols for URIs
|
||||
|
||||
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
|
||||
|
||||
:arg attr_val_is_uri: set of attributes that have URI values--values
|
||||
that have a scheme not listed in ``allowed_protocols`` are removed
|
||||
|
||||
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
||||
references
|
||||
|
||||
:arg svg_allow_local_href: set of SVG elements that can have local
|
||||
hrefs--these are removed
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
self.allowed_elements = allowed_elements
|
||||
self.allowed_attributes = allowed_attributes
|
||||
self.allowed_css_properties = allowed_css_properties
|
||||
self.allowed_css_keywords = allowed_css_keywords
|
||||
self.allowed_svg_properties = allowed_svg_properties
|
||||
self.allowed_protocols = allowed_protocols
|
||||
self.allowed_content_types = allowed_content_types
|
||||
self.attr_val_is_uri = attr_val_is_uri
|
||||
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
||||
self.svg_allow_local_href = svg_allow_local_href
|
||||
|
||||
def __iter__(self):
|
||||
for token in base.Filter.__iter__(self):
|
||||
token = self.sanitize_token(token)
|
||||
if token:
|
||||
yield token
|
||||
|
||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
||||
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
|
||||
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
|
||||
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
|
||||
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
|
||||
# allowed.
|
||||
#
|
||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||
# => <script> do_nasty_stuff() </script>
|
||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||
# => <a>Click here for $100</a>
|
||||
def sanitize_token(self, token):
|
||||
|
||||
# accommodate filters which use token_type differently
|
||||
token_type = token["type"]
|
||||
if token_type in ("StartTag", "EndTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
namespace = token["namespace"]
|
||||
if ((namespace, name) in self.allowed_elements or
|
||||
(namespace is None and
|
||||
(namespaces["html"], name) in self.allowed_elements)):
|
||||
return self.allowed_token(token)
|
||||
else:
|
||||
return self.disallowed_token(token)
|
||||
elif token_type == "Comment":
|
||||
pass
|
||||
else:
|
||||
return token
|
||||
|
||||
def allowed_token(self, token):
|
||||
if "data" in token:
|
||||
attrs = token["data"]
|
||||
attr_names = set(attrs.keys())
|
||||
|
||||
# Remove forbidden attributes
|
||||
for to_remove in (attr_names - self.allowed_attributes):
|
||||
del token["data"][to_remove]
|
||||
attr_names.remove(to_remove)
|
||||
|
||||
# Remove attributes with disallowed URL values
|
||||
for attr in (attr_names & self.attr_val_is_uri):
|
||||
assert attr in attrs
|
||||
# I don't have a clue where this regexp comes from or why it matches those
|
||||
# characters, nor why we call unescape. I just know it's always been here.
|
||||
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
|
||||
# this will do is remove *more* than it otherwise would.
|
||||
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
|
||||
unescape(attrs[attr])).lower()
|
||||
# remove replacement characters from unescaped characters
|
||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||
try:
|
||||
uri = urlparse.urlparse(val_unescaped)
|
||||
except ValueError:
|
||||
uri = None
|
||||
del attrs[attr]
|
||||
if uri and uri.scheme:
|
||||
if uri.scheme not in self.allowed_protocols:
|
||||
del attrs[attr]
|
||||
if uri.scheme == 'data':
|
||||
m = data_content_type.match(uri.path)
|
||||
if not m:
|
||||
del attrs[attr]
|
||||
elif m.group('content_type') not in self.allowed_content_types:
|
||||
del attrs[attr]
|
||||
|
||||
for attr in self.svg_attr_val_allows_ref:
|
||||
if attr in attrs:
|
||||
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
||||
' ',
|
||||
unescape(attrs[attr]))
|
||||
if (token["name"] in self.svg_allow_local_href and
|
||||
(namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
|
||||
attrs[(namespaces['xlink'], 'href')])):
|
||||
del attrs[(namespaces['xlink'], 'href')]
|
||||
if (None, 'style') in attrs:
|
||||
attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
|
||||
token["data"] = attrs
|
||||
return token
|
||||
|
||||
def disallowed_token(self, token):
|
||||
token_type = token["type"]
|
||||
if token_type == "EndTag":
|
||||
token["data"] = "</%s>" % token["name"]
|
||||
elif token["data"]:
|
||||
assert token_type in ("StartTag", "EmptyTag")
|
||||
attrs = []
|
||||
for (ns, name), v in token["data"].items():
|
||||
attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
|
||||
token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
if token.get("selfClosing"):
|
||||
token["data"] = token["data"][:-1] + "/>"
|
||||
|
||||
token["type"] = "Characters"
|
||||
|
||||
del token["name"]
|
||||
return token
|
||||
|
||||
def sanitize_css(self, style):
|
||||
# disallow urls
|
||||
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
||||
|
||||
# gauntlet
|
||||
if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
||||
return ''
|
||||
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
||||
return ''
|
||||
|
||||
clean = []
|
||||
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
|
||||
if not value:
|
||||
continue
|
||||
if prop.lower() in self.allowed_css_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
||||
'padding']:
|
||||
for keyword in value.split():
|
||||
if keyword not in self.allowed_css_keywords and \
|
||||
not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
|
||||
break
|
||||
else:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
elif prop.lower() in self.allowed_svg_properties:
|
||||
clean.append(prop + ': ' + value + ';')
|
||||
|
||||
return ' '.join(clean)
|
||||
@@ -1,38 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from . import base
|
||||
from ..constants import rcdataElements, spaceCharacters
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
|
||||
|
||||
|
||||
class Filter(base.Filter):
|
||||
"""Collapses whitespace except in pre, textarea, and script elements"""
|
||||
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
|
||||
|
||||
def __iter__(self):
|
||||
preserve = 0
|
||||
for token in base.Filter.__iter__(self):
|
||||
type = token["type"]
|
||||
if type == "StartTag" \
|
||||
and (preserve or token["name"] in self.spacePreserveElements):
|
||||
preserve += 1
|
||||
|
||||
elif type == "EndTag" and preserve:
|
||||
preserve -= 1
|
||||
|
||||
elif not preserve and type == "SpaceCharacters" and token["data"]:
|
||||
# Test on token["data"] above to not introduce spaces where there were not
|
||||
token["data"] = " "
|
||||
|
||||
elif not preserve and type == "Characters":
|
||||
token["data"] = collapse_spaces(token["data"])
|
||||
|
||||
yield token
|
||||
|
||||
|
||||
def collapse_spaces(text):
|
||||
return SPACES_REGEX.sub(' ', text)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,409 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from pipenv.patched.notpip._vendor.six import text_type
|
||||
|
||||
import re
|
||||
|
||||
from codecs import register_error, xmlcharrefreplace_errors
|
||||
|
||||
from .constants import voidElements, booleanAttributes, spaceCharacters
|
||||
from .constants import rcdataElements, entities, xmlEntities
|
||||
from . import treewalkers, _utils
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
|
||||
_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
|
||||
_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
|
||||
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
|
||||
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
|
||||
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
|
||||
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
|
||||
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
|
||||
"\u3000]")
|
||||
|
||||
|
||||
_encode_entity_map = {}
|
||||
_is_ucs4 = len("\U0010FFFF") == 1
|
||||
for k, v in list(entities.items()):
|
||||
# skip multi-character entities
|
||||
if ((_is_ucs4 and len(v) > 1) or
|
||||
(not _is_ucs4 and len(v) > 2)):
|
||||
continue
|
||||
if v != "&":
|
||||
if len(v) == 2:
|
||||
v = _utils.surrogatePairToCodepoint(v)
|
||||
else:
|
||||
v = ord(v)
|
||||
if v not in _encode_entity_map or k.islower():
|
||||
# prefer < over < and similarly for &, >, etc.
|
||||
_encode_entity_map[v] = k
|
||||
|
||||
|
||||
def htmlentityreplace_errors(exc):
|
||||
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
|
||||
res = []
|
||||
codepoints = []
|
||||
skip = False
|
||||
for i, c in enumerate(exc.object[exc.start:exc.end]):
|
||||
if skip:
|
||||
skip = False
|
||||
continue
|
||||
index = i + exc.start
|
||||
if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
|
||||
codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
|
||||
skip = True
|
||||
else:
|
||||
codepoint = ord(c)
|
||||
codepoints.append(codepoint)
|
||||
for cp in codepoints:
|
||||
e = _encode_entity_map.get(cp)
|
||||
if e:
|
||||
res.append("&")
|
||||
res.append(e)
|
||||
if not e.endswith(";"):
|
||||
res.append(";")
|
||||
else:
|
||||
res.append("&#x%s;" % (hex(cp)[2:]))
|
||||
return ("".join(res), exc.end)
|
||||
else:
|
||||
return xmlcharrefreplace_errors(exc)
|
||||
|
||||
|
||||
register_error("htmlentityreplace", htmlentityreplace_errors)
|
||||
|
||||
|
||||
def serialize(input, tree="etree", encoding=None, **serializer_opts):
|
||||
"""Serializes the input token stream using the specified treewalker
|
||||
|
||||
:arg input: the token stream to serialize
|
||||
|
||||
:arg tree: the treewalker to use
|
||||
|
||||
:arg encoding: the encoding to use
|
||||
|
||||
:arg serializer_opts: any options to pass to the
|
||||
:py:class:`html5lib.serializer.HTMLSerializer` that gets created
|
||||
|
||||
:returns: the tree serialized as a string
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib.html5parser import parse
|
||||
>>> from html5lib.serializer import serialize
|
||||
>>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
|
||||
>>> serialize(token_stream, omit_optional_tags=False)
|
||||
'<html><head></head><body><p>Hi!</p></body></html>'
|
||||
|
||||
"""
|
||||
# XXX: Should we cache this?
|
||||
walker = treewalkers.getTreeWalker(tree)
|
||||
s = HTMLSerializer(**serializer_opts)
|
||||
return s.render(walker(input), encoding)
|
||||
|
||||
|
||||
class HTMLSerializer(object):
|
||||
|
||||
# attribute quoting options
|
||||
quote_attr_values = "legacy" # be secure by default
|
||||
quote_char = '"'
|
||||
use_best_quote_char = True
|
||||
|
||||
# tag syntax options
|
||||
omit_optional_tags = True
|
||||
minimize_boolean_attributes = True
|
||||
use_trailing_solidus = False
|
||||
space_before_trailing_solidus = True
|
||||
|
||||
# escaping options
|
||||
escape_lt_in_attrs = False
|
||||
escape_rcdata = False
|
||||
resolve_entities = True
|
||||
|
||||
# miscellaneous options
|
||||
alphabetical_attributes = False
|
||||
inject_meta_charset = True
|
||||
strip_whitespace = False
|
||||
sanitize = False
|
||||
|
||||
options = ("quote_attr_values", "quote_char", "use_best_quote_char",
|
||||
"omit_optional_tags", "minimize_boolean_attributes",
|
||||
"use_trailing_solidus", "space_before_trailing_solidus",
|
||||
"escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
|
||||
"alphabetical_attributes", "inject_meta_charset",
|
||||
"strip_whitespace", "sanitize")
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize HTMLSerializer
|
||||
|
||||
:arg inject_meta_charset: Whether or not to inject the meta charset.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg quote_attr_values: Whether to quote attribute values that don't
|
||||
require quoting per legacy browser behavior (``"legacy"``), when
|
||||
required by the standard (``"spec"``), or always (``"always"``).
|
||||
|
||||
Defaults to ``"legacy"``.
|
||||
|
||||
:arg quote_char: Use given quote character for attribute quoting.
|
||||
|
||||
Defaults to ``"`` which will use double quotes unless attribute
|
||||
value contains a double quote, in which case single quotes are
|
||||
used.
|
||||
|
||||
:arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
|
||||
values.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg escape_rcdata: Whether to escape characters that need to be
|
||||
escaped within normal elements within rcdata elements such as
|
||||
style.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg resolve_entities: Whether to resolve named character entities that
|
||||
appear in the source tree. The XML predefined entities < >
|
||||
& " ' are unaffected by this setting.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg strip_whitespace: Whether to remove semantically meaningless
|
||||
whitespace. (This compresses all whitespace to a single space
|
||||
except within ``pre``.)
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg minimize_boolean_attributes: Shortens boolean attributes to give
|
||||
just the attribute value, for example::
|
||||
|
||||
<input disabled="disabled">
|
||||
|
||||
becomes::
|
||||
|
||||
<input disabled>
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg use_trailing_solidus: Includes a close-tag slash at the end of the
|
||||
start tag of void elements (empty elements whose end tag is
|
||||
forbidden). E.g. ``<hr/>``.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg space_before_trailing_solidus: Places a space immediately before
|
||||
the closing slash in a tag using a trailing solidus. E.g.
|
||||
``<hr />``. Requires ``use_trailing_solidus=True``.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg sanitize: Strip all unsafe or unknown constructs from output.
|
||||
See :py:class:`html5lib.filters.sanitizer.Filter`.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
:arg omit_optional_tags: Omit start/end tags that are optional.
|
||||
|
||||
Defaults to ``True``.
|
||||
|
||||
:arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
|
||||
|
||||
Defaults to ``False``.
|
||||
|
||||
"""
|
||||
unexpected_args = frozenset(kwargs) - frozenset(self.options)
|
||||
if len(unexpected_args) > 0:
|
||||
raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
|
||||
if 'quote_char' in kwargs:
|
||||
self.use_best_quote_char = False
|
||||
for attr in self.options:
|
||||
setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
|
||||
self.errors = []
|
||||
self.strict = False
|
||||
|
||||
def encode(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "htmlentityreplace")
|
||||
else:
|
||||
return string
|
||||
|
||||
def encodeStrict(self, string):
|
||||
assert(isinstance(string, text_type))
|
||||
if self.encoding:
|
||||
return string.encode(self.encoding, "strict")
|
||||
else:
|
||||
return string
|
||||
|
||||
def serialize(self, treewalker, encoding=None):
|
||||
# pylint:disable=too-many-nested-blocks
|
||||
self.encoding = encoding
|
||||
in_cdata = False
|
||||
self.errors = []
|
||||
|
||||
if encoding and self.inject_meta_charset:
|
||||
from .filters.inject_meta_charset import Filter
|
||||
treewalker = Filter(treewalker, encoding)
|
||||
# Alphabetical attributes is here under the assumption that none of
|
||||
# the later filters add or change order of attributes; it needs to be
|
||||
# before the sanitizer so escaped elements come out correctly
|
||||
if self.alphabetical_attributes:
|
||||
from .filters.alphabeticalattributes import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
# WhitespaceFilter should be used before OptionalTagFilter
|
||||
# for maximum efficiently of this latter filter
|
||||
if self.strip_whitespace:
|
||||
from .filters.whitespace import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.sanitize:
|
||||
from .filters.sanitizer import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
if self.omit_optional_tags:
|
||||
from .filters.optionaltags import Filter
|
||||
treewalker = Filter(treewalker)
|
||||
|
||||
for token in treewalker:
|
||||
type = token["type"]
|
||||
if type == "Doctype":
|
||||
doctype = "<!DOCTYPE %s" % token["name"]
|
||||
|
||||
if token["publicId"]:
|
||||
doctype += ' PUBLIC "%s"' % token["publicId"]
|
||||
elif token["systemId"]:
|
||||
doctype += " SYSTEM"
|
||||
if token["systemId"]:
|
||||
if token["systemId"].find('"') >= 0:
|
||||
if token["systemId"].find("'") >= 0:
|
||||
self.serializeError("System identifier contains both single and double quote characters")
|
||||
quote_char = "'"
|
||||
else:
|
||||
quote_char = '"'
|
||||
doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
|
||||
|
||||
doctype += ">"
|
||||
yield self.encodeStrict(doctype)
|
||||
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
if type == "SpaceCharacters" or in_cdata:
|
||||
if in_cdata and token["data"].find("</") >= 0:
|
||||
self.serializeError("Unexpected </ in CDATA")
|
||||
yield self.encode(token["data"])
|
||||
else:
|
||||
yield self.encode(escape(token["data"]))
|
||||
|
||||
elif type in ("StartTag", "EmptyTag"):
|
||||
name = token["name"]
|
||||
yield self.encodeStrict("<%s" % name)
|
||||
if name in rcdataElements and not self.escape_rcdata:
|
||||
in_cdata = True
|
||||
elif in_cdata:
|
||||
self.serializeError("Unexpected child element of a CDATA element")
|
||||
for (_, attr_name), attr_value in token["data"].items():
|
||||
# TODO: Add namespace support here
|
||||
k = attr_name
|
||||
v = attr_value
|
||||
yield self.encodeStrict(' ')
|
||||
|
||||
yield self.encodeStrict(k)
|
||||
if not self.minimize_boolean_attributes or \
|
||||
(k not in booleanAttributes.get(name, tuple()) and
|
||||
k not in booleanAttributes.get("", tuple())):
|
||||
yield self.encodeStrict("=")
|
||||
if self.quote_attr_values == "always" or len(v) == 0:
|
||||
quote_attr = True
|
||||
elif self.quote_attr_values == "spec":
|
||||
quote_attr = _quoteAttributeSpec.search(v) is not None
|
||||
elif self.quote_attr_values == "legacy":
|
||||
quote_attr = _quoteAttributeLegacy.search(v) is not None
|
||||
else:
|
||||
raise ValueError("quote_attr_values must be one of: "
|
||||
"'always', 'spec', or 'legacy'")
|
||||
v = v.replace("&", "&")
|
||||
if self.escape_lt_in_attrs:
|
||||
v = v.replace("<", "<")
|
||||
if quote_attr:
|
||||
quote_char = self.quote_char
|
||||
if self.use_best_quote_char:
|
||||
if "'" in v and '"' not in v:
|
||||
quote_char = '"'
|
||||
elif '"' in v and "'" not in v:
|
||||
quote_char = "'"
|
||||
if quote_char == "'":
|
||||
v = v.replace("'", "'")
|
||||
else:
|
||||
v = v.replace('"', """)
|
||||
yield self.encodeStrict(quote_char)
|
||||
yield self.encode(v)
|
||||
yield self.encodeStrict(quote_char)
|
||||
else:
|
||||
yield self.encode(v)
|
||||
if name in voidElements and self.use_trailing_solidus:
|
||||
if self.space_before_trailing_solidus:
|
||||
yield self.encodeStrict(" /")
|
||||
else:
|
||||
yield self.encodeStrict("/")
|
||||
yield self.encode(">")
|
||||
|
||||
elif type == "EndTag":
|
||||
name = token["name"]
|
||||
if name in rcdataElements:
|
||||
in_cdata = False
|
||||
elif in_cdata:
|
||||
self.serializeError("Unexpected child element of a CDATA element")
|
||||
yield self.encodeStrict("</%s>" % name)
|
||||
|
||||
elif type == "Comment":
|
||||
data = token["data"]
|
||||
if data.find("--") >= 0:
|
||||
self.serializeError("Comment contains --")
|
||||
yield self.encodeStrict("<!--%s-->" % token["data"])
|
||||
|
||||
elif type == "Entity":
|
||||
name = token["name"]
|
||||
key = name + ";"
|
||||
if key not in entities:
|
||||
self.serializeError("Entity %s not recognized" % name)
|
||||
if self.resolve_entities and key not in xmlEntities:
|
||||
data = entities[key]
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
yield self.encodeStrict(data)
|
||||
|
||||
else:
|
||||
self.serializeError(token["data"])
|
||||
|
||||
def render(self, treewalker, encoding=None):
|
||||
"""Serializes the stream from the treewalker into a string
|
||||
|
||||
:arg treewalker: the treewalker to serialize
|
||||
|
||||
:arg encoding: the string encoding to use
|
||||
|
||||
:returns: the serialized tree
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib import parse, getTreeWalker
|
||||
>>> from html5lib.serializer import HTMLSerializer
|
||||
>>> token_stream = parse('<html><body>Hi!</body></html>')
|
||||
>>> walker = getTreeWalker('etree')
|
||||
>>> serializer = HTMLSerializer(omit_optional_tags=False)
|
||||
>>> serializer.render(walker(token_stream))
|
||||
'<html><head></head><body>Hi!</body></html>'
|
||||
|
||||
"""
|
||||
if encoding:
|
||||
return b"".join(list(self.serialize(treewalker, encoding)))
|
||||
else:
|
||||
return "".join(list(self.serialize(treewalker)))
|
||||
|
||||
def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
|
||||
# XXX The idea is to make data mandatory.
|
||||
self.errors.append(data)
|
||||
if self.strict:
|
||||
raise SerializeError
|
||||
|
||||
|
||||
class SerializeError(Exception):
|
||||
"""Error in serialized tree"""
|
||||
pass
|
||||
@@ -1,30 +0,0 @@
|
||||
"""Tree adapters let you convert from one tree structure to another
|
||||
|
||||
Example:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from pipenv.patched.notpip._vendor import html5lib
|
||||
from pipenv.patched.notpip._vendor.html5lib.treeadapters import genshi
|
||||
|
||||
doc = '<html><body>Hi!</body></html>'
|
||||
treebuilder = html5lib.getTreeBuilder('etree')
|
||||
parser = html5lib.HTMLParser(tree=treebuilder)
|
||||
tree = parser.parse(doc)
|
||||
TreeWalker = html5lib.getTreeWalker('etree')
|
||||
|
||||
genshi_tree = genshi.to_genshi(TreeWalker(tree))
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from . import sax
|
||||
|
||||
__all__ = ["sax"]
|
||||
|
||||
try:
|
||||
from . import genshi # noqa
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
__all__.append("genshi")
|
||||
@@ -1,54 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from genshi.core import QName, Attrs
|
||||
from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
|
||||
|
||||
|
||||
def to_genshi(walker):
|
||||
"""Convert a tree to a genshi tree
|
||||
|
||||
:arg walker: the treewalker to use to walk the tree to convert it
|
||||
|
||||
:returns: generator of genshi nodes
|
||||
|
||||
"""
|
||||
text = []
|
||||
for token in walker:
|
||||
type = token["type"]
|
||||
if type in ("Characters", "SpaceCharacters"):
|
||||
text.append(token["data"])
|
||||
elif text:
|
||||
yield TEXT, "".join(text), (None, -1, -1)
|
||||
text = []
|
||||
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
if token["namespace"]:
|
||||
name = "{%s}%s" % (token["namespace"], token["name"])
|
||||
else:
|
||||
name = token["name"]
|
||||
attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
|
||||
for attr, value in token["data"].items()])
|
||||
yield (START, (QName(name), attrs), (None, -1, -1))
|
||||
if type == "EmptyTag":
|
||||
type = "EndTag"
|
||||
|
||||
if type == "EndTag":
|
||||
if token["namespace"]:
|
||||
name = "{%s}%s" % (token["namespace"], token["name"])
|
||||
else:
|
||||
name = token["name"]
|
||||
|
||||
yield END, QName(name), (None, -1, -1)
|
||||
|
||||
elif type == "Comment":
|
||||
yield COMMENT, token["data"], (None, -1, -1)
|
||||
|
||||
elif type == "Doctype":
|
||||
yield DOCTYPE, (token["name"], token["publicId"],
|
||||
token["systemId"]), (None, -1, -1)
|
||||
|
||||
else:
|
||||
pass # FIXME: What to do?
|
||||
|
||||
if text:
|
||||
yield TEXT, "".join(text), (None, -1, -1)
|
||||
@@ -1,50 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from xml.sax.xmlreader import AttributesNSImpl
|
||||
|
||||
from ..constants import adjustForeignAttributes, unadjustForeignAttributes
|
||||
|
||||
prefix_mapping = {}
|
||||
for prefix, localName, namespace in adjustForeignAttributes.values():
|
||||
if prefix is not None:
|
||||
prefix_mapping[prefix] = namespace
|
||||
|
||||
|
||||
def to_sax(walker, handler):
|
||||
"""Call SAX-like content handler based on treewalker walker
|
||||
|
||||
:arg walker: the treewalker to use to walk the tree to convert it
|
||||
|
||||
:arg handler: SAX handler to use
|
||||
|
||||
"""
|
||||
handler.startDocument()
|
||||
for prefix, namespace in prefix_mapping.items():
|
||||
handler.startPrefixMapping(prefix, namespace)
|
||||
|
||||
for token in walker:
|
||||
type = token["type"]
|
||||
if type == "Doctype":
|
||||
continue
|
||||
elif type in ("StartTag", "EmptyTag"):
|
||||
attrs = AttributesNSImpl(token["data"],
|
||||
unadjustForeignAttributes)
|
||||
handler.startElementNS((token["namespace"], token["name"]),
|
||||
token["name"],
|
||||
attrs)
|
||||
if type == "EmptyTag":
|
||||
handler.endElementNS((token["namespace"], token["name"]),
|
||||
token["name"])
|
||||
elif type == "EndTag":
|
||||
handler.endElementNS((token["namespace"], token["name"]),
|
||||
token["name"])
|
||||
elif type in ("Characters", "SpaceCharacters"):
|
||||
handler.characters(token["data"])
|
||||
elif type == "Comment":
|
||||
pass
|
||||
else:
|
||||
assert False, "Unknown token type"
|
||||
|
||||
for prefix, namespace in prefix_mapping.items():
|
||||
handler.endPrefixMapping(prefix)
|
||||
handler.endDocument()
|
||||
@@ -1,88 +0,0 @@
|
||||
"""A collection of modules for building different kinds of trees from HTML
|
||||
documents.
|
||||
|
||||
To create a treebuilder for a new type of tree, you need to do
|
||||
implement several things:
|
||||
|
||||
1. A set of classes for various types of elements: Document, Doctype, Comment,
|
||||
Element. These must implement the interface of ``base.treebuilders.Node``
|
||||
(although comment nodes have a different signature for their constructor,
|
||||
see ``treebuilders.etree.Comment``) Textual content may also be implemented
|
||||
as another node type, or not, as your tree implementation requires.
|
||||
|
||||
2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
|
||||
from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
|
||||
|
||||
* ``documentClass`` - the class to use for the bottommost node of a document
|
||||
* ``elementClass`` - the class to use for HTML Elements
|
||||
* ``commentClass`` - the class to use for comments
|
||||
* ``doctypeClass`` - the class to use for doctypes
|
||||
|
||||
It also has one required method:
|
||||
|
||||
* ``getDocument`` - Returns the root node of the complete document tree
|
||||
|
||||
3. If you wish to run the unit tests, you must also create a ``testSerializer``
|
||||
method on your treebuilder which accepts a node and returns a string
|
||||
containing Node and its children serialized according to the format used in
|
||||
the unittests
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .._utils import default_etree
|
||||
|
||||
treeBuilderCache = {}
|
||||
|
||||
|
||||
def getTreeBuilder(treeType, implementation=None, **kwargs):
|
||||
"""Get a TreeBuilder class for various types of trees with built-in support
|
||||
|
||||
:arg treeType: the name of the tree type required (case-insensitive). Supported
|
||||
values are:
|
||||
|
||||
* "dom" - A generic builder for DOM implementations, defaulting to a
|
||||
xml.dom.minidom based implementation.
|
||||
* "etree" - A generic builder for tree implementations exposing an
|
||||
ElementTree-like interface, defaulting to xml.etree.cElementTree if
|
||||
available and xml.etree.ElementTree if not.
|
||||
* "lxml" - A etree-based builder for lxml.etree, handling limitations
|
||||
of lxml's implementation.
|
||||
|
||||
:arg implementation: (Currently applies to the "etree" and "dom" tree
|
||||
types). A module implementing the tree type e.g. xml.etree.ElementTree
|
||||
or xml.etree.cElementTree.
|
||||
|
||||
:arg kwargs: Any additional options to pass to the TreeBuilder when
|
||||
creating it.
|
||||
|
||||
Example:
|
||||
|
||||
>>> from html5lib.treebuilders import getTreeBuilder
|
||||
>>> builder = getTreeBuilder('etree')
|
||||
|
||||
"""
|
||||
|
||||
treeType = treeType.lower()
|
||||
if treeType not in treeBuilderCache:
|
||||
if treeType == "dom":
|
||||
from . import dom
|
||||
# Come up with a sane default (pref. from the stdlib)
|
||||
if implementation is None:
|
||||
from xml.dom import minidom
|
||||
implementation = minidom
|
||||
# NEVER cache here, caching is done in the dom submodule
|
||||
return dom.getDomModule(implementation, **kwargs).TreeBuilder
|
||||
elif treeType == "lxml":
|
||||
from . import etree_lxml
|
||||
treeBuilderCache[treeType] = etree_lxml.TreeBuilder
|
||||
elif treeType == "etree":
|
||||
from . import etree
|
||||
if implementation is None:
|
||||
implementation = default_etree
|
||||
# NEVER cache here, caching is done in the etree submodule
|
||||
return etree.getETreeModule(implementation, **kwargs).TreeBuilder
|
||||
else:
|
||||
raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
|
||||
return treeBuilderCache.get(treeType)
|
||||
@@ -1,417 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from pipenv.patched.notpip._vendor.six import text_type
|
||||
|
||||
from ..constants import scopingElements, tableInsertModeElements, namespaces
|
||||
|
||||
# The scope markers are inserted when entering object elements,
|
||||
# marquees, table cells, and table captions, and are used to prevent formatting
|
||||
# from "leaking" into tables, object elements, and marquees.
|
||||
Marker = None
|
||||
|
||||
listElementsMap = {
|
||||
None: (frozenset(scopingElements), False),
|
||||
"button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
|
||||
"list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
|
||||
(namespaces["html"], "ul")}), False),
|
||||
"table": (frozenset([(namespaces["html"], "html"),
|
||||
(namespaces["html"], "table")]), False),
|
||||
"select": (frozenset([(namespaces["html"], "optgroup"),
|
||||
(namespaces["html"], "option")]), True)
|
||||
}
|
||||
|
||||
|
||||
class Node(object):
|
||||
"""Represents an item in the tree"""
|
||||
def __init__(self, name):
|
||||
"""Creates a Node
|
||||
|
||||
:arg name: The tag name associated with the node
|
||||
|
||||
"""
|
||||
# The tag name associated with the node
|
||||
self.name = name
|
||||
# The parent of the current node (or None for the document node)
|
||||
self.parent = None
|
||||
# The value of the current node (applies to text nodes and comments)
|
||||
self.value = None
|
||||
# A dict holding name -> value pairs for attributes of the node
|
||||
self.attributes = {}
|
||||
# A list of child nodes of the current node. This must include all
|
||||
# elements but not necessarily other node types.
|
||||
self.childNodes = []
|
||||
# A list of miscellaneous flags that can be set on the node.
|
||||
self._flags = []
|
||||
|
||||
def __str__(self):
|
||||
attributesStr = " ".join(["%s=\"%s\"" % (name, value)
|
||||
for name, value in
|
||||
self.attributes.items()])
|
||||
if attributesStr:
|
||||
return "<%s %s>" % (self.name, attributesStr)
|
||||
else:
|
||||
return "<%s>" % (self.name)
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s>" % (self.name)
|
||||
|
||||
def appendChild(self, node):
|
||||
"""Insert node as a child of the current node
|
||||
|
||||
:arg node: the node to insert
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
"""Insert data as text in the current node, positioned before the
|
||||
start of node insertBefore or to the end of the node's text.
|
||||
|
||||
:arg data: the data to insert
|
||||
|
||||
:arg insertBefore: True if you want to insert the text before the node
|
||||
and False if you want to insert it after the node
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
"""Insert node as a child of the current node, before refNode in the
|
||||
list of child nodes. Raises ValueError if refNode is not a child of
|
||||
the current node
|
||||
|
||||
:arg node: the node to insert
|
||||
|
||||
:arg refNode: the child node to insert the node before
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def removeChild(self, node):
|
||||
"""Remove node from the children of the current node
|
||||
|
||||
:arg node: the child node to remove
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
"""Move all the children of the current node to newParent.
|
||||
This is needed so that trees that don't store text as nodes move the
|
||||
text in the correct way
|
||||
|
||||
:arg newParent: the node to move all this node's children to
|
||||
|
||||
"""
|
||||
# XXX - should this method be made more general?
|
||||
for child in self.childNodes:
|
||||
newParent.appendChild(child)
|
||||
self.childNodes = []
|
||||
|
||||
def cloneNode(self):
|
||||
"""Return a shallow copy of the current node i.e. a node with the same
|
||||
name and attributes but with no parent or child nodes
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def hasContent(self):
|
||||
"""Return true if the node has children or text, false otherwise
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ActiveFormattingElements(list):
|
||||
def append(self, node):
|
||||
equalCount = 0
|
||||
if node != Marker:
|
||||
for element in self[::-1]:
|
||||
if element == Marker:
|
||||
break
|
||||
if self.nodesEqual(element, node):
|
||||
equalCount += 1
|
||||
if equalCount == 3:
|
||||
self.remove(element)
|
||||
break
|
||||
list.append(self, node)
|
||||
|
||||
def nodesEqual(self, node1, node2):
|
||||
if not node1.nameTuple == node2.nameTuple:
|
||||
return False
|
||||
|
||||
if not node1.attributes == node2.attributes:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class TreeBuilder(object):
|
||||
"""Base treebuilder implementation
|
||||
|
||||
* documentClass - the class to use for the bottommost node of a document
|
||||
* elementClass - the class to use for HTML Elements
|
||||
* commentClass - the class to use for comments
|
||||
* doctypeClass - the class to use for doctypes
|
||||
|
||||
"""
|
||||
# pylint:disable=not-callable
|
||||
|
||||
# Document class
|
||||
documentClass = None
|
||||
|
||||
# The class to use for creating a node
|
||||
elementClass = None
|
||||
|
||||
# The class to use for creating comments
|
||||
commentClass = None
|
||||
|
||||
# The class to use for creating doctypes
|
||||
doctypeClass = None
|
||||
|
||||
# Fragment class
|
||||
fragmentClass = None
|
||||
|
||||
def __init__(self, namespaceHTMLElements):
|
||||
"""Create a TreeBuilder
|
||||
|
||||
:arg namespaceHTMLElements: whether or not to namespace HTML elements
|
||||
|
||||
"""
|
||||
if namespaceHTMLElements:
|
||||
self.defaultNamespace = "http://www.w3.org/1999/xhtml"
|
||||
else:
|
||||
self.defaultNamespace = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.openElements = []
|
||||
self.activeFormattingElements = ActiveFormattingElements()
|
||||
|
||||
# XXX - rename these to headElement, formElement
|
||||
self.headPointer = None
|
||||
self.formPointer = None
|
||||
|
||||
self.insertFromTable = False
|
||||
|
||||
self.document = self.documentClass()
|
||||
|
||||
def elementInScope(self, target, variant=None):
|
||||
|
||||
# If we pass a node in we match that. if we pass a string
|
||||
# match any node with that name
|
||||
exactNode = hasattr(target, "nameTuple")
|
||||
if not exactNode:
|
||||
if isinstance(target, text_type):
|
||||
target = (namespaces["html"], target)
|
||||
assert isinstance(target, tuple)
|
||||
|
||||
listElements, invert = listElementsMap[variant]
|
||||
|
||||
for node in reversed(self.openElements):
|
||||
if exactNode and node == target:
|
||||
return True
|
||||
elif not exactNode and node.nameTuple == target:
|
||||
return True
|
||||
elif (invert ^ (node.nameTuple in listElements)):
|
||||
return False
|
||||
|
||||
assert False # We should never reach this point
|
||||
|
||||
def reconstructActiveFormattingElements(self):
|
||||
# Within this algorithm the order of steps described in the
|
||||
# specification is not quite the same as the order of steps in the
|
||||
# code. It should still do the same though.
|
||||
|
||||
# Step 1: stop the algorithm when there's nothing to do.
|
||||
if not self.activeFormattingElements:
|
||||
return
|
||||
|
||||
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||
i = len(self.activeFormattingElements) - 1
|
||||
entry = self.activeFormattingElements[i]
|
||||
if entry == Marker or entry in self.openElements:
|
||||
return
|
||||
|
||||
# Step 6
|
||||
while entry != Marker and entry not in self.openElements:
|
||||
if i == 0:
|
||||
# This will be reset to 0 below
|
||||
i = -1
|
||||
break
|
||||
i -= 1
|
||||
# Step 5: let entry be one earlier in the list.
|
||||
entry = self.activeFormattingElements[i]
|
||||
|
||||
while True:
|
||||
# Step 7
|
||||
i += 1
|
||||
|
||||
# Step 8
|
||||
entry = self.activeFormattingElements[i]
|
||||
clone = entry.cloneNode() # Mainly to get a new copy of the attributes
|
||||
|
||||
# Step 9
|
||||
element = self.insertElement({"type": "StartTag",
|
||||
"name": clone.name,
|
||||
"namespace": clone.namespace,
|
||||
"data": clone.attributes})
|
||||
|
||||
# Step 10
|
||||
self.activeFormattingElements[i] = element
|
||||
|
||||
# Step 11
|
||||
if element == self.activeFormattingElements[-1]:
|
||||
break
|
||||
|
||||
def clearActiveFormattingElements(self):
|
||||
entry = self.activeFormattingElements.pop()
|
||||
while self.activeFormattingElements and entry != Marker:
|
||||
entry = self.activeFormattingElements.pop()
|
||||
|
||||
def elementInActiveFormattingElements(self, name):
|
||||
"""Check if an element exists between the end of the active
|
||||
formatting elements and the last marker. If it does, return it, else
|
||||
return false"""
|
||||
|
||||
for item in self.activeFormattingElements[::-1]:
|
||||
# Check for Marker first because if it's a Marker it doesn't have a
|
||||
# name attribute.
|
||||
if item == Marker:
|
||||
break
|
||||
elif item.name == name:
|
||||
return item
|
||||
return False
|
||||
|
||||
def insertRoot(self, token):
|
||||
element = self.createElement(token)
|
||||
self.openElements.append(element)
|
||||
self.document.appendChild(element)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
doctype = self.doctypeClass(name, publicId, systemId)
|
||||
self.document.appendChild(doctype)
|
||||
|
||||
def insertComment(self, token, parent=None):
|
||||
if parent is None:
|
||||
parent = self.openElements[-1]
|
||||
parent.appendChild(self.commentClass(token["data"]))
|
||||
|
||||
def createElement(self, token):
|
||||
"""Create an element but don't insert it anywhere"""
|
||||
name = token["name"]
|
||||
namespace = token.get("namespace", self.defaultNamespace)
|
||||
element = self.elementClass(name, namespace)
|
||||
element.attributes = token["data"]
|
||||
return element
|
||||
|
||||
def _getInsertFromTable(self):
|
||||
return self._insertFromTable
|
||||
|
||||
def _setInsertFromTable(self, value):
|
||||
"""Switch the function used to insert an element from the
|
||||
normal one to the misnested table one and back again"""
|
||||
self._insertFromTable = value
|
||||
if value:
|
||||
self.insertElement = self.insertElementTable
|
||||
else:
|
||||
self.insertElement = self.insertElementNormal
|
||||
|
||||
insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
|
||||
|
||||
def insertElementNormal(self, token):
|
||||
name = token["name"]
|
||||
assert isinstance(name, text_type), "Element %s not unicode" % name
|
||||
namespace = token.get("namespace", self.defaultNamespace)
|
||||
element = self.elementClass(name, namespace)
|
||||
element.attributes = token["data"]
|
||||
self.openElements[-1].appendChild(element)
|
||||
self.openElements.append(element)
|
||||
return element
|
||||
|
||||
def insertElementTable(self, token):
|
||||
"""Create an element and insert it into the tree"""
|
||||
element = self.createElement(token)
|
||||
if self.openElements[-1].name not in tableInsertModeElements:
|
||||
return self.insertElementNormal(token)
|
||||
else:
|
||||
# We should be in the InTable mode. This means we want to do
|
||||
# special magic element rearranging
|
||||
parent, insertBefore = self.getTableMisnestedNodePosition()
|
||||
if insertBefore is None:
|
||||
parent.appendChild(element)
|
||||
else:
|
||||
parent.insertBefore(element, insertBefore)
|
||||
self.openElements.append(element)
|
||||
return element
|
||||
|
||||
def insertText(self, data, parent=None):
|
||||
"""Insert text data."""
|
||||
if parent is None:
|
||||
parent = self.openElements[-1]
|
||||
|
||||
if (not self.insertFromTable or (self.insertFromTable and
|
||||
self.openElements[-1].name
|
||||
not in tableInsertModeElements)):
|
||||
parent.insertText(data)
|
||||
else:
|
||||
# We should be in the InTable mode. This means we want to do
|
||||
# special magic element rearranging
|
||||
parent, insertBefore = self.getTableMisnestedNodePosition()
|
||||
parent.insertText(data, insertBefore)
|
||||
|
||||
def getTableMisnestedNodePosition(self):
|
||||
"""Get the foster parent element, and sibling to insert before
|
||||
(or None) when inserting a misnested table node"""
|
||||
# The foster parent element is the one which comes before the most
|
||||
# recently opened table element
|
||||
# XXX - this is really inelegant
|
||||
lastTable = None
|
||||
fosterParent = None
|
||||
insertBefore = None
|
||||
for elm in self.openElements[::-1]:
|
||||
if elm.name == "table":
|
||||
lastTable = elm
|
||||
break
|
||||
if lastTable:
|
||||
# XXX - we should really check that this parent is actually a
|
||||
# node here
|
||||
if lastTable.parent:
|
||||
fosterParent = lastTable.parent
|
||||
insertBefore = lastTable
|
||||
else:
|
||||
fosterParent = self.openElements[
|
||||
self.openElements.index(lastTable) - 1]
|
||||
else:
|
||||
fosterParent = self.openElements[0]
|
||||
return fosterParent, insertBefore
|
||||
|
||||
def generateImpliedEndTags(self, exclude=None):
|
||||
name = self.openElements[-1].name
|
||||
# XXX td, th and tr are not actually needed
|
||||
if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
|
||||
name != exclude):
|
||||
self.openElements.pop()
|
||||
# XXX This is not entirely what the specification says. We should
|
||||
# investigate it more closely.
|
||||
self.generateImpliedEndTags(exclude)
|
||||
|
||||
def getDocument(self):
|
||||
"""Return the final tree"""
|
||||
return self.document
|
||||
|
||||
def getFragment(self):
|
||||
"""Return the final fragment"""
|
||||
# assert self.innerHTML
|
||||
fragment = self.fragmentClass()
|
||||
self.openElements[0].reparentChildren(fragment)
|
||||
return fragment
|
||||
|
||||
def testSerializer(self, node):
|
||||
"""Serialize the subtree of node in the format required by unit tests
|
||||
|
||||
:arg node: the node from which to start serializing
|
||||
|
||||
"""
|
||||
raise NotImplementedError
|
||||
@@ -1,239 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
except ImportError: # Python 2.7
|
||||
from collections import MutableMapping
|
||||
from xml.dom import minidom, Node
|
||||
import weakref
|
||||
|
||||
from . import base
|
||||
from .. import constants
|
||||
from ..constants import namespaces
|
||||
from .._utils import moduleFactoryFactory
|
||||
|
||||
|
||||
def getDomBuilder(DomImplementation):
|
||||
Dom = DomImplementation
|
||||
|
||||
class AttrList(MutableMapping):
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.element.attributes.keys())
|
||||
|
||||
def __setitem__(self, name, value):
|
||||
if isinstance(name, tuple):
|
||||
raise NotImplementedError
|
||||
else:
|
||||
attr = self.element.ownerDocument.createAttribute(name)
|
||||
attr.value = value
|
||||
self.element.attributes[name] = attr
|
||||
|
||||
def __len__(self):
|
||||
return len(self.element.attributes)
|
||||
|
||||
def items(self):
|
||||
return list(self.element.attributes.items())
|
||||
|
||||
def values(self):
|
||||
return list(self.element.attributes.values())
|
||||
|
||||
def __getitem__(self, name):
|
||||
if isinstance(name, tuple):
|
||||
raise NotImplementedError
|
||||
else:
|
||||
return self.element.attributes[name].value
|
||||
|
||||
def __delitem__(self, name):
|
||||
if isinstance(name, tuple):
|
||||
raise NotImplementedError
|
||||
else:
|
||||
del self.element.attributes[name]
|
||||
|
||||
class NodeBuilder(base.Node):
|
||||
def __init__(self, element):
|
||||
base.Node.__init__(self, element.nodeName)
|
||||
self.element = element
|
||||
|
||||
namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
|
||||
self.element.namespaceURI or None)
|
||||
|
||||
def appendChild(self, node):
|
||||
node.parent = self
|
||||
self.element.appendChild(node.element)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
text = self.element.ownerDocument.createTextNode(data)
|
||||
if insertBefore:
|
||||
self.element.insertBefore(text, insertBefore.element)
|
||||
else:
|
||||
self.element.appendChild(text)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
self.element.insertBefore(node.element, refNode.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
if node.element.parentNode == self.element:
|
||||
self.element.removeChild(node.element)
|
||||
node.parent = None
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
while self.element.hasChildNodes():
|
||||
child = self.element.firstChild
|
||||
self.element.removeChild(child)
|
||||
newParent.element.appendChild(child)
|
||||
self.childNodes = []
|
||||
|
||||
def getAttributes(self):
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
if attributes:
|
||||
for name, value in list(attributes.items()):
|
||||
if isinstance(name, tuple):
|
||||
if name[0] is not None:
|
||||
qualifiedName = (name[0] + ":" + name[1])
|
||||
else:
|
||||
qualifiedName = name[1]
|
||||
self.element.setAttributeNS(name[2], qualifiedName,
|
||||
value)
|
||||
else:
|
||||
self.element.setAttribute(
|
||||
name, value)
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def cloneNode(self):
|
||||
return NodeBuilder(self.element.cloneNode(False))
|
||||
|
||||
def hasContent(self):
|
||||
return self.element.hasChildNodes()
|
||||
|
||||
def getNameTuple(self):
|
||||
if self.namespace is None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
|
||||
def documentClass(self):
|
||||
self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
|
||||
return weakref.proxy(self)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
domimpl = Dom.getDOMImplementation()
|
||||
doctype = domimpl.createDocumentType(name, publicId, systemId)
|
||||
self.document.appendChild(NodeBuilder(doctype))
|
||||
if Dom == minidom:
|
||||
doctype.ownerDocument = self.dom
|
||||
|
||||
def elementClass(self, name, namespace=None):
|
||||
if namespace is None and self.defaultNamespace is None:
|
||||
node = self.dom.createElement(name)
|
||||
else:
|
||||
node = self.dom.createElementNS(namespace, name)
|
||||
|
||||
return NodeBuilder(node)
|
||||
|
||||
def commentClass(self, data):
|
||||
return NodeBuilder(self.dom.createComment(data))
|
||||
|
||||
def fragmentClass(self):
|
||||
return NodeBuilder(self.dom.createDocumentFragment())
|
||||
|
||||
def appendChild(self, node):
|
||||
self.dom.appendChild(node.element)
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
|
||||
def getDocument(self):
|
||||
return self.dom
|
||||
|
||||
def getFragment(self):
|
||||
return base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def insertText(self, data, parent=None):
|
||||
data = data
|
||||
if parent != self:
|
||||
base.TreeBuilder.insertText(self, data, parent)
|
||||
else:
|
||||
# HACK: allow text nodes as children of the document node
|
||||
if hasattr(self.dom, '_child_node_types'):
|
||||
# pylint:disable=protected-access
|
||||
if Node.TEXT_NODE not in self.dom._child_node_types:
|
||||
self.dom._child_node_types = list(self.dom._child_node_types)
|
||||
self.dom._child_node_types.append(Node.TEXT_NODE)
|
||||
self.dom.appendChild(self.dom.createTextNode(data))
|
||||
|
||||
implementation = DomImplementation
|
||||
name = None
|
||||
|
||||
def testSerializer(element):
|
||||
element.normalize()
|
||||
rv = []
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if element.nodeType == Node.DOCUMENT_TYPE_NODE:
|
||||
if element.name:
|
||||
if element.publicId or element.systemId:
|
||||
publicId = element.publicId or ""
|
||||
systemId = element.systemId or ""
|
||||
rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(' ' * indent, element.name, publicId, systemId))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
|
||||
else:
|
||||
rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
|
||||
elif element.nodeType == Node.DOCUMENT_NODE:
|
||||
rv.append("#document")
|
||||
elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
|
||||
rv.append("#document-fragment")
|
||||
elif element.nodeType == Node.COMMENT_NODE:
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
|
||||
elif element.nodeType == Node.TEXT_NODE:
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
|
||||
else:
|
||||
if (hasattr(element, "namespaceURI") and
|
||||
element.namespaceURI is not None):
|
||||
name = "%s %s" % (constants.prefixes[element.namespaceURI],
|
||||
element.nodeName)
|
||||
else:
|
||||
name = element.nodeName
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
if element.hasAttributes():
|
||||
attributes = []
|
||||
for i in range(len(element.attributes)):
|
||||
attr = element.attributes.item(i)
|
||||
name = attr.nodeName
|
||||
value = attr.value
|
||||
ns = attr.namespaceURI
|
||||
if ns:
|
||||
name = "%s %s" % (constants.prefixes[ns], attr.localName)
|
||||
else:
|
||||
name = attr.nodeName
|
||||
attributes.append((name, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
indent += 2
|
||||
for child in element.childNodes:
|
||||
serializeElement(child, indent)
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
# The actual means to get a module!
|
||||
getDomModule = moduleFactoryFactory(getDomBuilder)
|
||||
@@ -1,343 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
# pylint:disable=protected-access
|
||||
|
||||
from pipenv.patched.notpip._vendor.six import text_type
|
||||
|
||||
import re
|
||||
|
||||
from copy import copy
|
||||
|
||||
from . import base
|
||||
from .. import _ihatexml
|
||||
from .. import constants
|
||||
from ..constants import namespaces
|
||||
from .._utils import moduleFactoryFactory
|
||||
|
||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||
|
||||
|
||||
def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
||||
ElementTree = ElementTreeImplementation
|
||||
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
||||
|
||||
class Element(base.Node):
|
||||
def __init__(self, name, namespace=None):
|
||||
self._name = name
|
||||
self._namespace = namespace
|
||||
self._element = ElementTree.Element(self._getETreeTag(name,
|
||||
namespace))
|
||||
if namespace is None:
|
||||
self.nameTuple = namespaces["html"], self._name
|
||||
else:
|
||||
self.nameTuple = self._namespace, self._name
|
||||
self.parent = None
|
||||
self._childNodes = []
|
||||
self._flags = []
|
||||
|
||||
def _getETreeTag(self, name, namespace):
|
||||
if namespace is None:
|
||||
etree_tag = name
|
||||
else:
|
||||
etree_tag = "{%s}%s" % (namespace, name)
|
||||
return etree_tag
|
||||
|
||||
def _setName(self, name):
|
||||
self._name = name
|
||||
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
||||
|
||||
def _getName(self):
|
||||
return self._name
|
||||
|
||||
name = property(_getName, _setName)
|
||||
|
||||
def _setNamespace(self, namespace):
|
||||
self._namespace = namespace
|
||||
self._element.tag = self._getETreeTag(self._name, self._namespace)
|
||||
|
||||
def _getNamespace(self):
|
||||
return self._namespace
|
||||
|
||||
namespace = property(_getNamespace, _setNamespace)
|
||||
|
||||
def _getAttributes(self):
|
||||
return self._element.attrib
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
el_attrib = self._element.attrib
|
||||
el_attrib.clear()
|
||||
if attributes:
|
||||
# calling .items _always_ allocates, and the above truthy check is cheaper than the
|
||||
# allocation on average
|
||||
for key, value in attributes.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], key[1])
|
||||
else:
|
||||
name = key
|
||||
el_attrib[name] = value
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
|
||||
def _setChildNodes(self, value):
|
||||
del self._element[:]
|
||||
self._childNodes = []
|
||||
for element in value:
|
||||
self.insertChild(element)
|
||||
|
||||
childNodes = property(_getChildNodes, _setChildNodes)
|
||||
|
||||
def hasContent(self):
|
||||
"""Return true if the node has children or text"""
|
||||
return bool(self._element.text or len(self._element))
|
||||
|
||||
def appendChild(self, node):
|
||||
self._childNodes.append(node)
|
||||
self._element.append(node._element)
|
||||
node.parent = self
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = list(self._element).index(refNode._element)
|
||||
self._element.insert(index, node._element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
self._childNodes.remove(node)
|
||||
self._element.remove(node._element)
|
||||
node.parent = None
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if not(len(self._element)):
|
||||
if not self._element.text:
|
||||
self._element.text = ""
|
||||
self._element.text += data
|
||||
elif insertBefore is None:
|
||||
# Insert the text as the tail of the last child element
|
||||
if not self._element[-1].tail:
|
||||
self._element[-1].tail = ""
|
||||
self._element[-1].tail += data
|
||||
else:
|
||||
# Insert the text before the specified node
|
||||
children = list(self._element)
|
||||
index = children.index(insertBefore._element)
|
||||
if index > 0:
|
||||
if not self._element[index - 1].tail:
|
||||
self._element[index - 1].tail = ""
|
||||
self._element[index - 1].tail += data
|
||||
else:
|
||||
if not self._element.text:
|
||||
self._element.text = ""
|
||||
self._element.text += data
|
||||
|
||||
def cloneNode(self):
|
||||
element = type(self)(self.name, self.namespace)
|
||||
if self._element.attrib:
|
||||
element._element.attrib = copy(self._element.attrib)
|
||||
return element
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
if newParent.childNodes:
|
||||
newParent.childNodes[-1]._element.tail += self._element.text
|
||||
else:
|
||||
if not newParent._element.text:
|
||||
newParent._element.text = ""
|
||||
if self._element.text is not None:
|
||||
newParent._element.text += self._element.text
|
||||
self._element.text = ""
|
||||
base.Node.reparentChildren(self, newParent)
|
||||
|
||||
class Comment(Element):
|
||||
def __init__(self, data):
|
||||
# Use the superclass constructor to set all properties on the
|
||||
# wrapper element
|
||||
self._element = ElementTree.Comment(data)
|
||||
self.parent = None
|
||||
self._childNodes = []
|
||||
self._flags = []
|
||||
|
||||
def _getData(self):
|
||||
return self._element.text
|
||||
|
||||
def _setData(self, value):
|
||||
self._element.text = value
|
||||
|
||||
data = property(_getData, _setData)
|
||||
|
||||
class DocumentType(Element):
|
||||
def __init__(self, name, publicId, systemId):
|
||||
Element.__init__(self, "<!DOCTYPE>")
|
||||
self._element.text = name
|
||||
self.publicId = publicId
|
||||
self.systemId = systemId
|
||||
|
||||
def _getPublicId(self):
|
||||
return self._element.get("publicId", "")
|
||||
|
||||
def _setPublicId(self, value):
|
||||
if value is not None:
|
||||
self._element.set("publicId", value)
|
||||
|
||||
publicId = property(_getPublicId, _setPublicId)
|
||||
|
||||
def _getSystemId(self):
|
||||
return self._element.get("systemId", "")
|
||||
|
||||
def _setSystemId(self, value):
|
||||
if value is not None:
|
||||
self._element.set("systemId", value)
|
||||
|
||||
systemId = property(_getSystemId, _setSystemId)
|
||||
|
||||
class Document(Element):
|
||||
def __init__(self):
|
||||
Element.__init__(self, "DOCUMENT_ROOT")
|
||||
|
||||
class DocumentFragment(Element):
|
||||
def __init__(self):
|
||||
Element.__init__(self, "DOCUMENT_FRAGMENT")
|
||||
|
||||
def testSerializer(element):
|
||||
rv = []
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if not(hasattr(element, "tag")):
|
||||
element = element.getroot()
|
||||
if element.tag == "<!DOCTYPE>":
|
||||
if element.get("publicId") or element.get("systemId"):
|
||||
publicId = element.get("publicId") or ""
|
||||
systemId = element.get("systemId") or ""
|
||||
rv.append("""<!DOCTYPE %s "%s" "%s">""" %
|
||||
(element.text, publicId, systemId))
|
||||
else:
|
||||
rv.append("<!DOCTYPE %s>" % (element.text,))
|
||||
elif element.tag == "DOCUMENT_ROOT":
|
||||
rv.append("#document")
|
||||
if element.text is not None:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||
if element.tail is not None:
|
||||
raise TypeError("Document node cannot have tail")
|
||||
if hasattr(element, "attrib") and len(element.attrib):
|
||||
raise TypeError("Document node cannot have attributes")
|
||||
elif element.tag == ElementTreeCommentType:
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
||||
else:
|
||||
assert isinstance(element.tag, text_type), \
|
||||
"Expected unicode, got %s, %s" % (type(element.tag), element.tag)
|
||||
nsmatch = tag_regexp.match(element.tag)
|
||||
|
||||
if nsmatch is None:
|
||||
name = element.tag
|
||||
else:
|
||||
ns, name = nsmatch.groups()
|
||||
prefix = constants.prefixes[ns]
|
||||
name = "%s %s" % (prefix, name)
|
||||
rv.append("|%s<%s>" % (' ' * indent, name))
|
||||
|
||||
if hasattr(element, "attrib"):
|
||||
attributes = []
|
||||
for name, value in element.attrib.items():
|
||||
nsmatch = tag_regexp.match(name)
|
||||
if nsmatch is not None:
|
||||
ns, name = nsmatch.groups()
|
||||
prefix = constants.prefixes[ns]
|
||||
attr_string = "%s %s" % (prefix, name)
|
||||
else:
|
||||
attr_string = name
|
||||
attributes.append((attr_string, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||
indent += 2
|
||||
for child in element:
|
||||
serializeElement(child, indent)
|
||||
if element.tail:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
def tostring(element): # pylint:disable=unused-variable
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
filter = _ihatexml.InfosetFilter()
|
||||
|
||||
def serializeElement(element):
|
||||
if isinstance(element, ElementTree.ElementTree):
|
||||
element = element.getroot()
|
||||
|
||||
if element.tag == "<!DOCTYPE>":
|
||||
if element.get("publicId") or element.get("systemId"):
|
||||
publicId = element.get("publicId") or ""
|
||||
systemId = element.get("systemId") or ""
|
||||
rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
|
||||
(element.text, publicId, systemId))
|
||||
else:
|
||||
rv.append("<!DOCTYPE %s>" % (element.text,))
|
||||
elif element.tag == "DOCUMENT_ROOT":
|
||||
if element.text is not None:
|
||||
rv.append(element.text)
|
||||
if element.tail is not None:
|
||||
raise TypeError("Document node cannot have tail")
|
||||
if hasattr(element, "attrib") and len(element.attrib):
|
||||
raise TypeError("Document node cannot have attributes")
|
||||
|
||||
for child in element:
|
||||
serializeElement(child)
|
||||
|
||||
elif element.tag == ElementTreeCommentType:
|
||||
rv.append("<!--%s-->" % (element.text,))
|
||||
else:
|
||||
# This is assumed to be an ordinary element
|
||||
if not element.attrib:
|
||||
rv.append("<%s>" % (filter.fromXmlName(element.tag),))
|
||||
else:
|
||||
attr = " ".join(["%s=\"%s\"" % (
|
||||
filter.fromXmlName(name), value)
|
||||
for name, value in element.attrib.items()])
|
||||
rv.append("<%s %s>" % (element.tag, attr))
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
|
||||
for child in element:
|
||||
serializeElement(child)
|
||||
|
||||
rv.append("</%s>" % (element.tag,))
|
||||
|
||||
if element.tail:
|
||||
rv.append(element.tail)
|
||||
|
||||
serializeElement(element)
|
||||
|
||||
return "".join(rv)
|
||||
|
||||
class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = Element
|
||||
commentClass = Comment
|
||||
fragmentClass = DocumentFragment
|
||||
implementation = ElementTreeImplementation
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
|
||||
def getDocument(self):
|
||||
if fullTree:
|
||||
return self.document._element
|
||||
else:
|
||||
if self.defaultNamespace is not None:
|
||||
return self.document._element.find(
|
||||
"{%s}html" % self.defaultNamespace)
|
||||
else:
|
||||
return self.document._element.find("html")
|
||||
|
||||
def getFragment(self):
|
||||
return base.TreeBuilder.getFragment(self)._element
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
||||
@@ -1,392 +0,0 @@
|
||||
"""Module for supporting the lxml.etree library. The idea here is to use as much
|
||||
of the native library as possible, without using fragile hacks like custom element
|
||||
names that break between releases. The downside of this is that we cannot represent
|
||||
all possible trees; specifically the following are known to cause problems:
|
||||
|
||||
Text or comments as siblings of the root element
|
||||
Docypes with no name
|
||||
|
||||
When any of these things occur, we emit a DataLossWarning
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
# pylint:disable=protected-access
|
||||
|
||||
import warnings
|
||||
import re
|
||||
import sys
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
except ImportError:
|
||||
from collections import MutableMapping
|
||||
|
||||
from . import base
|
||||
from ..constants import DataLossWarning
|
||||
from .. import constants
|
||||
from . import etree as etree_builders
|
||||
from .. import _ihatexml
|
||||
|
||||
import lxml.etree as etree
|
||||
from pipenv.patched.notpip._vendor.six import PY3, binary_type
|
||||
|
||||
|
||||
fullTree = True
|
||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||
|
||||
comment_type = etree.Comment("asd").tag
|
||||
|
||||
|
||||
class DocumentType(object):
|
||||
def __init__(self, name, publicId, systemId):
|
||||
self.name = name
|
||||
self.publicId = publicId
|
||||
self.systemId = systemId
|
||||
|
||||
|
||||
class Document(object):
|
||||
def __init__(self):
|
||||
self._elementTree = None
|
||||
self._childNodes = []
|
||||
|
||||
def appendChild(self, element):
|
||||
last = self._elementTree.getroot()
|
||||
for last in self._elementTree.getroot().itersiblings():
|
||||
pass
|
||||
|
||||
last.addnext(element._element)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
|
||||
childNodes = property(_getChildNodes)
|
||||
|
||||
|
||||
def testSerializer(element):
|
||||
rv = []
|
||||
infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
||||
|
||||
def serializeElement(element, indent=0):
|
||||
if not hasattr(element, "tag"):
|
||||
if hasattr(element, "getroot"):
|
||||
# Full tree case
|
||||
rv.append("#document")
|
||||
if element.docinfo.internalDTD:
|
||||
if not (element.docinfo.public_id or
|
||||
element.docinfo.system_url):
|
||||
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
||||
else:
|
||||
dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
|
||||
element.docinfo.root_name,
|
||||
element.docinfo.public_id,
|
||||
element.docinfo.system_url)
|
||||
rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
|
||||
next_element = element.getroot()
|
||||
while next_element.getprevious() is not None:
|
||||
next_element = next_element.getprevious()
|
||||
while next_element is not None:
|
||||
serializeElement(next_element, indent + 2)
|
||||
next_element = next_element.getnext()
|
||||
elif isinstance(element, str) or isinstance(element, bytes):
|
||||
# Text in a fragment
|
||||
assert isinstance(element, str) or sys.version_info[0] == 2
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
||||
else:
|
||||
# Fragment case
|
||||
rv.append("#document-fragment")
|
||||
for next_element in element:
|
||||
serializeElement(next_element, indent + 2)
|
||||
elif element.tag == comment_type:
|
||||
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
||||
if hasattr(element, "tail") and element.tail:
|
||||
rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
|
||||
else:
|
||||
assert isinstance(element, etree._Element)
|
||||
nsmatch = etree_builders.tag_regexp.match(element.tag)
|
||||
if nsmatch is not None:
|
||||
ns = nsmatch.group(1)
|
||||
tag = nsmatch.group(2)
|
||||
prefix = constants.prefixes[ns]
|
||||
rv.append("|%s<%s %s>" % (' ' * indent, prefix,
|
||||
infosetFilter.fromXmlName(tag)))
|
||||
else:
|
||||
rv.append("|%s<%s>" % (' ' * indent,
|
||||
infosetFilter.fromXmlName(element.tag)))
|
||||
|
||||
if hasattr(element, "attrib"):
|
||||
attributes = []
|
||||
for name, value in element.attrib.items():
|
||||
nsmatch = tag_regexp.match(name)
|
||||
if nsmatch is not None:
|
||||
ns, name = nsmatch.groups()
|
||||
name = infosetFilter.fromXmlName(name)
|
||||
prefix = constants.prefixes[ns]
|
||||
attr_string = "%s %s" % (prefix, name)
|
||||
else:
|
||||
attr_string = infosetFilter.fromXmlName(name)
|
||||
attributes.append((attr_string, value))
|
||||
|
||||
for name, value in sorted(attributes):
|
||||
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
||||
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
||||
indent += 2
|
||||
for child in element:
|
||||
serializeElement(child, indent)
|
||||
if hasattr(element, "tail") and element.tail:
|
||||
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
|
||||
def tostring(element):
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
|
||||
def serializeElement(element):
|
||||
if not hasattr(element, "tag"):
|
||||
if element.docinfo.internalDTD:
|
||||
if element.docinfo.doctype:
|
||||
dtd_str = element.docinfo.doctype
|
||||
else:
|
||||
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
||||
rv.append(dtd_str)
|
||||
serializeElement(element.getroot())
|
||||
|
||||
elif element.tag == comment_type:
|
||||
rv.append("<!--%s-->" % (element.text,))
|
||||
|
||||
else:
|
||||
# This is assumed to be an ordinary element
|
||||
if not element.attrib:
|
||||
rv.append("<%s>" % (element.tag,))
|
||||
else:
|
||||
attr = " ".join(["%s=\"%s\"" % (name, value)
|
||||
for name, value in element.attrib.items()])
|
||||
rv.append("<%s %s>" % (element.tag, attr))
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
|
||||
for child in element:
|
||||
serializeElement(child)
|
||||
|
||||
rv.append("</%s>" % (element.tag,))
|
||||
|
||||
if hasattr(element, "tail") and element.tail:
|
||||
rv.append(element.tail)
|
||||
|
||||
serializeElement(element)
|
||||
|
||||
return "".join(rv)
|
||||
|
||||
|
||||
class TreeBuilder(base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = None
|
||||
commentClass = None
|
||||
fragmentClass = Document
|
||||
implementation = etree
|
||||
|
||||
def __init__(self, namespaceHTMLElements, fullTree=False):
|
||||
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
||||
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
||||
self.namespaceHTMLElements = namespaceHTMLElements
|
||||
|
||||
class Attributes(MutableMapping):
|
||||
def __init__(self, element):
|
||||
self._element = element
|
||||
|
||||
def _coerceKey(self, key):
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||
else:
|
||||
name = infosetFilter.coerceAttribute(key)
|
||||
return name
|
||||
|
||||
def __getitem__(self, key):
|
||||
value = self._element._element.attrib[self._coerceKey(key)]
|
||||
if not PY3 and isinstance(value, binary_type):
|
||||
value = value.decode("ascii")
|
||||
return value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._element._element.attrib[self._coerceKey(key)] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._element._element.attrib[self._coerceKey(key)]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._element._element.attrib)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._element._element.attrib)
|
||||
|
||||
def clear(self):
|
||||
return self._element._element.attrib.clear()
|
||||
|
||||
class Element(builder.Element):
|
||||
def __init__(self, name, namespace):
|
||||
name = infosetFilter.coerceElement(name)
|
||||
builder.Element.__init__(self, name, namespace=namespace)
|
||||
self._attributes = Attributes(self)
|
||||
|
||||
def _setName(self, name):
|
||||
self._name = infosetFilter.coerceElement(name)
|
||||
self._element.tag = self._getETreeTag(
|
||||
self._name, self._namespace)
|
||||
|
||||
def _getName(self):
|
||||
return infosetFilter.fromXmlName(self._name)
|
||||
|
||||
name = property(_getName, _setName)
|
||||
|
||||
def _getAttributes(self):
|
||||
return self._attributes
|
||||
|
||||
def _setAttributes(self, value):
|
||||
attributes = self.attributes
|
||||
attributes.clear()
|
||||
attributes.update(value)
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
data = infosetFilter.coerceCharacters(data)
|
||||
builder.Element.insertText(self, data, insertBefore)
|
||||
|
||||
def cloneNode(self):
|
||||
element = type(self)(self.name, self.namespace)
|
||||
if self._element.attrib:
|
||||
element._element.attrib.update(self._element.attrib)
|
||||
return element
|
||||
|
||||
class Comment(builder.Comment):
|
||||
def __init__(self, data):
|
||||
data = infosetFilter.coerceComment(data)
|
||||
builder.Comment.__init__(self, data)
|
||||
|
||||
def _setData(self, data):
|
||||
data = infosetFilter.coerceComment(data)
|
||||
self._element.text = data
|
||||
|
||||
def _getData(self):
|
||||
return self._element.text
|
||||
|
||||
data = property(_getData, _setData)
|
||||
|
||||
self.elementClass = Element
|
||||
self.commentClass = Comment
|
||||
# self.fragmentClass = builder.DocumentFragment
|
||||
base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
||||
|
||||
def reset(self):
|
||||
base.TreeBuilder.reset(self)
|
||||
self.insertComment = self.insertCommentInitial
|
||||
self.initial_comments = []
|
||||
self.doctype = None
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
|
||||
def getDocument(self):
|
||||
if fullTree:
|
||||
return self.document._elementTree
|
||||
else:
|
||||
return self.document._elementTree.getroot()
|
||||
|
||||
def getFragment(self):
|
||||
fragment = []
|
||||
element = self.openElements[0]._element
|
||||
if element.text:
|
||||
fragment.append(element.text)
|
||||
fragment.extend(list(element))
|
||||
if element.tail:
|
||||
fragment.append(element.tail)
|
||||
return fragment
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
if not name:
|
||||
warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
|
||||
self.doctype = None
|
||||
else:
|
||||
coercedName = self.infosetFilter.coerceElement(name)
|
||||
if coercedName != name:
|
||||
warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
|
||||
|
||||
doctype = self.doctypeClass(coercedName, publicId, systemId)
|
||||
self.doctype = doctype
|
||||
|
||||
def insertCommentInitial(self, data, parent=None):
|
||||
assert parent is None or parent is self.document
|
||||
assert self.document._elementTree is None
|
||||
self.initial_comments.append(data)
|
||||
|
||||
def insertCommentMain(self, data, parent=None):
|
||||
if (parent == self.document and
|
||||
self.document._elementTree.getroot()[-1].tag == comment_type):
|
||||
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
||||
super(TreeBuilder, self).insertComment(data, parent)
|
||||
|
||||
def insertRoot(self, token):
|
||||
# Because of the way libxml2 works, it doesn't seem to be possible to
|
||||
# alter information like the doctype after the tree has been parsed.
|
||||
# Therefore we need to use the built-in parser to create our initial
|
||||
# tree, after which we can add elements like normal
|
||||
docStr = ""
|
||||
if self.doctype:
|
||||
assert self.doctype.name
|
||||
docStr += "<!DOCTYPE %s" % self.doctype.name
|
||||
if (self.doctype.publicId is not None or
|
||||
self.doctype.systemId is not None):
|
||||
docStr += (' PUBLIC "%s" ' %
|
||||
(self.infosetFilter.coercePubid(self.doctype.publicId or "")))
|
||||
if self.doctype.systemId:
|
||||
sysid = self.doctype.systemId
|
||||
if sysid.find("'") >= 0 and sysid.find('"') >= 0:
|
||||
warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
|
||||
sysid = sysid.replace("'", 'U00027')
|
||||
if sysid.find("'") >= 0:
|
||||
docStr += '"%s"' % sysid
|
||||
else:
|
||||
docStr += "'%s'" % sysid
|
||||
else:
|
||||
docStr += "''"
|
||||
docStr += ">"
|
||||
if self.doctype.name != token["name"]:
|
||||
warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
|
||||
docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
|
||||
root = etree.fromstring(docStr)
|
||||
|
||||
# Append the initial comments:
|
||||
for comment_token in self.initial_comments:
|
||||
comment = self.commentClass(comment_token["data"])
|
||||
root.addprevious(comment._element)
|
||||
|
||||
# Create the root document and add the ElementTree to it
|
||||
self.document = self.documentClass()
|
||||
self.document._elementTree = root.getroottree()
|
||||
|
||||
# Give the root element the right name
|
||||
name = token["name"]
|
||||
namespace = token.get("namespace", self.defaultNamespace)
|
||||
if namespace is None:
|
||||
etree_tag = name
|
||||
else:
|
||||
etree_tag = "{%s}%s" % (namespace, name)
|
||||
root.tag = etree_tag
|
||||
|
||||
# Add the root element to the internal child/open data structures
|
||||
root_element = self.elementClass(name, namespace)
|
||||
root_element._element = root
|
||||
self.document._childNodes.append(root_element)
|
||||
self.openElements.append(root_element)
|
||||
|
||||
# Reset to the default insert comment function
|
||||
self.insertComment = self.insertCommentMain
|
||||
@@ -1,154 +0,0 @@
|
||||
"""A collection of modules for iterating through different kinds of
|
||||
tree, generating tokens identical to those produced by the tokenizer
|
||||
module.
|
||||
|
||||
To create a tree walker for a new type of tree, you need to
|
||||
implement a tree walker object (called TreeWalker by convention) that
|
||||
implements a 'serialize' method which takes a tree as sole argument and
|
||||
returns an iterator which generates tokens.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .. import constants
|
||||
from .._utils import default_etree
|
||||
|
||||
__all__ = ["getTreeWalker", "pprint"]
|
||||
|
||||
treeWalkerCache = {}
|
||||
|
||||
|
||||
def getTreeWalker(treeType, implementation=None, **kwargs):
|
||||
"""Get a TreeWalker class for various types of tree with built-in support
|
||||
|
||||
:arg str treeType: the name of the tree type required (case-insensitive).
|
||||
Supported values are:
|
||||
|
||||
* "dom": The xml.dom.minidom DOM implementation
|
||||
* "etree": A generic walker for tree implementations exposing an
|
||||
elementtree-like interface (known to work with ElementTree,
|
||||
cElementTree and lxml.etree).
|
||||
* "lxml": Optimized walker for lxml.etree
|
||||
* "genshi": a Genshi stream
|
||||
|
||||
:arg implementation: A module implementing the tree type e.g.
|
||||
xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
|
||||
tree type only).
|
||||
|
||||
:arg kwargs: keyword arguments passed to the etree walker--for other
|
||||
walkers, this has no effect
|
||||
|
||||
:returns: a TreeWalker class
|
||||
|
||||
"""
|
||||
|
||||
treeType = treeType.lower()
|
||||
if treeType not in treeWalkerCache:
|
||||
if treeType == "dom":
|
||||
from . import dom
|
||||
treeWalkerCache[treeType] = dom.TreeWalker
|
||||
elif treeType == "genshi":
|
||||
from . import genshi
|
||||
treeWalkerCache[treeType] = genshi.TreeWalker
|
||||
elif treeType == "lxml":
|
||||
from . import etree_lxml
|
||||
treeWalkerCache[treeType] = etree_lxml.TreeWalker
|
||||
elif treeType == "etree":
|
||||
from . import etree
|
||||
if implementation is None:
|
||||
implementation = default_etree
|
||||
# XXX: NEVER cache here, caching is done in the etree submodule
|
||||
return etree.getETreeModule(implementation, **kwargs).TreeWalker
|
||||
return treeWalkerCache.get(treeType)
|
||||
|
||||
|
||||
def concatenateCharacterTokens(tokens):
|
||||
pendingCharacters = []
|
||||
for token in tokens:
|
||||
type = token["type"]
|
||||
if type in ("Characters", "SpaceCharacters"):
|
||||
pendingCharacters.append(token["data"])
|
||||
else:
|
||||
if pendingCharacters:
|
||||
yield {"type": "Characters", "data": "".join(pendingCharacters)}
|
||||
pendingCharacters = []
|
||||
yield token
|
||||
if pendingCharacters:
|
||||
yield {"type": "Characters", "data": "".join(pendingCharacters)}
|
||||
|
||||
|
||||
def pprint(walker):
|
||||
"""Pretty printer for tree walkers
|
||||
|
||||
Takes a TreeWalker instance and pretty prints the output of walking the tree.
|
||||
|
||||
:arg walker: a TreeWalker instance
|
||||
|
||||
"""
|
||||
output = []
|
||||
indent = 0
|
||||
for token in concatenateCharacterTokens(walker):
|
||||
type = token["type"]
|
||||
if type in ("StartTag", "EmptyTag"):
|
||||
# tag name
|
||||
if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
|
||||
if token["namespace"] in constants.prefixes:
|
||||
ns = constants.prefixes[token["namespace"]]
|
||||
else:
|
||||
ns = token["namespace"]
|
||||
name = "%s %s" % (ns, token["name"])
|
||||
else:
|
||||
name = token["name"]
|
||||
output.append("%s<%s>" % (" " * indent, name))
|
||||
indent += 2
|
||||
# attributes (sorted for consistent ordering)
|
||||
attrs = token["data"]
|
||||
for (namespace, localname), value in sorted(attrs.items()):
|
||||
if namespace:
|
||||
if namespace in constants.prefixes:
|
||||
ns = constants.prefixes[namespace]
|
||||
else:
|
||||
ns = namespace
|
||||
name = "%s %s" % (ns, localname)
|
||||
else:
|
||||
name = localname
|
||||
output.append("%s%s=\"%s\"" % (" " * indent, name, value))
|
||||
# self-closing
|
||||
if type == "EmptyTag":
|
||||
indent -= 2
|
||||
|
||||
elif type == "EndTag":
|
||||
indent -= 2
|
||||
|
||||
elif type == "Comment":
|
||||
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
|
||||
|
||||
elif type == "Doctype":
|
||||
if token["name"]:
|
||||
if token["publicId"]:
|
||||
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
|
||||
(" " * indent,
|
||||
token["name"],
|
||||
token["publicId"],
|
||||
token["systemId"] if token["systemId"] else ""))
|
||||
elif token["systemId"]:
|
||||
output.append("""%s<!DOCTYPE %s "" "%s">""" %
|
||||
(" " * indent,
|
||||
token["name"],
|
||||
token["systemId"]))
|
||||
else:
|
||||
output.append("%s<!DOCTYPE %s>" % (" " * indent,
|
||||
token["name"]))
|
||||
else:
|
||||
output.append("%s<!DOCTYPE >" % (" " * indent,))
|
||||
|
||||
elif type == "Characters":
|
||||
output.append("%s\"%s\"" % (" " * indent, token["data"]))
|
||||
|
||||
elif type == "SpaceCharacters":
|
||||
assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
|
||||
|
||||
else:
|
||||
raise ValueError("Unknown token type, %s" % type)
|
||||
|
||||
return "\n".join(output)
|
||||
@@ -1,252 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from xml.dom import Node
|
||||
from ..constants import namespaces, voidElements, spaceCharacters
|
||||
|
||||
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
|
||||
"TreeWalker", "NonRecursiveTreeWalker"]
|
||||
|
||||
DOCUMENT = Node.DOCUMENT_NODE
|
||||
DOCTYPE = Node.DOCUMENT_TYPE_NODE
|
||||
TEXT = Node.TEXT_NODE
|
||||
ELEMENT = Node.ELEMENT_NODE
|
||||
COMMENT = Node.COMMENT_NODE
|
||||
ENTITY = Node.ENTITY_NODE
|
||||
UNKNOWN = "<#UNKNOWN#>"
|
||||
|
||||
spaceCharacters = "".join(spaceCharacters)
|
||||
|
||||
|
||||
class TreeWalker(object):
|
||||
"""Walks a tree yielding tokens
|
||||
|
||||
Tokens are dicts that all have a ``type`` field specifying the type of the
|
||||
token.
|
||||
|
||||
"""
|
||||
def __init__(self, tree):
|
||||
"""Creates a TreeWalker
|
||||
|
||||
:arg tree: the tree to walk
|
||||
|
||||
"""
|
||||
self.tree = tree
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def error(self, msg):
|
||||
"""Generates an error token with the given message
|
||||
|
||||
:arg msg: the error message
|
||||
|
||||
:returns: SerializeError token
|
||||
|
||||
"""
|
||||
return {"type": "SerializeError", "data": msg}
|
||||
|
||||
def emptyTag(self, namespace, name, attrs, hasChildren=False):
|
||||
"""Generates an EmptyTag token
|
||||
|
||||
:arg namespace: the namespace of the token--can be ``None``
|
||||
|
||||
:arg name: the name of the element
|
||||
|
||||
:arg attrs: the attributes of the element as a dict
|
||||
|
||||
:arg hasChildren: whether or not to yield a SerializationError because
|
||||
this tag shouldn't have children
|
||||
|
||||
:returns: EmptyTag token
|
||||
|
||||
"""
|
||||
yield {"type": "EmptyTag", "name": name,
|
||||
"namespace": namespace,
|
||||
"data": attrs}
|
||||
if hasChildren:
|
||||
yield self.error("Void element has children")
|
||||
|
||||
def startTag(self, namespace, name, attrs):
|
||||
"""Generates a StartTag token
|
||||
|
||||
:arg namespace: the namespace of the token--can be ``None``
|
||||
|
||||
:arg name: the name of the element
|
||||
|
||||
:arg attrs: the attributes of the element as a dict
|
||||
|
||||
:returns: StartTag token
|
||||
|
||||
"""
|
||||
return {"type": "StartTag",
|
||||
"name": name,
|
||||
"namespace": namespace,
|
||||
"data": attrs}
|
||||
|
||||
def endTag(self, namespace, name):
|
||||
"""Generates an EndTag token
|
||||
|
||||
:arg namespace: the namespace of the token--can be ``None``
|
||||
|
||||
:arg name: the name of the element
|
||||
|
||||
:returns: EndTag token
|
||||
|
||||
"""
|
||||
return {"type": "EndTag",
|
||||
"name": name,
|
||||
"namespace": namespace}
|
||||
|
||||
def text(self, data):
|
||||
"""Generates SpaceCharacters and Characters tokens
|
||||
|
||||
Depending on what's in the data, this generates one or more
|
||||
``SpaceCharacters`` and ``Characters`` tokens.
|
||||
|
||||
For example:
|
||||
|
||||
>>> from html5lib.treewalkers.base import TreeWalker
|
||||
>>> # Give it an empty tree just so it instantiates
|
||||
>>> walker = TreeWalker([])
|
||||
>>> list(walker.text(''))
|
||||
[]
|
||||
>>> list(walker.text(' '))
|
||||
[{u'data': ' ', u'type': u'SpaceCharacters'}]
|
||||
>>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
|
||||
[{u'data': ' ', u'type': u'SpaceCharacters'},
|
||||
{u'data': u'abc', u'type': u'Characters'},
|
||||
{u'data': u' ', u'type': u'SpaceCharacters'}]
|
||||
|
||||
:arg data: the text data
|
||||
|
||||
:returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
|
||||
|
||||
"""
|
||||
data = data
|
||||
middle = data.lstrip(spaceCharacters)
|
||||
left = data[:len(data) - len(middle)]
|
||||
if left:
|
||||
yield {"type": "SpaceCharacters", "data": left}
|
||||
data = middle
|
||||
middle = data.rstrip(spaceCharacters)
|
||||
right = data[len(middle):]
|
||||
if middle:
|
||||
yield {"type": "Characters", "data": middle}
|
||||
if right:
|
||||
yield {"type": "SpaceCharacters", "data": right}
|
||||
|
||||
def comment(self, data):
|
||||
"""Generates a Comment token
|
||||
|
||||
:arg data: the comment
|
||||
|
||||
:returns: Comment token
|
||||
|
||||
"""
|
||||
return {"type": "Comment", "data": data}
|
||||
|
||||
def doctype(self, name, publicId=None, systemId=None):
|
||||
"""Generates a Doctype token
|
||||
|
||||
:arg name:
|
||||
|
||||
:arg publicId:
|
||||
|
||||
:arg systemId:
|
||||
|
||||
:returns: the Doctype token
|
||||
|
||||
"""
|
||||
return {"type": "Doctype",
|
||||
"name": name,
|
||||
"publicId": publicId,
|
||||
"systemId": systemId}
|
||||
|
||||
def entity(self, name):
|
||||
"""Generates an Entity token
|
||||
|
||||
:arg name: the entity name
|
||||
|
||||
:returns: an Entity token
|
||||
|
||||
"""
|
||||
return {"type": "Entity", "name": name}
|
||||
|
||||
def unknown(self, nodeType):
|
||||
"""Handles unknown node types"""
|
||||
return self.error("Unknown node type: " + nodeType)
|
||||
|
||||
|
||||
class NonRecursiveTreeWalker(TreeWalker):
|
||||
def getNodeDetails(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def getFirstChild(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def getNextSibling(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def getParentNode(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def __iter__(self):
|
||||
currentNode = self.tree
|
||||
while currentNode is not None:
|
||||
details = self.getNodeDetails(currentNode)
|
||||
type, details = details[0], details[1:]
|
||||
hasChildren = False
|
||||
|
||||
if type == DOCTYPE:
|
||||
yield self.doctype(*details)
|
||||
|
||||
elif type == TEXT:
|
||||
for token in self.text(*details):
|
||||
yield token
|
||||
|
||||
elif type == ELEMENT:
|
||||
namespace, name, attributes, hasChildren = details
|
||||
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
|
||||
for token in self.emptyTag(namespace, name, attributes,
|
||||
hasChildren):
|
||||
yield token
|
||||
hasChildren = False
|
||||
else:
|
||||
yield self.startTag(namespace, name, attributes)
|
||||
|
||||
elif type == COMMENT:
|
||||
yield self.comment(details[0])
|
||||
|
||||
elif type == ENTITY:
|
||||
yield self.entity(details[0])
|
||||
|
||||
elif type == DOCUMENT:
|
||||
hasChildren = True
|
||||
|
||||
else:
|
||||
yield self.unknown(details[0])
|
||||
|
||||
if hasChildren:
|
||||
firstChild = self.getFirstChild(currentNode)
|
||||
else:
|
||||
firstChild = None
|
||||
|
||||
if firstChild is not None:
|
||||
currentNode = firstChild
|
||||
else:
|
||||
while currentNode is not None:
|
||||
details = self.getNodeDetails(currentNode)
|
||||
type, details = details[0], details[1:]
|
||||
if type == ELEMENT:
|
||||
namespace, name, attributes, hasChildren = details
|
||||
if (namespace and namespace != namespaces["html"]) or name not in voidElements:
|
||||
yield self.endTag(namespace, name)
|
||||
if self.tree is currentNode:
|
||||
currentNode = None
|
||||
break
|
||||
nextSibling = self.getNextSibling(currentNode)
|
||||
if nextSibling is not None:
|
||||
currentNode = nextSibling
|
||||
break
|
||||
else:
|
||||
currentNode = self.getParentNode(currentNode)
|
||||
@@ -1,43 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from xml.dom import Node
|
||||
|
||||
from . import base
|
||||
|
||||
|
||||
class TreeWalker(base.NonRecursiveTreeWalker):
|
||||
def getNodeDetails(self, node):
|
||||
if node.nodeType == Node.DOCUMENT_TYPE_NODE:
|
||||
return base.DOCTYPE, node.name, node.publicId, node.systemId
|
||||
|
||||
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
|
||||
return base.TEXT, node.nodeValue
|
||||
|
||||
elif node.nodeType == Node.ELEMENT_NODE:
|
||||
attrs = {}
|
||||
for attr in list(node.attributes.keys()):
|
||||
attr = node.getAttributeNode(attr)
|
||||
if attr.namespaceURI:
|
||||
attrs[(attr.namespaceURI, attr.localName)] = attr.value
|
||||
else:
|
||||
attrs[(None, attr.name)] = attr.value
|
||||
return (base.ELEMENT, node.namespaceURI, node.nodeName,
|
||||
attrs, node.hasChildNodes())
|
||||
|
||||
elif node.nodeType == Node.COMMENT_NODE:
|
||||
return base.COMMENT, node.nodeValue
|
||||
|
||||
elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
|
||||
return (base.DOCUMENT,)
|
||||
|
||||
else:
|
||||
return base.UNKNOWN, node.nodeType
|
||||
|
||||
def getFirstChild(self, node):
|
||||
return node.firstChild
|
||||
|
||||
def getNextSibling(self, node):
|
||||
return node.nextSibling
|
||||
|
||||
def getParentNode(self, node):
|
||||
return node.parentNode
|
||||
@@ -1,131 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
|
||||
from pipenv.patched.notpip._vendor.six import string_types
|
||||
|
||||
from . import base
|
||||
from .._utils import moduleFactoryFactory
|
||||
|
||||
tag_regexp = re.compile("{([^}]*)}(.*)")
|
||||
|
||||
|
||||
def getETreeBuilder(ElementTreeImplementation):
|
||||
ElementTree = ElementTreeImplementation
|
||||
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
||||
|
||||
class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable
|
||||
"""Given the particular ElementTree representation, this implementation,
|
||||
to avoid using recursion, returns "nodes" as tuples with the following
|
||||
content:
|
||||
|
||||
1. The current element
|
||||
|
||||
2. The index of the element relative to its parent
|
||||
|
||||
3. A stack of ancestor elements
|
||||
|
||||
4. A flag "text", "tail" or None to indicate if the current node is a
|
||||
text node; either the text or tail of the current element (1)
|
||||
"""
|
||||
def getNodeDetails(self, node):
|
||||
if isinstance(node, tuple): # It might be the root Element
|
||||
elt, _, _, flag = node
|
||||
if flag in ("text", "tail"):
|
||||
return base.TEXT, getattr(elt, flag)
|
||||
else:
|
||||
node = elt
|
||||
|
||||
if not(hasattr(node, "tag")):
|
||||
node = node.getroot()
|
||||
|
||||
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
||||
return (base.DOCUMENT,)
|
||||
|
||||
elif node.tag == "<!DOCTYPE>":
|
||||
return (base.DOCTYPE, node.text,
|
||||
node.get("publicId"), node.get("systemId"))
|
||||
|
||||
elif node.tag == ElementTreeCommentType:
|
||||
return base.COMMENT, node.text
|
||||
|
||||
else:
|
||||
assert isinstance(node.tag, string_types), type(node.tag)
|
||||
# This is assumed to be an ordinary element
|
||||
match = tag_regexp.match(node.tag)
|
||||
if match:
|
||||
namespace, tag = match.groups()
|
||||
else:
|
||||
namespace = None
|
||||
tag = node.tag
|
||||
attrs = OrderedDict()
|
||||
for name, value in list(node.attrib.items()):
|
||||
match = tag_regexp.match(name)
|
||||
if match:
|
||||
attrs[(match.group(1), match.group(2))] = value
|
||||
else:
|
||||
attrs[(None, name)] = value
|
||||
return (base.ELEMENT, namespace, tag,
|
||||
attrs, len(node) or node.text)
|
||||
|
||||
def getFirstChild(self, node):
|
||||
if isinstance(node, tuple):
|
||||
element, key, parents, flag = node
|
||||
else:
|
||||
element, key, parents, flag = node, None, [], None
|
||||
|
||||
if flag in ("text", "tail"):
|
||||
return None
|
||||
else:
|
||||
if element.text:
|
||||
return element, key, parents, "text"
|
||||
elif len(element):
|
||||
parents.append(element)
|
||||
return element[0], 0, parents, None
|
||||
else:
|
||||
return None
|
||||
|
||||
def getNextSibling(self, node):
|
||||
if isinstance(node, tuple):
|
||||
element, key, parents, flag = node
|
||||
else:
|
||||
return None
|
||||
|
||||
if flag == "text":
|
||||
if len(element):
|
||||
parents.append(element)
|
||||
return element[0], 0, parents, None
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
if element.tail and flag != "tail":
|
||||
return element, key, parents, "tail"
|
||||
elif key < len(parents[-1]) - 1:
|
||||
return parents[-1][key + 1], key + 1, parents, None
|
||||
else:
|
||||
return None
|
||||
|
||||
def getParentNode(self, node):
|
||||
if isinstance(node, tuple):
|
||||
element, key, parents, flag = node
|
||||
else:
|
||||
return None
|
||||
|
||||
if flag == "text":
|
||||
if not parents:
|
||||
return element
|
||||
else:
|
||||
return element, key, parents, None
|
||||
else:
|
||||
parent = parents.pop()
|
||||
if not parents:
|
||||
return parent
|
||||
else:
|
||||
assert list(parents[-1]).count(parent) == 1
|
||||
return parent, list(parents[-1]).index(parent), parents, None
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
||||
@@ -1,215 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from pipenv.patched.notpip._vendor.six import text_type
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from lxml import etree
|
||||
from ..treebuilders.etree import tag_regexp
|
||||
|
||||
from . import base
|
||||
|
||||
from .. import _ihatexml
|
||||
|
||||
|
||||
def ensure_str(s):
|
||||
if s is None:
|
||||
return None
|
||||
elif isinstance(s, text_type):
|
||||
return s
|
||||
else:
|
||||
return s.decode("ascii", "strict")
|
||||
|
||||
|
||||
class Root(object):
|
||||
def __init__(self, et):
|
||||
self.elementtree = et
|
||||
self.children = []
|
||||
|
||||
try:
|
||||
if et.docinfo.internalDTD:
|
||||
self.children.append(Doctype(self,
|
||||
ensure_str(et.docinfo.root_name),
|
||||
ensure_str(et.docinfo.public_id),
|
||||
ensure_str(et.docinfo.system_url)))
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
node = et.getroot()
|
||||
except AttributeError:
|
||||
node = et
|
||||
|
||||
while node.getprevious() is not None:
|
||||
node = node.getprevious()
|
||||
while node is not None:
|
||||
self.children.append(node)
|
||||
node = node.getnext()
|
||||
|
||||
self.text = None
|
||||
self.tail = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.children[key]
|
||||
|
||||
def getnext(self):
|
||||
return None
|
||||
|
||||
def __len__(self):
|
||||
return 1
|
||||
|
||||
|
||||
class Doctype(object):
|
||||
def __init__(self, root_node, name, public_id, system_id):
|
||||
self.root_node = root_node
|
||||
self.name = name
|
||||
self.public_id = public_id
|
||||
self.system_id = system_id
|
||||
|
||||
self.text = None
|
||||
self.tail = None
|
||||
|
||||
def getnext(self):
|
||||
return self.root_node.children[1]
|
||||
|
||||
|
||||
class FragmentRoot(Root):
|
||||
def __init__(self, children):
|
||||
self.children = [FragmentWrapper(self, child) for child in children]
|
||||
self.text = self.tail = None
|
||||
|
||||
def getnext(self):
|
||||
return None
|
||||
|
||||
|
||||
class FragmentWrapper(object):
|
||||
def __init__(self, fragment_root, obj):
|
||||
self.root_node = fragment_root
|
||||
self.obj = obj
|
||||
if hasattr(self.obj, 'text'):
|
||||
self.text = ensure_str(self.obj.text)
|
||||
else:
|
||||
self.text = None
|
||||
if hasattr(self.obj, 'tail'):
|
||||
self.tail = ensure_str(self.obj.tail)
|
||||
else:
|
||||
self.tail = None
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.obj, name)
|
||||
|
||||
def getnext(self):
|
||||
siblings = self.root_node.children
|
||||
idx = siblings.index(self)
|
||||
if idx < len(siblings) - 1:
|
||||
return siblings[idx + 1]
|
||||
else:
|
||||
return None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.obj[key]
|
||||
|
||||
def __bool__(self):
|
||||
return bool(self.obj)
|
||||
|
||||
def getparent(self):
|
||||
return None
|
||||
|
||||
def __str__(self):
|
||||
return str(self.obj)
|
||||
|
||||
def __unicode__(self):
|
||||
return str(self.obj)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.obj)
|
||||
|
||||
|
||||
class TreeWalker(base.NonRecursiveTreeWalker):
|
||||
def __init__(self, tree):
|
||||
# pylint:disable=redefined-variable-type
|
||||
if isinstance(tree, list):
|
||||
self.fragmentChildren = set(tree)
|
||||
tree = FragmentRoot(tree)
|
||||
else:
|
||||
self.fragmentChildren = set()
|
||||
tree = Root(tree)
|
||||
base.NonRecursiveTreeWalker.__init__(self, tree)
|
||||
self.filter = _ihatexml.InfosetFilter()
|
||||
|
||||
def getNodeDetails(self, node):
|
||||
if isinstance(node, tuple): # Text node
|
||||
node, key = node
|
||||
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
||||
return base.TEXT, ensure_str(getattr(node, key))
|
||||
|
||||
elif isinstance(node, Root):
|
||||
return (base.DOCUMENT,)
|
||||
|
||||
elif isinstance(node, Doctype):
|
||||
return base.DOCTYPE, node.name, node.public_id, node.system_id
|
||||
|
||||
elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
|
||||
return base.TEXT, ensure_str(node.obj)
|
||||
|
||||
elif node.tag == etree.Comment:
|
||||
return base.COMMENT, ensure_str(node.text)
|
||||
|
||||
elif node.tag == etree.Entity:
|
||||
return base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
|
||||
|
||||
else:
|
||||
# This is assumed to be an ordinary element
|
||||
match = tag_regexp.match(ensure_str(node.tag))
|
||||
if match:
|
||||
namespace, tag = match.groups()
|
||||
else:
|
||||
namespace = None
|
||||
tag = ensure_str(node.tag)
|
||||
attrs = OrderedDict()
|
||||
for name, value in list(node.attrib.items()):
|
||||
name = ensure_str(name)
|
||||
value = ensure_str(value)
|
||||
match = tag_regexp.match(name)
|
||||
if match:
|
||||
attrs[(match.group(1), match.group(2))] = value
|
||||
else:
|
||||
attrs[(None, name)] = value
|
||||
return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
|
||||
attrs, len(node) > 0 or node.text)
|
||||
|
||||
def getFirstChild(self, node):
|
||||
assert not isinstance(node, tuple), "Text nodes have no children"
|
||||
|
||||
assert len(node) or node.text, "Node has no children"
|
||||
if node.text:
|
||||
return (node, "text")
|
||||
else:
|
||||
return node[0]
|
||||
|
||||
def getNextSibling(self, node):
|
||||
if isinstance(node, tuple): # Text node
|
||||
node, key = node
|
||||
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
||||
if key == "text":
|
||||
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
|
||||
# because node[0] might evaluate to False if it has no child element
|
||||
if len(node):
|
||||
return node[0]
|
||||
else:
|
||||
return None
|
||||
else: # tail
|
||||
return node.getnext()
|
||||
|
||||
return (node, "tail") if node.tail else node.getnext()
|
||||
|
||||
def getParentNode(self, node):
|
||||
if isinstance(node, tuple): # Text node
|
||||
node, key = node
|
||||
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
|
||||
if key == "text":
|
||||
return node
|
||||
# else: fallback to "normal" processing
|
||||
elif node in self.fragmentChildren:
|
||||
return None
|
||||
|
||||
return node.getparent()
|
||||
@@ -1,69 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from genshi.core import QName
|
||||
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
|
||||
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
|
||||
|
||||
from . import base
|
||||
|
||||
from ..constants import voidElements, namespaces
|
||||
|
||||
|
||||
class TreeWalker(base.TreeWalker):
|
||||
def __iter__(self):
|
||||
# Buffer the events so we can pass in the following one
|
||||
previous = None
|
||||
for event in self.tree:
|
||||
if previous is not None:
|
||||
for token in self.tokens(previous, event):
|
||||
yield token
|
||||
previous = event
|
||||
|
||||
# Don't forget the final event!
|
||||
if previous is not None:
|
||||
for token in self.tokens(previous, None):
|
||||
yield token
|
||||
|
||||
def tokens(self, event, next):
|
||||
kind, data, _ = event
|
||||
if kind == START:
|
||||
tag, attribs = data
|
||||
name = tag.localname
|
||||
namespace = tag.namespace
|
||||
converted_attribs = {}
|
||||
for k, v in attribs:
|
||||
if isinstance(k, QName):
|
||||
converted_attribs[(k.namespace, k.localname)] = v
|
||||
else:
|
||||
converted_attribs[(None, k)] = v
|
||||
|
||||
if namespace == namespaces["html"] and name in voidElements:
|
||||
for token in self.emptyTag(namespace, name, converted_attribs,
|
||||
not next or next[0] != END or
|
||||
next[1] != tag):
|
||||
yield token
|
||||
else:
|
||||
yield self.startTag(namespace, name, converted_attribs)
|
||||
|
||||
elif kind == END:
|
||||
name = data.localname
|
||||
namespace = data.namespace
|
||||
if namespace != namespaces["html"] or name not in voidElements:
|
||||
yield self.endTag(namespace, name)
|
||||
|
||||
elif kind == COMMENT:
|
||||
yield self.comment(data)
|
||||
|
||||
elif kind == TEXT:
|
||||
for token in self.text(data):
|
||||
yield token
|
||||
|
||||
elif kind == DOCTYPE:
|
||||
yield self.doctype(*data)
|
||||
|
||||
elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
|
||||
START_CDATA, END_CDATA, PI):
|
||||
pass
|
||||
|
||||
else:
|
||||
yield self.unknown(kind)
|
||||
@@ -1 +0,0 @@
|
||||
version = (1, 0, 3)
|
||||
@@ -1,580 +0,0 @@
|
||||
"""
|
||||
pygments.lexers._mapping
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Lexer mapping definitions. This file is generated by itself. Everytime
|
||||
you change something on a builtin lexer definition, run this script from
|
||||
the lexers folder to update it.
|
||||
|
||||
Do not alter the LEXERS dictionary by hand.
|
||||
|
||||
:copyright: Copyright 2006-2014, 2016 by the Pygments team, see AUTHORS.
|
||||
:license: BSD, see LICENSE for details.
|
||||
"""
|
||||
|
||||
LEXERS = {
|
||||
'ABAPLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.business', 'ABAP', ('abap',), ('*.abap', '*.ABAP'), ('text/x-abap',)),
|
||||
'AMDGPULexer': ('pipenv.patched.notpip._vendor.pygments.lexers.amdgpu', 'AMDGPU', ('amdgpu',), ('*.isa',), ()),
|
||||
'APLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.apl', 'APL', ('apl',), ('*.apl', '*.aplf', '*.aplo', '*.apln', '*.aplc', '*.apli', '*.dyalog'), ()),
|
||||
'AbnfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.grammar_notation', 'ABNF', ('abnf',), ('*.abnf',), ('text/x-abnf',)),
|
||||
'ActionScript3Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.actionscript', 'ActionScript 3', ('actionscript3', 'as3'), ('*.as',), ('application/x-actionscript3', 'text/x-actionscript3', 'text/actionscript3')),
|
||||
'ActionScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.actionscript', 'ActionScript', ('actionscript', 'as'), ('*.as',), ('application/x-actionscript', 'text/x-actionscript', 'text/actionscript')),
|
||||
'AdaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.pascal', 'Ada', ('ada', 'ada95', 'ada2005'), ('*.adb', '*.ads', '*.ada'), ('text/x-ada',)),
|
||||
'AdlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.archetype', 'ADL', ('adl',), ('*.adl', '*.adls', '*.adlf', '*.adlx'), ()),
|
||||
'AgdaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Agda', ('agda',), ('*.agda',), ('text/x-agda',)),
|
||||
'AheuiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.esoteric', 'Aheui', ('aheui',), ('*.aheui',), ()),
|
||||
'AlloyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Alloy', ('alloy',), ('*.als',), ('text/x-alloy',)),
|
||||
'AmbientTalkLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ambient', 'AmbientTalk', ('ambienttalk', 'ambienttalk/2', 'at'), ('*.at',), ('text/x-ambienttalk',)),
|
||||
'AmplLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ampl', 'Ampl', ('ampl',), ('*.run',), ()),
|
||||
'Angular2HtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML + Angular2', ('html+ng2',), ('*.ng2',), ()),
|
||||
'Angular2Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Angular2', ('ng2',), (), ()),
|
||||
'AntlrActionScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With ActionScript Target', ('antlr-actionscript', 'antlr-as'), ('*.G', '*.g'), ()),
|
||||
'AntlrCSharpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With C# Target', ('antlr-csharp', 'antlr-c#'), ('*.G', '*.g'), ()),
|
||||
'AntlrCppLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With CPP Target', ('antlr-cpp',), ('*.G', '*.g'), ()),
|
||||
'AntlrJavaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With Java Target', ('antlr-java',), ('*.G', '*.g'), ()),
|
||||
'AntlrLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR', ('antlr',), (), ()),
|
||||
'AntlrObjectiveCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With ObjectiveC Target', ('antlr-objc',), ('*.G', '*.g'), ()),
|
||||
'AntlrPerlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With Perl Target', ('antlr-perl',), ('*.G', '*.g'), ()),
|
||||
'AntlrPythonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With Python Target', ('antlr-python',), ('*.G', '*.g'), ()),
|
||||
'AntlrRubyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'ANTLR With Ruby Target', ('antlr-ruby', 'antlr-rb'), ('*.G', '*.g'), ()),
|
||||
'ApacheConfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'ApacheConf', ('apacheconf', 'aconf', 'apache'), ('.htaccess', 'apache.conf', 'apache2.conf'), ('text/x-apacheconf',)),
|
||||
'AppleScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'AppleScript', ('applescript',), ('*.applescript',), ()),
|
||||
'ArduinoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'Arduino', ('arduino',), ('*.ino',), ('text/x-arduino',)),
|
||||
'ArrowLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.arrow', 'Arrow', ('arrow',), ('*.arw',), ()),
|
||||
'AscLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asc', 'ASCII armored', ('asc', 'pem'), ('*.asc', '*.pem', 'id_dsa', 'id_ecdsa', 'id_ecdsa_sk', 'id_ed25519', 'id_ed25519_sk', 'id_rsa'), ('application/pgp-keys', 'application/pgp-encrypted', 'application/pgp-signature')),
|
||||
'AspectJLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'AspectJ', ('aspectj',), ('*.aj',), ('text/x-aspectj',)),
|
||||
'AsymptoteLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graphics', 'Asymptote', ('asymptote', 'asy'), ('*.asy',), ('text/x-asymptote',)),
|
||||
'AugeasLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Augeas', ('augeas',), ('*.aug',), ()),
|
||||
'AutoItLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.automation', 'AutoIt', ('autoit',), ('*.au3',), ('text/x-autoit',)),
|
||||
'AutohotkeyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.automation', 'autohotkey', ('autohotkey', 'ahk'), ('*.ahk', '*.ahkl'), ('text/x-autohotkey',)),
|
||||
'AwkLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textedit', 'Awk', ('awk', 'gawk', 'mawk', 'nawk'), ('*.awk',), ('application/x-awk',)),
|
||||
'BBCBasicLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.basic', 'BBC Basic', ('bbcbasic',), ('*.bbc',), ()),
|
||||
'BBCodeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'BBCode', ('bbcode',), (), ('text/x-bbcode',)),
|
||||
'BCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.algebra', 'BC', ('bc',), ('*.bc',), ()),
|
||||
'BSTLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.bibtex', 'BST', ('bst', 'bst-pybtex'), ('*.bst',), ()),
|
||||
'BareLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.bare', 'BARE', ('bare',), ('*.bare',), ()),
|
||||
'BaseMakefileLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.make', 'Base Makefile', ('basemake',), (), ()),
|
||||
'BashLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'Bash', ('bash', 'sh', 'ksh', 'zsh', 'shell'), ('*.sh', '*.ksh', '*.bash', '*.ebuild', '*.eclass', '*.exheres-0', '*.exlib', '*.zsh', '.bashrc', 'bashrc', '.bash_*', 'bash_*', 'zshrc', '.zshrc', '.kshrc', 'kshrc', 'PKGBUILD'), ('application/x-sh', 'application/x-shellscript', 'text/x-shellscript')),
|
||||
'BashSessionLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'Bash Session', ('console', 'shell-session'), ('*.sh-session', '*.shell-session'), ('application/x-shell-session', 'application/x-sh-session')),
|
||||
'BatchLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'Batchfile', ('batch', 'bat', 'dosbatch', 'winbatch'), ('*.bat', '*.cmd'), ('application/x-dos-batch',)),
|
||||
'BddLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.bdd', 'Bdd', ('bdd',), ('*.feature',), ('text/x-bdd',)),
|
||||
'BefungeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.esoteric', 'Befunge', ('befunge',), ('*.befunge',), ('application/x-befunge',)),
|
||||
'BibTeXLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.bibtex', 'BibTeX', ('bibtex', 'bib'), ('*.bib',), ('text/x-bibtex',)),
|
||||
'BlitzBasicLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.basic', 'BlitzBasic', ('blitzbasic', 'b3d', 'bplus'), ('*.bb', '*.decls'), ('text/x-bb',)),
|
||||
'BlitzMaxLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.basic', 'BlitzMax', ('blitzmax', 'bmax'), ('*.bmx',), ('text/x-bmx',)),
|
||||
'BnfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.grammar_notation', 'BNF', ('bnf',), ('*.bnf',), ('text/x-bnf',)),
|
||||
'BoaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.boa', 'Boa', ('boa',), ('*.boa',), ()),
|
||||
'BooLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dotnet', 'Boo', ('boo',), ('*.boo',), ('text/x-boo',)),
|
||||
'BoogieLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.verification', 'Boogie', ('boogie',), ('*.bpl',), ()),
|
||||
'BrainfuckLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.esoteric', 'Brainfuck', ('brainfuck', 'bf'), ('*.bf', '*.b'), ('application/x-brainfuck',)),
|
||||
'BugsLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.modeling', 'BUGS', ('bugs', 'winbugs', 'openbugs'), ('*.bug',), ()),
|
||||
'CAmkESLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.esoteric', 'CAmkES', ('camkes', 'idl4'), ('*.camkes', '*.idl4'), ()),
|
||||
'CLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_cpp', 'C', ('c',), ('*.c', '*.h', '*.idc', '*.x[bp]m'), ('text/x-chdr', 'text/x-csrc', 'image/x-xbitmap', 'image/x-xpixmap')),
|
||||
'CMakeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.make', 'CMake', ('cmake',), ('*.cmake', 'CMakeLists.txt'), ('text/x-cmake',)),
|
||||
'CObjdumpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'c-objdump', ('c-objdump',), ('*.c-objdump',), ('text/x-c-objdump',)),
|
||||
'CPSALexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'CPSA', ('cpsa',), ('*.cpsa',), ()),
|
||||
'CSharpAspxLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dotnet', 'aspx-cs', ('aspx-cs',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()),
|
||||
'CSharpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dotnet', 'C#', ('csharp', 'c#', 'cs'), ('*.cs',), ('text/x-csharp',)),
|
||||
'Ca65Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'ca65 assembler', ('ca65',), ('*.s',), ()),
|
||||
'CadlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.archetype', 'cADL', ('cadl',), ('*.cadl',), ()),
|
||||
'CapDLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.esoteric', 'CapDL', ('capdl',), ('*.cdl',), ()),
|
||||
'CapnProtoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.capnproto', "Cap'n Proto", ('capnp',), ('*.capnp',), ()),
|
||||
'CbmBasicV2Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.basic', 'CBM BASIC V2', ('cbmbas',), ('*.bas',), ()),
|
||||
'CddlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.cddl', 'CDDL', ('cddl',), ('*.cddl',), ('text/x-cddl',)),
|
||||
'CeylonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Ceylon', ('ceylon',), ('*.ceylon',), ('text/x-ceylon',)),
|
||||
'Cfengine3Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'CFEngine3', ('cfengine3', 'cf3'), ('*.cf',), ()),
|
||||
'ChaiscriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'ChaiScript', ('chaiscript', 'chai'), ('*.chai',), ('text/x-chaiscript', 'application/x-chaiscript')),
|
||||
'ChapelLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.chapel', 'Chapel', ('chapel', 'chpl'), ('*.chpl',), ()),
|
||||
'CharmciLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'Charmci', ('charmci',), ('*.ci',), ()),
|
||||
'CheetahHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Cheetah', ('html+cheetah', 'html+spitfire', 'htmlcheetah'), (), ('text/html+cheetah', 'text/html+spitfire')),
|
||||
'CheetahJavascriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Cheetah', ('javascript+cheetah', 'js+cheetah', 'javascript+spitfire', 'js+spitfire'), (), ('application/x-javascript+cheetah', 'text/x-javascript+cheetah', 'text/javascript+cheetah', 'application/x-javascript+spitfire', 'text/x-javascript+spitfire', 'text/javascript+spitfire')),
|
||||
'CheetahLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Cheetah', ('cheetah', 'spitfire'), ('*.tmpl', '*.spt'), ('application/x-cheetah', 'application/x-spitfire')),
|
||||
'CheetahXmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Cheetah', ('xml+cheetah', 'xml+spitfire'), (), ('application/xml+cheetah', 'application/xml+spitfire')),
|
||||
'CirruLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.webmisc', 'Cirru', ('cirru',), ('*.cirru',), ('text/x-cirru',)),
|
||||
'ClayLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'Clay', ('clay',), ('*.clay',), ('text/x-clay',)),
|
||||
'CleanLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.clean', 'Clean', ('clean',), ('*.icl', '*.dcl'), ()),
|
||||
'ClojureLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Clojure', ('clojure', 'clj'), ('*.clj',), ('text/x-clojure', 'application/x-clojure')),
|
||||
'ClojureScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'ClojureScript', ('clojurescript', 'cljs'), ('*.cljs',), ('text/x-clojurescript', 'application/x-clojurescript')),
|
||||
'CobolFreeformatLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.business', 'COBOLFree', ('cobolfree',), ('*.cbl', '*.CBL'), ()),
|
||||
'CobolLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.business', 'COBOL', ('cobol',), ('*.cob', '*.COB', '*.cpy', '*.CPY'), ('text/x-cobol',)),
|
||||
'CoffeeScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'CoffeeScript', ('coffeescript', 'coffee-script', 'coffee'), ('*.coffee',), ('text/coffeescript',)),
|
||||
'ColdfusionCFCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Coldfusion CFC', ('cfc',), ('*.cfc',), ()),
|
||||
'ColdfusionHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Coldfusion HTML', ('cfm',), ('*.cfm', '*.cfml'), ('application/x-coldfusion',)),
|
||||
'ColdfusionLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'cfstatement', ('cfs',), (), ()),
|
||||
'CommonLispLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'Common Lisp', ('common-lisp', 'cl', 'lisp'), ('*.cl', '*.lisp'), ('text/x-common-lisp',)),
|
||||
'ComponentPascalLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.oberon', 'Component Pascal', ('componentpascal', 'cp'), ('*.cp', '*.cps'), ('text/x-component-pascal',)),
|
||||
'CoqLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.theorem', 'Coq', ('coq',), ('*.v',), ('text/x-coq',)),
|
||||
'CppLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_cpp', 'C++', ('cpp', 'c++'), ('*.cpp', '*.hpp', '*.c++', '*.h++', '*.cc', '*.hh', '*.cxx', '*.hxx', '*.C', '*.H', '*.cp', '*.CPP'), ('text/x-c++hdr', 'text/x-c++src')),
|
||||
'CppObjdumpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'cpp-objdump', ('cpp-objdump', 'c++-objdumb', 'cxx-objdump'), ('*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump'), ('text/x-cpp-objdump',)),
|
||||
'CrmshLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Crmsh', ('crmsh', 'pcmk'), ('*.crmsh', '*.pcmk'), ()),
|
||||
'CrocLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.d', 'Croc', ('croc',), ('*.croc',), ('text/x-crocsrc',)),
|
||||
'CryptolLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Cryptol', ('cryptol', 'cry'), ('*.cry',), ('text/x-cryptol',)),
|
||||
'CrystalLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.crystal', 'Crystal', ('cr', 'crystal'), ('*.cr',), ('text/x-crystal',)),
|
||||
'CsoundDocumentLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.csound', 'Csound Document', ('csound-document', 'csound-csd'), ('*.csd',), ()),
|
||||
'CsoundOrchestraLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.csound', 'Csound Orchestra', ('csound', 'csound-orc'), ('*.orc', '*.udo'), ()),
|
||||
'CsoundScoreLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.csound', 'Csound Score', ('csound-score', 'csound-sco'), ('*.sco',), ()),
|
||||
'CssDjangoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+Django/Jinja', ('css+django', 'css+jinja'), (), ('text/css+django', 'text/css+jinja')),
|
||||
'CssErbLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+Ruby', ('css+ruby', 'css+erb'), (), ('text/css+ruby',)),
|
||||
'CssGenshiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+Genshi Text', ('css+genshitext', 'css+genshi'), (), ('text/css+genshi',)),
|
||||
'CssLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.css', 'CSS', ('css',), ('*.css',), ('text/css',)),
|
||||
'CssPhpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+PHP', ('css+php',), (), ('text/css+php',)),
|
||||
'CssSmartyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+Smarty', ('css+smarty',), (), ('text/css+smarty',)),
|
||||
'CudaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'CUDA', ('cuda', 'cu'), ('*.cu', '*.cuh'), ('text/x-cuda',)),
|
||||
'CypherLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graph', 'Cypher', ('cypher',), ('*.cyp', '*.cypher'), ()),
|
||||
'CythonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'Cython', ('cython', 'pyx', 'pyrex'), ('*.pyx', '*.pxd', '*.pxi'), ('text/x-cython', 'application/x-cython')),
|
||||
'DLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.d', 'D', ('d',), ('*.d', '*.di'), ('text/x-dsrc',)),
|
||||
'DObjdumpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'd-objdump', ('d-objdump',), ('*.d-objdump',), ('text/x-d-objdump',)),
|
||||
'DarcsPatchLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.diff', 'Darcs Patch', ('dpatch',), ('*.dpatch', '*.darcspatch'), ()),
|
||||
'DartLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Dart', ('dart',), ('*.dart',), ('text/x-dart',)),
|
||||
'Dasm16Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'DASM16', ('dasm16',), ('*.dasm16', '*.dasm'), ('text/x-dasm16',)),
|
||||
'DebianControlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.installers', 'Debian Control file', ('debcontrol', 'control'), ('control',), ()),
|
||||
'DelphiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.pascal', 'Delphi', ('delphi', 'pas', 'pascal', 'objectpascal'), ('*.pas', '*.dpr'), ('text/x-pascal',)),
|
||||
'DevicetreeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.devicetree', 'Devicetree', ('devicetree', 'dts'), ('*.dts', '*.dtsi'), ('text/x-c',)),
|
||||
'DgLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'dg', ('dg',), ('*.dg',), ('text/x-dg',)),
|
||||
'DiffLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.diff', 'Diff', ('diff', 'udiff'), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')),
|
||||
'DjangoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Django/Jinja', ('django', 'jinja'), (), ('application/x-django-templating', 'application/x-jinja')),
|
||||
'DockerLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Docker', ('docker', 'dockerfile'), ('Dockerfile', '*.docker'), ('text/x-dockerfile-config',)),
|
||||
'DtdLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.html', 'DTD', ('dtd',), ('*.dtd',), ('application/xml-dtd',)),
|
||||
'DuelLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.webmisc', 'Duel', ('duel', 'jbst', 'jsonml+bst'), ('*.duel', '*.jbst'), ('text/x-duel', 'text/x-jbst')),
|
||||
'DylanConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dylan', 'Dylan session', ('dylan-console', 'dylan-repl'), ('*.dylan-console',), ('text/x-dylan-console',)),
|
||||
'DylanLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dylan', 'Dylan', ('dylan',), ('*.dylan', '*.dyl', '*.intr'), ('text/x-dylan',)),
|
||||
'DylanLidLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dylan', 'DylanLID', ('dylan-lid', 'lid'), ('*.lid', '*.hdp'), ('text/x-dylan-lid',)),
|
||||
'ECLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ecl', 'ECL', ('ecl',), ('*.ecl',), ('application/x-ecl',)),
|
||||
'ECLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'eC', ('ec',), ('*.ec', '*.eh'), ('text/x-echdr', 'text/x-ecsrc')),
|
||||
'EarlGreyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Earl Grey', ('earl-grey', 'earlgrey', 'eg'), ('*.eg',), ('text/x-earl-grey',)),
|
||||
'EasytrieveLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'Easytrieve', ('easytrieve',), ('*.ezt', '*.mac'), ('text/x-easytrieve',)),
|
||||
'EbnfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'EBNF', ('ebnf',), ('*.ebnf',), ('text/x-ebnf',)),
|
||||
'EiffelLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.eiffel', 'Eiffel', ('eiffel',), ('*.e',), ('text/x-eiffel',)),
|
||||
'ElixirConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.erlang', 'Elixir iex session', ('iex',), (), ('text/x-elixir-shellsession',)),
|
||||
'ElixirLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.erlang', 'Elixir', ('elixir', 'ex', 'exs'), ('*.ex', '*.eex', '*.exs', '*.leex'), ('text/x-elixir',)),
|
||||
'ElmLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.elm', 'Elm', ('elm',), ('*.elm',), ('text/x-elm',)),
|
||||
'ElpiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.elpi', 'Elpi', ('elpi',), ('*.elpi',), ('text/x-elpi',)),
|
||||
'EmacsLispLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'EmacsLisp', ('emacs-lisp', 'elisp', 'emacs'), ('*.el',), ('text/x-elisp', 'application/x-elisp')),
|
||||
'EmailLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.email', 'E-mail', ('email', 'eml'), ('*.eml',), ('message/rfc822',)),
|
||||
'ErbLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'ERB', ('erb',), (), ('application/x-ruby-templating',)),
|
||||
'ErlangLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.erlang', 'Erlang', ('erlang',), ('*.erl', '*.hrl', '*.es', '*.escript'), ('text/x-erlang',)),
|
||||
'ErlangShellLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.erlang', 'Erlang erl session', ('erl',), ('*.erl-sh',), ('text/x-erl-shellsession',)),
|
||||
'EvoqueHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Evoque', ('html+evoque',), ('*.html',), ('text/html+evoque',)),
|
||||
'EvoqueLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Evoque', ('evoque',), ('*.evoque',), ('application/x-evoque',)),
|
||||
'EvoqueXmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Evoque', ('xml+evoque',), ('*.xml',), ('application/xml+evoque',)),
|
||||
'ExeclineLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'execline', ('execline',), ('*.exec',), ()),
|
||||
'EzhilLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ezhil', 'Ezhil', ('ezhil',), ('*.n',), ('text/x-ezhil',)),
|
||||
'FSharpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dotnet', 'F#', ('fsharp', 'f#'), ('*.fs', '*.fsi'), ('text/x-fsharp',)),
|
||||
'FStarLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ml', 'FStar', ('fstar',), ('*.fst', '*.fsti'), ('text/x-fstar',)),
|
||||
'FactorLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.factor', 'Factor', ('factor',), ('*.factor',), ('text/x-factor',)),
|
||||
'FancyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ruby', 'Fancy', ('fancy', 'fy'), ('*.fy', '*.fancypack'), ('text/x-fancysrc',)),
|
||||
'FantomLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.fantom', 'Fantom', ('fan',), ('*.fan',), ('application/x-fantom',)),
|
||||
'FelixLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.felix', 'Felix', ('felix', 'flx'), ('*.flx', '*.flxh'), ('text/x-felix',)),
|
||||
'FennelLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'Fennel', ('fennel', 'fnl'), ('*.fnl',), ()),
|
||||
'FishShellLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'Fish', ('fish', 'fishshell'), ('*.fish', '*.load'), ('application/x-fish',)),
|
||||
'FlatlineLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Flatline', ('flatline',), (), ('text/x-flatline',)),
|
||||
'FloScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.floscript', 'FloScript', ('floscript', 'flo'), ('*.flo',), ()),
|
||||
'ForthLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.forth', 'Forth', ('forth',), ('*.frt', '*.fs'), ('application/x-forth',)),
|
||||
'FortranFixedLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.fortran', 'FortranFixed', ('fortranfixed',), ('*.f', '*.F'), ()),
|
||||
'FortranLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.fortran', 'Fortran', ('fortran', 'f90'), ('*.f03', '*.f90', '*.F03', '*.F90'), ('text/x-fortran',)),
|
||||
'FoxProLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()),
|
||||
'FreeFemLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.freefem', 'Freefem', ('freefem',), ('*.edp',), ('text/x-freefem',)),
|
||||
'FutharkLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.futhark', 'Futhark', ('futhark',), ('*.fut',), ('text/x-futhark',)),
|
||||
'GAPLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.algebra', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()),
|
||||
'GDScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.gdscript', 'GDScript', ('gdscript', 'gd'), ('*.gd',), ('text/x-gdscript', 'application/x-gdscript')),
|
||||
'GLShaderLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graphics', 'GLSL', ('glsl',), ('*.vert', '*.frag', '*.geo'), ('text/x-glslsrc',)),
|
||||
'GSQLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.gsql', 'GSQL', ('gsql',), ('*.gsql',), ()),
|
||||
'GasLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'GAS', ('gas', 'asm'), ('*.s', '*.S'), ('text/x-gas',)),
|
||||
'GcodeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.gcodelexer', 'g-code', ('gcode',), ('*.gcode',), ()),
|
||||
'GenshiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Genshi', ('genshi', 'kid', 'xml+genshi', 'xml+kid'), ('*.kid',), ('application/x-genshi', 'application/x-kid')),
|
||||
'GenshiTextLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Genshi Text', ('genshitext',), (), ('application/x-genshi-text', 'text/x-genshi')),
|
||||
'GettextLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textfmts', 'Gettext Catalog', ('pot', 'po'), ('*.pot', '*.po'), ('application/x-gettext', 'text/x-gettext', 'text/gettext')),
|
||||
'GherkinLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.testing', 'Gherkin', ('gherkin', 'cucumber'), ('*.feature',), ('text/x-gherkin',)),
|
||||
'GnuplotLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graphics', 'Gnuplot', ('gnuplot',), ('*.plot', '*.plt'), ('text/x-gnuplot',)),
|
||||
'GoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.go', 'Go', ('go', 'golang'), ('*.go',), ('text/x-gosrc',)),
|
||||
'GoloLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Golo', ('golo',), ('*.golo',), ()),
|
||||
'GoodDataCLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.business', 'GoodData-CL', ('gooddata-cl',), ('*.gdc',), ('text/x-gooddata-cl',)),
|
||||
'GosuLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Gosu', ('gosu',), ('*.gs', '*.gsx', '*.gsp', '*.vark'), ('text/x-gosu',)),
|
||||
'GosuTemplateLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Gosu Template', ('gst',), ('*.gst',), ('text/x-gosu-template',)),
|
||||
'GraphvizLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graphviz', 'Graphviz', ('graphviz', 'dot'), ('*.gv', '*.dot'), ('text/x-graphviz', 'text/vnd.graphviz')),
|
||||
'GroffLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'Groff', ('groff', 'nroff', 'man'), ('*.[1-9]', '*.man', '*.1p', '*.3pm'), ('application/x-troff', 'text/troff')),
|
||||
'GroovyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Groovy', ('groovy',), ('*.groovy', '*.gradle'), ('text/x-groovy',)),
|
||||
'HLSLShaderLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graphics', 'HLSL', ('hlsl',), ('*.hlsl', '*.hlsli'), ('text/x-hlsl',)),
|
||||
'HamlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.html', 'Haml', ('haml',), ('*.haml',), ('text/x-haml',)),
|
||||
'HandlebarsHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Handlebars', ('html+handlebars',), ('*.handlebars', '*.hbs'), ('text/html+handlebars', 'text/x-handlebars-template')),
|
||||
'HandlebarsLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Handlebars', ('handlebars',), (), ()),
|
||||
'HaskellLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Haskell', ('haskell', 'hs'), ('*.hs',), ('text/x-haskell',)),
|
||||
'HaxeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haxe', 'Haxe', ('haxe', 'hxsl', 'hx'), ('*.hx', '*.hxsl'), ('text/haxe', 'text/x-haxe', 'text/x-hx')),
|
||||
'HexdumpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.hexdump', 'Hexdump', ('hexdump',), (), ()),
|
||||
'HsailLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'HSAIL', ('hsail', 'hsa'), ('*.hsail',), ('text/x-hsail',)),
|
||||
'HspecLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Hspec', ('hspec',), (), ()),
|
||||
'HtmlDjangoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Django/Jinja', ('html+django', 'html+jinja', 'htmldjango'), (), ('text/html+django', 'text/html+jinja')),
|
||||
'HtmlGenshiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Genshi', ('html+genshi', 'html+kid'), (), ('text/html+genshi',)),
|
||||
'HtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.html', 'HTML', ('html',), ('*.html', '*.htm', '*.xhtml', '*.xslt'), ('text/html', 'application/xhtml+xml')),
|
||||
'HtmlPhpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+PHP', ('html+php',), ('*.phtml',), ('application/x-php', 'application/x-httpd-php', 'application/x-httpd-php3', 'application/x-httpd-php4', 'application/x-httpd-php5')),
|
||||
'HtmlSmartyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Smarty', ('html+smarty',), (), ('text/html+smarty',)),
|
||||
'HttpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textfmts', 'HTTP', ('http',), (), ()),
|
||||
'HxmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haxe', 'Hxml', ('haxeml', 'hxml'), ('*.hxml',), ()),
|
||||
'HyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'Hy', ('hylang',), ('*.hy',), ('text/x-hy', 'application/x-hy')),
|
||||
'HybrisLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'Hybris', ('hybris', 'hy'), ('*.hy', '*.hyb'), ('text/x-hybris', 'application/x-hybris')),
|
||||
'IDLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.idl', 'IDL', ('idl',), ('*.pro',), ('text/idl',)),
|
||||
'IconLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.unicon', 'Icon', ('icon',), ('*.icon', '*.ICON'), ()),
|
||||
'IdrisLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Idris', ('idris', 'idr'), ('*.idr',), ('text/x-idris',)),
|
||||
'IgorLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.igor', 'Igor', ('igor', 'igorpro'), ('*.ipf',), ('text/ipf',)),
|
||||
'Inform6Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.int_fiction', 'Inform 6', ('inform6', 'i6'), ('*.inf',), ()),
|
||||
'Inform6TemplateLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.int_fiction', 'Inform 6 template', ('i6t',), ('*.i6t',), ()),
|
||||
'Inform7Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.int_fiction', 'Inform 7', ('inform7', 'i7'), ('*.ni', '*.i7x'), ()),
|
||||
'IniLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'INI', ('ini', 'cfg', 'dosini'), ('*.ini', '*.cfg', '*.inf', '.editorconfig', '*.service', '*.socket', '*.device', '*.mount', '*.automount', '*.swap', '*.target', '*.path', '*.timer', '*.slice', '*.scope'), ('text/x-ini', 'text/inf')),
|
||||
'IoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.iolang', 'Io', ('io',), ('*.io',), ('text/x-iosrc',)),
|
||||
'IokeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Ioke', ('ioke', 'ik'), ('*.ik',), ('text/x-iokesrc',)),
|
||||
'IrcLogsLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textfmts', 'IRC logs', ('irc',), ('*.weechatlog',), ('text/x-irclog',)),
|
||||
'IsabelleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.theorem', 'Isabelle', ('isabelle',), ('*.thy',), ('text/x-isabelle',)),
|
||||
'JLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.j', 'J', ('j',), ('*.ijs',), ('text/x-j',)),
|
||||
'JSLTLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jslt', 'JSLT', ('jslt',), ('*.jslt',), ('text/x-jslt',)),
|
||||
'JagsLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.modeling', 'JAGS', ('jags',), ('*.jag', '*.bug'), ()),
|
||||
'JasminLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Jasmin', ('jasmin', 'jasminxt'), ('*.j',), ()),
|
||||
'JavaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Java', ('java',), ('*.java',), ('text/x-java',)),
|
||||
'JavascriptDjangoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Django/Jinja', ('javascript+django', 'js+django', 'javascript+jinja', 'js+jinja'), (), ('application/x-javascript+django', 'application/x-javascript+jinja', 'text/x-javascript+django', 'text/x-javascript+jinja', 'text/javascript+django', 'text/javascript+jinja')),
|
||||
'JavascriptErbLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Ruby', ('javascript+ruby', 'js+ruby', 'javascript+erb', 'js+erb'), (), ('application/x-javascript+ruby', 'text/x-javascript+ruby', 'text/javascript+ruby')),
|
||||
'JavascriptGenshiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Genshi Text', ('js+genshitext', 'js+genshi', 'javascript+genshitext', 'javascript+genshi'), (), ('application/x-javascript+genshi', 'text/x-javascript+genshi', 'text/javascript+genshi')),
|
||||
'JavascriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'JavaScript', ('javascript', 'js'), ('*.js', '*.jsm', '*.mjs', '*.cjs'), ('application/javascript', 'application/x-javascript', 'text/x-javascript', 'text/javascript')),
|
||||
'JavascriptPhpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+PHP', ('javascript+php', 'js+php'), (), ('application/x-javascript+php', 'text/x-javascript+php', 'text/javascript+php')),
|
||||
'JavascriptSmartyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Smarty', ('javascript+smarty', 'js+smarty'), (), ('application/x-javascript+smarty', 'text/x-javascript+smarty', 'text/javascript+smarty')),
|
||||
'JclLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'JCL', ('jcl',), ('*.jcl',), ('text/x-jcl',)),
|
||||
'JsgfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.grammar_notation', 'JSGF', ('jsgf',), ('*.jsgf',), ('application/jsgf', 'application/x-jsgf', 'text/jsgf')),
|
||||
'JsonBareObjectLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.data', 'JSONBareObject', (), (), ()),
|
||||
'JsonLdLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.data', 'JSON-LD', ('jsonld', 'json-ld'), ('*.jsonld',), ('application/ld+json',)),
|
||||
'JsonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.data', 'JSON', ('json', 'json-object'), ('*.json', 'Pipfile.lock'), ('application/json', 'application/json-object')),
|
||||
'JspLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Java Server Page', ('jsp',), ('*.jsp',), ('application/x-jsp',)),
|
||||
'JuliaConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.julia', 'Julia console', ('jlcon', 'julia-repl'), (), ()),
|
||||
'JuliaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.julia', 'Julia', ('julia', 'jl'), ('*.jl',), ('text/x-julia', 'application/x-julia')),
|
||||
'JuttleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Juttle', ('juttle',), ('*.juttle',), ('application/juttle', 'application/x-juttle', 'text/x-juttle', 'text/juttle')),
|
||||
'KalLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Kal', ('kal',), ('*.kal',), ('text/kal', 'application/kal')),
|
||||
'KconfigLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Kconfig', ('kconfig', 'menuconfig', 'linux-config', 'kernel-config'), ('Kconfig*', '*Config.in*', 'external.in*', 'standard-modules.in'), ('text/x-kconfig',)),
|
||||
'KernelLogLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textfmts', 'Kernel log', ('kmsg', 'dmesg'), ('*.kmsg', '*.dmesg'), ()),
|
||||
'KokaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Koka', ('koka',), ('*.kk', '*.kki'), ('text/x-koka',)),
|
||||
'KotlinLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Kotlin', ('kotlin',), ('*.kt', '*.kts'), ('text/x-kotlin',)),
|
||||
'KuinLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.kuin', 'Kuin', ('kuin',), ('*.kn',), ()),
|
||||
'LSLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'LSL', ('lsl',), ('*.lsl',), ('text/x-lsl',)),
|
||||
'LassoCssLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+Lasso', ('css+lasso',), (), ('text/css+lasso',)),
|
||||
'LassoHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Lasso', ('html+lasso',), (), ('text/html+lasso', 'application/x-httpd-lasso', 'application/x-httpd-lasso[89]')),
|
||||
'LassoJavascriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Lasso', ('javascript+lasso', 'js+lasso'), (), ('application/x-javascript+lasso', 'text/x-javascript+lasso', 'text/javascript+lasso')),
|
||||
'LassoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Lasso', ('lasso', 'lassoscript'), ('*.lasso', '*.lasso[89]'), ('text/x-lasso',)),
|
||||
'LassoXmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Lasso', ('xml+lasso',), (), ('application/xml+lasso',)),
|
||||
'LeanLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.theorem', 'Lean', ('lean',), ('*.lean',), ('text/x-lean',)),
|
||||
'LessCssLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.css', 'LessCss', ('less',), ('*.less',), ('text/x-less-css',)),
|
||||
'LighttpdConfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Lighttpd configuration file', ('lighttpd', 'lighty'), ('lighttpd.conf',), ('text/x-lighttpd-conf',)),
|
||||
'LilyPondLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lilypond', 'LilyPond', ('lilypond',), ('*.ly',), ()),
|
||||
'LimboLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.inferno', 'Limbo', ('limbo',), ('*.b',), ('text/limbo',)),
|
||||
'LiquidLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'liquid', ('liquid',), ('*.liquid',), ()),
|
||||
'LiterateAgdaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Literate Agda', ('literate-agda', 'lagda'), ('*.lagda',), ('text/x-literate-agda',)),
|
||||
'LiterateCryptolLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Literate Cryptol', ('literate-cryptol', 'lcryptol', 'lcry'), ('*.lcry',), ('text/x-literate-cryptol',)),
|
||||
'LiterateHaskellLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Literate Haskell', ('literate-haskell', 'lhaskell', 'lhs'), ('*.lhs',), ('text/x-literate-haskell',)),
|
||||
'LiterateIdrisLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.haskell', 'Literate Idris', ('literate-idris', 'lidris', 'lidr'), ('*.lidr',), ('text/x-literate-idris',)),
|
||||
'LiveScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'LiveScript', ('livescript', 'live-script'), ('*.ls',), ('text/livescript',)),
|
||||
'LlvmLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'LLVM', ('llvm',), ('*.ll',), ('text/x-llvm',)),
|
||||
'LlvmMirBodyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'LLVM-MIR Body', ('llvm-mir-body',), (), ()),
|
||||
'LlvmMirLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'LLVM-MIR', ('llvm-mir',), ('*.mir',), ()),
|
||||
'LogosLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.objective', 'Logos', ('logos',), ('*.x', '*.xi', '*.xm', '*.xmi'), ('text/x-logos',)),
|
||||
'LogtalkLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.prolog', 'Logtalk', ('logtalk',), ('*.lgt', '*.logtalk'), ('text/x-logtalk',)),
|
||||
'LuaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'Lua', ('lua',), ('*.lua', '*.wlua'), ('text/x-lua', 'application/x-lua')),
|
||||
'MIMELexer': ('pipenv.patched.notpip._vendor.pygments.lexers.mime', 'MIME', ('mime',), (), ('multipart/mixed', 'multipart/related', 'multipart/alternative')),
|
||||
'MOOCodeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'MOOCode', ('moocode', 'moo'), ('*.moo',), ('text/x-moocode',)),
|
||||
'MSDOSSessionLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'MSDOS Session', ('doscon',), (), ()),
|
||||
'MakefileLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.make', 'Makefile', ('make', 'makefile', 'mf', 'bsdmake'), ('*.mak', '*.mk', 'Makefile', 'makefile', 'Makefile.*', 'GNUmakefile'), ('text/x-makefile',)),
|
||||
'MakoCssLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+Mako', ('css+mako',), (), ('text/css+mako',)),
|
||||
'MakoHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Mako', ('html+mako',), (), ('text/html+mako',)),
|
||||
'MakoJavascriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Mako', ('javascript+mako', 'js+mako'), (), ('application/x-javascript+mako', 'text/x-javascript+mako', 'text/javascript+mako')),
|
||||
'MakoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Mako', ('mako',), ('*.mao',), ('application/x-mako',)),
|
||||
'MakoXmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Mako', ('xml+mako',), (), ('application/xml+mako',)),
|
||||
'MaqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.business', 'MAQL', ('maql',), ('*.maql',), ('text/x-gooddata-maql', 'application/x-gooddata-maql')),
|
||||
'MarkdownLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'Markdown', ('markdown', 'md'), ('*.md', '*.markdown'), ('text/x-markdown',)),
|
||||
'MaskLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Mask', ('mask',), ('*.mask',), ('text/x-mask',)),
|
||||
'MasonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Mason', ('mason',), ('*.m', '*.mhtml', '*.mc', '*.mi', 'autohandler', 'dhandler'), ('application/x-mason',)),
|
||||
'MathematicaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.algebra', 'Mathematica', ('mathematica', 'mma', 'nb'), ('*.nb', '*.cdf', '*.nbp', '*.ma'), ('application/mathematica', 'application/vnd.wolfram.mathematica', 'application/vnd.wolfram.mathematica.package', 'application/vnd.wolfram.cdf')),
|
||||
'MatlabLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.matlab', 'Matlab', ('matlab',), ('*.m',), ('text/matlab',)),
|
||||
'MatlabSessionLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.matlab', 'Matlab session', ('matlabsession',), (), ()),
|
||||
'MaximaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.maxima', 'Maxima', ('maxima', 'macsyma'), ('*.mac', '*.max'), ()),
|
||||
'MesonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.meson', 'Meson', ('meson', 'meson.build'), ('meson.build', 'meson_options.txt'), ('text/x-meson',)),
|
||||
'MiniDLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.d', 'MiniD', ('minid',), (), ('text/x-minidsrc',)),
|
||||
'MiniScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'MiniScript', ('miniscript', 'ms'), ('*.ms',), ('text/x-minicript', 'application/x-miniscript')),
|
||||
'ModelicaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.modeling', 'Modelica', ('modelica',), ('*.mo',), ('text/x-modelica',)),
|
||||
'Modula2Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.modula2', 'Modula-2', ('modula2', 'm2'), ('*.def', '*.mod'), ('text/x-modula2',)),
|
||||
'MoinWikiLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'MoinMoin/Trac Wiki markup', ('trac-wiki', 'moin'), (), ('text/x-trac-wiki',)),
|
||||
'MonkeyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.basic', 'Monkey', ('monkey',), ('*.monkey',), ('text/x-monkey',)),
|
||||
'MonteLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.monte', 'Monte', ('monte',), ('*.mt',), ()),
|
||||
'MoonScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'MoonScript', ('moonscript', 'moon'), ('*.moon',), ('text/x-moonscript', 'application/x-moonscript')),
|
||||
'MoselLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.mosel', 'Mosel', ('mosel',), ('*.mos',), ()),
|
||||
'MozPreprocCssLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'CSS+mozpreproc', ('css+mozpreproc',), ('*.css.in',), ()),
|
||||
'MozPreprocHashLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'mozhashpreproc', ('mozhashpreproc',), (), ()),
|
||||
'MozPreprocJavascriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'Javascript+mozpreproc', ('javascript+mozpreproc',), ('*.js.in',), ()),
|
||||
'MozPreprocPercentLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'mozpercentpreproc', ('mozpercentpreproc',), (), ()),
|
||||
'MozPreprocXulLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'XUL+mozpreproc', ('xul+mozpreproc',), ('*.xul.in',), ()),
|
||||
'MqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'MQL', ('mql', 'mq4', 'mq5', 'mql4', 'mql5'), ('*.mq4', '*.mq5', '*.mqh'), ('text/x-mql',)),
|
||||
'MscgenLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Mscgen', ('mscgen', 'msc'), ('*.msc',), ()),
|
||||
'MuPADLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.algebra', 'MuPAD', ('mupad',), ('*.mu',), ()),
|
||||
'MxmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.actionscript', 'MXML', ('mxml',), ('*.mxml',), ()),
|
||||
'MySqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'MySQL', ('mysql',), (), ('text/x-mysql',)),
|
||||
'MyghtyCssLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'CSS+Myghty', ('css+myghty',), (), ('text/css+myghty',)),
|
||||
'MyghtyHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Myghty', ('html+myghty',), (), ('text/html+myghty',)),
|
||||
'MyghtyJavascriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'JavaScript+Myghty', ('javascript+myghty', 'js+myghty'), (), ('application/x-javascript+myghty', 'text/x-javascript+myghty', 'text/javascript+mygthy')),
|
||||
'MyghtyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Myghty', ('myghty',), ('*.myt', 'autodelegate'), ('application/x-myghty',)),
|
||||
'MyghtyXmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Myghty', ('xml+myghty',), (), ('application/xml+myghty',)),
|
||||
'NCLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ncl', 'NCL', ('ncl',), ('*.ncl',), ('text/ncl',)),
|
||||
'NSISLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.installers', 'NSIS', ('nsis', 'nsi', 'nsh'), ('*.nsi', '*.nsh'), ('text/x-nsis',)),
|
||||
'NasmLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'NASM', ('nasm',), ('*.asm', '*.ASM'), ('text/x-nasm',)),
|
||||
'NasmObjdumpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'objdump-nasm', ('objdump-nasm',), ('*.objdump-intel',), ('text/x-nasm-objdump',)),
|
||||
'NemerleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dotnet', 'Nemerle', ('nemerle',), ('*.n',), ('text/x-nemerle',)),
|
||||
'NesCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'nesC', ('nesc',), ('*.nc',), ('text/x-nescsrc',)),
|
||||
'NestedTextLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'NestedText', ('nestedtext', 'nt'), ('*.nt',), ()),
|
||||
'NewLispLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'NewLisp', ('newlisp',), ('*.lsp', '*.nl', '*.kif'), ('text/x-newlisp', 'application/x-newlisp')),
|
||||
'NewspeakLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.smalltalk', 'Newspeak', ('newspeak',), ('*.ns2',), ('text/x-newspeak',)),
|
||||
'NginxConfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Nginx configuration file', ('nginx',), ('nginx.conf',), ('text/x-nginx-conf',)),
|
||||
'NimrodLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.nimrod', 'Nimrod', ('nimrod', 'nim'), ('*.nim', '*.nimrod'), ('text/x-nim',)),
|
||||
'NitLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.nit', 'Nit', ('nit',), ('*.nit',), ()),
|
||||
'NixLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.nix', 'Nix', ('nixos', 'nix'), ('*.nix',), ('text/x-nix',)),
|
||||
'NodeConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Node.js REPL console session', ('nodejsrepl',), (), ('text/x-nodejsrepl',)),
|
||||
'NotmuchLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textfmts', 'Notmuch', ('notmuch',), (), ()),
|
||||
'NuSMVLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.smv', 'NuSMV', ('nusmv',), ('*.smv',), ()),
|
||||
'NumPyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'NumPy', ('numpy',), (), ()),
|
||||
'ObjdumpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'objdump', ('objdump',), ('*.objdump',), ('text/x-objdump',)),
|
||||
'ObjectiveCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.objective', 'Objective-C', ('objective-c', 'objectivec', 'obj-c', 'objc'), ('*.m', '*.h'), ('text/x-objective-c',)),
|
||||
'ObjectiveCppLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.objective', 'Objective-C++', ('objective-c++', 'objectivec++', 'obj-c++', 'objc++'), ('*.mm', '*.hh'), ('text/x-objective-c++',)),
|
||||
'ObjectiveJLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'Objective-J', ('objective-j', 'objectivej', 'obj-j', 'objj'), ('*.j',), ('text/x-objective-j',)),
|
||||
'OcamlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ml', 'OCaml', ('ocaml',), ('*.ml', '*.mli', '*.mll', '*.mly'), ('text/x-ocaml',)),
|
||||
'OctaveLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.matlab', 'Octave', ('octave',), ('*.m',), ('text/octave',)),
|
||||
'OdinLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.archetype', 'ODIN', ('odin',), ('*.odin',), ('text/odin',)),
|
||||
'OmgIdlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'OMG Interface Definition Language', ('omg-idl',), ('*.idl', '*.pidl'), ()),
|
||||
'OocLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ooc', 'Ooc', ('ooc',), ('*.ooc',), ('text/x-ooc',)),
|
||||
'OpaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ml', 'Opa', ('opa',), ('*.opa',), ('text/x-opa',)),
|
||||
'OpenEdgeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.business', 'OpenEdge ABL', ('openedge', 'abl', 'progress'), ('*.p', '*.cls'), ('text/x-openedge', 'application/x-openedge')),
|
||||
'OutputLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.special', 'Text output', ('output',), (), ()),
|
||||
'PacmanConfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'PacmanConf', ('pacmanconf',), ('pacman.conf',), ()),
|
||||
'PanLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Pan', ('pan',), ('*.pan',), ()),
|
||||
'ParaSailLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parasail', 'ParaSail', ('parasail',), ('*.psi', '*.psl'), ('text/x-parasail',)),
|
||||
'PawnLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.pawn', 'Pawn', ('pawn',), ('*.p', '*.pwn', '*.inc'), ('text/x-pawn',)),
|
||||
'PegLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.grammar_notation', 'PEG', ('peg',), ('*.peg',), ('text/x-peg',)),
|
||||
'Perl6Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.perl', 'Perl6', ('perl6', 'pl6', 'raku'), ('*.pl', '*.pm', '*.nqp', '*.p6', '*.6pl', '*.p6l', '*.pl6', '*.6pm', '*.p6m', '*.pm6', '*.t', '*.raku', '*.rakumod', '*.rakutest', '*.rakudoc'), ('text/x-perl6', 'application/x-perl6')),
|
||||
'PerlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.perl', 'Perl', ('perl', 'pl'), ('*.pl', '*.pm', '*.t', '*.perl'), ('text/x-perl', 'application/x-perl')),
|
||||
'PhpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.php', 'PHP', ('php', 'php3', 'php4', 'php5'), ('*.php', '*.php[345]', '*.inc'), ('text/x-php',)),
|
||||
'PigLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Pig', ('pig',), ('*.pig',), ('text/x-pig',)),
|
||||
'PikeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'Pike', ('pike',), ('*.pike', '*.pmod'), ('text/x-pike',)),
|
||||
'PkgConfigLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'PkgConfig', ('pkgconfig',), ('*.pc',), ()),
|
||||
'PlPgsqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)),
|
||||
'PointlessLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.pointless', 'Pointless', ('pointless',), ('*.ptls',), ()),
|
||||
'PonyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.pony', 'Pony', ('pony',), ('*.pony',), ()),
|
||||
'PostScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graphics', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
|
||||
'PostgresConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)),
|
||||
'PostgresLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)),
|
||||
'PovrayLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.graphics', 'POVRay', ('pov',), ('*.pov', '*.inc'), ('text/x-povray',)),
|
||||
'PowerShellLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'PowerShell', ('powershell', 'pwsh', 'posh', 'ps1', 'psm1'), ('*.ps1', '*.psm1'), ('text/x-powershell',)),
|
||||
'PowerShellSessionLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'PowerShell Session', ('pwsh-session', 'ps1con'), (), ()),
|
||||
'PraatLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.praat', 'Praat', ('praat',), ('*.praat', '*.proc', '*.psc'), ()),
|
||||
'ProcfileLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.procfile', 'Procfile', ('procfile',), ('Procfile',), ()),
|
||||
'PrologLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.prolog', 'Prolog', ('prolog',), ('*.ecl', '*.prolog', '*.pro', '*.pl'), ('text/x-prolog',)),
|
||||
'PromQLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.promql', 'PromQL', ('promql',), ('*.promql',), ()),
|
||||
'PropertiesLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Properties', ('properties', 'jproperties'), ('*.properties',), ('text/x-java-properties',)),
|
||||
'ProtoBufLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Protocol Buffer', ('protobuf', 'proto'), ('*.proto',), ()),
|
||||
'PsyshConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.php', 'PsySH console session for PHP', ('psysh',), (), ()),
|
||||
'PugLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.html', 'Pug', ('pug', 'jade'), ('*.pug', '*.jade'), ('text/x-pug', 'text/x-jade')),
|
||||
'PuppetLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Puppet', ('puppet',), ('*.pp',), ()),
|
||||
'PyPyLogLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.console', 'PyPy Log', ('pypylog', 'pypy'), ('*.pypylog',), ('application/x-pypylog',)),
|
||||
'Python2Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'Python 2.x', ('python2', 'py2'), (), ('text/x-python2', 'application/x-python2')),
|
||||
'Python2TracebackLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'Python 2.x Traceback', ('py2tb',), ('*.py2tb',), ('text/x-python2-traceback',)),
|
||||
'PythonConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'Python console session', ('pycon',), (), ('text/x-python-doctest',)),
|
||||
'PythonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'Python', ('python', 'py', 'sage', 'python3', 'py3'), ('*.py', '*.pyw', '*.jy', '*.sage', '*.sc', 'SConstruct', 'SConscript', '*.bzl', 'BUCK', 'BUILD', 'BUILD.bazel', 'WORKSPACE', '*.tac'), ('text/x-python', 'application/x-python', 'text/x-python3', 'application/x-python3')),
|
||||
'PythonTracebackLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.python', 'Python Traceback', ('pytb', 'py3tb'), ('*.pytb', '*.py3tb'), ('text/x-python-traceback', 'text/x-python3-traceback')),
|
||||
'QBasicLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.basic', 'QBasic', ('qbasic', 'basic'), ('*.BAS', '*.bas'), ('text/basic',)),
|
||||
'QVToLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.qvt', 'QVTO', ('qvto', 'qvt'), ('*.qvto',), ()),
|
||||
'QmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.webmisc', 'QML', ('qml', 'qbs'), ('*.qml', '*.qbs'), ('application/x-qml', 'application/x-qt.qbs+qml')),
|
||||
'RConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.r', 'RConsole', ('rconsole', 'rout'), ('*.Rout',), ()),
|
||||
'RNCCompactLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rnc', 'Relax-NG Compact', ('rng-compact', 'rnc'), ('*.rnc',), ()),
|
||||
'RPMSpecLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.installers', 'RPMSpec', ('spec',), ('*.spec',), ('text/x-rpm-spec',)),
|
||||
'RacketLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'Racket', ('racket', 'rkt'), ('*.rkt', '*.rktd', '*.rktl'), ('text/x-racket', 'application/x-racket')),
|
||||
'RagelCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Ragel in C Host', ('ragel-c',), ('*.rl',), ()),
|
||||
'RagelCppLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Ragel in CPP Host', ('ragel-cpp',), ('*.rl',), ()),
|
||||
'RagelDLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Ragel in D Host', ('ragel-d',), ('*.rl',), ()),
|
||||
'RagelEmbeddedLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Embedded Ragel', ('ragel-em',), ('*.rl',), ()),
|
||||
'RagelJavaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Ragel in Java Host', ('ragel-java',), ('*.rl',), ()),
|
||||
'RagelLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Ragel', ('ragel',), (), ()),
|
||||
'RagelObjectiveCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Ragel in Objective C Host', ('ragel-objc',), ('*.rl',), ()),
|
||||
'RagelRubyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Ragel in Ruby Host', ('ragel-ruby', 'ragel-rb'), ('*.rl',), ()),
|
||||
'RawTokenLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.special', 'Raw token data', (), (), ('application/x-pygments-tokens',)),
|
||||
'RdLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.r', 'Rd', ('rd',), ('*.Rd',), ('text/x-r-doc',)),
|
||||
'ReasonLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ml', 'ReasonML', ('reasonml', 'reason'), ('*.re', '*.rei'), ('text/x-reasonml',)),
|
||||
'RebolLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rebol', 'REBOL', ('rebol',), ('*.r', '*.r3', '*.reb'), ('text/x-rebol',)),
|
||||
'RedLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rebol', 'Red', ('red', 'red/system'), ('*.red', '*.reds'), ('text/x-red', 'text/x-red-system')),
|
||||
'RedcodeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.esoteric', 'Redcode', ('redcode',), ('*.cw',), ()),
|
||||
'RegeditLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'reg', ('registry',), ('*.reg',), ('text/x-windows-registry',)),
|
||||
'ResourceLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.resource', 'ResourceBundle', ('resourcebundle', 'resource'), (), ()),
|
||||
'RexxLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scripting', 'Rexx', ('rexx', 'arexx'), ('*.rexx', '*.rex', '*.rx', '*.arexx'), ('text/x-rexx',)),
|
||||
'RhtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'RHTML', ('rhtml', 'html+erb', 'html+ruby'), ('*.rhtml',), ('text/html+ruby',)),
|
||||
'RideLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ride', 'Ride', ('ride',), ('*.ride',), ('text/x-ride',)),
|
||||
'RitaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rita', 'Rita', ('rita',), ('*.rita',), ('text/rita',)),
|
||||
'RoboconfGraphLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.roboconf', 'Roboconf Graph', ('roboconf-graph',), ('*.graph',), ()),
|
||||
'RoboconfInstancesLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.roboconf', 'Roboconf Instances', ('roboconf-instances',), ('*.instances',), ()),
|
||||
'RobotFrameworkLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.robotframework', 'RobotFramework', ('robotframework',), ('*.robot',), ('text/x-robotframework',)),
|
||||
'RqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'RQL', ('rql',), ('*.rql',), ('text/x-rql',)),
|
||||
'RslLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'RSL', ('rsl',), ('*.rsl',), ('text/rsl',)),
|
||||
'RstLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'reStructuredText', ('restructuredtext', 'rst', 'rest'), ('*.rst', '*.rest'), ('text/x-rst', 'text/prs.fallenstein.rst')),
|
||||
'RtsLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.trafficscript', 'TrafficScript', ('trafficscript', 'rts'), ('*.rts',), ()),
|
||||
'RubyConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ruby', 'Ruby irb session', ('rbcon', 'irb'), (), ('text/x-ruby-shellsession',)),
|
||||
'RubyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ruby', 'Ruby', ('ruby', 'rb', 'duby'), ('*.rb', '*.rbw', 'Rakefile', '*.rake', '*.gemspec', '*.rbx', '*.duby', 'Gemfile', 'Vagrantfile'), ('text/x-ruby', 'application/x-ruby')),
|
||||
'RustLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rust', 'Rust', ('rust', 'rs'), ('*.rs', '*.rs.in'), ('text/rust', 'text/x-rust')),
|
||||
'SASLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sas', 'SAS', ('sas',), ('*.SAS', '*.sas'), ('text/x-sas', 'text/sas', 'application/x-sas')),
|
||||
'SLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.r', 'S', ('splus', 's', 'r'), ('*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron'), ('text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', 'text/x-R', 'text/x-r-history', 'text/x-r-profile')),
|
||||
'SMLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.ml', 'Standard ML', ('sml',), ('*.sml', '*.sig', '*.fun'), ('text/x-standardml', 'application/x-standardml')),
|
||||
'SarlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'SARL', ('sarl',), ('*.sarl',), ('text/x-sarl',)),
|
||||
'SassLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.css', 'Sass', ('sass',), ('*.sass',), ('text/x-sass',)),
|
||||
'SaviLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.savi', 'Savi', ('savi',), ('*.savi',), ()),
|
||||
'ScalaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Scala', ('scala',), ('*.scala',), ('text/x-scala',)),
|
||||
'ScamlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.html', 'Scaml', ('scaml',), ('*.scaml',), ('text/x-scaml',)),
|
||||
'ScdocLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.scdoc', 'scdoc', ('scdoc', 'scd'), ('*.scd', '*.scdoc'), ()),
|
||||
'SchemeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'Scheme', ('scheme', 'scm'), ('*.scm', '*.ss'), ('text/x-scheme', 'application/x-scheme')),
|
||||
'ScilabLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.matlab', 'Scilab', ('scilab',), ('*.sci', '*.sce', '*.tst'), ('text/scilab',)),
|
||||
'ScssLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.css', 'SCSS', ('scss',), ('*.scss',), ('text/x-scss',)),
|
||||
'SedLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textedit', 'Sed', ('sed', 'gsed', 'ssed'), ('*.sed', '*.[gs]sed'), ('text/x-sed',)),
|
||||
'ShExCLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rdf', 'ShExC', ('shexc', 'shex'), ('*.shex',), ('text/shex',)),
|
||||
'ShenLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'Shen', ('shen',), ('*.shen',), ('text/x-shen', 'application/x-shen')),
|
||||
'SieveLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sieve', 'Sieve', ('sieve',), ('*.siv', '*.sieve'), ()),
|
||||
'SilverLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.verification', 'Silver', ('silver',), ('*.sil', '*.vpr'), ()),
|
||||
'SingularityLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Singularity', ('singularity',), ('*.def', 'Singularity'), ()),
|
||||
'SlashLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.slash', 'Slash', ('slash',), ('*.sla',), ()),
|
||||
'SlimLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.webmisc', 'Slim', ('slim',), ('*.slim',), ('text/x-slim',)),
|
||||
'SlurmBashLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'Slurm', ('slurm', 'sbatch'), ('*.sl',), ()),
|
||||
'SmaliLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dalvik', 'Smali', ('smali',), ('*.smali',), ('text/smali',)),
|
||||
'SmalltalkLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.smalltalk', 'Smalltalk', ('smalltalk', 'squeak', 'st'), ('*.st',), ('text/x-smalltalk',)),
|
||||
'SmartGameFormatLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sgf', 'SmartGameFormat', ('sgf',), ('*.sgf',), ()),
|
||||
'SmartyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Smarty', ('smarty',), ('*.tpl',), ('application/x-smarty',)),
|
||||
'SmithyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.smithy', 'Smithy', ('smithy',), ('*.smithy',), ()),
|
||||
'SnobolLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.snobol', 'Snobol', ('snobol',), ('*.snobol',), ('text/x-snobol',)),
|
||||
'SnowballLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Snowball', ('snowball',), ('*.sbl',), ()),
|
||||
'SolidityLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.solidity', 'Solidity', ('solidity',), ('*.sol',), ()),
|
||||
'SophiaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sophia', 'Sophia', ('sophia',), ('*.aes',), ()),
|
||||
'SourcePawnLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.pawn', 'SourcePawn', ('sp',), ('*.sp',), ('text/x-sourcepawn',)),
|
||||
'SourcesListLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.installers', 'Debian Sourcelist', ('debsources', 'sourceslist', 'sources.list'), ('sources.list',), ()),
|
||||
'SparqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rdf', 'SPARQL', ('sparql',), ('*.rq', '*.sparql'), ('application/sparql-query',)),
|
||||
'SpiceLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.spice', 'Spice', ('spice', 'spicelang'), ('*.spice',), ('text/x-spice',)),
|
||||
'SqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'SQL', ('sql',), ('*.sql',), ('text/x-sql',)),
|
||||
'SqliteConsoleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'sqlite3con', ('sqlite3',), ('*.sqlite3-console',), ('text/x-sqlite3-console',)),
|
||||
'SquidConfLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'SquidConf', ('squidconf', 'squid.conf', 'squid'), ('squid.conf',), ('text/x-squidconf',)),
|
||||
'SrcinfoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.srcinfo', 'Srcinfo', ('srcinfo',), ('.SRCINFO',), ()),
|
||||
'SspLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Scalate Server Page', ('ssp',), ('*.ssp',), ('application/x-ssp',)),
|
||||
'StanLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.modeling', 'Stan', ('stan',), ('*.stan',), ()),
|
||||
'StataLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.stata', 'Stata', ('stata', 'do'), ('*.do', '*.ado'), ('text/x-stata', 'text/stata', 'application/x-stata')),
|
||||
'SuperColliderLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.supercollider', 'SuperCollider', ('supercollider', 'sc'), ('*.sc', '*.scd'), ('application/supercollider', 'text/supercollider')),
|
||||
'SwiftLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.objective', 'Swift', ('swift',), ('*.swift',), ('text/x-swift',)),
|
||||
'SwigLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'SWIG', ('swig',), ('*.swg', '*.i'), ('text/swig',)),
|
||||
'SystemVerilogLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.hdl', 'systemverilog', ('systemverilog', 'sv'), ('*.sv', '*.svh'), ('text/x-systemverilog',)),
|
||||
'TAPLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.testing', 'TAP', ('tap',), ('*.tap',), ()),
|
||||
'TNTLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.tnt', 'Typographic Number Theory', ('tnt',), ('*.tnt',), ()),
|
||||
'TOMLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'TOML', ('toml',), ('*.toml', 'Pipfile', 'poetry.lock'), ()),
|
||||
'Tads3Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.int_fiction', 'TADS 3', ('tads3',), ('*.t',), ()),
|
||||
'TasmLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.asm', 'TASM', ('tasm',), ('*.asm', '*.ASM', '*.tasm'), ('text/x-tasm',)),
|
||||
'TclLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.tcl', 'Tcl', ('tcl',), ('*.tcl', '*.rvt'), ('text/x-tcl', 'text/x-script.tcl', 'application/x-tcl')),
|
||||
'TcshLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'Tcsh', ('tcsh', 'csh'), ('*.tcsh', '*.csh'), ('application/x-csh',)),
|
||||
'TcshSessionLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.shell', 'Tcsh Session', ('tcshcon',), (), ()),
|
||||
'TeaTemplateLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Tea', ('tea',), ('*.tea',), ('text/x-tea',)),
|
||||
'TealLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.teal', 'teal', ('teal',), ('*.teal',), ()),
|
||||
'TeraTermLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.teraterm', 'Tera Term macro', ('teratermmacro', 'teraterm', 'ttl'), ('*.ttl',), ('text/x-teratermmacro',)),
|
||||
'TermcapLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Termcap', ('termcap',), ('termcap', 'termcap.src'), ()),
|
||||
'TerminfoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Terminfo', ('terminfo',), ('terminfo', 'terminfo.src'), ()),
|
||||
'TerraformLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.configs', 'Terraform', ('terraform', 'tf'), ('*.tf',), ('application/x-tf', 'application/x-terraform')),
|
||||
'TexLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'TeX', ('tex', 'latex'), ('*.tex', '*.aux', '*.toc'), ('text/x-tex', 'text/x-latex')),
|
||||
'TextLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.special', 'Text only', ('text',), ('*.txt',), ('text/plain',)),
|
||||
'ThingsDBLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.thingsdb', 'ThingsDB', ('ti', 'thingsdb'), ('*.ti',), ()),
|
||||
'ThriftLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Thrift', ('thrift',), ('*.thrift',), ('application/x-thrift',)),
|
||||
'TiddlyWiki5Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.markup', 'tiddler', ('tid',), ('*.tid',), ('text/vnd.tiddlywiki',)),
|
||||
'TodotxtLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textfmts', 'Todotxt', ('todotxt',), ('todo.txt', '*.todotxt'), ('text/x-todo',)),
|
||||
'TransactSqlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.sql', 'Transact-SQL', ('tsql', 't-sql'), ('*.sql',), ('text/x-tsql',)),
|
||||
'TreetopLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.parsers', 'Treetop', ('treetop',), ('*.treetop', '*.tt'), ()),
|
||||
'TurtleLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.rdf', 'Turtle', ('turtle',), ('*.ttl',), ('text/turtle', 'application/x-turtle')),
|
||||
'TwigHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Twig', ('html+twig',), ('*.twig',), ('text/html+twig',)),
|
||||
'TwigLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Twig', ('twig',), (), ('application/x-twig',)),
|
||||
'TypeScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.javascript', 'TypeScript', ('typescript', 'ts'), ('*.ts',), ('application/x-typescript', 'text/x-typescript')),
|
||||
'TypoScriptCssDataLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.typoscript', 'TypoScriptCssData', ('typoscriptcssdata',), (), ()),
|
||||
'TypoScriptHtmlDataLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.typoscript', 'TypoScriptHtmlData', ('typoscripthtmldata',), (), ()),
|
||||
'TypoScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.typoscript', 'TypoScript', ('typoscript',), ('*.typoscript',), ('text/x-typoscript',)),
|
||||
'UcodeLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.unicon', 'ucode', ('ucode',), ('*.u', '*.u1', '*.u2'), ()),
|
||||
'UniconLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.unicon', 'Unicon', ('unicon',), ('*.icn',), ('text/unicon',)),
|
||||
'UrbiscriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.urbi', 'UrbiScript', ('urbiscript',), ('*.u',), ('application/x-urbiscript',)),
|
||||
'UsdLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.usd', 'USD', ('usd', 'usda'), ('*.usd', '*.usda'), ()),
|
||||
'VBScriptLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.basic', 'VBScript', ('vbscript',), ('*.vbs', '*.VBS'), ()),
|
||||
'VCLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.varnish', 'VCL', ('vcl',), ('*.vcl',), ('text/x-vclsrc',)),
|
||||
'VCLSnippetLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.varnish', 'VCLSnippets', ('vclsnippets', 'vclsnippet'), (), ('text/x-vclsnippet',)),
|
||||
'VCTreeStatusLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.console', 'VCTreeStatus', ('vctreestatus',), (), ()),
|
||||
'VGLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'VGL', ('vgl',), ('*.rpf',), ()),
|
||||
'ValaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.c_like', 'Vala', ('vala', 'vapi'), ('*.vala', '*.vapi'), ('text/x-vala',)),
|
||||
'VbNetAspxLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dotnet', 'aspx-vb', ('aspx-vb',), ('*.aspx', '*.asax', '*.ascx', '*.ashx', '*.asmx', '*.axd'), ()),
|
||||
'VbNetLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dotnet', 'VB.net', ('vb.net', 'vbnet'), ('*.vb', '*.bas'), ('text/x-vbnet', 'text/x-vba')),
|
||||
'VelocityHtmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'HTML+Velocity', ('html+velocity',), (), ('text/html+velocity',)),
|
||||
'VelocityLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'Velocity', ('velocity',), ('*.vm', '*.fhtml'), ()),
|
||||
'VelocityXmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Velocity', ('xml+velocity',), (), ('application/xml+velocity',)),
|
||||
'VerilogLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.hdl', 'verilog', ('verilog', 'v'), ('*.v',), ('text/x-verilog',)),
|
||||
'VhdlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.hdl', 'vhdl', ('vhdl',), ('*.vhdl', '*.vhd'), ('text/x-vhdl',)),
|
||||
'VimLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.textedit', 'VimL', ('vim',), ('*.vim', '.vimrc', '.exrc', '.gvimrc', '_vimrc', '_exrc', '_gvimrc', 'vimrc', 'gvimrc'), ('text/x-vim',)),
|
||||
'WDiffLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.diff', 'WDiff', ('wdiff',), ('*.wdiff',), ()),
|
||||
'WatLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.webassembly', 'WebAssembly', ('wast', 'wat'), ('*.wat', '*.wast'), ()),
|
||||
'WebIDLLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.webidl', 'Web IDL', ('webidl',), ('*.webidl',), ()),
|
||||
'WhileyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.whiley', 'Whiley', ('whiley',), ('*.whiley',), ('text/x-whiley',)),
|
||||
'X10Lexer': ('pipenv.patched.notpip._vendor.pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)),
|
||||
'XQueryLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')),
|
||||
'XmlDjangoLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Django/Jinja', ('xml+django', 'xml+jinja'), (), ('application/xml+django', 'application/xml+jinja')),
|
||||
'XmlErbLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Ruby', ('xml+ruby', 'xml+erb'), (), ('application/xml+ruby',)),
|
||||
'XmlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.html', 'XML', ('xml',), ('*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd', '*.wsdl', '*.wsf'), ('text/xml', 'application/xml', 'image/svg+xml', 'application/rss+xml', 'application/atom+xml')),
|
||||
'XmlPhpLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+PHP', ('xml+php',), (), ('application/xml+php',)),
|
||||
'XmlSmartyLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'XML+Smarty', ('xml+smarty',), (), ('application/xml+smarty',)),
|
||||
'XorgLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.xorg', 'Xorg', ('xorg.conf',), ('xorg.conf',), ()),
|
||||
'XsltLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.html', 'XSLT', ('xslt',), ('*.xsl', '*.xslt', '*.xpl'), ('application/xsl+xml', 'application/xslt+xml')),
|
||||
'XtendLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.jvm', 'Xtend', ('xtend',), ('*.xtend',), ('text/x-xtend',)),
|
||||
'XtlangLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.lisp', 'xtlang', ('extempore',), ('*.xtm',), ()),
|
||||
'YamlJinjaLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.templates', 'YAML+Jinja', ('yaml+jinja', 'salt', 'sls'), ('*.sls',), ('text/x-yaml+jinja', 'text/x-sls')),
|
||||
'YamlLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.data', 'YAML', ('yaml',), ('*.yaml', '*.yml'), ('text/x-yaml',)),
|
||||
'YangLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.yang', 'YANG', ('yang',), ('*.yang',), ('application/yang',)),
|
||||
'ZeekLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.dsls', 'Zeek', ('zeek', 'bro'), ('*.zeek', '*.bro'), ()),
|
||||
'ZephirLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.php', 'Zephir', ('zephir',), ('*.zep',), ()),
|
||||
'ZigLexer': ('pipenv.patched.notpip._vendor.pygments.lexers.zig', 'Zig', ('zig',), ('*.zig',), ('text/zig',)),
|
||||
'apdlexer': ('pipenv.patched.notpip._vendor.pygments.lexers.apdlexer', 'ANSYS parametric design language', ('ansys', 'apdl'), ('*.ans',), ()),
|
||||
}
|
||||
|
||||
if __name__ == '__main__': # pragma: no cover
|
||||
import sys
|
||||
import os
|
||||
|
||||
# lookup lexers
|
||||
found_lexers = []
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
|
||||
for root, dirs, files in os.walk('.'):
|
||||
for filename in files:
|
||||
if filename.endswith('.py') and not filename.startswith('_'):
|
||||
module_name = 'pygments.lexers%s.%s' % (
|
||||
root[1:].replace('/', '.'), filename[:-3])
|
||||
print(module_name)
|
||||
module = __import__(module_name, None, None, [''])
|
||||
for lexer_name in module.__all__:
|
||||
lexer = getattr(module, lexer_name)
|
||||
found_lexers.append(
|
||||
'%r: %r' % (lexer_name,
|
||||
(module_name,
|
||||
lexer.name,
|
||||
tuple(lexer.aliases),
|
||||
tuple(lexer.filenames),
|
||||
tuple(lexer.mimetypes))))
|
||||
# sort them to make the diff minimal
|
||||
found_lexers.sort()
|
||||
|
||||
# extract useful sourcecode from this file
|
||||
with open(__file__) as fp:
|
||||
content = fp.read()
|
||||
# replace crnl to nl for Windows.
|
||||
#
|
||||
# Note that, originally, contributers should keep nl of master
|
||||
# repository, for example by using some kind of automatic
|
||||
# management EOL, like `EolExtension
|
||||
# <https://www.mercurial-scm.org/wiki/EolExtension>`.
|
||||
content = content.replace("\r\n", "\n")
|
||||
header = content[:content.find('LEXERS = {')]
|
||||
footer = content[content.find("if __name__ == '__main__':"):]
|
||||
|
||||
# write new file
|
||||
with open(__file__, 'w') as fp:
|
||||
fp.write(header)
|
||||
fp.write('LEXERS = {\n %s,\n}\n\n' % ',\n '.join(found_lexers))
|
||||
fp.write(footer)
|
||||
|
||||
print ('=== %d lexers processed.' % len(found_lexers))
|
||||
@@ -1,14 +0,0 @@
|
||||
# .-. .-. .-. . . .-. .-. .-. .-.
|
||||
# |( |- |.| | | |- `-. | `-.
|
||||
# ' ' `-' `-`.`-' `-' `-' ' `-'
|
||||
|
||||
__title__ = 'requests'
|
||||
__description__ = 'Python HTTP for Humans.'
|
||||
__url__ = 'https://requests.readthedocs.io'
|
||||
__version__ = '2.27.1'
|
||||
__build__ = 0x022701
|
||||
__author__ = 'Kenneth Reitz'
|
||||
__author_email__ = 'me@kennethreitz.org'
|
||||
__license__ = 'Apache 2.0'
|
||||
__copyright__ = 'Copyright 2022 Kenneth Reitz'
|
||||
__cake__ = u'\u2728 \U0001f370 \u2728'
|
||||
@@ -1,77 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
requests.compat
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
This module handles import compatibility issues between Python 2 and
|
||||
Python 3.
|
||||
"""
|
||||
|
||||
from pipenv.patched.notpip._vendor import chardet
|
||||
|
||||
import sys
|
||||
|
||||
# -------
|
||||
# Pythons
|
||||
# -------
|
||||
|
||||
# Syntax sugar.
|
||||
_ver = sys.version_info
|
||||
|
||||
#: Python 2.x?
|
||||
is_py2 = (_ver[0] == 2)
|
||||
|
||||
#: Python 3.x?
|
||||
is_py3 = (_ver[0] == 3)
|
||||
|
||||
# Note: We've patched out simplejson support in pip because it prevents
|
||||
# upgrading simplejson on Windows.
|
||||
# try:
|
||||
# import simplejson as json
|
||||
# except (ImportError, SyntaxError):
|
||||
# # simplejson does not support Python 3.2, it throws a SyntaxError
|
||||
# # because of u'...' Unicode literals.
|
||||
import json
|
||||
|
||||
# ---------
|
||||
# Specifics
|
||||
# ---------
|
||||
|
||||
if is_py2:
|
||||
from urllib import (
|
||||
quote, unquote, quote_plus, unquote_plus, urlencode, getproxies,
|
||||
proxy_bypass, proxy_bypass_environment, getproxies_environment)
|
||||
from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
|
||||
from urllib2 import parse_http_list
|
||||
import cookielib
|
||||
from Cookie import Morsel
|
||||
from StringIO import StringIO
|
||||
# Keep OrderedDict for backwards compatibility.
|
||||
from collections import Callable, Mapping, MutableMapping, OrderedDict
|
||||
|
||||
builtin_str = str
|
||||
bytes = str
|
||||
str = unicode
|
||||
basestring = basestring
|
||||
numeric_types = (int, long, float)
|
||||
integer_types = (int, long)
|
||||
JSONDecodeError = ValueError
|
||||
|
||||
elif is_py3:
|
||||
from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
|
||||
from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment
|
||||
from http import cookiejar as cookielib
|
||||
from http.cookies import Morsel
|
||||
from io import StringIO
|
||||
# Keep OrderedDict for backwards compatibility.
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Callable, Mapping, MutableMapping
|
||||
from json import JSONDecodeError
|
||||
|
||||
builtin_str = str
|
||||
str = str
|
||||
bytes = bytes
|
||||
basestring = (str, bytes)
|
||||
numeric_types = (int, float)
|
||||
integer_types = (int,)
|
||||
@@ -1,132 +0,0 @@
|
||||
"""Module containing bug report helper(s)."""
|
||||
from __future__ import print_function
|
||||
|
||||
import json
|
||||
import platform
|
||||
import sys
|
||||
import ssl
|
||||
|
||||
from pipenv.patched.notpip._vendor import idna
|
||||
from pipenv.patched.notpip._vendor import urllib3
|
||||
|
||||
from . import __version__ as requests_version
|
||||
|
||||
charset_normalizer = None
|
||||
|
||||
try:
|
||||
from pipenv.patched.notpip._vendor import chardet
|
||||
except ImportError:
|
||||
chardet = None
|
||||
|
||||
try:
|
||||
from pipenv.patched.notpip._vendor.urllib3.contrib import pyopenssl
|
||||
except ImportError:
|
||||
pyopenssl = None
|
||||
OpenSSL = None
|
||||
cryptography = None
|
||||
else:
|
||||
import OpenSSL
|
||||
import cryptography
|
||||
|
||||
|
||||
def _implementation():
|
||||
"""Return a dict with the Python implementation and version.
|
||||
|
||||
Provide both the name and the version of the Python implementation
|
||||
currently running. For example, on CPython 2.7.5 it will return
|
||||
{'name': 'CPython', 'version': '2.7.5'}.
|
||||
|
||||
This function works best on CPython and PyPy: in particular, it probably
|
||||
doesn't work for Jython or IronPython. Future investigation should be done
|
||||
to work out the correct shape of the code for those platforms.
|
||||
"""
|
||||
implementation = platform.python_implementation()
|
||||
|
||||
if implementation == 'CPython':
|
||||
implementation_version = platform.python_version()
|
||||
elif implementation == 'PyPy':
|
||||
implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major,
|
||||
sys.pypy_version_info.minor,
|
||||
sys.pypy_version_info.micro)
|
||||
if sys.pypy_version_info.releaselevel != 'final':
|
||||
implementation_version = ''.join([
|
||||
implementation_version, sys.pypy_version_info.releaselevel
|
||||
])
|
||||
elif implementation == 'Jython':
|
||||
implementation_version = platform.python_version() # Complete Guess
|
||||
elif implementation == 'IronPython':
|
||||
implementation_version = platform.python_version() # Complete Guess
|
||||
else:
|
||||
implementation_version = 'Unknown'
|
||||
|
||||
return {'name': implementation, 'version': implementation_version}
|
||||
|
||||
|
||||
def info():
|
||||
"""Generate information for a bug report."""
|
||||
try:
|
||||
platform_info = {
|
||||
'system': platform.system(),
|
||||
'release': platform.release(),
|
||||
}
|
||||
except IOError:
|
||||
platform_info = {
|
||||
'system': 'Unknown',
|
||||
'release': 'Unknown',
|
||||
}
|
||||
|
||||
implementation_info = _implementation()
|
||||
urllib3_info = {'version': urllib3.__version__}
|
||||
charset_normalizer_info = {'version': None}
|
||||
chardet_info = {'version': None}
|
||||
if charset_normalizer:
|
||||
charset_normalizer_info = {'version': charset_normalizer.__version__}
|
||||
if chardet:
|
||||
chardet_info = {'version': chardet.__version__}
|
||||
|
||||
pyopenssl_info = {
|
||||
'version': None,
|
||||
'openssl_version': '',
|
||||
}
|
||||
if OpenSSL:
|
||||
pyopenssl_info = {
|
||||
'version': OpenSSL.__version__,
|
||||
'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER,
|
||||
}
|
||||
cryptography_info = {
|
||||
'version': getattr(cryptography, '__version__', ''),
|
||||
}
|
||||
idna_info = {
|
||||
'version': getattr(idna, '__version__', ''),
|
||||
}
|
||||
|
||||
system_ssl = ssl.OPENSSL_VERSION_NUMBER
|
||||
system_ssl_info = {
|
||||
'version': '%x' % system_ssl if system_ssl is not None else ''
|
||||
}
|
||||
|
||||
return {
|
||||
'platform': platform_info,
|
||||
'implementation': implementation_info,
|
||||
'system_ssl': system_ssl_info,
|
||||
'using_pyopenssl': pyopenssl is not None,
|
||||
'using_charset_normalizer': chardet is None,
|
||||
'pyOpenSSL': pyopenssl_info,
|
||||
'urllib3': urllib3_info,
|
||||
'chardet': chardet_info,
|
||||
'charset_normalizer': charset_normalizer_info,
|
||||
'cryptography': cryptography_info,
|
||||
'idna': idna_info,
|
||||
'requests': {
|
||||
'version': requests_version,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Pretty-print the bug information as JSON."""
|
||||
print(json.dumps(info(), sort_keys=True, indent=2))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,123 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
r"""
|
||||
The ``codes`` object defines a mapping from common names for HTTP statuses
|
||||
to their numerical codes, accessible either as attributes or as dictionary
|
||||
items.
|
||||
|
||||
Example::
|
||||
|
||||
>>> import requests
|
||||
>>> requests.codes['temporary_redirect']
|
||||
307
|
||||
>>> requests.codes.teapot
|
||||
418
|
||||
>>> requests.codes['\o/']
|
||||
200
|
||||
|
||||
Some codes have multiple names, and both upper- and lower-case versions of
|
||||
the names are allowed. For example, ``codes.ok``, ``codes.OK``, and
|
||||
``codes.okay`` all correspond to the HTTP status code 200.
|
||||
"""
|
||||
|
||||
from .structures import LookupDict
|
||||
|
||||
_codes = {
|
||||
|
||||
# Informational.
|
||||
100: ('continue',),
|
||||
101: ('switching_protocols',),
|
||||
102: ('processing',),
|
||||
103: ('checkpoint',),
|
||||
122: ('uri_too_long', 'request_uri_too_long'),
|
||||
200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
|
||||
201: ('created',),
|
||||
202: ('accepted',),
|
||||
203: ('non_authoritative_info', 'non_authoritative_information'),
|
||||
204: ('no_content',),
|
||||
205: ('reset_content', 'reset'),
|
||||
206: ('partial_content', 'partial'),
|
||||
207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
|
||||
208: ('already_reported',),
|
||||
226: ('im_used',),
|
||||
|
||||
# Redirection.
|
||||
300: ('multiple_choices',),
|
||||
301: ('moved_permanently', 'moved', '\\o-'),
|
||||
302: ('found',),
|
||||
303: ('see_other', 'other'),
|
||||
304: ('not_modified',),
|
||||
305: ('use_proxy',),
|
||||
306: ('switch_proxy',),
|
||||
307: ('temporary_redirect', 'temporary_moved', 'temporary'),
|
||||
308: ('permanent_redirect',
|
||||
'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
|
||||
|
||||
# Client Error.
|
||||
400: ('bad_request', 'bad'),
|
||||
401: ('unauthorized',),
|
||||
402: ('payment_required', 'payment'),
|
||||
403: ('forbidden',),
|
||||
404: ('not_found', '-o-'),
|
||||
405: ('method_not_allowed', 'not_allowed'),
|
||||
406: ('not_acceptable',),
|
||||
407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
|
||||
408: ('request_timeout', 'timeout'),
|
||||
409: ('conflict',),
|
||||
410: ('gone',),
|
||||
411: ('length_required',),
|
||||
412: ('precondition_failed', 'precondition'),
|
||||
413: ('request_entity_too_large',),
|
||||
414: ('request_uri_too_large',),
|
||||
415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
|
||||
416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
|
||||
417: ('expectation_failed',),
|
||||
418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
|
||||
421: ('misdirected_request',),
|
||||
422: ('unprocessable_entity', 'unprocessable'),
|
||||
423: ('locked',),
|
||||
424: ('failed_dependency', 'dependency'),
|
||||
425: ('unordered_collection', 'unordered'),
|
||||
426: ('upgrade_required', 'upgrade'),
|
||||
428: ('precondition_required', 'precondition'),
|
||||
429: ('too_many_requests', 'too_many'),
|
||||
431: ('header_fields_too_large', 'fields_too_large'),
|
||||
444: ('no_response', 'none'),
|
||||
449: ('retry_with', 'retry'),
|
||||
450: ('blocked_by_windows_parental_controls', 'parental_controls'),
|
||||
451: ('unavailable_for_legal_reasons', 'legal_reasons'),
|
||||
499: ('client_closed_request',),
|
||||
|
||||
# Server Error.
|
||||
500: ('internal_server_error', 'server_error', '/o\\', '✗'),
|
||||
501: ('not_implemented',),
|
||||
502: ('bad_gateway',),
|
||||
503: ('service_unavailable', 'unavailable'),
|
||||
504: ('gateway_timeout',),
|
||||
505: ('http_version_not_supported', 'http_version'),
|
||||
506: ('variant_also_negotiates',),
|
||||
507: ('insufficient_storage',),
|
||||
509: ('bandwidth_limit_exceeded', 'bandwidth'),
|
||||
510: ('not_extended',),
|
||||
511: ('network_authentication_required', 'network_auth', 'network_authentication'),
|
||||
}
|
||||
|
||||
codes = LookupDict(name='status_codes')
|
||||
|
||||
def _init():
|
||||
for code, titles in _codes.items():
|
||||
for title in titles:
|
||||
setattr(codes, title, code)
|
||||
if not title.startswith(('\\', '/')):
|
||||
setattr(codes, title.upper(), code)
|
||||
|
||||
def doc(code):
|
||||
names = ', '.join('``%s``' % n for n in _codes[code])
|
||||
return '* %d: %s' % (code, names)
|
||||
|
||||
global __doc__
|
||||
__doc__ = (__doc__ + '\n' +
|
||||
'\n'.join(doc(code) for code in sorted(_codes))
|
||||
if __doc__ is not None else None)
|
||||
|
||||
_init()
|
||||
@@ -1,38 +0,0 @@
|
||||
from typing import Dict, Generic, TypeVar, TYPE_CHECKING
|
||||
import sys
|
||||
|
||||
CacheKey = TypeVar("CacheKey")
|
||||
CacheValue = TypeVar("CacheValue")
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
from pipenv.patched.notpip._vendor.typing_extensions import OrderedDict
|
||||
else:
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class LRUCache(OrderedDict[CacheKey, CacheValue]):
|
||||
"""
|
||||
A dictionary-like container that stores a given maximum items.
|
||||
|
||||
If an additional item is added when the LRUCache is full, the least
|
||||
recently used key is discarded to make room for the new item.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, cache_size: int) -> None:
|
||||
self.cache_size = cache_size
|
||||
super().__init__()
|
||||
|
||||
def __setitem__(self, key: CacheKey, value: CacheValue) -> None:
|
||||
"""Store a new views, potentially discarding an old value."""
|
||||
if key not in self:
|
||||
if len(self) >= self.cache_size:
|
||||
self.popitem(last=False)
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def __getitem__(self, key: CacheKey) -> CacheValue:
|
||||
"""Gets the item, but also makes it most recent."""
|
||||
value: CacheValue = super().__getitem__(key)
|
||||
super().__delitem__(key)
|
||||
super().__setitem__(key, value)
|
||||
return value
|
||||
@@ -1,3 +1,3 @@
|
||||
pip==22.1.2
|
||||
pip==22.2.1
|
||||
pipfile==0.0.2
|
||||
safety==1.10.3
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import List, Optional
|
||||
|
||||
__version__ = "22.1.2"
|
||||
__version__ = "22.2.1"
|
||||
|
||||
|
||||
def main(args: Optional[List[str]] = None) -> int:
|
||||
@@ -8,6 +8,6 @@ def main(args: Optional[List[str]] = None) -> int:
|
||||
|
||||
For additional details, see https://github.com/pypa/pip/issues/7498.
|
||||
"""
|
||||
from pipenv.patched.notpip._internal.utils.entrypoints import _wrapper
|
||||
from pipenv.patched.pip._internal.utils.entrypoints import _wrapper
|
||||
|
||||
return _wrapper(args)
|
||||
@@ -27,6 +27,6 @@ if __name__ == "__main__":
|
||||
"ignore", category=DeprecationWarning, module=".*packaging\\.version"
|
||||
)
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
||||
from pipenv.patched.notpip._internal.cli.main import main as _main
|
||||
from pipenv.patched.pip._internal.cli.main import main as _main
|
||||
|
||||
sys.exit(_main())
|
||||
@@ -0,0 +1,37 @@
|
||||
"""Execute exactly this copy of pip, within a different environment.
|
||||
|
||||
This file is named as it is, to ensure that this module can't be imported via
|
||||
an import statement.
|
||||
"""
|
||||
|
||||
import runpy
|
||||
import sys
|
||||
import types
|
||||
from importlib.machinery import ModuleSpec, PathFinder
|
||||
from os.path import dirname
|
||||
from typing import Optional, Sequence, Union
|
||||
|
||||
PIP_SOURCES_ROOT = dirname(dirname(__file__))
|
||||
|
||||
|
||||
class PipImportRedirectingFinder:
|
||||
@classmethod
|
||||
def find_spec(
|
||||
self,
|
||||
fullname: str,
|
||||
path: Optional[Sequence[Union[bytes, str]]] = None,
|
||||
target: Optional[types.ModuleType] = None,
|
||||
) -> Optional[ModuleSpec]:
|
||||
if fullname != "pip":
|
||||
return None
|
||||
|
||||
spec = PathFinder.find_spec(fullname, [PIP_SOURCES_ROOT], target)
|
||||
assert spec, (PIP_SOURCES_ROOT, fullname)
|
||||
return spec
|
||||
|
||||
|
||||
# TODO https://github.com/pypa/pip/issues/11294
|
||||
sys.meta_path.insert(0, PipImportRedirectingFinder())
|
||||
|
||||
assert __name__ == "__main__", "Cannot run __pip-runner__.py as a non-main module"
|
||||
runpy.run_module("pip", run_name="__main__", alter_sys=True)
|
||||
+3
-3
@@ -1,7 +1,7 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import pipenv.patched.notpip._internal.utils.inject_securetransport # noqa
|
||||
from pipenv.patched.notpip._internal.utils import _log
|
||||
import pipenv.patched.pip._internal.utils.inject_securetransport # noqa
|
||||
from pipenv.patched.pip._internal.utils import _log
|
||||
|
||||
# init_logging() must be called before any call to logging.getLogger()
|
||||
# which happens at import of most modules.
|
||||
@@ -14,6 +14,6 @@ def main(args: (Optional[List[str]]) = None) -> int:
|
||||
|
||||
For additional details, see https://github.com/pypa/pip/issues/7498.
|
||||
"""
|
||||
from pipenv.patched.notpip._internal.utils.entrypoints import _wrapper
|
||||
from pipenv.patched.pip._internal.utils.entrypoints import _wrapper
|
||||
|
||||
return _wrapper(args)
|
||||
+27
-42
@@ -1,31 +1,29 @@
|
||||
"""Build Environment used for isolation during sdist building
|
||||
"""
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import textwrap
|
||||
import zipfile
|
||||
from collections import OrderedDict
|
||||
from sysconfig import get_paths
|
||||
from types import TracebackType
|
||||
from typing import TYPE_CHECKING, Generator, Iterable, List, Optional, Set, Tuple, Type
|
||||
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type
|
||||
|
||||
from pipenv.patched.notpip._vendor.certifi import where
|
||||
from pipenv.patched.notpip._vendor.packaging.requirements import Requirement
|
||||
from pipenv.patched.notpip._vendor.packaging.version import Version
|
||||
from pipenv.patched.pip._vendor.certifi import where
|
||||
from pipenv.patched.pip._vendor.packaging.requirements import Requirement
|
||||
from pipenv.patched.pip._vendor.packaging.version import Version
|
||||
|
||||
from pip import __file__ as pip_location
|
||||
from pipenv.patched.notpip._internal.cli.spinners import open_spinner
|
||||
from pipenv.patched.notpip._internal.locations import get_platlib, get_prefixed_libs, get_purelib
|
||||
from pipenv.patched.notpip._internal.metadata import get_default_environment, get_environment
|
||||
from pipenv.patched.notpip._internal.utils.subprocess import call_subprocess
|
||||
from pipenv.patched.notpip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
||||
from pipenv.patched.pip import __file__ as pip_location
|
||||
from pipenv.patched.pip._internal.cli.spinners import open_spinner
|
||||
from pipenv.patched.pip._internal.locations import get_platlib, get_prefixed_libs, get_purelib
|
||||
from pipenv.patched.pip._internal.metadata import get_default_environment, get_environment
|
||||
from pipenv.patched.pip._internal.utils.subprocess import call_subprocess
|
||||
from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipenv.patched.notpip._internal.index.package_finder import PackageFinder
|
||||
from pipenv.patched.pip._internal.index.package_finder import PackageFinder
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -41,30 +39,20 @@ class _Prefix:
|
||||
self.lib_dirs = get_prefixed_libs(path)
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def _create_standalone_pip() -> Generator[str, None, None]:
|
||||
"""Create a "standalone pip" zip file.
|
||||
def _get_runnable_pip() -> str:
|
||||
"""Get a file to pass to a Python executable, to run the currently-running pip.
|
||||
|
||||
The zip file's content is identical to the currently-running pip.
|
||||
It will be used to install requirements into the build environment.
|
||||
This is used to run a pip subprocess, for installing requirements into the build
|
||||
environment.
|
||||
"""
|
||||
source = pathlib.Path(pip_location).resolve().parent
|
||||
|
||||
# Return the current instance if `source` is not a directory. We can't build
|
||||
# a zip from this, and it likely means the instance is already standalone.
|
||||
if not source.is_dir():
|
||||
yield str(source)
|
||||
return
|
||||
# This would happen if someone is using pip from inside a zip file. In that
|
||||
# case, we can use that directly.
|
||||
return str(source)
|
||||
|
||||
with TempDirectory(kind="standalone-pip") as tmp_dir:
|
||||
pip_zip = os.path.join(tmp_dir.path, "__env_pip__.zip")
|
||||
kwargs = {}
|
||||
if sys.version_info >= (3, 8):
|
||||
kwargs["strict_timestamps"] = False
|
||||
with zipfile.ZipFile(pip_zip, "w", **kwargs) as zf:
|
||||
for child in source.rglob("*"):
|
||||
zf.write(child, child.relative_to(source.parent).as_posix())
|
||||
yield os.path.join(pip_zip, "pip")
|
||||
return os.fsdecode(source / "__pip-runner__.py")
|
||||
|
||||
|
||||
class BuildEnvironment:
|
||||
@@ -205,15 +193,13 @@ class BuildEnvironment:
|
||||
prefix.setup = True
|
||||
if not requirements:
|
||||
return
|
||||
with contextlib.ExitStack() as ctx:
|
||||
pip_runnable = ctx.enter_context(_create_standalone_pip())
|
||||
self._install_requirements(
|
||||
pip_runnable,
|
||||
finder,
|
||||
requirements,
|
||||
prefix,
|
||||
kind=kind,
|
||||
)
|
||||
self._install_requirements(
|
||||
_get_runnable_pip(),
|
||||
finder,
|
||||
requirements,
|
||||
prefix,
|
||||
kind=kind,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _install_requirements(
|
||||
@@ -224,9 +210,8 @@ class BuildEnvironment:
|
||||
*,
|
||||
kind: str,
|
||||
) -> None:
|
||||
sys_executable = os.environ.get('PIP_PYTHON_PATH', sys.executable)
|
||||
args: List[str] = [
|
||||
sys_executable,
|
||||
sys.executable,
|
||||
pip_runnable,
|
||||
"install",
|
||||
"--ignore-installed",
|
||||
@@ -5,20 +5,24 @@ import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
from pipenv.patched.notpip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
|
||||
from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from pipenv.patched.notpip._internal.exceptions import InvalidWheelFilename
|
||||
from pipenv.patched.notpip._internal.models.format_control import FormatControl
|
||||
from pipenv.patched.notpip._internal.models.link import Link
|
||||
from pipenv.patched.notpip._internal.models.wheel import Wheel
|
||||
from pipenv.patched.notpip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
||||
from pipenv.patched.notpip._internal.utils.urls import path_to_url
|
||||
from pipenv.patched.pip._internal.exceptions import InvalidWheelFilename
|
||||
from pipenv.patched.pip._internal.models.direct_url import DirectUrl
|
||||
from pipenv.patched.pip._internal.models.format_control import FormatControl
|
||||
from pipenv.patched.pip._internal.models.link import Link
|
||||
from pipenv.patched.pip._internal.models.wheel import Wheel
|
||||
from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
||||
from pipenv.patched.pip._internal.utils.urls import path_to_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ORIGIN_JSON_NAME = "origin.json"
|
||||
|
||||
|
||||
def _hash_dict(d: Dict[str, str]) -> str:
|
||||
"""Return a stable sha224 of a dictionary."""
|
||||
@@ -204,6 +208,10 @@ class CacheEntry:
|
||||
):
|
||||
self.link = link
|
||||
self.persistent = persistent
|
||||
self.origin: Optional[DirectUrl] = None
|
||||
origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME
|
||||
if origin_direct_url_path.exists():
|
||||
self.origin = DirectUrl.from_json(origin_direct_url_path.read_text())
|
||||
|
||||
|
||||
class WheelCache(Cache):
|
||||
@@ -262,3 +270,20 @@ class WheelCache(Cache):
|
||||
return CacheEntry(retval, persistent=False)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None:
|
||||
origin_path = Path(cache_dir) / ORIGIN_JSON_NAME
|
||||
if origin_path.is_file():
|
||||
origin = DirectUrl.from_json(origin_path.read_text())
|
||||
# TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564
|
||||
# is merged.
|
||||
if origin.url != download_info.url:
|
||||
logger.warning(
|
||||
"Origin URL %s in cache entry %s does not match download URL %s. "
|
||||
"This is likely a pip bug or a cache corruption issue.",
|
||||
origin.url,
|
||||
cache_dir,
|
||||
download_info.url,
|
||||
)
|
||||
origin_path.write_text(download_info.to_json(), encoding="utf-8")
|
||||
+3
-3
@@ -7,9 +7,9 @@ import sys
|
||||
from itertools import chain
|
||||
from typing import Any, Iterable, List, Optional
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.main_parser import create_main_parser
|
||||
from pipenv.patched.notpip._internal.commands import commands_dict, create_command
|
||||
from pipenv.patched.notpip._internal.metadata import get_default_environment
|
||||
from pipenv.patched.pip._internal.cli.main_parser import create_main_parser
|
||||
from pipenv.patched.pip._internal.commands import commands_dict, create_command
|
||||
from pipenv.patched.pip._internal.metadata import get_default_environment
|
||||
|
||||
|
||||
def autocomplete() -> None:
|
||||
+12
-12
@@ -10,18 +10,18 @@ import traceback
|
||||
from optparse import Values
|
||||
from typing import Any, Callable, List, Optional, Tuple
|
||||
|
||||
from pipenv.patched.notpip._vendor.rich import traceback as rich_traceback
|
||||
from pipenv.patched.pip._vendor.rich import traceback as rich_traceback
|
||||
|
||||
from pipenv.patched.notpip._internal.cli import cmdoptions
|
||||
from pipenv.patched.notpip._internal.cli.command_context import CommandContextMixIn
|
||||
from pipenv.patched.notpip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import (
|
||||
from pipenv.patched.pip._internal.cli import cmdoptions
|
||||
from pipenv.patched.pip._internal.cli.command_context import CommandContextMixIn
|
||||
from pipenv.patched.pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
||||
from pipenv.patched.pip._internal.cli.status_codes import (
|
||||
ERROR,
|
||||
PREVIOUS_BUILD_DIR_ERROR,
|
||||
UNKNOWN_ERROR,
|
||||
VIRTUALENV_NOT_FOUND,
|
||||
)
|
||||
from pipenv.patched.notpip._internal.exceptions import (
|
||||
from pipenv.patched.pip._internal.exceptions import (
|
||||
BadCommand,
|
||||
CommandError,
|
||||
DiagnosticPipError,
|
||||
@@ -30,12 +30,12 @@ from pipenv.patched.notpip._internal.exceptions import (
|
||||
PreviousBuildDirError,
|
||||
UninstallationError,
|
||||
)
|
||||
from pipenv.patched.notpip._internal.utils.filesystem import check_path_owner
|
||||
from pipenv.patched.notpip._internal.utils.logging import BrokenStdoutLoggingError, setup_logging
|
||||
from pipenv.patched.notpip._internal.utils.misc import get_prog, normalize_path
|
||||
from pipenv.patched.notpip._internal.utils.temp_dir import TempDirectoryTypeRegistry as TempDirRegistry
|
||||
from pipenv.patched.notpip._internal.utils.temp_dir import global_tempdir_manager, tempdir_registry
|
||||
from pipenv.patched.notpip._internal.utils.virtualenv import running_under_virtualenv
|
||||
from pipenv.patched.pip._internal.utils.filesystem import check_path_owner
|
||||
from pipenv.patched.pip._internal.utils.logging import BrokenStdoutLoggingError, setup_logging
|
||||
from pipenv.patched.pip._internal.utils.misc import get_prog, normalize_path
|
||||
from pipenv.patched.pip._internal.utils.temp_dir import TempDirectoryTypeRegistry as TempDirRegistry
|
||||
from pipenv.patched.pip._internal.utils.temp_dir import global_tempdir_manager, tempdir_registry
|
||||
from pipenv.patched.pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
__all__ = ["Command"]
|
||||
|
||||
+10
-12
@@ -19,16 +19,16 @@ from optparse import SUPPRESS_HELP, Option, OptionGroup, OptionParser, Values
|
||||
from textwrap import dedent
|
||||
from typing import Any, Callable, Dict, Optional, Tuple
|
||||
|
||||
from pipenv.patched.notpip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.parser import ConfigOptionParser
|
||||
from pipenv.patched.notpip._internal.exceptions import CommandError
|
||||
from pipenv.patched.notpip._internal.locations import USER_CACHE_DIR, get_src_prefix
|
||||
from pipenv.patched.notpip._internal.models.format_control import FormatControl
|
||||
from pipenv.patched.notpip._internal.models.index import PyPI
|
||||
from pipenv.patched.notpip._internal.models.target_python import TargetPython
|
||||
from pipenv.patched.notpip._internal.utils.hashes import STRONG_HASHES
|
||||
from pipenv.patched.notpip._internal.utils.misc import strtobool
|
||||
from pipenv.patched.pip._internal.cli.parser import ConfigOptionParser
|
||||
from pipenv.patched.pip._internal.exceptions import CommandError
|
||||
from pipenv.patched.pip._internal.locations import USER_CACHE_DIR, get_src_prefix
|
||||
from pipenv.patched.pip._internal.models.format_control import FormatControl
|
||||
from pipenv.patched.pip._internal.models.index import PyPI
|
||||
from pipenv.patched.pip._internal.models.target_python import TargetPython
|
||||
from pipenv.patched.pip._internal.utils.hashes import STRONG_HASHES
|
||||
from pipenv.patched.pip._internal.utils.misc import strtobool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1000,7 +1000,7 @@ use_new_feature: Callable[..., Option] = partial(
|
||||
metavar="feature",
|
||||
action="append",
|
||||
default=[],
|
||||
choices=["2020-resolver", "fast-deps"],
|
||||
choices=["2020-resolver", "fast-deps", "truststore"],
|
||||
help="Enable new functionality, that may be backward incompatible.",
|
||||
)
|
||||
|
||||
@@ -1013,8 +1013,6 @@ use_deprecated_feature: Callable[..., Option] = partial(
|
||||
default=[],
|
||||
choices=[
|
||||
"legacy-resolver",
|
||||
"backtrack-on-build-failures",
|
||||
"html5lib",
|
||||
],
|
||||
help=("Enable deprecated functionality, that will be removed in the future."),
|
||||
)
|
||||
+6
-6
@@ -6,11 +6,11 @@ import os
|
||||
import sys
|
||||
from typing import List, Optional
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.autocompletion import autocomplete
|
||||
from pipenv.patched.notpip._internal.cli.main_parser import parse_command
|
||||
from pipenv.patched.notpip._internal.commands import create_command
|
||||
from pipenv.patched.notpip._internal.exceptions import PipError
|
||||
from pipenv.patched.notpip._internal.utils import deprecation
|
||||
from pipenv.patched.pip._internal.cli.autocompletion import autocomplete
|
||||
from pipenv.patched.pip._internal.cli.main_parser import parse_command
|
||||
from pipenv.patched.pip._internal.commands import create_command
|
||||
from pipenv.patched.pip._internal.exceptions import PipError
|
||||
from pipenv.patched.pip._internal.utils import deprecation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -59,7 +59,7 @@ def main(args: Optional[List[str]] = None) -> int:
|
||||
sys.exit(1)
|
||||
|
||||
# Needed for locale.getpreferredencoding(False) to work
|
||||
# in pipenv.patched.notpip._internal.utils.encoding.auto_decode
|
||||
# in pipenv.patched.pip._internal.utils.encoding.auto_decode
|
||||
try:
|
||||
locale.setlocale(locale.LC_ALL, "")
|
||||
except locale.Error as e:
|
||||
+5
-5
@@ -5,11 +5,11 @@ import os
|
||||
import sys
|
||||
from typing import List, Tuple
|
||||
|
||||
from pipenv.patched.notpip._internal.cli import cmdoptions
|
||||
from pipenv.patched.notpip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
||||
from pipenv.patched.notpip._internal.commands import commands_dict, get_similar_commands
|
||||
from pipenv.patched.notpip._internal.exceptions import CommandError
|
||||
from pipenv.patched.notpip._internal.utils.misc import get_pip_version, get_prog
|
||||
from pipenv.patched.pip._internal.cli import cmdoptions
|
||||
from pipenv.patched.pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter
|
||||
from pipenv.patched.pip._internal.commands import commands_dict, get_similar_commands
|
||||
from pipenv.patched.pip._internal.exceptions import CommandError
|
||||
from pipenv.patched.pip._internal.utils.misc import get_pip_version, get_prog
|
||||
|
||||
__all__ = ["create_main_parser", "parse_command"]
|
||||
|
||||
+3
-3
@@ -8,9 +8,9 @@ import textwrap
|
||||
from contextlib import suppress
|
||||
from typing import Any, Dict, Generator, List, Tuple
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import UNKNOWN_ERROR
|
||||
from pipenv.patched.notpip._internal.configuration import Configuration, ConfigurationError
|
||||
from pipenv.patched.notpip._internal.utils.misc import redact_auth_from_url, strtobool
|
||||
from pipenv.patched.pip._internal.cli.status_codes import UNKNOWN_ERROR
|
||||
from pipenv.patched.pip._internal.configuration import Configuration, ConfigurationError
|
||||
from pipenv.patched.pip._internal.utils.misc import redact_auth_from_url, strtobool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
+2
-2
@@ -1,7 +1,7 @@
|
||||
import functools
|
||||
from typing import Callable, Generator, Iterable, Iterator, Optional, Tuple
|
||||
|
||||
from pipenv.patched.notpip._vendor.rich.progress import (
|
||||
from pipenv.patched.pip._vendor.rich.progress import (
|
||||
BarColumn,
|
||||
DownloadColumn,
|
||||
FileSizeColumn,
|
||||
@@ -14,7 +14,7 @@ from pipenv.patched.notpip._vendor.rich.progress import (
|
||||
TransferSpeedColumn,
|
||||
)
|
||||
|
||||
from pipenv.patched.notpip._internal.utils.logging import get_indentation
|
||||
from pipenv.patched.pip._internal.utils.logging import get_indentation
|
||||
|
||||
DownloadProgressRenderer = Callable[[Iterable[bytes]], Iterator[bytes]]
|
||||
|
||||
+72
-58
@@ -10,41 +10,64 @@ import os
|
||||
import sys
|
||||
from functools import partial
|
||||
from optparse import Values
|
||||
from typing import Any, List, Optional, Tuple
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
|
||||
|
||||
from pipenv.patched.notpip._internal.cache import WheelCache
|
||||
from pipenv.patched.notpip._internal.cli import cmdoptions
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.command_context import CommandContextMixIn
|
||||
from pipenv.patched.notpip._internal.exceptions import CommandError, PreviousBuildDirError
|
||||
from pipenv.patched.notpip._internal.index.collector import LinkCollector
|
||||
from pipenv.patched.notpip._internal.index.package_finder import PackageFinder
|
||||
from pipenv.patched.notpip._internal.models.selection_prefs import SelectionPreferences
|
||||
from pipenv.patched.notpip._internal.models.target_python import TargetPython
|
||||
from pipenv.patched.notpip._internal.network.session import PipSession
|
||||
from pipenv.patched.notpip._internal.operations.build.build_tracker import BuildTracker
|
||||
from pipenv.patched.notpip._internal.operations.prepare import RequirementPreparer
|
||||
from pipenv.patched.notpip._internal.req.constructors import (
|
||||
from pipenv.patched.pip._internal.cache import WheelCache
|
||||
from pipenv.patched.pip._internal.cli import cmdoptions
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.command_context import CommandContextMixIn
|
||||
from pipenv.patched.pip._internal.exceptions import CommandError, PreviousBuildDirError
|
||||
from pipenv.patched.pip._internal.index.collector import LinkCollector
|
||||
from pipenv.patched.pip._internal.index.package_finder import PackageFinder
|
||||
from pipenv.patched.pip._internal.models.selection_prefs import SelectionPreferences
|
||||
from pipenv.patched.pip._internal.models.target_python import TargetPython
|
||||
from pipenv.patched.pip._internal.network.session import PipSession
|
||||
from pipenv.patched.pip._internal.operations.build.build_tracker import BuildTracker
|
||||
from pipenv.patched.pip._internal.operations.prepare import RequirementPreparer
|
||||
from pipenv.patched.pip._internal.req.constructors import (
|
||||
install_req_from_editable,
|
||||
install_req_from_line,
|
||||
install_req_from_parsed_requirement,
|
||||
install_req_from_req_string,
|
||||
)
|
||||
from pipenv.patched.notpip._internal.req.req_file import parse_requirements
|
||||
from pipenv.patched.notpip._internal.req.req_install import InstallRequirement
|
||||
from pipenv.patched.notpip._internal.resolution.base import BaseResolver
|
||||
from pipenv.patched.notpip._internal.self_outdated_check import pip_self_version_check
|
||||
from pipenv.patched.notpip._internal.utils.deprecation import deprecated
|
||||
from pipenv.patched.notpip._internal.utils.temp_dir import (
|
||||
from pipenv.patched.pip._internal.req.req_file import parse_requirements
|
||||
from pipenv.patched.pip._internal.req.req_install import InstallRequirement
|
||||
from pipenv.patched.pip._internal.resolution.base import BaseResolver
|
||||
from pipenv.patched.pip._internal.self_outdated_check import pip_self_version_check
|
||||
from pipenv.patched.pip._internal.utils.temp_dir import (
|
||||
TempDirectory,
|
||||
TempDirectoryTypeRegistry,
|
||||
tempdir_kinds,
|
||||
)
|
||||
from pipenv.patched.notpip._internal.utils.virtualenv import running_under_virtualenv
|
||||
from pipenv.patched.pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ssl import SSLContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _create_truststore_ssl_context() -> Optional["SSLContext"]:
|
||||
if sys.version_info < (3, 10):
|
||||
raise CommandError("The truststore feature is only available for Python 3.10+")
|
||||
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
logger.warning("Disabling truststore since ssl support is missing")
|
||||
return None
|
||||
|
||||
try:
|
||||
import truststore
|
||||
except ImportError:
|
||||
raise CommandError(
|
||||
"To use the truststore feature, 'truststore' must be installed into "
|
||||
"pip's current environment."
|
||||
)
|
||||
|
||||
return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
||||
|
||||
|
||||
class SessionCommandMixin(CommandContextMixIn):
|
||||
|
||||
"""
|
||||
@@ -84,15 +107,27 @@ class SessionCommandMixin(CommandContextMixIn):
|
||||
options: Values,
|
||||
retries: Optional[int] = None,
|
||||
timeout: Optional[int] = None,
|
||||
fallback_to_certifi: bool = False,
|
||||
) -> PipSession:
|
||||
assert not options.cache_dir or os.path.isabs(options.cache_dir)
|
||||
cache_dir = options.cache_dir
|
||||
assert not cache_dir or os.path.isabs(cache_dir)
|
||||
|
||||
if "truststore" in options.features_enabled:
|
||||
try:
|
||||
ssl_context = _create_truststore_ssl_context()
|
||||
except Exception:
|
||||
if not fallback_to_certifi:
|
||||
raise
|
||||
ssl_context = None
|
||||
else:
|
||||
ssl_context = None
|
||||
|
||||
session = PipSession(
|
||||
cache=(
|
||||
os.path.join(options.cache_dir, "http") if options.cache_dir else None
|
||||
),
|
||||
cache=os.path.join(cache_dir, "http") if cache_dir else None,
|
||||
retries=retries if retries is not None else options.retries,
|
||||
trusted_hosts=options.trusted_hosts,
|
||||
index_urls=self._get_index_urls(options),
|
||||
ssl_context=ssl_context,
|
||||
)
|
||||
|
||||
# Handle custom ca-bundles from the user
|
||||
@@ -142,7 +177,14 @@ class IndexGroupCommand(Command, SessionCommandMixin):
|
||||
|
||||
# Otherwise, check if we're using the latest version of pip available.
|
||||
session = self._build_session(
|
||||
options, retries=0, timeout=min(5, options.timeout)
|
||||
options,
|
||||
retries=0,
|
||||
timeout=min(5, options.timeout),
|
||||
# This is set to ensure the function does not fail when truststore is
|
||||
# specified in use-feature but cannot be loaded. This usually raises a
|
||||
# CommandError and shows a nice user-facing error, but this function is not
|
||||
# called in that try-except block.
|
||||
fallback_to_certifi=True,
|
||||
)
|
||||
with session:
|
||||
pip_self_version_check(session, options)
|
||||
@@ -227,31 +269,6 @@ class RequirementCommand(IndexGroupCommand):
|
||||
|
||||
return "2020-resolver"
|
||||
|
||||
@staticmethod
|
||||
def determine_build_failure_suppression(options: Values) -> bool:
|
||||
"""Determines whether build failures should be suppressed and backtracked on."""
|
||||
if "backtrack-on-build-failures" not in options.deprecated_features_enabled:
|
||||
return False
|
||||
|
||||
if "legacy-resolver" in options.deprecated_features_enabled:
|
||||
raise CommandError("Cannot backtrack with legacy resolver.")
|
||||
|
||||
deprecated(
|
||||
reason=(
|
||||
"Backtracking on build failures can mask issues related to how "
|
||||
"a package generates metadata or builds a wheel. This flag will "
|
||||
"be removed in pip 22.2."
|
||||
),
|
||||
gone_in=None,
|
||||
replacement=(
|
||||
"avoiding known-bad versions by explicitly telling pip to ignore them "
|
||||
"(either directly as requirements, or via a constraints file)"
|
||||
),
|
||||
feature_flag=None,
|
||||
issue=10655,
|
||||
)
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def make_requirement_preparer(
|
||||
cls,
|
||||
@@ -328,15 +345,14 @@ class RequirementCommand(IndexGroupCommand):
|
||||
use_pep517=use_pep517,
|
||||
config_settings=getattr(options, "config_settings", None),
|
||||
)
|
||||
suppress_build_failures = cls.determine_build_failure_suppression(options)
|
||||
resolver_variant = cls.determine_resolver_variant(options)
|
||||
# The long import name and duplicated invocation is needed to convince
|
||||
# Mypy into correctly typechecking. Otherwise it would complain the
|
||||
# "Resolver" class being redefined.
|
||||
if resolver_variant == "2020-resolver":
|
||||
import pipenv.patched.notpip._internal.resolution.resolvelib.resolver
|
||||
import pipenv.patched.pip._internal.resolution.resolvelib.resolver
|
||||
|
||||
return pipenv.patched.notpip._internal.resolution.resolvelib.resolver.Resolver(
|
||||
return pipenv.patched.pip._internal.resolution.resolvelib.resolver.Resolver(
|
||||
preparer=preparer,
|
||||
finder=finder,
|
||||
wheel_cache=wheel_cache,
|
||||
@@ -348,11 +364,10 @@ class RequirementCommand(IndexGroupCommand):
|
||||
force_reinstall=force_reinstall,
|
||||
upgrade_strategy=upgrade_strategy,
|
||||
py_version_info=py_version_info,
|
||||
suppress_build_failures=suppress_build_failures,
|
||||
)
|
||||
import pipenv.patched.notpip._internal.resolution.legacy.resolver
|
||||
import pipenv.patched.pip._internal.resolution.legacy.resolver
|
||||
|
||||
return pipenv.patched.notpip._internal.resolution.legacy.resolver.Resolver(
|
||||
return pipenv.patched.pip._internal.resolution.legacy.resolver.Resolver(
|
||||
preparer=preparer,
|
||||
finder=finder,
|
||||
wheel_cache=wheel_cache,
|
||||
@@ -484,5 +499,4 @@ class RequirementCommand(IndexGroupCommand):
|
||||
link_collector=link_collector,
|
||||
selection_prefs=selection_prefs,
|
||||
target_python=target_python,
|
||||
use_deprecated_html5lib="html5lib" in options.deprecated_features_enabled,
|
||||
)
|
||||
+2
-2
@@ -5,8 +5,8 @@ import sys
|
||||
import time
|
||||
from typing import IO, Generator
|
||||
|
||||
from pipenv.patched.notpip._internal.utils.compat import WINDOWS
|
||||
from pipenv.patched.notpip._internal.utils.logging import get_indentation
|
||||
from pipenv.patched.pip._internal.utils.compat import WINDOWS
|
||||
from pipenv.patched.pip._internal.utils.logging import get_indentation
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
+23
-18
@@ -6,7 +6,7 @@ import importlib
|
||||
from collections import namedtuple
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
|
||||
CommandInfo = namedtuple("CommandInfo", "module_path, class_name, summary")
|
||||
|
||||
@@ -14,87 +14,92 @@ CommandInfo = namedtuple("CommandInfo", "module_path, class_name, summary")
|
||||
# - Enables avoiding additional (costly) imports for presenting `--help`.
|
||||
# - The ordering matters for help display.
|
||||
#
|
||||
# Even though the module path starts with the same "pipenv.patched.notpip._internal.commands"
|
||||
# Even though the module path starts with the same "pipenv.patched.pip._internal.commands"
|
||||
# prefix, the full path makes testing easier (specifically when modifying
|
||||
# `commands_dict` in test setup / teardown).
|
||||
commands_dict: Dict[str, CommandInfo] = {
|
||||
"install": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.install",
|
||||
"pipenv.patched.pip._internal.commands.install",
|
||||
"InstallCommand",
|
||||
"Install packages.",
|
||||
),
|
||||
"download": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.download",
|
||||
"pipenv.patched.pip._internal.commands.download",
|
||||
"DownloadCommand",
|
||||
"Download packages.",
|
||||
),
|
||||
"uninstall": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.uninstall",
|
||||
"pipenv.patched.pip._internal.commands.uninstall",
|
||||
"UninstallCommand",
|
||||
"Uninstall packages.",
|
||||
),
|
||||
"freeze": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.freeze",
|
||||
"pipenv.patched.pip._internal.commands.freeze",
|
||||
"FreezeCommand",
|
||||
"Output installed packages in requirements format.",
|
||||
),
|
||||
"inspect": CommandInfo(
|
||||
"pipenv.patched.pip._internal.commands.inspect",
|
||||
"InspectCommand",
|
||||
"Inspect the python environment.",
|
||||
),
|
||||
"list": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.list",
|
||||
"pipenv.patched.pip._internal.commands.list",
|
||||
"ListCommand",
|
||||
"List installed packages.",
|
||||
),
|
||||
"show": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.show",
|
||||
"pipenv.patched.pip._internal.commands.show",
|
||||
"ShowCommand",
|
||||
"Show information about installed packages.",
|
||||
),
|
||||
"check": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.check",
|
||||
"pipenv.patched.pip._internal.commands.check",
|
||||
"CheckCommand",
|
||||
"Verify installed packages have compatible dependencies.",
|
||||
),
|
||||
"config": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.configuration",
|
||||
"pipenv.patched.pip._internal.commands.configuration",
|
||||
"ConfigurationCommand",
|
||||
"Manage local and global configuration.",
|
||||
),
|
||||
"search": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.search",
|
||||
"pipenv.patched.pip._internal.commands.search",
|
||||
"SearchCommand",
|
||||
"Search PyPI for packages.",
|
||||
),
|
||||
"cache": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.cache",
|
||||
"pipenv.patched.pip._internal.commands.cache",
|
||||
"CacheCommand",
|
||||
"Inspect and manage pip's wheel cache.",
|
||||
),
|
||||
"index": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.index",
|
||||
"pipenv.patched.pip._internal.commands.index",
|
||||
"IndexCommand",
|
||||
"Inspect information available from package indexes.",
|
||||
),
|
||||
"wheel": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.wheel",
|
||||
"pipenv.patched.pip._internal.commands.wheel",
|
||||
"WheelCommand",
|
||||
"Build wheels from your requirements.",
|
||||
),
|
||||
"hash": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.hash",
|
||||
"pipenv.patched.pip._internal.commands.hash",
|
||||
"HashCommand",
|
||||
"Compute hashes of package archives.",
|
||||
),
|
||||
"completion": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.completion",
|
||||
"pipenv.patched.pip._internal.commands.completion",
|
||||
"CompletionCommand",
|
||||
"A helper command used for command completion.",
|
||||
),
|
||||
"debug": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.debug",
|
||||
"pipenv.patched.pip._internal.commands.debug",
|
||||
"DebugCommand",
|
||||
"Show information useful for debugging.",
|
||||
),
|
||||
"help": CommandInfo(
|
||||
"pipenv.patched.notpip._internal.commands.help",
|
||||
"pipenv.patched.pip._internal.commands.help",
|
||||
"HelpCommand",
|
||||
"Show help for commands.",
|
||||
),
|
||||
+9
-9
@@ -3,11 +3,11 @@ import textwrap
|
||||
from optparse import Values
|
||||
from typing import Any, List
|
||||
|
||||
import pipenv.patched.notpip._internal.utils.filesystem as filesystem
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.notpip._internal.exceptions import CommandError, PipError
|
||||
from pipenv.patched.notpip._internal.utils.logging import getLogger
|
||||
import pipenv.patched.pip._internal.utils.filesystem as filesystem
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.pip._internal.exceptions import CommandError, PipError
|
||||
from pipenv.patched.pip._internal.utils.logging import getLogger
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
@@ -105,9 +105,9 @@ class CacheCommand(Command):
|
||||
Package index page cache location: {http_cache_location}
|
||||
Package index page cache size: {http_cache_size}
|
||||
Number of HTTP files: {num_http_files}
|
||||
Wheels location: {wheels_cache_location}
|
||||
Wheels size: {wheels_cache_size}
|
||||
Number of wheels: {package_count}
|
||||
Locally built wheels location: {wheels_cache_location}
|
||||
Locally built wheels size: {wheels_cache_size}
|
||||
Number of locally built wheels: {package_count}
|
||||
"""
|
||||
)
|
||||
.format(
|
||||
@@ -140,7 +140,7 @@ class CacheCommand(Command):
|
||||
|
||||
def format_for_human(self, files: List[str]) -> None:
|
||||
if not files:
|
||||
logger.info("Nothing cached.")
|
||||
logger.info("No locally built wheels cached.")
|
||||
return
|
||||
|
||||
results = []
|
||||
+4
-4
@@ -2,13 +2,13 @@ import logging
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.notpip._internal.operations.check import (
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.pip._internal.operations.check import (
|
||||
check_package_set,
|
||||
create_package_set_from_installed,
|
||||
)
|
||||
from pipenv.patched.notpip._internal.utils.misc import write_output
|
||||
from pipenv.patched.pip._internal.utils.misc import write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
+3
-3
@@ -3,9 +3,9 @@ import textwrap
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.notpip._internal.utils.misc import get_prog
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.pip._internal.utils.misc import get_prog
|
||||
|
||||
BASE_COMPLETION = """
|
||||
# pip {shell} completion start{script}# pip {shell} completion end
|
||||
+6
-6
@@ -4,17 +4,17 @@ import subprocess
|
||||
from optparse import Values
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.notpip._internal.configuration import (
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.pip._internal.configuration import (
|
||||
Configuration,
|
||||
Kind,
|
||||
get_configuration_files,
|
||||
kinds,
|
||||
)
|
||||
from pipenv.patched.notpip._internal.exceptions import PipError
|
||||
from pipenv.patched.notpip._internal.utils.logging import indent_log
|
||||
from pipenv.patched.notpip._internal.utils.misc import get_prog, write_output
|
||||
from pipenv.patched.pip._internal.exceptions import PipError
|
||||
from pipenv.patched.pip._internal.utils.logging import indent_log
|
||||
from pipenv.patched.pip._internal.utils.misc import get_prog, write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
+17
-21
@@ -1,3 +1,4 @@
|
||||
import importlib.resources
|
||||
import locale
|
||||
import logging
|
||||
import os
|
||||
@@ -6,19 +7,18 @@ from optparse import Values
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import pipenv.patched.notpip._vendor
|
||||
from pipenv.patched.notpip._vendor.certifi import where
|
||||
from pipenv.patched.notpip._vendor.packaging.version import parse as parse_version
|
||||
import pipenv.patched.pip._vendor
|
||||
from pipenv.patched.pip._vendor.certifi import where
|
||||
from pipenv.patched.pip._vendor.packaging.version import parse as parse_version
|
||||
|
||||
from pipenv.patched.notpip import __file__ as pip_location
|
||||
from pipenv.patched.notpip._internal.cli import cmdoptions
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.cmdoptions import make_target_python
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.notpip._internal.configuration import Configuration
|
||||
from pipenv.patched.notpip._internal.metadata import get_environment
|
||||
from pipenv.patched.notpip._internal.utils.logging import indent_log
|
||||
from pipenv.patched.notpip._internal.utils.misc import get_pip_version
|
||||
from pipenv.patched.pip._internal.cli import cmdoptions
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.cmdoptions import make_target_python
|
||||
from pipenv.patched.pip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.pip._internal.configuration import Configuration
|
||||
from pipenv.patched.pip._internal.metadata import get_environment
|
||||
from pipenv.patched.pip._internal.utils.logging import indent_log
|
||||
from pipenv.patched.pip._internal.utils.misc import get_pip_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -35,11 +35,7 @@ def show_sys_implementation() -> None:
|
||||
|
||||
|
||||
def create_vendor_txt_map() -> Dict[str, str]:
|
||||
vendor_txt_path = os.path.join(
|
||||
os.path.dirname(pip_location), "_vendor", "vendor.txt"
|
||||
)
|
||||
|
||||
with open(vendor_txt_path) as f:
|
||||
with importlib.resources.open_text("pipenv.patched.pip._vendor", "vendor.txt") as f:
|
||||
# Purge non version specifying lines.
|
||||
# Also, remove any space prefix or suffixes (including comments).
|
||||
lines = [
|
||||
@@ -57,8 +53,8 @@ def get_module_from_module_name(module_name: str) -> ModuleType:
|
||||
if module_name == "setuptools":
|
||||
module_name = "pkg_resources"
|
||||
|
||||
__import__(f"pipenv.patched.notpip._vendor.{module_name}", globals(), locals(), level=0)
|
||||
return getattr(pipenv.patched.notpip._vendor, module_name)
|
||||
__import__(f"pipenv.patched.pip._vendor.{module_name}", globals(), locals(), level=0)
|
||||
return getattr(pipenv.patched.pip._vendor, module_name)
|
||||
|
||||
|
||||
def get_vendor_version_from_module(module_name: str) -> Optional[str]:
|
||||
@@ -193,8 +189,8 @@ class DebugCommand(Command):
|
||||
show_value("'cert' config value", ca_bundle_info(self.parser.config))
|
||||
show_value("REQUESTS_CA_BUNDLE", os.environ.get("REQUESTS_CA_BUNDLE"))
|
||||
show_value("CURL_CA_BUNDLE", os.environ.get("CURL_CA_BUNDLE"))
|
||||
show_value("pipenv.patched.notpip._vendor.certifi.where()", where())
|
||||
show_value("pipenv.patched.notpip._vendor.DEBUNDLED", pipenv.patched.notpip._vendor.DEBUNDLED)
|
||||
show_value("pipenv.patched.pip._vendor.certifi.where()", where())
|
||||
show_value("pipenv.patched.pip._vendor.DEBUNDLED", pipenv.patched.pip._vendor.DEBUNDLED)
|
||||
|
||||
show_vendor_versions()
|
||||
|
||||
+8
-7
@@ -3,13 +3,13 @@ import os
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pipenv.patched.notpip._internal.cli import cmdoptions
|
||||
from pipenv.patched.notpip._internal.cli.cmdoptions import make_target_python
|
||||
from pipenv.patched.notpip._internal.cli.req_command import RequirementCommand, with_cleanup
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.notpip._internal.operations.build.build_tracker import get_build_tracker
|
||||
from pipenv.patched.notpip._internal.utils.misc import ensure_dir, normalize_path, write_output
|
||||
from pipenv.patched.notpip._internal.utils.temp_dir import TempDirectory
|
||||
from pipenv.patched.pip._internal.cli import cmdoptions
|
||||
from pipenv.patched.pip._internal.cli.cmdoptions import make_target_python
|
||||
from pipenv.patched.pip._internal.cli.req_command import RequirementCommand, with_cleanup
|
||||
from pipenv.patched.pip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.pip._internal.operations.build.build_tracker import get_build_tracker
|
||||
from pipenv.patched.pip._internal.utils.misc import ensure_dir, normalize_path, write_output
|
||||
from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -122,6 +122,7 @@ class DownloadCommand(RequirementCommand):
|
||||
finder=finder,
|
||||
options=options,
|
||||
ignore_requires_python=options.ignore_requires_python,
|
||||
use_pep517=options.use_pep517,
|
||||
py_version_info=options.python_version,
|
||||
)
|
||||
|
||||
+5
-5
@@ -2,11 +2,11 @@ import sys
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pipenv.patched.notpip._internal.cli import cmdoptions
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.notpip._internal.operations.freeze import freeze
|
||||
from pipenv.patched.notpip._internal.utils.compat import stdlib_pkgs
|
||||
from pipenv.patched.pip._internal.cli import cmdoptions
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.status_codes import SUCCESS
|
||||
from pipenv.patched.pip._internal.operations.freeze import freeze
|
||||
from pipenv.patched.pip._internal.utils.compat import stdlib_pkgs
|
||||
|
||||
DEV_PKGS = {"pip", "setuptools", "distribute", "wheel"}
|
||||
|
||||
+4
-4
@@ -4,10 +4,10 @@ import sys
|
||||
from optparse import Values
|
||||
from typing import List
|
||||
|
||||
from pipenv.patched.notpip._internal.cli.base_command import Command
|
||||
from pipenv.patched.notpip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.notpip._internal.utils.hashes import FAVORITE_HASH, STRONG_HASHES
|
||||
from pipenv.patched.notpip._internal.utils.misc import read_chunks, write_output
|
||||
from pipenv.patched.pip._internal.cli.base_command import Command
|
||||
from pipenv.patched.pip._internal.cli.status_codes import ERROR, SUCCESS
|
||||
from pipenv.patched.pip._internal.utils.hashes import FAVORITE_HASH, STRONG_HASHES
|
||||
from pipenv.patched.pip._internal.utils.misc import read_chunks, write_output
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user