mirror of
https://github.com/kennethreitz/pipenv.git
synced 2026-06-05 06:46:15 +00:00
Pip 23.0 (#5586)
* update pip to 23.0 in patched.txt * Vendor in pip517 since pip dropped it from its _vendor. * adjust vendoring script. * vendor in pip==23.0 * correct vendoring script. * fix import with vendoring script.
This commit is contained in:
@@ -1,2 +1,2 @@
|
||||
pip==22.3.1
|
||||
pip==23.0
|
||||
safety==2.3.2
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import List, Optional
|
||||
|
||||
__version__ = "22.3.1"
|
||||
__version__ = "23.0"
|
||||
|
||||
|
||||
def main(args: Optional[List[str]] = None) -> int:
|
||||
|
||||
@@ -8,7 +8,6 @@ import site
|
||||
import sys
|
||||
import textwrap
|
||||
from collections import OrderedDict
|
||||
from sysconfig import get_paths
|
||||
from types import TracebackType
|
||||
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type
|
||||
|
||||
@@ -18,7 +17,12 @@ from pipenv.patched.pip._vendor.packaging.version import Version
|
||||
|
||||
from pipenv.patched.pip import __file__ as pip_location
|
||||
from pipenv.patched.pip._internal.cli.spinners import open_spinner
|
||||
from pipenv.patched.pip._internal.locations import get_platlib, get_prefixed_libs, get_purelib
|
||||
from pipenv.patched.pip._internal.locations import (
|
||||
get_isolated_environment_bin_path,
|
||||
get_isolated_environment_lib_paths,
|
||||
get_platlib,
|
||||
get_purelib,
|
||||
)
|
||||
from pipenv.patched.pip._internal.metadata import get_default_environment, get_environment
|
||||
from pipenv.patched.pip._internal.utils.subprocess import call_subprocess
|
||||
from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
|
||||
@@ -33,11 +37,8 @@ class _Prefix:
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.setup = False
|
||||
self.bin_dir = get_paths(
|
||||
"nt" if os.name == "nt" else "posix_prefix",
|
||||
vars={"base": path, "platbase": path},
|
||||
)["scripts"]
|
||||
self.lib_dirs = get_prefixed_libs(path)
|
||||
self.bin_dir = get_isolated_environment_bin_path(path)
|
||||
self.lib_dirs = get_isolated_environment_lib_paths(path)
|
||||
|
||||
|
||||
def get_runnable_pip() -> str:
|
||||
|
||||
@@ -825,11 +825,9 @@ install_options: Callable[..., Option] = partial(
|
||||
dest="install_options",
|
||||
action="append",
|
||||
metavar="options",
|
||||
help="Extra arguments to be supplied to the setup.py install "
|
||||
'command (use like --install-option="--install-scripts=/usr/local/'
|
||||
'bin"). Use multiple --install-option options to pass multiple '
|
||||
"options to setup.py install. If you are using an option with a "
|
||||
"directory path, be sure to use absolute path.",
|
||||
help="This option is deprecated. Using this option with location-changing "
|
||||
"options may cause unexpected behavior. "
|
||||
"Use pip-level options like --user, --prefix, --root, and --target.",
|
||||
)
|
||||
|
||||
build_options: Callable[..., Option] = partial(
|
||||
|
||||
@@ -48,7 +48,7 @@ def create_vendor_txt_map() -> Dict[str, str]:
|
||||
|
||||
def get_module_from_module_name(module_name: str) -> ModuleType:
|
||||
# Module name can be uppercase in vendor.txt for some reason...
|
||||
module_name = module_name.lower()
|
||||
module_name = module_name.lower().replace("-", "_")
|
||||
# PATCH: setuptools is actually only pkg_resources.
|
||||
if module_name == "setuptools":
|
||||
module_name = "pkg_resources"
|
||||
|
||||
@@ -46,11 +46,6 @@ class InspectCommand(Command):
|
||||
self.parser.insert_option_group(0, self.cmd_opts)
|
||||
|
||||
def run(self, options: Values, args: List[str]) -> int:
|
||||
logger.warning(
|
||||
"pip inspect is currently an experimental command. "
|
||||
"The output format may change in a future release without prior warning."
|
||||
)
|
||||
|
||||
cmdoptions.check_list_path_option(options)
|
||||
dists = get_environment(options.path).iter_installed_distributions(
|
||||
local_only=options.local,
|
||||
@@ -58,7 +53,7 @@ class InspectCommand(Command):
|
||||
skip=set(stdlib_pkgs),
|
||||
)
|
||||
output = {
|
||||
"version": "0",
|
||||
"version": "1",
|
||||
"pip_version": __version__,
|
||||
"installed": [self._dist_to_dict(dist) for dist in dists],
|
||||
"environment": default_environment(),
|
||||
|
||||
@@ -41,6 +41,7 @@ from pipenv.patched.pip._internal.utils.distutils_args import parse_distutils_ar
|
||||
from pipenv.patched.pip._internal.utils.filesystem import test_writable_dir
|
||||
from pipenv.patched.pip._internal.utils.logging import getLogger
|
||||
from pipenv.patched.pip._internal.utils.misc import (
|
||||
check_externally_managed,
|
||||
ensure_dir,
|
||||
get_pip_version,
|
||||
protect_pip_from_modification_on_windows,
|
||||
@@ -284,6 +285,20 @@ class InstallCommand(RequirementCommand):
|
||||
if options.use_user_site and options.target_dir is not None:
|
||||
raise CommandError("Can not combine '--user' and '--target'")
|
||||
|
||||
# Check whether the environment we're installing into is externally
|
||||
# managed, as specified in PEP 668. Specifying --root, --target, or
|
||||
# --prefix disables the check, since there's no reliable way to locate
|
||||
# the EXTERNALLY-MANAGED file for those cases. An exception is also
|
||||
# made specifically for "--dry-run --report" for convenience.
|
||||
installing_into_current_environment = (
|
||||
not (options.dry_run and options.json_report_file)
|
||||
and options.root_path is None
|
||||
and options.target_dir is None
|
||||
and options.prefix_path is None
|
||||
)
|
||||
if installing_into_current_environment:
|
||||
check_externally_managed()
|
||||
|
||||
upgrade_strategy = "to-satisfy-only"
|
||||
if options.upgrade:
|
||||
upgrade_strategy = options.upgrade_strategy
|
||||
@@ -402,12 +417,6 @@ class InstallCommand(RequirementCommand):
|
||||
)
|
||||
|
||||
if options.json_report_file:
|
||||
logger.warning(
|
||||
"--report is currently an experimental option. "
|
||||
"The output format may change in a future release "
|
||||
"without prior warning."
|
||||
)
|
||||
|
||||
report = InstallationReport(requirement_set.requirements_to_install)
|
||||
if options.json_report_file == "-":
|
||||
print_json(data=report.to_dict())
|
||||
|
||||
@@ -53,6 +53,7 @@ class _PackageInfo(NamedTuple):
|
||||
name: str
|
||||
version: str
|
||||
location: str
|
||||
editable_project_location: Optional[str]
|
||||
requires: List[str]
|
||||
required_by: List[str]
|
||||
installer: str
|
||||
@@ -120,6 +121,7 @@ def search_packages_info(query: List[str]) -> Generator[_PackageInfo, None, None
|
||||
name=dist.raw_name,
|
||||
version=str(dist.version),
|
||||
location=dist.location or "",
|
||||
editable_project_location=dist.editable_project_location,
|
||||
requires=requires,
|
||||
required_by=required_by,
|
||||
installer=dist.installer,
|
||||
@@ -158,6 +160,10 @@ def print_results(
|
||||
write_output("Author-email: %s", dist.author_email)
|
||||
write_output("License: %s", dist.license)
|
||||
write_output("Location: %s", dist.location)
|
||||
if dist.editable_project_location is not None:
|
||||
write_output(
|
||||
"Editable project location: %s", dist.editable_project_location
|
||||
)
|
||||
write_output("Requires: %s", ", ".join(dist.requires))
|
||||
write_output("Required-by: %s", ", ".join(dist.required_by))
|
||||
|
||||
|
||||
@@ -14,7 +14,10 @@ from pipenv.patched.pip._internal.req.constructors import (
|
||||
install_req_from_line,
|
||||
install_req_from_parsed_requirement,
|
||||
)
|
||||
from pipenv.patched.pip._internal.utils.misc import protect_pip_from_modification_on_windows
|
||||
from pipenv.patched.pip._internal.utils.misc import (
|
||||
check_externally_managed,
|
||||
protect_pip_from_modification_on_windows,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -90,6 +93,8 @@ class UninstallCommand(Command, SessionCommandMixin):
|
||||
f'"pip help {self.name}")'
|
||||
)
|
||||
|
||||
check_externally_managed()
|
||||
|
||||
protect_pip_from_modification_on_windows(
|
||||
modifying_pip="pip" in reqs_to_uninstall
|
||||
)
|
||||
|
||||
@@ -6,9 +6,14 @@ subpackage and, thus, should not depend on them.
|
||||
"""
|
||||
|
||||
import configparser
|
||||
import contextlib
|
||||
import locale
|
||||
import logging
|
||||
import pathlib
|
||||
import re
|
||||
import sys
|
||||
from itertools import chain, groupby, repeat
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
||||
from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from pipenv.patched.pip._vendor.requests.models import Request, Response
|
||||
from pipenv.patched.pip._vendor.rich.console import Console, ConsoleOptions, RenderResult
|
||||
@@ -22,6 +27,8 @@ if TYPE_CHECKING:
|
||||
from pipenv.patched.pip._internal.metadata import BaseDistribution
|
||||
from pipenv.patched.pip._internal.req.req_install import InstallRequirement
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
#
|
||||
# Scaffolding
|
||||
@@ -658,3 +665,81 @@ class ConfigurationFileCouldNotBeLoaded(ConfigurationError):
|
||||
assert self.error is not None
|
||||
message_part = f".\n{self.error}\n"
|
||||
return f"Configuration file {self.reason}{message_part}"
|
||||
|
||||
|
||||
_DEFAULT_EXTERNALLY_MANAGED_ERROR = f"""\
|
||||
The Python environment under {sys.prefix} is managed externally, and may not be
|
||||
manipulated by the user. Please use specific tooling from the distributor of
|
||||
the Python installation to interact with this environment instead.
|
||||
"""
|
||||
|
||||
|
||||
class ExternallyManagedEnvironment(DiagnosticPipError):
|
||||
"""The current environment is externally managed.
|
||||
|
||||
This is raised when the current environment is externally managed, as
|
||||
defined by `PEP 668`_. The ``EXTERNALLY-MANAGED`` configuration is checked
|
||||
and displayed when the error is bubbled up to the user.
|
||||
|
||||
:param error: The error message read from ``EXTERNALLY-MANAGED``.
|
||||
"""
|
||||
|
||||
reference = "externally-managed-environment"
|
||||
|
||||
def __init__(self, error: Optional[str]) -> None:
|
||||
if error is None:
|
||||
context = Text(_DEFAULT_EXTERNALLY_MANAGED_ERROR)
|
||||
else:
|
||||
context = Text(error)
|
||||
super().__init__(
|
||||
message="This environment is externally managed",
|
||||
context=context,
|
||||
note_stmt=(
|
||||
"If you believe this is a mistake, please contact your "
|
||||
"Python installation or OS distribution provider."
|
||||
),
|
||||
hint_stmt=Text("See PEP 668 for the detailed specification."),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _iter_externally_managed_error_keys() -> Iterator[str]:
|
||||
# LC_MESSAGES is in POSIX, but not the C standard. The most common
|
||||
# platform that does not implement this category is Windows, where
|
||||
# using other categories for console message localization is equally
|
||||
# unreliable, so we fall back to the locale-less vendor message. This
|
||||
# can always be re-evaluated when a vendor proposes a new alternative.
|
||||
try:
|
||||
category = locale.LC_MESSAGES
|
||||
except AttributeError:
|
||||
lang: Optional[str] = None
|
||||
else:
|
||||
lang, _ = locale.getlocale(category)
|
||||
if lang is not None:
|
||||
yield f"Error-{lang}"
|
||||
for sep in ("-", "_"):
|
||||
before, found, _ = lang.partition(sep)
|
||||
if not found:
|
||||
continue
|
||||
yield f"Error-{before}"
|
||||
yield "Error"
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls,
|
||||
config: Union[pathlib.Path, str],
|
||||
) -> "ExternallyManagedEnvironment":
|
||||
parser = configparser.ConfigParser(interpolation=None)
|
||||
try:
|
||||
parser.read(config, encoding="utf-8")
|
||||
section = parser["externally-managed"]
|
||||
for key in cls._iter_externally_managed_error_keys():
|
||||
with contextlib.suppress(KeyError):
|
||||
return cls(section[key])
|
||||
except KeyError:
|
||||
pass
|
||||
except (OSError, UnicodeDecodeError, configparser.ParsingError):
|
||||
from pipenv.patched.pip._internal.utils._log import VERBOSE
|
||||
|
||||
exc_info = logger.isEnabledFor(VERBOSE)
|
||||
logger.warning("Failed to read %s", config, exc_info=exc_info)
|
||||
return cls(None)
|
||||
|
||||
@@ -354,7 +354,7 @@ def _get_index_content(link: Link, *, session: PipSession) -> Optional["IndexCon
|
||||
if not url.endswith("/"):
|
||||
url += "/"
|
||||
# TODO: In the future, it would be nice if pip supported PEP 691
|
||||
# style respones in the file:// URLs, however there's no
|
||||
# style responses in the file:// URLs, however there's no
|
||||
# standard file extension for application/vnd.pypi.simple.v1+json
|
||||
# so we'll need to come up with something on our own.
|
||||
url = urllib.parse.urljoin(url, "index.html")
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
"""Routines related to PyPI, indexes"""
|
||||
|
||||
# The following comment should be removed at some point in the future.
|
||||
# mypy: strict-optional=False
|
||||
|
||||
import enum
|
||||
import functools
|
||||
import itertools
|
||||
import logging
|
||||
import re
|
||||
from typing import FrozenSet, Iterable, List, Optional, Set, Tuple, Union
|
||||
from typing import TYPE_CHECKING, FrozenSet, Iterable, List, Optional, Set, Tuple, Union
|
||||
|
||||
from pipenv.patched.pip._vendor.packaging import specifiers
|
||||
from pipenv.patched.pip._vendor.packaging.tags import Tag
|
||||
@@ -39,6 +36,9 @@ from pipenv.patched.pip._internal.utils.misc import build_netloc
|
||||
from pipenv.patched.pip._internal.utils.packaging import check_requires_python
|
||||
from pipenv.patched.pip._internal.utils.unpacking import SUPPORTED_EXTENSIONS
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipenv.patched.pip._vendor.typing_extensions import TypeGuard
|
||||
|
||||
__all__ = ["FormatControl", "BestCandidateResult", "PackageFinder"]
|
||||
|
||||
|
||||
@@ -255,7 +255,7 @@ class LinkEvaluator:
|
||||
|
||||
def filter_unallowed_hashes(
|
||||
candidates: List[InstallationCandidate],
|
||||
hashes: Hashes,
|
||||
hashes: Optional[Hashes],
|
||||
project_name: str,
|
||||
) -> List[InstallationCandidate]:
|
||||
"""
|
||||
@@ -549,6 +549,7 @@ class CandidateEvaluator:
|
||||
binary_preference = 1
|
||||
if wheel.build_tag is not None:
|
||||
match = re.match(r"^(\d+)(.*)$", wheel.build_tag)
|
||||
assert match is not None, "guaranteed by filename validation"
|
||||
build_tag_groups = match.groups()
|
||||
build_tag = (int(build_tag_groups[0]), build_tag_groups[1])
|
||||
else: # sdist
|
||||
@@ -954,43 +955,46 @@ class PackageFinder:
|
||||
"No matching distribution found for {}".format(req)
|
||||
)
|
||||
|
||||
best_installed = False
|
||||
if installed_version and (
|
||||
best_candidate is None or best_candidate.version <= installed_version
|
||||
):
|
||||
best_installed = True
|
||||
def _should_install_candidate(
|
||||
candidate: Optional[InstallationCandidate],
|
||||
) -> "TypeGuard[InstallationCandidate]":
|
||||
if installed_version is None:
|
||||
return True
|
||||
if best_candidate is None:
|
||||
return False
|
||||
return best_candidate.version > installed_version
|
||||
|
||||
if not upgrade and installed_version is not None:
|
||||
if best_installed:
|
||||
logger.debug(
|
||||
"Existing installed version (%s) is most up-to-date and "
|
||||
"satisfies requirement",
|
||||
installed_version,
|
||||
)
|
||||
else:
|
||||
if _should_install_candidate(best_candidate):
|
||||
logger.debug(
|
||||
"Existing installed version (%s) satisfies requirement "
|
||||
"(most up-to-date version is %s)",
|
||||
installed_version,
|
||||
best_candidate.version,
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Existing installed version (%s) is most up-to-date and "
|
||||
"satisfies requirement",
|
||||
installed_version,
|
||||
)
|
||||
return None
|
||||
|
||||
if best_installed:
|
||||
# We have an existing version, and its the best version
|
||||
if _should_install_candidate(best_candidate):
|
||||
logger.debug(
|
||||
"Installed version (%s) is most up-to-date (past versions: %s)",
|
||||
installed_version,
|
||||
"Using version %s (newest of versions: %s)",
|
||||
best_candidate.version,
|
||||
_format_versions(best_candidate_result.iter_applicable()),
|
||||
)
|
||||
raise BestVersionAlreadyInstalled
|
||||
return best_candidate
|
||||
|
||||
# We have an existing version, and its the best version
|
||||
logger.debug(
|
||||
"Using version %s (newest of versions: %s)",
|
||||
best_candidate.version,
|
||||
"Installed version (%s) is most up-to-date (past versions: %s)",
|
||||
installed_version,
|
||||
_format_versions(best_candidate_result.iter_applicable()),
|
||||
)
|
||||
return best_candidate
|
||||
raise BestVersionAlreadyInstalled
|
||||
|
||||
|
||||
def _find_name_version_sep(fragment: str, canonical_name: str) -> int:
|
||||
|
||||
@@ -25,9 +25,10 @@ __all__ = [
|
||||
"USER_CACHE_DIR",
|
||||
"get_bin_prefix",
|
||||
"get_bin_user",
|
||||
"get_isolated_environment_bin_path",
|
||||
"get_isolated_environment_lib_paths",
|
||||
"get_major_minor_version",
|
||||
"get_platlib",
|
||||
"get_prefixed_libs",
|
||||
"get_purelib",
|
||||
"get_scheme",
|
||||
"get_src_prefix",
|
||||
@@ -482,13 +483,13 @@ def _looks_like_apple_library(path: str) -> bool:
|
||||
return path == f"/Library/Python/{get_major_minor_version()}/site-packages"
|
||||
|
||||
|
||||
def get_prefixed_libs(prefix: str) -> List[str]:
|
||||
def get_isolated_environment_lib_paths(prefix: str) -> List[str]:
|
||||
"""Return the lib locations under ``prefix``."""
|
||||
new_pure, new_plat = _sysconfig.get_prefixed_libs(prefix)
|
||||
new_pure, new_plat = _sysconfig.get_isolated_environment_lib_paths(prefix)
|
||||
if _USE_SYSCONFIG:
|
||||
return _deduplicated(new_pure, new_plat)
|
||||
|
||||
old_pure, old_plat = _distutils.get_prefixed_libs(prefix)
|
||||
old_pure, old_plat = _distutils.get_isolated_environment_lib_paths(prefix)
|
||||
old_lib_paths = _deduplicated(old_pure, old_plat)
|
||||
|
||||
# Apple's Python (shipped with Xcode and Command Line Tools) hard-code
|
||||
@@ -526,3 +527,7 @@ def get_prefixed_libs(prefix: str) -> List[str]:
|
||||
_log_context(prefix=prefix)
|
||||
|
||||
return old_lib_paths
|
||||
|
||||
|
||||
def get_isolated_environment_bin_path(prefix: str) -> str:
|
||||
return _sysconfig.get_isolated_environment_paths(prefix)["scripts"]
|
||||
|
||||
@@ -173,7 +173,7 @@ def get_platlib() -> str:
|
||||
return get_python_lib(plat_specific=True)
|
||||
|
||||
|
||||
def get_prefixed_libs(prefix: str) -> Tuple[str, str]:
|
||||
def get_isolated_environment_lib_paths(prefix: str) -> Tuple[str, str]:
|
||||
return (
|
||||
get_python_lib(plat_specific=False, prefix=prefix),
|
||||
get_python_lib(plat_specific=True, prefix=prefix),
|
||||
|
||||
@@ -213,6 +213,13 @@ def get_platlib() -> str:
|
||||
return sysconfig.get_paths()["platlib"]
|
||||
|
||||
|
||||
def get_prefixed_libs(prefix: str) -> typing.Tuple[str, str]:
|
||||
paths = sysconfig.get_paths(vars={"base": prefix, "platbase": prefix})
|
||||
def get_isolated_environment_paths(prefix: str) -> typing.Dict[str, str]:
|
||||
variables = {"base": prefix, "platbase": prefix}
|
||||
if "venv" in sysconfig.get_scheme_names():
|
||||
return sysconfig.get_paths(vars=variables, scheme="venv")
|
||||
return sysconfig.get_paths(vars=variables)
|
||||
|
||||
|
||||
def get_isolated_environment_lib_paths(prefix: str) -> typing.Tuple[str, str]:
|
||||
paths = get_isolated_environment_paths(prefix)
|
||||
return (paths["purelib"], paths["platlib"])
|
||||
|
||||
@@ -13,7 +13,7 @@ from pipenv.patched.pip._internal.utils.virtualenv import running_under_virtuale
|
||||
USER_CACHE_DIR = appdirs.user_cache_dir("pip")
|
||||
|
||||
# FIXME doesn't account for venv linked to global site-packages
|
||||
site_packages: typing.Optional[str] = sysconfig.get_path("purelib")
|
||||
site_packages: str = sysconfig.get_path("purelib")
|
||||
|
||||
|
||||
def get_major_minor_version() -> str:
|
||||
|
||||
@@ -103,17 +103,28 @@ class ArchiveInfo:
|
||||
def __init__(
|
||||
self,
|
||||
hash: Optional[str] = None,
|
||||
hashes: Optional[Dict[str, str]] = None,
|
||||
) -> None:
|
||||
if hash is not None:
|
||||
# Auto-populate the hashes key to upgrade to the new format automatically.
|
||||
# We don't back-populate the legacy hash key.
|
||||
hash_name, hash_value = hash.split("=", 1)
|
||||
if hashes is None:
|
||||
hashes = {hash_name: hash_value}
|
||||
elif hash_name not in hash:
|
||||
hashes = hashes.copy()
|
||||
hashes[hash_name] = hash_value
|
||||
self.hash = hash
|
||||
self.hashes = hashes
|
||||
|
||||
@classmethod
|
||||
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]:
|
||||
if d is None:
|
||||
return None
|
||||
return cls(hash=_get(d, str, "hash"))
|
||||
return cls(hash=_get(d, str, "hash"), hashes=_get(d, dict, "hashes"))
|
||||
|
||||
def _to_dict(self) -> Dict[str, Any]:
|
||||
return _filter_none(hash=self.hash)
|
||||
return _filter_none(hash=self.hash, hashes=self.hashes)
|
||||
|
||||
|
||||
class DirInfo:
|
||||
|
||||
@@ -38,7 +38,7 @@ class InstallationReport:
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"version": "0",
|
||||
"version": "1",
|
||||
"pip_version": __version__,
|
||||
"install": [
|
||||
self._install_req_to_dict(ireq) for ireq in self._install_requirements
|
||||
|
||||
@@ -18,6 +18,7 @@ from typing import (
|
||||
Union,
|
||||
)
|
||||
|
||||
from pipenv.patched.pip._internal.utils.deprecation import deprecated
|
||||
from pipenv.patched.pip._internal.utils.filetypes import WHEEL_EXTENSION
|
||||
from pipenv.patched.pip._internal.utils.hashes import Hashes
|
||||
from pipenv.patched.pip._internal.utils.misc import (
|
||||
@@ -78,6 +79,9 @@ class LinkHash:
|
||||
name, value = match.groups()
|
||||
return cls(name=name, value=value)
|
||||
|
||||
def as_dict(self) -> Dict[str, str]:
|
||||
return {self.name: self.value}
|
||||
|
||||
def as_hashes(self) -> Hashes:
|
||||
"""Return a Hashes instance which checks only for the current hash."""
|
||||
return Hashes({self.name: [self.value]})
|
||||
@@ -164,8 +168,8 @@ class Link(KeyBasedCompareMixin):
|
||||
"requires_python",
|
||||
"yanked_reason",
|
||||
"dist_info_metadata",
|
||||
"link_hash",
|
||||
"cache_link_parsing",
|
||||
"egg_fragment",
|
||||
]
|
||||
|
||||
def __init__(
|
||||
@@ -175,7 +179,6 @@ class Link(KeyBasedCompareMixin):
|
||||
requires_python: Optional[str] = None,
|
||||
yanked_reason: Optional[str] = None,
|
||||
dist_info_metadata: Optional[str] = None,
|
||||
link_hash: Optional[LinkHash] = None,
|
||||
cache_link_parsing: bool = True,
|
||||
hashes: Optional[Mapping[str, str]] = None,
|
||||
) -> None:
|
||||
@@ -198,16 +201,11 @@ class Link(KeyBasedCompareMixin):
|
||||
attribute, if present, in a simple repository HTML link. This may be parsed
|
||||
into its own `Link` by `self.metadata_link()`. See PEP 658 for more
|
||||
information and the specification.
|
||||
:param link_hash: a checksum for the content the link points to. If not
|
||||
provided, this will be extracted from the link URL, if the URL has
|
||||
any checksum.
|
||||
:param cache_link_parsing: A flag that is used elsewhere to determine
|
||||
whether resources retrieved from this link
|
||||
should be cached. PyPI index urls should
|
||||
generally have this set to False, for
|
||||
example.
|
||||
whether resources retrieved from this link should be cached. PyPI
|
||||
URLs should generally have this set to False, for example.
|
||||
:param hashes: A mapping of hash names to digests to allow us to
|
||||
determine the validity of a download.
|
||||
determine the validity of a download.
|
||||
"""
|
||||
|
||||
# url can be a UNC windows share
|
||||
@@ -218,17 +216,23 @@ class Link(KeyBasedCompareMixin):
|
||||
# Store the url as a private attribute to prevent accidentally
|
||||
# trying to set a new value.
|
||||
self._url = url
|
||||
self._hashes = hashes if hashes is not None else {}
|
||||
|
||||
link_hash = LinkHash.split_hash_name_and_value(url)
|
||||
hashes_from_link = {} if link_hash is None else link_hash.as_dict()
|
||||
if hashes is None:
|
||||
self._hashes = hashes_from_link
|
||||
else:
|
||||
self._hashes = {**hashes, **hashes_from_link}
|
||||
|
||||
self.comes_from = comes_from
|
||||
self.requires_python = requires_python if requires_python else None
|
||||
self.yanked_reason = yanked_reason
|
||||
self.dist_info_metadata = dist_info_metadata
|
||||
self.link_hash = link_hash or LinkHash.split_hash_name_and_value(self._url)
|
||||
|
||||
super().__init__(key=url, defining_class=Link)
|
||||
|
||||
self.cache_link_parsing = cache_link_parsing
|
||||
self.egg_fragment = self._egg_fragment()
|
||||
|
||||
@classmethod
|
||||
def from_json(
|
||||
@@ -358,12 +362,28 @@ class Link(KeyBasedCompareMixin):
|
||||
|
||||
_egg_fragment_re = re.compile(r"[#&]egg=([^&]*)")
|
||||
|
||||
@property
|
||||
def egg_fragment(self) -> Optional[str]:
|
||||
# Per PEP 508.
|
||||
_project_name_re = re.compile(
|
||||
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
|
||||
)
|
||||
|
||||
def _egg_fragment(self) -> Optional[str]:
|
||||
match = self._egg_fragment_re.search(self._url)
|
||||
if not match:
|
||||
return None
|
||||
return match.group(1)
|
||||
|
||||
# An egg fragment looks like a PEP 508 project name, along with
|
||||
# an optional extras specifier. Anything else is invalid.
|
||||
project_name = match.group(1)
|
||||
if not self._project_name_re.match(project_name):
|
||||
deprecated(
|
||||
reason=f"{self} contains an egg fragment with a non-PEP 508 name",
|
||||
replacement="to use the req @ url syntax, and remove the egg fragment",
|
||||
gone_in="25.0",
|
||||
issue=11617,
|
||||
)
|
||||
|
||||
return project_name
|
||||
|
||||
_subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)")
|
||||
|
||||
@@ -382,29 +402,26 @@ class Link(KeyBasedCompareMixin):
|
||||
if self.dist_info_metadata is None:
|
||||
return None
|
||||
metadata_url = f"{self.url_without_fragment}.metadata"
|
||||
link_hash: Optional[LinkHash] = None
|
||||
# If data-dist-info-metadata="true" is set, then the metadata file exists,
|
||||
# but there is no information about its checksum or anything else.
|
||||
if self.dist_info_metadata != "true":
|
||||
link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata)
|
||||
return Link(metadata_url, link_hash=link_hash)
|
||||
else:
|
||||
link_hash = None
|
||||
if link_hash is None:
|
||||
return Link(metadata_url)
|
||||
return Link(metadata_url, hashes=link_hash.as_dict())
|
||||
|
||||
def as_hashes(self) -> Optional[Hashes]:
|
||||
if self.link_hash is not None:
|
||||
return self.link_hash.as_hashes()
|
||||
return None
|
||||
def as_hashes(self) -> Hashes:
|
||||
return Hashes({k: [v] for k, v in self._hashes.items()})
|
||||
|
||||
@property
|
||||
def hash(self) -> Optional[str]:
|
||||
if self.link_hash is not None:
|
||||
return self.link_hash.value
|
||||
return None
|
||||
return next(iter(self._hashes.values()), None)
|
||||
|
||||
@property
|
||||
def hash_name(self) -> Optional[str]:
|
||||
if self.link_hash is not None:
|
||||
return self.link_hash.name
|
||||
return None
|
||||
return next(iter(self._hashes), None)
|
||||
|
||||
@property
|
||||
def show_url(self) -> str:
|
||||
@@ -433,15 +450,15 @@ class Link(KeyBasedCompareMixin):
|
||||
|
||||
@property
|
||||
def has_hash(self) -> bool:
|
||||
return self.link_hash is not None
|
||||
return bool(self._hashes)
|
||||
|
||||
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
|
||||
"""
|
||||
Return True if the link has a hash and it is allowed by `hashes`.
|
||||
"""
|
||||
if self.link_hash is None:
|
||||
if hashes is None:
|
||||
return False
|
||||
return self.link_hash.is_hash_allowed(hashes)
|
||||
return any(hashes.is_hash_allowed(k, v) for k, v in self._hashes.items())
|
||||
|
||||
|
||||
class _CleanResult(NamedTuple):
|
||||
|
||||
@@ -4,8 +4,12 @@ Contains interface (MultiDomainBasicAuth) and associated glue code for
|
||||
providing credentials in the context of network requests.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import urllib.parse
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, NamedTuple, Optional, Tuple
|
||||
|
||||
from pipenv.patched.pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
|
||||
from pipenv.patched.pip._vendor.requests.models import Request, Response
|
||||
@@ -23,51 +27,165 @@ from pipenv.patched.pip._internal.vcs.versioncontrol import AuthInfo
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
Credentials = Tuple[str, str, str]
|
||||
|
||||
try:
|
||||
import keyring
|
||||
except ImportError:
|
||||
keyring = None # type: ignore[assignment]
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Keyring is skipped due to an exception: %s",
|
||||
str(exc),
|
||||
)
|
||||
keyring = None # type: ignore[assignment]
|
||||
KEYRING_DISABLED = False
|
||||
|
||||
|
||||
def get_keyring_auth(url: Optional[str], username: Optional[str]) -> Optional[AuthInfo]:
|
||||
"""Return the tuple auth for a given url from keyring."""
|
||||
global keyring
|
||||
if not url or not keyring:
|
||||
class Credentials(NamedTuple):
|
||||
url: str
|
||||
username: str
|
||||
password: str
|
||||
|
||||
|
||||
class KeyRingBaseProvider(ABC):
|
||||
"""Keyring base provider interface"""
|
||||
|
||||
@abstractmethod
|
||||
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def save_auth_info(self, url: str, username: str, password: str) -> None:
|
||||
...
|
||||
|
||||
|
||||
class KeyRingNullProvider(KeyRingBaseProvider):
|
||||
"""Keyring null provider"""
|
||||
|
||||
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
|
||||
return None
|
||||
|
||||
try:
|
||||
try:
|
||||
get_credential = keyring.get_credential
|
||||
except AttributeError:
|
||||
pass
|
||||
else:
|
||||
def save_auth_info(self, url: str, username: str, password: str) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class KeyRingPythonProvider(KeyRingBaseProvider):
|
||||
"""Keyring interface which uses locally imported `keyring`"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
import keyring
|
||||
|
||||
self.keyring = keyring
|
||||
|
||||
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
|
||||
# Support keyring's get_credential interface which supports getting
|
||||
# credentials without a username. This is only available for
|
||||
# keyring>=15.2.0.
|
||||
if hasattr(self.keyring, "get_credential"):
|
||||
logger.debug("Getting credentials from keyring for %s", url)
|
||||
cred = get_credential(url, username)
|
||||
cred = self.keyring.get_credential(url, username)
|
||||
if cred is not None:
|
||||
return cred.username, cred.password
|
||||
return None
|
||||
|
||||
if username:
|
||||
if username is not None:
|
||||
logger.debug("Getting password from keyring for %s", url)
|
||||
password = keyring.get_password(url, username)
|
||||
password = self.keyring.get_password(url, username)
|
||||
if password:
|
||||
return username, password
|
||||
return None
|
||||
|
||||
def save_auth_info(self, url: str, username: str, password: str) -> None:
|
||||
self.keyring.set_password(url, username, password)
|
||||
|
||||
|
||||
class KeyRingCliProvider(KeyRingBaseProvider):
|
||||
"""Provider which uses `keyring` cli
|
||||
|
||||
Instead of calling the keyring package installed alongside pip
|
||||
we call keyring on the command line which will enable pip to
|
||||
use which ever installation of keyring is available first in
|
||||
PATH.
|
||||
"""
|
||||
|
||||
def __init__(self, cmd: str) -> None:
|
||||
self.keyring = cmd
|
||||
|
||||
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
|
||||
# This is the default implementation of keyring.get_credential
|
||||
# https://github.com/jaraco/keyring/blob/97689324abcf01bd1793d49063e7ca01e03d7d07/keyring/backend.py#L134-L139
|
||||
if username is not None:
|
||||
password = self._get_password(url, username)
|
||||
if password is not None:
|
||||
return username, password
|
||||
return None
|
||||
|
||||
def save_auth_info(self, url: str, username: str, password: str) -> None:
|
||||
return self._set_password(url, username, password)
|
||||
|
||||
def _get_password(self, service_name: str, username: str) -> Optional[str]:
|
||||
"""Mirror the implementation of keyring.get_password using cli"""
|
||||
if self.keyring is None:
|
||||
return None
|
||||
|
||||
cmd = [self.keyring, "get", service_name, username]
|
||||
env = os.environ.copy()
|
||||
env["PYTHONIOENCODING"] = "utf-8"
|
||||
res = subprocess.run(
|
||||
cmd,
|
||||
stdin=subprocess.DEVNULL,
|
||||
capture_output=True,
|
||||
env=env,
|
||||
)
|
||||
if res.returncode:
|
||||
return None
|
||||
return res.stdout.decode("utf-8").strip(os.linesep)
|
||||
|
||||
def _set_password(self, service_name: str, username: str, password: str) -> None:
|
||||
"""Mirror the implementation of keyring.set_password using cli"""
|
||||
if self.keyring is None:
|
||||
return None
|
||||
|
||||
cmd = [self.keyring, "set", service_name, username]
|
||||
input_ = (password + os.linesep).encode("utf-8")
|
||||
env = os.environ.copy()
|
||||
env["PYTHONIOENCODING"] = "utf-8"
|
||||
res = subprocess.run(cmd, input=input_, env=env)
|
||||
res.check_returncode()
|
||||
return None
|
||||
|
||||
|
||||
def get_keyring_provider() -> KeyRingBaseProvider:
|
||||
# keyring has previously failed and been disabled
|
||||
if not KEYRING_DISABLED:
|
||||
# Default to trying to use Python provider
|
||||
try:
|
||||
return KeyRingPythonProvider()
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as exc:
|
||||
# In the event of an unexpected exception
|
||||
# we should warn the user
|
||||
logger.warning(
|
||||
"Installed copy of keyring fails with exception %s, "
|
||||
"trying to find a keyring executable as a fallback",
|
||||
str(exc),
|
||||
)
|
||||
|
||||
# Fallback to Cli Provider if `keyring` isn't installed
|
||||
cli = shutil.which("keyring")
|
||||
if cli:
|
||||
return KeyRingCliProvider(cli)
|
||||
|
||||
return KeyRingNullProvider()
|
||||
|
||||
|
||||
def get_keyring_auth(url: Optional[str], username: Optional[str]) -> Optional[AuthInfo]:
|
||||
"""Return the tuple auth for a given url from keyring."""
|
||||
# Do nothing if no url was provided
|
||||
if not url:
|
||||
return None
|
||||
|
||||
keyring = get_keyring_provider()
|
||||
try:
|
||||
return keyring.get_auth_info(url, username)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Keyring is skipped due to an exception: %s",
|
||||
str(exc),
|
||||
)
|
||||
keyring = None # type: ignore[assignment]
|
||||
return None
|
||||
global KEYRING_DISABLED
|
||||
KEYRING_DISABLED = True
|
||||
return None
|
||||
|
||||
|
||||
class MultiDomainBasicAuth(AuthBase):
|
||||
@@ -241,7 +359,7 @@ class MultiDomainBasicAuth(AuthBase):
|
||||
|
||||
# Factored out to allow for easy patching in tests
|
||||
def _should_save_password_to_keyring(self) -> bool:
|
||||
if not keyring:
|
||||
if get_keyring_provider() is None:
|
||||
return False
|
||||
return ask("Save credentials to keyring [y/N]: ", ["y", "n"]) == "y"
|
||||
|
||||
@@ -276,7 +394,11 @@ class MultiDomainBasicAuth(AuthBase):
|
||||
|
||||
# Prompt to save the password to keyring
|
||||
if save and self._should_save_password_to_keyring():
|
||||
self._credentials_to_save = (parsed.netloc, username, password)
|
||||
self._credentials_to_save = Credentials(
|
||||
url=parsed.netloc,
|
||||
username=username,
|
||||
password=password,
|
||||
)
|
||||
|
||||
# Consume content and release the original connection to allow our new
|
||||
# request to reuse the same one.
|
||||
@@ -309,15 +431,16 @@ class MultiDomainBasicAuth(AuthBase):
|
||||
|
||||
def save_credentials(self, resp: Response, **kwargs: Any) -> None:
|
||||
"""Response callback to save credentials on success."""
|
||||
assert keyring is not None, "should never reach here without keyring"
|
||||
if not keyring:
|
||||
return
|
||||
keyring = get_keyring_provider()
|
||||
assert not isinstance(
|
||||
keyring, KeyRingNullProvider
|
||||
), "should never reach here without keyring"
|
||||
|
||||
creds = self._credentials_to_save
|
||||
self._credentials_to_save = None
|
||||
if creds and resp.status_code < 400:
|
||||
try:
|
||||
logger.info("Saving credentials to keyring")
|
||||
keyring.set_password(*creds)
|
||||
keyring.save_auth_info(creds.url, creds.username, creds.password)
|
||||
except Exception:
|
||||
logger.exception("Failed to save credentials")
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import os
|
||||
|
||||
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
|
||||
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
|
||||
|
||||
from pipenv.patched.pip._internal.build_env import BuildEnvironment
|
||||
from pipenv.patched.pip._internal.exceptions import (
|
||||
@@ -15,7 +15,7 @@ from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory
|
||||
|
||||
|
||||
def generate_metadata(
|
||||
build_env: BuildEnvironment, backend: Pep517HookCaller, details: str
|
||||
build_env: BuildEnvironment, backend: BuildBackendHookCaller, details: str
|
||||
) -> str:
|
||||
"""Generate metadata using mechanisms described in PEP 517.
|
||||
|
||||
@@ -26,7 +26,7 @@ def generate_metadata(
|
||||
metadata_dir = metadata_tmpdir.path
|
||||
|
||||
with build_env:
|
||||
# Note that Pep517HookCaller implements a fallback for
|
||||
# Note that BuildBackendHookCaller implements a fallback for
|
||||
# prepare_metadata_for_build_wheel, so we don't have to
|
||||
# consider the possibility that this hook doesn't exist.
|
||||
runner = runner_with_spinner_message("Preparing metadata (pyproject.toml)")
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import os
|
||||
|
||||
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
|
||||
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
|
||||
|
||||
from pipenv.patched.pip._internal.build_env import BuildEnvironment
|
||||
from pipenv.patched.pip._internal.exceptions import (
|
||||
@@ -15,7 +15,7 @@ from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory
|
||||
|
||||
|
||||
def generate_editable_metadata(
|
||||
build_env: BuildEnvironment, backend: Pep517HookCaller, details: str
|
||||
build_env: BuildEnvironment, backend: BuildBackendHookCaller, details: str
|
||||
) -> str:
|
||||
"""Generate metadata using mechanisms described in PEP 660.
|
||||
|
||||
@@ -26,7 +26,7 @@ def generate_editable_metadata(
|
||||
metadata_dir = metadata_tmpdir.path
|
||||
|
||||
with build_env:
|
||||
# Note that Pep517HookCaller implements a fallback for
|
||||
# Note that BuildBackendHookCaller implements a fallback for
|
||||
# prepare_metadata_for_build_wheel/editable, so we don't have to
|
||||
# consider the possibility that this hook doesn't exist.
|
||||
runner = runner_with_spinner_message(
|
||||
|
||||
@@ -2,7 +2,7 @@ import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
|
||||
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
|
||||
|
||||
from pipenv.patched.pip._internal.utils.subprocess import runner_with_spinner_message
|
||||
|
||||
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def build_wheel_pep517(
|
||||
name: str,
|
||||
backend: Pep517HookCaller,
|
||||
backend: BuildBackendHookCaller,
|
||||
metadata_directory: str,
|
||||
tempd: str,
|
||||
) -> Optional[str]:
|
||||
|
||||
@@ -2,7 +2,7 @@ import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from pipenv.patched.pip._vendor.pep517.wrappers import HookMissing, Pep517HookCaller
|
||||
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller, HookMissing
|
||||
|
||||
from pipenv.patched.pip._internal.utils.subprocess import runner_with_spinner_message
|
||||
|
||||
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def build_wheel_editable(
|
||||
name: str,
|
||||
backend: Pep517HookCaller,
|
||||
backend: BuildBackendHookCaller,
|
||||
metadata_directory: str,
|
||||
tempd: str,
|
||||
) -> Optional[str]:
|
||||
|
||||
@@ -75,7 +75,7 @@ def check_package_set(
|
||||
if name not in package_set:
|
||||
missed = True
|
||||
if req.marker is not None:
|
||||
missed = req.marker.evaluate()
|
||||
missed = req.marker.evaluate({"extra": ""})
|
||||
if missed:
|
||||
missing_deps.add((name, req))
|
||||
continue
|
||||
|
||||
@@ -159,9 +159,8 @@ def load_pyproject_toml(
|
||||
if backend is None:
|
||||
# If the user didn't specify a backend, we assume they want to use
|
||||
# the setuptools backend. But we can't be sure they have included
|
||||
# a version of setuptools which supplies the backend, or wheel
|
||||
# (which is needed by the backend) in their requirements. So we
|
||||
# make a note to check that those requirements are present once
|
||||
# a version of setuptools which supplies the backend. So we
|
||||
# make a note to check that this requirement is present once
|
||||
# we have set up the environment.
|
||||
# This is quite a lot of work to check for a very specific case. But
|
||||
# the problem is, that case is potentially quite common - projects that
|
||||
@@ -170,6 +169,6 @@ def load_pyproject_toml(
|
||||
# tools themselves. The original PEP 518 code had a similar check (but
|
||||
# implemented in a different way).
|
||||
backend = "setuptools.build_meta:__legacy__"
|
||||
check = ["setuptools>=40.8.0", "wheel"]
|
||||
check = ["setuptools>=40.8.0"]
|
||||
|
||||
return BuildSystemDetails(requires, backend, check, backend_path)
|
||||
|
||||
@@ -18,7 +18,7 @@ from pipenv.patched.pip._vendor.packaging.specifiers import SpecifierSet
|
||||
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
|
||||
from pipenv.patched.pip._vendor.packaging.version import Version
|
||||
from pipenv.patched.pip._vendor.packaging.version import parse as parse_version
|
||||
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
|
||||
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
|
||||
|
||||
from pipenv.patched.pip._internal.build_env import BuildEnvironment, NoOpBuildEnvironment
|
||||
from pipenv.patched.pip._internal.exceptions import InstallationError, LegacyInstallFailure
|
||||
@@ -51,7 +51,7 @@ from pipenv.patched.pip._internal.utils.direct_url_helpers import (
|
||||
)
|
||||
from pipenv.patched.pip._internal.utils.hashes import Hashes
|
||||
from pipenv.patched.pip._internal.utils.misc import (
|
||||
ConfiguredPep517HookCaller,
|
||||
ConfiguredBuildBackendHookCaller,
|
||||
ask_path_exists,
|
||||
backup_dir,
|
||||
display_path,
|
||||
@@ -173,7 +173,7 @@ class InstallRequirement:
|
||||
self.requirements_to_check: List[str] = []
|
||||
|
||||
# The PEP 517 backend we should use to build the project
|
||||
self.pep517_backend: Optional[Pep517HookCaller] = None
|
||||
self.pep517_backend: Optional[BuildBackendHookCaller] = None
|
||||
|
||||
# Are we using PEP 517 for this requirement?
|
||||
# After pyproject.toml has been loaded, the only valid values are True
|
||||
@@ -195,7 +195,11 @@ class InstallRequirement:
|
||||
else:
|
||||
s = "<InstallRequirement>"
|
||||
if self.satisfied_by is not None:
|
||||
s += " in {}".format(display_path(self.satisfied_by.location))
|
||||
if self.satisfied_by.location is not None:
|
||||
location = display_path(self.satisfied_by.location)
|
||||
else:
|
||||
location = "<memory>"
|
||||
s += f" in {location}"
|
||||
if self.comes_from:
|
||||
if isinstance(self.comes_from, str):
|
||||
comes_from: Optional[str] = self.comes_from
|
||||
@@ -482,7 +486,7 @@ class InstallRequirement:
|
||||
requires, backend, check, backend_path = pyproject_toml_data
|
||||
self.requirements_to_check = check
|
||||
self.pyproject_requires = requires
|
||||
self.pep517_backend = ConfiguredPep517HookCaller(
|
||||
self.pep517_backend = ConfiguredBuildBackendHookCaller(
|
||||
self,
|
||||
self.unpacked_source_directory,
|
||||
backend,
|
||||
|
||||
@@ -133,7 +133,7 @@ class UpgradePrompt:
|
||||
return Group(
|
||||
Text(),
|
||||
Text.from_markup(
|
||||
f"{notice} A new release of pip available: "
|
||||
f"{notice} A new release of pip is available: "
|
||||
f"[red]{self.old}[reset] -> [green]{self.new}[reset]"
|
||||
),
|
||||
Text.from_markup(
|
||||
@@ -155,7 +155,7 @@ def was_installed_by_pip(pkg: str) -> bool:
|
||||
|
||||
def _get_current_remote_pip_version(
|
||||
session: PipSession, options: optparse.Values
|
||||
) -> str:
|
||||
) -> Optional[str]:
|
||||
# Lets use PackageFinder to see what the latest pip version is
|
||||
link_collector = LinkCollector.create(
|
||||
session,
|
||||
@@ -176,7 +176,7 @@ def _get_current_remote_pip_version(
|
||||
)
|
||||
best_candidate = finder.find_best_candidate("pip").best_candidate
|
||||
if best_candidate is None:
|
||||
return
|
||||
return None
|
||||
|
||||
return str(best_candidate.version)
|
||||
|
||||
@@ -186,11 +186,14 @@ def _self_version_check_logic(
|
||||
state: SelfCheckState,
|
||||
current_time: datetime.datetime,
|
||||
local_version: DistributionVersion,
|
||||
get_remote_version: Callable[[], str],
|
||||
get_remote_version: Callable[[], Optional[str]],
|
||||
) -> Optional[UpgradePrompt]:
|
||||
remote_version_str = state.get(current_time)
|
||||
if remote_version_str is None:
|
||||
remote_version_str = get_remote_version()
|
||||
if remote_version_str is None:
|
||||
logger.debug("No remote pip version found")
|
||||
return None
|
||||
state.set(remote_version_str, current_time)
|
||||
|
||||
remote_version = parse_version(remote_version_str)
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
# The following comment should be removed at some point in the future.
|
||||
# mypy: strict-optional=False
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional
|
||||
from typing import List, Optional
|
||||
|
||||
from pipenv.patched.pip._internal.locations import site_packages, user_site
|
||||
from pipenv.patched.pip._internal.utils.virtualenv import (
|
||||
@@ -57,7 +54,7 @@ def egg_link_path_from_location(raw_name: str) -> Optional[str]:
|
||||
|
||||
This method will just return the first one found.
|
||||
"""
|
||||
sites = []
|
||||
sites: List[str] = []
|
||||
if running_under_virtualenv():
|
||||
sites.append(site_packages)
|
||||
if not virtualenv_no_global() and user_site:
|
||||
|
||||
@@ -12,6 +12,7 @@ import posixpath
|
||||
import shutil
|
||||
import stat
|
||||
import sys
|
||||
import sysconfig
|
||||
import urllib.parse
|
||||
from io import StringIO
|
||||
from itertools import filterfalse, tee, zip_longest
|
||||
@@ -34,11 +35,11 @@ from typing import (
|
||||
cast,
|
||||
)
|
||||
|
||||
from pipenv.patched.pip._vendor.pep517 import Pep517HookCaller
|
||||
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
|
||||
from pipenv.patched.pip._vendor.tenacity import retry, stop_after_delay, wait_fixed
|
||||
|
||||
from pipenv.patched.pip import __version__
|
||||
from pipenv.patched.pip._internal.exceptions import CommandError
|
||||
from pipenv.patched.pip._internal.exceptions import CommandError, ExternallyManagedEnvironment
|
||||
from pipenv.patched.pip._internal.locations import get_major_minor_version
|
||||
from pipenv.patched.pip._internal.utils.compat import WINDOWS
|
||||
from pipenv.patched.pip._internal.utils.virtualenv import running_under_virtualenv
|
||||
@@ -57,10 +58,10 @@ __all__ = [
|
||||
"captured_stdout",
|
||||
"ensure_dir",
|
||||
"remove_auth_from_url",
|
||||
"ConfiguredPep517HookCaller",
|
||||
"check_externally_managed",
|
||||
"ConfiguredBuildBackendHookCaller",
|
||||
]
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
T = TypeVar("T")
|
||||
@@ -581,6 +582,21 @@ def protect_pip_from_modification_on_windows(modifying_pip: bool) -> None:
|
||||
)
|
||||
|
||||
|
||||
def check_externally_managed() -> None:
|
||||
"""Check whether the current environment is externally managed.
|
||||
|
||||
If the ``EXTERNALLY-MANAGED`` config file is found, the current environment
|
||||
is considered externally managed, and an ExternallyManagedEnvironment is
|
||||
raised.
|
||||
"""
|
||||
if running_under_virtualenv():
|
||||
return
|
||||
marker = os.path.join(sysconfig.get_path("stdlib"), "EXTERNALLY-MANAGED")
|
||||
if not os.path.isfile(marker):
|
||||
return
|
||||
raise ExternallyManagedEnvironment.from_config(marker)
|
||||
|
||||
|
||||
def is_console_interactive() -> bool:
|
||||
"""Is this console interactive?"""
|
||||
return sys.stdin is not None and sys.stdin.isatty()
|
||||
@@ -635,7 +651,7 @@ def partition(
|
||||
return filterfalse(pred, t1), filter(pred, t2)
|
||||
|
||||
|
||||
class ConfiguredPep517HookCaller(Pep517HookCaller):
|
||||
class ConfiguredBuildBackendHookCaller(BuildBackendHookCaller):
|
||||
def __init__(
|
||||
self,
|
||||
config_holder: Any,
|
||||
|
||||
@@ -239,8 +239,8 @@ def call_subprocess(
|
||||
def runner_with_spinner_message(message: str) -> Callable[..., None]:
|
||||
"""Provide a subprocess_runner that shows a spinner message.
|
||||
|
||||
Intended for use with for pep517's Pep517HookCaller. Thus, the runner has
|
||||
an API that matches what's expected by Pep517HookCaller.subprocess_runner.
|
||||
Intended for use with for BuildBackendHookCaller. Thus, the runner has
|
||||
an API that matches what's expected by BuildBackendHookCaller.subprocess_runner.
|
||||
"""
|
||||
|
||||
def runner(
|
||||
|
||||
@@ -19,7 +19,7 @@ def _running_under_venv() -> bool:
|
||||
return sys.prefix != getattr(sys, "base_prefix", sys.prefix)
|
||||
|
||||
|
||||
def _running_under_regular_virtualenv() -> bool:
|
||||
def _running_under_legacy_virtualenv() -> bool:
|
||||
"""Checks if sys.real_prefix is set.
|
||||
|
||||
This handles virtual environments created with pypa's virtualenv.
|
||||
@@ -29,8 +29,8 @@ def _running_under_regular_virtualenv() -> bool:
|
||||
|
||||
|
||||
def running_under_virtualenv() -> bool:
|
||||
"""Return True if we're running inside a virtualenv, False otherwise."""
|
||||
return _running_under_venv() or _running_under_regular_virtualenv()
|
||||
"""True if we're running inside a virtual environment, False otherwise."""
|
||||
return _running_under_venv() or _running_under_legacy_virtualenv()
|
||||
|
||||
|
||||
def _get_pyvenv_cfg_lines() -> Optional[List[str]]:
|
||||
@@ -77,7 +77,7 @@ def _no_global_under_venv() -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def _no_global_under_regular_virtualenv() -> bool:
|
||||
def _no_global_under_legacy_virtualenv() -> bool:
|
||||
"""Check if "no-global-site-packages.txt" exists beside site.py
|
||||
|
||||
This mirrors logic in pypa/virtualenv for determining whether system
|
||||
@@ -98,7 +98,7 @@ def virtualenv_no_global() -> bool:
|
||||
if _running_under_venv():
|
||||
return _no_global_under_venv()
|
||||
|
||||
if _running_under_regular_virtualenv():
|
||||
return _no_global_under_regular_virtualenv()
|
||||
if _running_under_legacy_virtualenv():
|
||||
return _no_global_under_legacy_virtualenv()
|
||||
|
||||
return False
|
||||
|
||||
@@ -72,7 +72,7 @@ class Bazaar(VersionControl):
|
||||
|
||||
@classmethod
|
||||
def get_url_rev_and_auth(cls, url: str) -> Tuple[str, Optional[str], AuthInfo]:
|
||||
# hotfix the URL scheme after removing bzr+ from bzr+ssh:// readd it
|
||||
# hotfix the URL scheme after removing bzr+ from bzr+ssh:// re-add it
|
||||
url, rev, user_pass = super().get_url_rev_and_auth(url)
|
||||
if url.startswith("ssh://"):
|
||||
url = "bzr+" + url
|
||||
|
||||
@@ -87,7 +87,7 @@ class Subversion(VersionControl):
|
||||
|
||||
@classmethod
|
||||
def get_url_rev_and_auth(cls, url: str) -> Tuple[str, Optional[str], AuthInfo]:
|
||||
# hotfix the URL scheme after removing svn+ from svn+ssh:// readd it
|
||||
# hotfix the URL scheme after removing svn+ from svn+ssh:// re-add it
|
||||
url, rev, user_pass = super().get_url_rev_and_auth(url)
|
||||
if url.startswith("ssh://"):
|
||||
url = "svn+" + url
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .core import contents, where
|
||||
|
||||
__all__ = ["contents", "where"]
|
||||
__version__ = "2022.09.24"
|
||||
__version__ = "2022.12.07"
|
||||
|
||||
@@ -636,37 +636,6 @@ BA6+C4OmF4O5MBKgxTMVBbkN+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IB
|
||||
ZQ==
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C.
|
||||
# Subject: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C.
|
||||
# Label: "Network Solutions Certificate Authority"
|
||||
# Serial: 116697915152937497490437556386812487904
|
||||
# MD5 Fingerprint: d3:f3:a6:16:c0:fa:6b:1d:59:b1:2d:96:4d:0e:11:2e
|
||||
# SHA1 Fingerprint: 74:f8:a3:c3:ef:e7:b3:90:06:4b:83:90:3c:21:64:60:20:e5:df:ce
|
||||
# SHA256 Fingerprint: 15:f0:ba:00:a3:ac:7a:f3:ac:88:4c:07:2b:10:11:a0:77:bd:77:c0:97:f4:01:64:b2:f8:59:8a:bd:83:86:0c
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIID5jCCAs6gAwIBAgIQV8szb8JcFuZHFhfjkDFo4DANBgkqhkiG9w0BAQUFADBi
|
||||
MQswCQYDVQQGEwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMu
|
||||
MTAwLgYDVQQDEydOZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3Jp
|
||||
dHkwHhcNMDYxMjAxMDAwMDAwWhcNMjkxMjMxMjM1OTU5WjBiMQswCQYDVQQGEwJV
|
||||
UzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMuMTAwLgYDVQQDEydO
|
||||
ZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEiMA0GCSqG
|
||||
SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDkvH6SMG3G2I4rC7xGzuAnlt7e+foS0zwz
|
||||
c7MEL7xxjOWftiJgPl9dzgn/ggwbmlFQGiaJ3dVhXRncEg8tCqJDXRfQNJIg6nPP
|
||||
OCwGJgl6cvf6UDL4wpPTaaIjzkGxzOTVHzbRijr4jGPiFFlp7Q3Tf2vouAPlT2rl
|
||||
mGNpSAW+Lv8ztumXWWn4Zxmuk2GWRBXTcrA/vGp97Eh/jcOrqnErU2lBUzS1sLnF
|
||||
BgrEsEX1QV1uiUV7PTsmjHTC5dLRfbIR1PtYMiKagMnc/Qzpf14Dl847ABSHJ3A4
|
||||
qY5usyd2mFHgBeMhqxrVhSI8KbWaFsWAqPS7azCPL0YCorEMIuDTAgMBAAGjgZcw
|
||||
gZQwHQYDVR0OBBYEFCEwyfsA106Y2oeqKtCnLrFAMadMMA4GA1UdDwEB/wQEAwIB
|
||||
BjAPBgNVHRMBAf8EBTADAQH/MFIGA1UdHwRLMEkwR6BFoEOGQWh0dHA6Ly9jcmwu
|
||||
bmV0c29sc3NsLmNvbS9OZXR3b3JrU29sdXRpb25zQ2VydGlmaWNhdGVBdXRob3Jp
|
||||
dHkuY3JsMA0GCSqGSIb3DQEBBQUAA4IBAQC7rkvnt1frf6ott3NHhWrB5KUd5Oc8
|
||||
6fRZZXe1eltajSU24HqXLjjAV2CDmAaDn7l2em5Q4LqILPxFzBiwmZVRDuwduIj/
|
||||
h1AcgsLj4DKAv6ALR8jDMe+ZZzKATxcheQxpXN5eNK4CtSbqUN9/GGUsyfJj4akH
|
||||
/nxxH2szJGoeBfcFaMBqEssuXmHLrijTfsK0ZpEmXzwuJF/LWA/rKOyvEZbz3Htv
|
||||
wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN
|
||||
pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited
|
||||
# Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited
|
||||
# Label: "COMODO ECC Certification Authority"
|
||||
@@ -2204,46 +2173,6 @@ KoZIzj0EAwMDaAAwZQIxAOVpEslu28YxuglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg
|
||||
xwy8p2Fp8fc74SrL+SvzZpA3
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden
|
||||
# Subject: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden
|
||||
# Label: "Staat der Nederlanden EV Root CA"
|
||||
# Serial: 10000013
|
||||
# MD5 Fingerprint: fc:06:af:7b:e8:1a:f1:9a:b4:e8:d2:70:1f:c0:f5:ba
|
||||
# SHA1 Fingerprint: 76:e2:7e:c1:4f:db:82:c1:c0:a6:75:b5:05:be:3d:29:b4:ed:db:bb
|
||||
# SHA256 Fingerprint: 4d:24:91:41:4c:fe:95:67:46:ec:4c:ef:a6:cf:6f:72:e2:8a:13:29:43:2f:9d:8a:90:7a:c4:cb:5d:ad:c1:5a
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIFcDCCA1igAwIBAgIEAJiWjTANBgkqhkiG9w0BAQsFADBYMQswCQYDVQQGEwJO
|
||||
TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSkwJwYDVQQDDCBTdGFh
|
||||
dCBkZXIgTmVkZXJsYW5kZW4gRVYgUm9vdCBDQTAeFw0xMDEyMDgxMTE5MjlaFw0y
|
||||
MjEyMDgxMTEwMjhaMFgxCzAJBgNVBAYTAk5MMR4wHAYDVQQKDBVTdGFhdCBkZXIg
|
||||
TmVkZXJsYW5kZW4xKTAnBgNVBAMMIFN0YWF0IGRlciBOZWRlcmxhbmRlbiBFViBS
|
||||
b290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA48d+ifkkSzrS
|
||||
M4M1LGns3Amk41GoJSt5uAg94JG6hIXGhaTK5skuU6TJJB79VWZxXSzFYGgEt9nC
|
||||
UiY4iKTWO0Cmws0/zZiTs1QUWJZV1VD+hq2kY39ch/aO5ieSZxeSAgMs3NZmdO3d
|
||||
Z//BYY1jTw+bbRcwJu+r0h8QoPnFfxZpgQNH7R5ojXKhTbImxrpsX23Wr9GxE46p
|
||||
rfNeaXUmGD5BKyF/7otdBwadQ8QpCiv8Kj6GyzyDOvnJDdrFmeK8eEEzduG/L13l
|
||||
pJhQDBXd4Pqcfzho0LKmeqfRMb1+ilgnQ7O6M5HTp5gVXJrm0w912fxBmJc+qiXb
|
||||
j5IusHsMX/FjqTf5m3VpTCgmJdrV8hJwRVXj33NeN/UhbJCONVrJ0yPr08C+eKxC
|
||||
KFhmpUZtcALXEPlLVPxdhkqHz3/KRawRWrUgUY0viEeXOcDPusBCAUCZSCELa6fS
|
||||
/ZbV0b5GnUngC6agIk440ME8MLxwjyx1zNDFjFE7PZQIZCZhfbnDZY8UnCHQqv0X
|
||||
cgOPvZuM5l5Tnrmd74K74bzickFbIZTTRTeU0d8JOV3nI6qaHcptqAqGhYqCvkIH
|
||||
1vI4gnPah1vlPNOePqc7nvQDs/nxfRN0Av+7oeX6AHkcpmZBiFxgV6YuCcS6/ZrP
|
||||
px9Aw7vMWgpVSzs4dlG4Y4uElBbmVvMCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB
|
||||
/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFP6rAJCYniT8qcwaivsnuL8wbqg7
|
||||
MA0GCSqGSIb3DQEBCwUAA4ICAQDPdyxuVr5Os7aEAJSrR8kN0nbHhp8dB9O2tLsI
|
||||
eK9p0gtJ3jPFrK3CiAJ9Brc1AsFgyb/E6JTe1NOpEyVa/m6irn0F3H3zbPB+po3u
|
||||
2dfOWBfoqSmuc0iH55vKbimhZF8ZE/euBhD/UcabTVUlT5OZEAFTdfETzsemQUHS
|
||||
v4ilf0X8rLiltTMMgsT7B/Zq5SWEXwbKwYY5EdtYzXc7LMJMD16a4/CrPmEbUCTC
|
||||
wPTxGfARKbalGAKb12NMcIxHowNDXLldRqANb/9Zjr7dn3LDWyvfjFvO5QxGbJKy
|
||||
CqNMVEIYFRIYvdr8unRu/8G2oGTYqV9Vrp9canaW2HNnh/tNf1zuacpzEPuKqf2e
|
||||
vTY4SUmH9A4U8OmHuD+nT3pajnnUk+S7aFKErGzp85hwVXIy+TSrK0m1zSBi5Dp6
|
||||
Z2Orltxtrpfs/J92VoguZs9btsmksNcFuuEnL5O7Jiqik7Ab846+HUCjuTaPPoIa
|
||||
Gl6I6lD4WeKDRikL40Rc4ZW2aZCaFG+XroHPaO+Zmr615+F/+PoTRxZMzG0IQOeL
|
||||
eG9QgkRQP2YGiqtDhFZKDyAthg710tvSeopLzaXoTvFeJiUBWSOgftL2fiFX1ye8
|
||||
FVdMpEbB4IMeDExNH08GGeL5qPQ6gqGyeUN51q1veieQA6TqJIc/2b3Z6fJfUEkc
|
||||
7uzXLg==
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=IdenTrust Commercial Root CA 1 O=IdenTrust
|
||||
# Subject: CN=IdenTrust Commercial Root CA 1 O=IdenTrust
|
||||
# Label: "IdenTrust Commercial Root CA 1"
|
||||
@@ -2851,116 +2780,6 @@ T8p+ck0LcIymSLumoRT2+1hEmRSuqguTaaApJUqlyyvdimYHFngVV3Eb7PVHhPOe
|
||||
MTd61X8kreS8/f3MboPoDKi3QWwH3b08hpcv0g==
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
|
||||
# Subject: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
|
||||
# Label: "TrustCor RootCert CA-1"
|
||||
# Serial: 15752444095811006489
|
||||
# MD5 Fingerprint: 6e:85:f1:dc:1a:00:d3:22:d5:b2:b2:ac:6b:37:05:45
|
||||
# SHA1 Fingerprint: ff:bd:cd:e7:82:c8:43:5e:3c:6f:26:86:5c:ca:a8:3a:45:5b:c3:0a
|
||||
# SHA256 Fingerprint: d4:0e:9c:86:cd:8f:e4:68:c1:77:69:59:f4:9e:a7:74:fa:54:86:84:b6:c4:06:f3:90:92:61:f4:dc:e2:57:5c
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIEMDCCAxigAwIBAgIJANqb7HHzA7AZMA0GCSqGSIb3DQEBCwUAMIGkMQswCQYD
|
||||
VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk
|
||||
MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U
|
||||
cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRydXN0Q29y
|
||||
IFJvb3RDZXJ0IENBLTEwHhcNMTYwMjA0MTIzMjE2WhcNMjkxMjMxMTcyMzE2WjCB
|
||||
pDELMAkGA1UEBhMCUEExDzANBgNVBAgMBlBhbmFtYTEUMBIGA1UEBwwLUGFuYW1h
|
||||
IENpdHkxJDAiBgNVBAoMG1RydXN0Q29yIFN5c3RlbXMgUy4gZGUgUi5MLjEnMCUG
|
||||
A1UECwweVHJ1c3RDb3IgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MR8wHQYDVQQDDBZU
|
||||
cnVzdENvciBSb290Q2VydCBDQS0xMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
|
||||
CgKCAQEAv463leLCJhJrMxnHQFgKq1mqjQCj/IDHUHuO1CAmujIS2CNUSSUQIpid
|
||||
RtLByZ5OGy4sDjjzGiVoHKZaBeYei0i/mJZ0PmnK6bV4pQa81QBeCQryJ3pS/C3V
|
||||
seq0iWEk8xoT26nPUu0MJLq5nux+AHT6k61sKZKuUbS701e/s/OojZz0JEsq1pme
|
||||
9J7+wH5COucLlVPat2gOkEz7cD+PSiyU8ybdY2mplNgQTsVHCJCZGxdNuWxu72CV
|
||||
EY4hgLW9oHPY0LJ3xEXqWib7ZnZ2+AYfYW0PVcWDtxBWcgYHpfOxGgMFZA6dWorW
|
||||
hnAbJN7+KIor0Gqw/Hqi3LJ5DotlDwIDAQABo2MwYTAdBgNVHQ4EFgQU7mtJPHo/
|
||||
DeOxCbeKyKsZn3MzUOcwHwYDVR0jBBgwFoAU7mtJPHo/DeOxCbeKyKsZn3MzUOcw
|
||||
DwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQAD
|
||||
ggEBACUY1JGPE+6PHh0RU9otRCkZoB5rMZ5NDp6tPVxBb5UrJKF5mDo4Nvu7Zp5I
|
||||
/5CQ7z3UuJu0h3U/IJvOcs+hVcFNZKIZBqEHMwwLKeXx6quj7LUKdJDHfXLy11yf
|
||||
ke+Ri7fc7Waiz45mO7yfOgLgJ90WmMCV1Aqk5IGadZQ1nJBfiDcGrVmVCrDRZ9MZ
|
||||
yonnMlo2HD6CqFqTvsbQZJG2z9m2GM/bftJlo6bEjhcxwft+dtvTheNYsnd6djts
|
||||
L1Ac59v2Z3kf9YKVmgenFK+P3CghZwnS1k1aHBkcjndcw5QkPTJrS37UeJSDvjdN
|
||||
zl/HHk484IkzlQsPpTLWPFp5LBk=
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
|
||||
# Subject: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
|
||||
# Label: "TrustCor RootCert CA-2"
|
||||
# Serial: 2711694510199101698
|
||||
# MD5 Fingerprint: a2:e1:f8:18:0b:ba:45:d5:c7:41:2a:bb:37:52:45:64
|
||||
# SHA1 Fingerprint: b8:be:6d:cb:56:f1:55:b9:63:d4:12:ca:4e:06:34:c7:94:b2:1c:c0
|
||||
# SHA256 Fingerprint: 07:53:e9:40:37:8c:1b:d5:e3:83:6e:39:5d:ae:a5:cb:83:9e:50:46:f1:bd:0e:ae:19:51:cf:10:fe:c7:c9:65
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIGLzCCBBegAwIBAgIIJaHfyjPLWQIwDQYJKoZIhvcNAQELBQAwgaQxCzAJBgNV
|
||||
BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw
|
||||
IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy
|
||||
dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEfMB0GA1UEAwwWVHJ1c3RDb3Ig
|
||||
Um9vdENlcnQgQ0EtMjAeFw0xNjAyMDQxMjMyMjNaFw0zNDEyMzExNzI2MzlaMIGk
|
||||
MQswCQYDVQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEg
|
||||
Q2l0eTEkMCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYD
|
||||
VQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRy
|
||||
dXN0Q29yIFJvb3RDZXJ0IENBLTIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK
|
||||
AoICAQCnIG7CKqJiJJWQdsg4foDSq8GbZQWU9MEKENUCrO2fk8eHyLAnK0IMPQo+
|
||||
QVqedd2NyuCb7GgypGmSaIwLgQ5WoD4a3SwlFIIvl9NkRvRUqdw6VC0xK5mC8tkq
|
||||
1+9xALgxpL56JAfDQiDyitSSBBtlVkxs1Pu2YVpHI7TYabS3OtB0PAx1oYxOdqHp
|
||||
2yqlO/rOsP9+aij9JxzIsekp8VduZLTQwRVtDr4uDkbIXvRR/u8OYzo7cbrPb1nK
|
||||
DOObXUm4TOJXsZiKQlecdu/vvdFoqNL0Cbt3Nb4lggjEFixEIFapRBF37120Hape
|
||||
az6LMvYHL1cEksr1/p3C6eizjkxLAjHZ5DxIgif3GIJ2SDpxsROhOdUuxTTCHWKF
|
||||
3wP+TfSvPd9cW436cOGlfifHhi5qjxLGhF5DUVCcGZt45vz27Ud+ez1m7xMTiF88
|
||||
oWP7+ayHNZ/zgp6kPwqcMWmLmaSISo5uZk3vFsQPeSghYA2FFn3XVDjxklb9tTNM
|
||||
g9zXEJ9L/cb4Qr26fHMC4P99zVvh1Kxhe1fVSntb1IVYJ12/+CtgrKAmrhQhJ8Z3
|
||||
mjOAPF5GP/fDsaOGM8boXg25NSyqRsGFAnWAoOsk+xWq5Gd/bnc/9ASKL3x74xdh
|
||||
8N0JqSDIvgmk0H5Ew7IwSjiqqewYmgeCK9u4nBit2uBGF6zPXQIDAQABo2MwYTAd
|
||||
BgNVHQ4EFgQU2f4hQG6UnrybPZx9mCAZ5YwwYrIwHwYDVR0jBBgwFoAU2f4hQG6U
|
||||
nrybPZx9mCAZ5YwwYrIwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYw
|
||||
DQYJKoZIhvcNAQELBQADggIBAJ5Fngw7tu/hOsh80QA9z+LqBrWyOrsGS2h60COX
|
||||
dKcs8AjYeVrXWoSK2BKaG9l9XE1wxaX5q+WjiYndAfrs3fnpkpfbsEZC89NiqpX+
|
||||
MWcUaViQCqoL7jcjx1BRtPV+nuN79+TMQjItSQzL/0kMmx40/W5ulop5A7Zv2wnL
|
||||
/V9lFDfhOPXzYRZY5LVtDQsEGz9QLX+zx3oaFoBg+Iof6Rsqxvm6ARppv9JYx1RX
|
||||
CI/hOWB3S6xZhBqI8d3LT3jX5+EzLfzuQfogsL7L9ziUwOHQhQ+77Sxzq+3+knYa
|
||||
ZH9bDTMJBzN7Bj8RpFxwPIXAz+OQqIN3+tvmxYxoZxBnpVIt8MSZj3+/0WvitUfW
|
||||
2dCFmU2Umw9Lje4AWkcdEQOsQRivh7dvDDqPys/cA8GiCcjl/YBeyGBCARsaU1q7
|
||||
N6a3vLqE6R5sGtRk2tRD/pOLS/IseRYQ1JMLiI+h2IYURpFHmygk71dSTlxCnKr3
|
||||
Sewn6EAes6aJInKc9Q0ztFijMDvd1GpUk74aTfOTlPf8hAs/hCBcNANExdqtvArB
|
||||
As8e5ZTZ845b2EzwnexhF7sUMlQMAimTHpKG9n/v55IFDlndmQguLvqcAFLTxWYp
|
||||
5KeXRKQOKIETNcX2b2TmQcTVL8w0RSXPQQCWPUouwpaYT05KnJe32x+SMsj/D1Fu
|
||||
1uwJ
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
|
||||
# Subject: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
|
||||
# Label: "TrustCor ECA-1"
|
||||
# Serial: 9548242946988625984
|
||||
# MD5 Fingerprint: 27:92:23:1d:0a:f5:40:7c:e9:e6:6b:9d:d8:f5:e7:6c
|
||||
# SHA1 Fingerprint: 58:d1:df:95:95:67:6b:63:c0:f0:5b:1c:17:4d:8b:84:0b:c8:78:bd
|
||||
# SHA256 Fingerprint: 5a:88:5d:b1:9c:01:d9:12:c5:75:93:88:93:8c:af:bb:df:03:1a:b2:d4:8e:91:ee:15:58:9b:42:97:1d:03:9c
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIEIDCCAwigAwIBAgIJAISCLF8cYtBAMA0GCSqGSIb3DQEBCwUAMIGcMQswCQYD
|
||||
VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk
|
||||
MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U
|
||||
cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxFzAVBgNVBAMMDlRydXN0Q29y
|
||||
IEVDQS0xMB4XDTE2MDIwNDEyMzIzM1oXDTI5MTIzMTE3MjgwN1owgZwxCzAJBgNV
|
||||
BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw
|
||||
IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy
|
||||
dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEXMBUGA1UEAwwOVHJ1c3RDb3Ig
|
||||
RUNBLTEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPj+ARtZ+odnbb
|
||||
3w9U73NjKYKtR8aja+3+XzP4Q1HpGjORMRegdMTUpwHmspI+ap3tDvl0mEDTPwOA
|
||||
BoJA6LHip1GnHYMma6ve+heRK9jGrB6xnhkB1Zem6g23xFUfJ3zSCNV2HykVh0A5
|
||||
3ThFEXXQmqc04L/NyFIduUd+Dbi7xgz2c1cWWn5DkR9VOsZtRASqnKmcp0yJF4Ou
|
||||
owReUoCLHhIlERnXDH19MURB6tuvsBzvgdAsxZohmz3tQjtQJvLsznFhBmIhVE5/
|
||||
wZ0+fyCMgMsq2JdiyIMzkX2woloPV+g7zPIlstR8L+xNxqE6FXrntl019fZISjZF
|
||||
ZtS6mFjBAgMBAAGjYzBhMB0GA1UdDgQWBBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAf
|
||||
BgNVHSMEGDAWgBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAPBgNVHRMBAf8EBTADAQH/
|
||||
MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEABT41XBVwm8nHc2Fv
|
||||
civUwo/yQ10CzsSUuZQRg2dd4mdsdXa/uwyqNsatR5Nj3B5+1t4u/ukZMjgDfxT2
|
||||
AHMsWbEhBuH7rBiVDKP/mZb3Kyeb1STMHd3BOuCYRLDE5D53sXOpZCz2HAF8P11F
|
||||
hcCF5yWPldwX8zyfGm6wyuMdKulMY/okYWLW2n62HGz1Ah3UKt1VkOsqEUc8Ll50
|
||||
soIipX1TH0XsJ5F95yIW6MBoNtjG8U+ARDL54dHRHareqKucBK+tIA5kmE2la8BI
|
||||
WJZpTdwHjFGTot+fDz2LYLSCjaoITmJF4PkL0uDgPFveXHEnJcLmA4GLEFPjx1Wi
|
||||
tJ/X5g==
|
||||
-----END CERTIFICATE-----
|
||||
|
||||
# Issuer: CN=SSL.com Root Certification Authority RSA O=SSL Corporation
|
||||
# Subject: CN=SSL.com Root Certification Authority RSA O=SSL Corporation
|
||||
# Label: "SSL.com Root Certification Authority RSA"
|
||||
|
||||
@@ -15,19 +15,29 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import List, Union
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import InputState
|
||||
from .resultdict import ResultDict
|
||||
from .universaldetector import UniversalDetector
|
||||
from .version import VERSION, __version__
|
||||
|
||||
__all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"]
|
||||
|
||||
|
||||
def detect(byte_str):
|
||||
def detect(
|
||||
byte_str: Union[bytes, bytearray], should_rename_legacy: bool = False
|
||||
) -> ResultDict:
|
||||
"""
|
||||
Detect the encoding of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
:param should_rename_legacy: Should we rename legacy encodings
|
||||
to their more modern equivalents?
|
||||
:type should_rename_legacy: ``bool``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
@@ -35,12 +45,16 @@ def detect(byte_str):
|
||||
f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
|
||||
)
|
||||
byte_str = bytearray(byte_str)
|
||||
detector = UniversalDetector()
|
||||
detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
|
||||
detector.feed(byte_str)
|
||||
return detector.close()
|
||||
|
||||
|
||||
def detect_all(byte_str, ignore_threshold=False):
|
||||
def detect_all(
|
||||
byte_str: Union[bytes, bytearray],
|
||||
ignore_threshold: bool = False,
|
||||
should_rename_legacy: bool = False,
|
||||
) -> List[ResultDict]:
|
||||
"""
|
||||
Detect all the possible encodings of the given byte string.
|
||||
|
||||
@@ -50,6 +64,9 @@ def detect_all(byte_str, ignore_threshold=False):
|
||||
``UniversalDetector.MINIMUM_THRESHOLD``
|
||||
in results.
|
||||
:type ignore_threshold: ``bool``
|
||||
:param should_rename_legacy: Should we rename legacy encodings
|
||||
to their more modern equivalents?
|
||||
:type should_rename_legacy: ``bool``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
@@ -58,15 +75,15 @@ def detect_all(byte_str, ignore_threshold=False):
|
||||
)
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
detector = UniversalDetector()
|
||||
detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
|
||||
detector.feed(byte_str)
|
||||
detector.close()
|
||||
|
||||
if detector.input_state == InputState.HIGH_BYTE:
|
||||
results = []
|
||||
probers = []
|
||||
results: List[ResultDict] = []
|
||||
probers: List[CharSetProber] = []
|
||||
for prober in detector.charset_probers:
|
||||
if hasattr(prober, "probers"):
|
||||
if isinstance(prober, CharSetGroupProber):
|
||||
probers.extend(p for p in prober.probers)
|
||||
else:
|
||||
probers.append(prober)
|
||||
@@ -80,6 +97,11 @@ def detect_all(byte_str, ignore_threshold=False):
|
||||
charset_name = detector.ISO_WIN_MAP.get(
|
||||
lower_charset_name, charset_name
|
||||
)
|
||||
# Rename legacy encodings with superset encodings if asked
|
||||
if should_rename_legacy:
|
||||
charset_name = detector.LEGACY_MAP.get(
|
||||
charset_name.lower(), charset_name
|
||||
)
|
||||
results.append(
|
||||
{
|
||||
"encoding": charset_name,
|
||||
|
||||
@@ -32,16 +32,16 @@ from .mbcssm import BIG5_SM_MODEL
|
||||
|
||||
|
||||
class Big5Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
|
||||
self.distribution_analyzer = Big5DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "Big5"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Chinese"
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import Tuple, Union
|
||||
|
||||
from .big5freq import (
|
||||
BIG5_CHAR_TO_FREQ_ORDER,
|
||||
BIG5_TABLE_SIZE,
|
||||
@@ -59,22 +61,22 @@ class CharDistributionAnalysis:
|
||||
SURE_NO = 0.01
|
||||
MINIMUM_DATA_THRESHOLD = 3
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
# Mapping table to get frequency order from char order (get from
|
||||
# GetOrder())
|
||||
self._char_to_freq_order = tuple()
|
||||
self._table_size = None # Size of above table
|
||||
self._char_to_freq_order: Tuple[int, ...] = tuple()
|
||||
self._table_size = 0 # Size of above table
|
||||
# This is a constant value which varies from language to language,
|
||||
# used in calculating confidence. See
|
||||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||
# for further detail.
|
||||
self.typical_distribution_ratio = None
|
||||
self._done = None
|
||||
self._total_chars = None
|
||||
self._freq_chars = None
|
||||
self.typical_distribution_ratio = 0.0
|
||||
self._done = False
|
||||
self._total_chars = 0
|
||||
self._freq_chars = 0
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
"""reset analyser, clear any state"""
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
@@ -83,7 +85,7 @@ class CharDistributionAnalysis:
|
||||
# The number of characters whose frequency order is less than 512
|
||||
self._freq_chars = 0
|
||||
|
||||
def feed(self, char, char_len):
|
||||
def feed(self, char: Union[bytes, bytearray], char_len: int) -> None:
|
||||
"""feed a character with known length"""
|
||||
if char_len == 2:
|
||||
# we only care about 2-bytes character in our distribution analysis
|
||||
@@ -97,7 +99,7 @@ class CharDistributionAnalysis:
|
||||
if 512 > self._char_to_freq_order[order]:
|
||||
self._freq_chars += 1
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
"""return confidence based on existing data"""
|
||||
# if we didn't receive any character in our consideration range,
|
||||
# return negative answer
|
||||
@@ -114,12 +116,12 @@ class CharDistributionAnalysis:
|
||||
# normalize confidence (we don't want to be 100% sure)
|
||||
return self.SURE_YES
|
||||
|
||||
def got_enough_data(self):
|
||||
def got_enough_data(self) -> bool:
|
||||
# It is not necessary to receive all data to draw conclusion.
|
||||
# For charset detection, certain amount of data is enough
|
||||
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
|
||||
|
||||
def get_order(self, _):
|
||||
def get_order(self, _: Union[bytes, bytearray]) -> int:
|
||||
# We do not handle characters based on the original encoding string,
|
||||
# but convert this encoding string to a number, here called order.
|
||||
# This allows multiple encodings of a language to share one frequency
|
||||
@@ -128,13 +130,13 @@ class CharDistributionAnalysis:
|
||||
|
||||
|
||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCTW_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
|
||||
# for euc-TW encoding, we are interested
|
||||
# first byte range: 0xc4 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
@@ -146,13 +148,13 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
|
||||
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCKR_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
|
||||
# for euc-KR encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
@@ -164,13 +166,13 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
|
||||
class JOHABDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCKR_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
|
||||
first_char = byte_str[0]
|
||||
if 0x88 <= first_char < 0xD4:
|
||||
code = first_char * 256 + byte_str[1]
|
||||
@@ -179,13 +181,13 @@ class JOHABDistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
|
||||
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = GB2312_TABLE_SIZE
|
||||
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
|
||||
# for GB2312 encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
@@ -197,13 +199,13 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
|
||||
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = BIG5_TABLE_SIZE
|
||||
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
|
||||
# for big5 encoding, we are interested
|
||||
# first byte range: 0xa4 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
@@ -217,13 +219,13 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
|
||||
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
|
||||
# for sjis encoding, we are interested
|
||||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
@@ -242,13 +244,13 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
|
||||
|
||||
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
|
||||
# for euc-JP encoding, we are interested
|
||||
# first byte range: 0xa0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
|
||||
@@ -25,29 +25,30 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
from .enums import LanguageFilter, ProbingState
|
||||
|
||||
|
||||
class CharSetGroupProber(CharSetProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
|
||||
super().__init__(lang_filter=lang_filter)
|
||||
self._active_num = 0
|
||||
self.probers = []
|
||||
self._best_guess_prober = None
|
||||
self.probers: List[CharSetProber] = []
|
||||
self._best_guess_prober: Optional[CharSetProber] = None
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
self._active_num = 0
|
||||
for prober in self.probers:
|
||||
if prober:
|
||||
prober.reset()
|
||||
prober.active = True
|
||||
self._active_num += 1
|
||||
prober.reset()
|
||||
prober.active = True
|
||||
self._active_num += 1
|
||||
self._best_guess_prober = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> Optional[str]:
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
@@ -55,17 +56,15 @@ class CharSetGroupProber(CharSetProber):
|
||||
return self._best_guess_prober.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> Optional[str]:
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
continue
|
||||
state = prober.feed(byte_str)
|
||||
@@ -83,7 +82,7 @@ class CharSetGroupProber(CharSetProber):
|
||||
return self.state
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
state = self.state
|
||||
if state == ProbingState.FOUND_IT:
|
||||
return 0.99
|
||||
@@ -92,8 +91,6 @@ class CharSetGroupProber(CharSetProber):
|
||||
best_conf = 0.0
|
||||
self._best_guess_prober = None
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
self.logger.debug("%s not active", prober.charset_name)
|
||||
continue
|
||||
|
||||
@@ -28,8 +28,9 @@
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional, Union
|
||||
|
||||
from .enums import ProbingState
|
||||
from .enums import LanguageFilter, ProbingState
|
||||
|
||||
INTERNATIONAL_WORDS_PATTERN = re.compile(
|
||||
b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?"
|
||||
@@ -40,35 +41,40 @@ class CharSetProber:
|
||||
|
||||
SHORTCUT_THRESHOLD = 0.95
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
self._state = None
|
||||
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
|
||||
self._state = ProbingState.DETECTING
|
||||
self.active = True
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
self._state = ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> Optional[str]:
|
||||
return None
|
||||
|
||||
def feed(self, byte_str):
|
||||
@property
|
||||
def language(self) -> Optional[str]:
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
def state(self) -> ProbingState:
|
||||
return self._state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def filter_high_byte_only(buf):
|
||||
def filter_high_byte_only(buf: Union[bytes, bytearray]) -> bytes:
|
||||
buf = re.sub(b"([\x00-\x7F])+", b" ", buf)
|
||||
return buf
|
||||
|
||||
@staticmethod
|
||||
def filter_international_words(buf):
|
||||
def filter_international_words(buf: Union[bytes, bytearray]) -> bytearray:
|
||||
"""
|
||||
We define three types of bytes:
|
||||
alphabet: english alphabets [a-zA-Z]
|
||||
@@ -102,7 +108,7 @@ class CharSetProber:
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def remove_xml_tags(buf):
|
||||
def remove_xml_tags(buf: Union[bytes, bytearray]) -> bytes:
|
||||
"""
|
||||
Returns a copy of ``buf`` that retains only the sequences of English
|
||||
alphabet and high byte characters that are not between <> characters.
|
||||
@@ -117,10 +123,13 @@ class CharSetProber:
|
||||
|
||||
for curr, buf_char in enumerate(buf):
|
||||
# Check if we're coming out of or entering an XML tag
|
||||
if buf_char == b">":
|
||||
|
||||
# https://github.com/python/typeshed/issues/8182
|
||||
if buf_char == b">": # type: ignore[comparison-overlap]
|
||||
prev = curr + 1
|
||||
in_tag = False
|
||||
elif buf_char == b"<":
|
||||
# https://github.com/python/typeshed/issues/8182
|
||||
elif buf_char == b"<": # type: ignore[comparison-overlap]
|
||||
if curr > prev and not in_tag:
|
||||
# Keep everything after last non-extended-ASCII,
|
||||
# non-alphabetic character
|
||||
|
||||
@@ -15,12 +15,18 @@ If no paths are provided, it takes its input from stdin.
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from typing import Iterable, List, Optional
|
||||
|
||||
from .. import __version__
|
||||
from ..universaldetector import UniversalDetector
|
||||
|
||||
|
||||
def description_of(lines, name="stdin"):
|
||||
def description_of(
|
||||
lines: Iterable[bytes],
|
||||
name: str = "stdin",
|
||||
minimal: bool = False,
|
||||
should_rename_legacy: bool = False,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Return a string describing the probable encoding of a file or
|
||||
list of strings.
|
||||
@@ -29,8 +35,11 @@ def description_of(lines, name="stdin"):
|
||||
:type lines: Iterable of bytes
|
||||
:param name: Name of file or collection of lines
|
||||
:type name: str
|
||||
:param should_rename_legacy: Should we rename legacy encodings to
|
||||
their more modern equivalents?
|
||||
:type should_rename_legacy: ``bool``
|
||||
"""
|
||||
u = UniversalDetector()
|
||||
u = UniversalDetector(should_rename_legacy=should_rename_legacy)
|
||||
for line in lines:
|
||||
line = bytearray(line)
|
||||
u.feed(line)
|
||||
@@ -39,12 +48,14 @@ def description_of(lines, name="stdin"):
|
||||
break
|
||||
u.close()
|
||||
result = u.result
|
||||
if minimal:
|
||||
return result["encoding"]
|
||||
if result["encoding"]:
|
||||
return f'{name}: {result["encoding"]} with confidence {result["confidence"]}'
|
||||
return f"{name}: no result"
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
def main(argv: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Handles command line arguments and gets things started.
|
||||
|
||||
@@ -54,17 +65,28 @@ def main(argv=None):
|
||||
"""
|
||||
# Get command line arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Takes one or more file paths and reports their detected \
|
||||
encodings"
|
||||
description=(
|
||||
"Takes one or more file paths and reports their detected encodings"
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
help="File whose encoding we would like to determine. \
|
||||
(default: stdin)",
|
||||
help="File whose encoding we would like to determine. (default: stdin)",
|
||||
type=argparse.FileType("rb"),
|
||||
nargs="*",
|
||||
default=[sys.stdin.buffer],
|
||||
)
|
||||
parser.add_argument(
|
||||
"--minimal",
|
||||
help="Print only the encoding to standard output",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-l",
|
||||
"--legacy",
|
||||
help="Rename legacy encodings to more modern ones.",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--version", action="version", version=f"%(prog)s {__version__}"
|
||||
)
|
||||
@@ -79,7 +101,11 @@ def main(argv=None):
|
||||
"--help\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(description_of(f, f.name))
|
||||
print(
|
||||
description_of(
|
||||
f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
|
||||
import logging
|
||||
|
||||
from .codingstatemachinedict import CodingStateMachineDict
|
||||
from .enums import MachineState
|
||||
|
||||
|
||||
@@ -53,18 +54,19 @@ class CodingStateMachine:
|
||||
encoding from consideration from here on.
|
||||
"""
|
||||
|
||||
def __init__(self, sm):
|
||||
def __init__(self, sm: CodingStateMachineDict) -> None:
|
||||
self._model = sm
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = 0
|
||||
self._curr_state = None
|
||||
self._curr_state = MachineState.START
|
||||
self.active = True
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
self._curr_state = MachineState.START
|
||||
|
||||
def next_state(self, c):
|
||||
def next_state(self, c: int) -> int:
|
||||
# for each byte we get its class
|
||||
# if it is first byte, we also get byte length
|
||||
byte_class = self._model["class_table"][c]
|
||||
@@ -77,12 +79,12 @@ class CodingStateMachine:
|
||||
self._curr_byte_pos += 1
|
||||
return self._curr_state
|
||||
|
||||
def get_current_charlen(self):
|
||||
def get_current_charlen(self) -> int:
|
||||
return self._curr_char_len
|
||||
|
||||
def get_coding_state_machine(self):
|
||||
def get_coding_state_machine(self) -> str:
|
||||
return self._model["name"]
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return self._model["language"]
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
from typing import TYPE_CHECKING, Tuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# TypedDict was introduced in Python 3.8.
|
||||
#
|
||||
# TODO: Remove the else block and TYPE_CHECKING check when dropping support
|
||||
# for Python 3.7.
|
||||
from typing import TypedDict
|
||||
|
||||
class CodingStateMachineDict(TypedDict, total=False):
|
||||
class_table: Tuple[int, ...]
|
||||
class_factor: int
|
||||
state_table: Tuple[int, ...]
|
||||
char_len_table: Tuple[int, ...]
|
||||
name: str
|
||||
language: str # Optional key
|
||||
|
||||
else:
|
||||
CodingStateMachineDict = dict
|
||||
@@ -32,7 +32,7 @@ from .mbcssm import CP949_SM_MODEL
|
||||
|
||||
|
||||
class CP949Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
||||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
||||
@@ -41,9 +41,9 @@ class CP949Prober(MultiByteCharSetProber):
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "CP949"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Korean"
|
||||
|
||||
@@ -4,6 +4,8 @@ All of the Enums that are used throughout the chardet package.
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
from enum import Enum, Flag
|
||||
|
||||
|
||||
class InputState:
|
||||
"""
|
||||
@@ -15,12 +17,13 @@ class InputState:
|
||||
HIGH_BYTE = 2
|
||||
|
||||
|
||||
class LanguageFilter:
|
||||
class LanguageFilter(Flag):
|
||||
"""
|
||||
This enum represents the different language filters we can apply to a
|
||||
``UniversalDetector``.
|
||||
"""
|
||||
|
||||
NONE = 0x00
|
||||
CHINESE_SIMPLIFIED = 0x01
|
||||
CHINESE_TRADITIONAL = 0x02
|
||||
JAPANESE = 0x04
|
||||
@@ -31,7 +34,7 @@ class LanguageFilter:
|
||||
CJK = CHINESE | JAPANESE | KOREAN
|
||||
|
||||
|
||||
class ProbingState:
|
||||
class ProbingState(Enum):
|
||||
"""
|
||||
This enum represents the different states a prober can be in.
|
||||
"""
|
||||
@@ -62,7 +65,7 @@ class SequenceLikelihood:
|
||||
POSITIVE = 3
|
||||
|
||||
@classmethod
|
||||
def get_num_categories(cls):
|
||||
def get_num_categories(cls) -> int:
|
||||
""":returns: The number of likelihood categories in the enum."""
|
||||
return 4
|
||||
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import LanguageFilter, MachineState, ProbingState
|
||||
@@ -43,7 +45,7 @@ class EscCharSetProber(CharSetProber):
|
||||
identify these encodings.
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
|
||||
super().__init__(lang_filter=lang_filter)
|
||||
self.coding_sm = []
|
||||
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
|
||||
@@ -53,17 +55,15 @@ class EscCharSetProber(CharSetProber):
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.KOREAN:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
|
||||
self.active_sm_count = None
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
self._state = None
|
||||
self.active_sm_count = 0
|
||||
self._detected_charset: Optional[str] = None
|
||||
self._detected_language: Optional[str] = None
|
||||
self._state = ProbingState.DETECTING
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm:
|
||||
continue
|
||||
coding_sm.active = True
|
||||
coding_sm.reset()
|
||||
self.active_sm_count = len(self.coding_sm)
|
||||
@@ -71,20 +71,20 @@ class EscCharSetProber(CharSetProber):
|
||||
self._detected_language = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> Optional[str]:
|
||||
return self._detected_charset
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> Optional[str]:
|
||||
return self._detected_language
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
return 0.99 if self._detected_charset else 0.00
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
for c in byte_str:
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm or not coding_sm.active:
|
||||
if not coding_sm.active:
|
||||
continue
|
||||
coding_state = coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .codingstatemachinedict import CodingStateMachineDict
|
||||
from .enums import MachineState
|
||||
|
||||
# fmt: off
|
||||
@@ -75,7 +76,7 @@ MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR
|
||||
|
||||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
HZ_SM_MODEL = {
|
||||
HZ_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": HZ_CLS,
|
||||
"class_factor": 6,
|
||||
"state_table": HZ_ST,
|
||||
@@ -134,7 +135,7 @@ ISO2022CN_ST = (
|
||||
|
||||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022CN_SM_MODEL = {
|
||||
ISO2022CN_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": ISO2022CN_CLS,
|
||||
"class_factor": 9,
|
||||
"state_table": ISO2022CN_ST,
|
||||
@@ -194,7 +195,7 @@ ISO2022JP_ST = (
|
||||
|
||||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022JP_SM_MODEL = {
|
||||
ISO2022JP_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": ISO2022JP_CLS,
|
||||
"class_factor": 10,
|
||||
"state_table": ISO2022JP_ST,
|
||||
@@ -250,7 +251,7 @@ ISO2022KR_ST = (
|
||||
|
||||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022KR_SM_MODEL = {
|
||||
ISO2022KR_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": ISO2022KR_CLS,
|
||||
"class_factor": 6,
|
||||
"state_table": ISO2022KR_ST,
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import Union
|
||||
|
||||
from .chardistribution import EUCJPDistributionAnalysis
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import MachineState, ProbingState
|
||||
@@ -34,26 +36,29 @@ from .mbcssm import EUCJP_SM_MODEL
|
||||
|
||||
|
||||
class EUCJPProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
||||
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
||||
self.context_analyzer = EUCJPContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "EUC-JP"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
assert self.coding_sm is not None
|
||||
assert self.distribution_analyzer is not None
|
||||
|
||||
for i, byte in enumerate(byte_str):
|
||||
# PY3K: byte_str is a byte array, so byte is an int, not a byte
|
||||
coding_state = self.coding_sm.next_state(byte)
|
||||
@@ -89,7 +94,9 @@ class EUCJPProber(MultiByteCharSetProber):
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
assert self.distribution_analyzer is not None
|
||||
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
|
||||
@@ -32,16 +32,16 @@ from .mbcssm import EUCKR_SM_MODEL
|
||||
|
||||
|
||||
class EUCKRProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "EUC-KR"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Korean"
|
||||
|
||||
@@ -32,16 +32,16 @@ from .mbcssm import EUCTW_SM_MODEL
|
||||
|
||||
|
||||
class EUCTWProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
||||
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "EUC-TW"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Taiwan"
|
||||
|
||||
@@ -32,16 +32,16 @@ from .mbcssm import GB2312_SM_MODEL
|
||||
|
||||
|
||||
class GB2312Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
||||
self.distribution_analyzer = GB2312DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "GB2312"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Chinese"
|
||||
|
||||
@@ -25,8 +25,11 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
from .sbcharsetprober import SingleByteCharSetProber
|
||||
|
||||
# This prober doesn't actually recognize a language or a charset.
|
||||
# It is a helper prober for the use of the Hebrew model probers
|
||||
@@ -127,6 +130,7 @@ from .enums import ProbingState
|
||||
|
||||
|
||||
class HebrewProber(CharSetProber):
|
||||
SPACE = 0x20
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xEA
|
||||
NORMAL_KAF = 0xEB
|
||||
@@ -152,31 +156,35 @@ class HebrewProber(CharSetProber):
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._final_char_logical_score = None
|
||||
self._final_char_visual_score = None
|
||||
self._prev = None
|
||||
self._before_prev = None
|
||||
self._logical_prober = None
|
||||
self._visual_prober = None
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
self._prev = self.SPACE
|
||||
self._before_prev = self.SPACE
|
||||
self._logical_prober: Optional[SingleByteCharSetProber] = None
|
||||
self._visual_prober: Optional[SingleByteCharSetProber] = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
# The two last characters seen in the previous buffer,
|
||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||
# a word delimiter at the beginning of the data
|
||||
self._prev = " "
|
||||
self._before_prev = " "
|
||||
self._prev = self.SPACE
|
||||
self._before_prev = self.SPACE
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
def set_model_probers(self, logical_prober, visual_prober):
|
||||
def set_model_probers(
|
||||
self,
|
||||
logical_prober: SingleByteCharSetProber,
|
||||
visual_prober: SingleByteCharSetProber,
|
||||
) -> None:
|
||||
self._logical_prober = logical_prober
|
||||
self._visual_prober = visual_prober
|
||||
|
||||
def is_final(self, c):
|
||||
def is_final(self, c: int) -> bool:
|
||||
return c in [
|
||||
self.FINAL_KAF,
|
||||
self.FINAL_MEM,
|
||||
@@ -185,7 +193,7 @@ class HebrewProber(CharSetProber):
|
||||
self.FINAL_TSADI,
|
||||
]
|
||||
|
||||
def is_non_final(self, c):
|
||||
def is_non_final(self, c: int) -> bool:
|
||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
|
||||
# apostrophe is converted to a space in FilterWithoutEnglishLetters
|
||||
@@ -198,7 +206,7 @@ class HebrewProber(CharSetProber):
|
||||
# since these words are quite rare.
|
||||
return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE]
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew
|
||||
# or visual Hebrew.
|
||||
@@ -232,9 +240,9 @@ class HebrewProber(CharSetProber):
|
||||
byte_str = self.filter_high_byte_only(byte_str)
|
||||
|
||||
for cur in byte_str:
|
||||
if cur == " ":
|
||||
if cur == self.SPACE:
|
||||
# We stand on a space - a word just ended
|
||||
if self._before_prev != " ":
|
||||
if self._before_prev != self.SPACE:
|
||||
# next-to-last char was not a space so self._prev is not a
|
||||
# 1 letter word
|
||||
if self.is_final(self._prev):
|
||||
@@ -247,9 +255,9 @@ class HebrewProber(CharSetProber):
|
||||
else:
|
||||
# Not standing on a space
|
||||
if (
|
||||
(self._before_prev == " ")
|
||||
(self._before_prev == self.SPACE)
|
||||
and (self.is_final(self._prev))
|
||||
and (cur != " ")
|
||||
and (cur != self.SPACE)
|
||||
):
|
||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||
self._final_char_visual_score += 1
|
||||
@@ -261,7 +269,10 @@ class HebrewProber(CharSetProber):
|
||||
return ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
assert self._logical_prober is not None
|
||||
assert self._visual_prober is not None
|
||||
|
||||
# Make the decision: is it Logical or Visual?
|
||||
# If the final letter score distance is dominant enough, rely on it.
|
||||
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||
@@ -289,11 +300,14 @@ class HebrewProber(CharSetProber):
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Hebrew"
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
def state(self) -> ProbingState:
|
||||
assert self._logical_prober is not None
|
||||
assert self._visual_prober is not None
|
||||
|
||||
# Remain active as long as any of the model probers are active.
|
||||
if (self._logical_prober.state == ProbingState.NOT_ME) and (
|
||||
self._visual_prober.state == ProbingState.NOT_ME
|
||||
|
||||
@@ -32,16 +32,16 @@ from .mbcssm import JOHAB_SM_MODEL
|
||||
|
||||
|
||||
class JOHABProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL)
|
||||
self.distribution_analyzer = JOHABDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "Johab"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Korean"
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||
# fmt: off
|
||||
@@ -123,15 +124,15 @@ class JapaneseContextAnalysis:
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
def __init__(self):
|
||||
self._total_rel = None
|
||||
self._rel_sample = None
|
||||
self._need_to_skip_char_num = None
|
||||
self._last_char_order = None
|
||||
self._done = None
|
||||
def __init__(self) -> None:
|
||||
self._total_rel = 0
|
||||
self._rel_sample: List[int] = []
|
||||
self._need_to_skip_char_num = 0
|
||||
self._last_char_order = -1
|
||||
self._done = False
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
self._total_rel = 0 # total sequence received
|
||||
# category counters, each integer counts sequence in its category
|
||||
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||
@@ -143,7 +144,7 @@ class JapaneseContextAnalysis:
|
||||
# been made
|
||||
self._done = False
|
||||
|
||||
def feed(self, byte_str, num_bytes):
|
||||
def feed(self, byte_str: Union[bytes, bytearray], num_bytes: int) -> None:
|
||||
if self._done:
|
||||
return
|
||||
|
||||
@@ -172,29 +173,29 @@ class JapaneseContextAnalysis:
|
||||
] += 1
|
||||
self._last_char_order = order
|
||||
|
||||
def got_enough_data(self):
|
||||
def got_enough_data(self) -> bool:
|
||||
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||
return self.DONT_KNOW
|
||||
|
||||
def get_order(self, _):
|
||||
def get_order(self, _: Union[bytes, bytearray]) -> Tuple[int, int]:
|
||||
return -1, 1
|
||||
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._charset_name = "SHIFT_JIS"
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return self._charset_name
|
||||
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
@@ -216,7 +217,7 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
|
||||
|
||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, byte_str):
|
||||
def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import List, Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
@@ -96,26 +98,26 @@ Latin1ClassModel = (
|
||||
|
||||
|
||||
class Latin1Prober(CharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._last_char_class = None
|
||||
self._freq_counter = None
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter: List[int] = []
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||
super().reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "ISO-8859-1"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
byte_str = self.remove_xml_tags(byte_str)
|
||||
for c in byte_str:
|
||||
char_class = Latin1_CharToClass[c]
|
||||
@@ -128,7 +130,7 @@ class Latin1Prober(CharSetProber):
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# This code was modified from latin1prober.py by Rob Speer <rob@lumino.so>.
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Rob Speer - adapt to MacRoman encoding
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import List, Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
FREQ_CAT_NUM = 4
|
||||
|
||||
UDF = 0 # undefined
|
||||
OTH = 1 # other
|
||||
ASC = 2 # ascii capital letter
|
||||
ASS = 3 # ascii small letter
|
||||
ACV = 4 # accent capital vowel
|
||||
ACO = 5 # accent capital other
|
||||
ASV = 6 # accent small vowel
|
||||
ASO = 7 # accent small other
|
||||
ODD = 8 # character that is unlikely to appear
|
||||
CLASS_NUM = 9 # total classes
|
||||
|
||||
# The change from Latin1 is that we explicitly look for extended characters
|
||||
# that are infrequently-occurring symbols, and consider them to always be
|
||||
# improbable. This should let MacRoman get out of the way of more likely
|
||||
# encodings in most situations.
|
||||
|
||||
# fmt: off
|
||||
MacRoman_CharToClass = (
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
|
||||
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
|
||||
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
|
||||
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
|
||||
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
|
||||
ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV, # 80 - 87
|
||||
ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV, # 88 - 8F
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV, # 90 - 97
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # 98 - 9F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, # A0 - A7
|
||||
OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV, # A8 - AF
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV, # B8 - BF
|
||||
OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH, # C0 - C7
|
||||
OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV, # C8 - CF
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD, # D0 - D7
|
||||
ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH, # D8 - DF
|
||||
OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, # E0 - E7
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # E8 - EF
|
||||
ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD, # F0 - F7
|
||||
ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD, # F8 - FF
|
||||
)
|
||||
|
||||
# 0 : illegal
|
||||
# 1 : very unlikely
|
||||
# 2 : normal
|
||||
# 3 : very likely
|
||||
MacRomanClassModel = (
|
||||
# UDF OTH ASC ASS ACV ACO ASV ASO ODD
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
||||
0, 3, 3, 3, 3, 3, 3, 3, 1, # OTH
|
||||
0, 3, 3, 3, 3, 3, 3, 3, 1, # ASC
|
||||
0, 3, 3, 3, 1, 1, 3, 3, 1, # ASS
|
||||
0, 3, 3, 3, 1, 2, 1, 2, 1, # ACV
|
||||
0, 3, 3, 3, 3, 3, 3, 3, 1, # ACO
|
||||
0, 3, 1, 3, 1, 1, 1, 3, 1, # ASV
|
||||
0, 3, 1, 3, 1, 1, 3, 3, 1, # ASO
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, # ODD
|
||||
)
|
||||
# fmt: on
|
||||
|
||||
|
||||
class MacRomanProber(CharSetProber):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter: List[int] = []
|
||||
self.reset()
|
||||
|
||||
def reset(self) -> None:
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||
|
||||
# express the prior that MacRoman is a somewhat rare encoding;
|
||||
# this can be done by starting out in a slightly improbable state
|
||||
# that must be overcome
|
||||
self._freq_counter[2] = 10
|
||||
|
||||
super().reset()
|
||||
|
||||
@property
|
||||
def charset_name(self) -> str:
|
||||
return "MacRoman"
|
||||
|
||||
@property
|
||||
def language(self) -> str:
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
byte_str = self.remove_xml_tags(byte_str)
|
||||
for c in byte_str:
|
||||
char_class = MacRoman_CharToClass[c]
|
||||
freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class]
|
||||
if freq == 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
self._freq_counter[freq] += 1
|
||||
self._last_char_class = char_class
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self) -> float:
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
|
||||
total = sum(self._freq_counter)
|
||||
confidence = (
|
||||
0.0
|
||||
if total < 0.01
|
||||
else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total
|
||||
)
|
||||
confidence = max(confidence, 0.0)
|
||||
# lower the confidence of MacRoman so that other more accurate
|
||||
# detector can take priority.
|
||||
confidence *= 0.73
|
||||
return confidence
|
||||
@@ -27,8 +27,12 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
from .chardistribution import CharDistributionAnalysis
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import MachineState, ProbingState
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import LanguageFilter, MachineState, ProbingState
|
||||
|
||||
|
||||
class MultiByteCharSetProber(CharSetProber):
|
||||
@@ -36,29 +40,24 @@ class MultiByteCharSetProber(CharSetProber):
|
||||
MultiByteCharSetProber
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
|
||||
super().__init__(lang_filter=lang_filter)
|
||||
self.distribution_analyzer = None
|
||||
self.coding_sm = None
|
||||
self._last_char = [0, 0]
|
||||
self.distribution_analyzer: Optional[CharDistributionAnalysis] = None
|
||||
self.coding_sm: Optional[CodingStateMachine] = None
|
||||
self._last_char = bytearray(b"\0\0")
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
if self.coding_sm:
|
||||
self.coding_sm.reset()
|
||||
if self.distribution_analyzer:
|
||||
self.distribution_analyzer.reset()
|
||||
self._last_char = [0, 0]
|
||||
self._last_char = bytearray(b"\0\0")
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
raise NotImplementedError
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
assert self.coding_sm is not None
|
||||
assert self.distribution_analyzer is not None
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i, byte in enumerate(byte_str):
|
||||
coding_state = self.coding_sm.next_state(byte)
|
||||
if coding_state == MachineState.ERROR:
|
||||
@@ -91,5 +90,6 @@ class MultiByteCharSetProber(CharSetProber):
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
assert self.distribution_analyzer is not None
|
||||
return self.distribution_analyzer.get_confidence()
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
from .big5prober import Big5Prober
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .cp949prober import CP949Prober
|
||||
from .enums import LanguageFilter
|
||||
from .eucjpprober import EUCJPProber
|
||||
from .euckrprober import EUCKRProber
|
||||
from .euctwprober import EUCTWProber
|
||||
@@ -40,7 +41,7 @@ from .utf8prober import UTF8Prober
|
||||
|
||||
|
||||
class MBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
|
||||
super().__init__(lang_filter=lang_filter)
|
||||
self.probers = [
|
||||
UTF8Prober(),
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .codingstatemachinedict import CodingStateMachineDict
|
||||
from .enums import MachineState
|
||||
|
||||
# BIG5
|
||||
@@ -74,7 +75,7 @@ BIG5_ST = (
|
||||
|
||||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||
|
||||
BIG5_SM_MODEL = {
|
||||
BIG5_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": BIG5_CLS,
|
||||
"class_factor": 5,
|
||||
"state_table": BIG5_ST,
|
||||
@@ -117,7 +118,7 @@ CP949_ST = (
|
||||
|
||||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
|
||||
CP949_SM_MODEL = {
|
||||
CP949_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": CP949_CLS,
|
||||
"class_factor": 10,
|
||||
"state_table": CP949_ST,
|
||||
@@ -173,7 +174,7 @@ EUCJP_ST = (
|
||||
|
||||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||
|
||||
EUCJP_SM_MODEL = {
|
||||
EUCJP_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": EUCJP_CLS,
|
||||
"class_factor": 6,
|
||||
"state_table": EUCJP_ST,
|
||||
@@ -226,7 +227,7 @@ EUCKR_ST = (
|
||||
|
||||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||
|
||||
EUCKR_SM_MODEL = {
|
||||
EUCKR_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": EUCKR_CLS,
|
||||
"class_factor": 4,
|
||||
"state_table": EUCKR_ST,
|
||||
@@ -283,7 +284,7 @@ JOHAB_ST = (
|
||||
|
||||
JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2)
|
||||
|
||||
JOHAB_SM_MODEL = {
|
||||
JOHAB_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": JOHAB_CLS,
|
||||
"class_factor": 10,
|
||||
"state_table": JOHAB_ST,
|
||||
@@ -340,7 +341,7 @@ EUCTW_ST = (
|
||||
|
||||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||
|
||||
EUCTW_SM_MODEL = {
|
||||
EUCTW_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": EUCTW_CLS,
|
||||
"class_factor": 7,
|
||||
"state_table": EUCTW_ST,
|
||||
@@ -402,7 +403,7 @@ GB2312_ST = (
|
||||
# 2 here.
|
||||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||
|
||||
GB2312_SM_MODEL = {
|
||||
GB2312_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": GB2312_CLS,
|
||||
"class_factor": 7,
|
||||
"state_table": GB2312_ST,
|
||||
@@ -458,7 +459,7 @@ SJIS_ST = (
|
||||
|
||||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||
|
||||
SJIS_SM_MODEL = {
|
||||
SJIS_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": SJIS_CLS,
|
||||
"class_factor": 6,
|
||||
"state_table": SJIS_ST,
|
||||
@@ -516,7 +517,7 @@ UCS2BE_ST = (
|
||||
|
||||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||
|
||||
UCS2BE_SM_MODEL = {
|
||||
UCS2BE_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": UCS2BE_CLS,
|
||||
"class_factor": 6,
|
||||
"state_table": UCS2BE_ST,
|
||||
@@ -574,7 +575,7 @@ UCS2LE_ST = (
|
||||
|
||||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||
|
||||
UCS2LE_SM_MODEL = {
|
||||
UCS2LE_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": UCS2LE_CLS,
|
||||
"class_factor": 6,
|
||||
"state_table": UCS2LE_ST,
|
||||
@@ -651,7 +652,7 @@ UTF8_ST = (
|
||||
|
||||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
|
||||
UTF8_SM_MODEL = {
|
||||
UTF8_SM_MODEL: CodingStateMachineDict = {
|
||||
"class_table": UTF8_CLS,
|
||||
"class_factor": 16,
|
||||
"state_table": UTF8_ST,
|
||||
|
||||
@@ -6,6 +6,7 @@ This code is based on the language metadata from the uchardet project.
|
||||
"""
|
||||
|
||||
from string import ascii_letters
|
||||
from typing import List, Optional
|
||||
|
||||
# TODO: Add Ukrainian (KOI8-U)
|
||||
|
||||
@@ -33,13 +34,13 @@ class Language:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name=None,
|
||||
iso_code=None,
|
||||
use_ascii=True,
|
||||
charsets=None,
|
||||
alphabet=None,
|
||||
wiki_start_pages=None,
|
||||
):
|
||||
name: Optional[str] = None,
|
||||
iso_code: Optional[str] = None,
|
||||
use_ascii: bool = True,
|
||||
charsets: Optional[List[str]] = None,
|
||||
alphabet: Optional[str] = None,
|
||||
wiki_start_pages: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self.iso_code = iso_code
|
||||
@@ -55,7 +56,7 @@ class Language:
|
||||
self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None
|
||||
self.wiki_start_pages = wiki_start_pages
|
||||
|
||||
def __repr__(self):
|
||||
def __repr__(self) -> str:
|
||||
param_str = ", ".join(
|
||||
f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
|
||||
)
|
||||
@@ -103,7 +104,7 @@ LANGUAGES = {
|
||||
name="Danish",
|
||||
iso_code="da",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
|
||||
alphabet="æøåÆØÅ",
|
||||
wiki_start_pages=["Forside"],
|
||||
),
|
||||
@@ -111,8 +112,8 @@ LANGUAGES = {
|
||||
name="German",
|
||||
iso_code="de",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "WINDOWS-1252"],
|
||||
alphabet="äöüßÄÖÜ",
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
|
||||
alphabet="äöüßẞÄÖÜ",
|
||||
wiki_start_pages=["Wikipedia:Hauptseite"],
|
||||
),
|
||||
"Greek": Language(
|
||||
@@ -127,7 +128,7 @@ LANGUAGES = {
|
||||
name="English",
|
||||
iso_code="en",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
|
||||
wiki_start_pages=["Main_Page"],
|
||||
),
|
||||
"Esperanto": Language(
|
||||
@@ -143,7 +144,7 @@ LANGUAGES = {
|
||||
name="Spanish",
|
||||
iso_code="es",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
|
||||
alphabet="ñáéíóúüÑÁÉÍÓÚÜ",
|
||||
wiki_start_pages=["Wikipedia:Portada"],
|
||||
),
|
||||
@@ -161,7 +162,7 @@ LANGUAGES = {
|
||||
name="Finnish",
|
||||
iso_code="fi",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
|
||||
alphabet="ÅÄÖŠŽåäöšž",
|
||||
wiki_start_pages=["Wikipedia:Etusivu"],
|
||||
),
|
||||
@@ -169,7 +170,7 @@ LANGUAGES = {
|
||||
name="French",
|
||||
iso_code="fr",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
|
||||
alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ",
|
||||
wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"],
|
||||
),
|
||||
@@ -203,7 +204,7 @@ LANGUAGES = {
|
||||
name="Italian",
|
||||
iso_code="it",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
|
||||
alphabet="ÀÈÉÌÒÓÙàèéìòóù",
|
||||
wiki_start_pages=["Pagina_principale"],
|
||||
),
|
||||
@@ -237,7 +238,7 @@ LANGUAGES = {
|
||||
name="Dutch",
|
||||
iso_code="nl",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
|
||||
wiki_start_pages=["Hoofdpagina"],
|
||||
),
|
||||
"Polish": Language(
|
||||
@@ -253,7 +254,7 @@ LANGUAGES = {
|
||||
name="Portuguese",
|
||||
iso_code="pt",
|
||||
use_ascii=True,
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
|
||||
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
|
||||
alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú",
|
||||
wiki_start_pages=["Wikipédia:Página_principal"],
|
||||
),
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# TypedDict was introduced in Python 3.8.
|
||||
#
|
||||
# TODO: Remove the else block and TYPE_CHECKING check when dropping support
|
||||
# for Python 3.7.
|
||||
from typing import TypedDict
|
||||
|
||||
class ResultDict(TypedDict):
|
||||
encoding: Optional[str]
|
||||
confidence: float
|
||||
language: Optional[str]
|
||||
|
||||
else:
|
||||
ResultDict = dict
|
||||
@@ -26,23 +26,20 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from collections import namedtuple
|
||||
from typing import Dict, List, NamedTuple, Optional, Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
|
||||
|
||||
SingleByteCharSetModel = namedtuple(
|
||||
"SingleByteCharSetModel",
|
||||
[
|
||||
"charset_name",
|
||||
"language",
|
||||
"char_to_order_map",
|
||||
"language_model",
|
||||
"typical_positive_ratio",
|
||||
"keep_ascii_letters",
|
||||
"alphabet",
|
||||
],
|
||||
)
|
||||
|
||||
class SingleByteCharSetModel(NamedTuple):
|
||||
charset_name: str
|
||||
language: str
|
||||
char_to_order_map: Dict[int, int]
|
||||
language_model: Dict[int, Dict[int, int]]
|
||||
typical_positive_ratio: float
|
||||
keep_ascii_letters: bool
|
||||
alphabet: str
|
||||
|
||||
|
||||
class SingleByteCharSetProber(CharSetProber):
|
||||
@@ -51,22 +48,27 @@ class SingleByteCharSetProber(CharSetProber):
|
||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||
|
||||
def __init__(self, model, is_reversed=False, name_prober=None):
|
||||
def __init__(
|
||||
self,
|
||||
model: SingleByteCharSetModel,
|
||||
is_reversed: bool = False,
|
||||
name_prober: Optional[CharSetProber] = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self._model = model
|
||||
# TRUE if we need to reverse every pair in the model lookup
|
||||
self._reversed = is_reversed
|
||||
# Optional auxiliary prober for name decision
|
||||
self._name_prober = name_prober
|
||||
self._last_order = None
|
||||
self._seq_counters = None
|
||||
self._total_seqs = None
|
||||
self._total_char = None
|
||||
self._control_char = None
|
||||
self._freq_char = None
|
||||
self._last_order = 255
|
||||
self._seq_counters: List[int] = []
|
||||
self._total_seqs = 0
|
||||
self._total_char = 0
|
||||
self._control_char = 0
|
||||
self._freq_char = 0
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
# char order of last character
|
||||
self._last_order = 255
|
||||
@@ -78,18 +80,18 @@ class SingleByteCharSetProber(CharSetProber):
|
||||
self._freq_char = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> Optional[str]:
|
||||
if self._name_prober:
|
||||
return self._name_prober.charset_name
|
||||
return self._model.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> Optional[str]:
|
||||
if self._name_prober:
|
||||
return self._name_prober.language
|
||||
return self._model.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
# TODO: Make filter_international_words keep things in self.alphabet
|
||||
if not self._model.keep_ascii_letters:
|
||||
byte_str = self.filter_international_words(byte_str)
|
||||
@@ -139,7 +141,7 @@ class SingleByteCharSetProber(CharSetProber):
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
r = 0.01
|
||||
if self._total_seqs > 0:
|
||||
r = (
|
||||
|
||||
@@ -48,7 +48,7 @@ from .sbcharsetprober import SingleByteCharSetProber
|
||||
|
||||
|
||||
class SBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
hebrew_prober = HebrewProber()
|
||||
logical_hebrew_prober = SingleByteCharSetProber(
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import Union
|
||||
|
||||
from .chardistribution import SJISDistributionAnalysis
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import MachineState, ProbingState
|
||||
@@ -34,26 +36,29 @@ from .mbcssm import SJIS_SM_MODEL
|
||||
|
||||
|
||||
class SJISProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
||||
self.distribution_analyzer = SJISDistributionAnalysis()
|
||||
self.context_analyzer = SJISContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return self.context_analyzer.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
assert self.coding_sm is not None
|
||||
assert self.distribution_analyzer is not None
|
||||
|
||||
for i, byte in enumerate(byte_str):
|
||||
coding_state = self.coding_sm.next_state(byte)
|
||||
if coding_state == MachineState.ERROR:
|
||||
@@ -92,7 +97,9 @@ class SJISProber(MultiByteCharSetProber):
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
assert self.distribution_analyzer is not None
|
||||
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
|
||||
@@ -39,12 +39,16 @@ class a user of ``chardet`` should use.
|
||||
import codecs
|
||||
import logging
|
||||
import re
|
||||
from typing import List, Optional, Union
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import InputState, LanguageFilter, ProbingState
|
||||
from .escprober import EscCharSetProber
|
||||
from .latin1prober import Latin1Prober
|
||||
from .macromanprober import MacRomanProber
|
||||
from .mbcsgroupprober import MBCSGroupProber
|
||||
from .resultdict import ResultDict
|
||||
from .sbcsgroupprober import SBCSGroupProber
|
||||
from .utf1632prober import UTF1632Prober
|
||||
|
||||
@@ -80,34 +84,55 @@ class UniversalDetector:
|
||||
"iso-8859-9": "Windows-1254",
|
||||
"iso-8859-13": "Windows-1257",
|
||||
}
|
||||
# Based on https://encoding.spec.whatwg.org/#names-and-labels
|
||||
# but altered to match Python names for encodings and remove mappings
|
||||
# that break tests.
|
||||
LEGACY_MAP = {
|
||||
"ascii": "Windows-1252",
|
||||
"iso-8859-1": "Windows-1252",
|
||||
"tis-620": "ISO-8859-11",
|
||||
"iso-8859-9": "Windows-1254",
|
||||
"gb2312": "GB18030",
|
||||
"euc-kr": "CP949",
|
||||
"utf-16le": "UTF-16",
|
||||
}
|
||||
|
||||
def __init__(self, lang_filter=LanguageFilter.ALL):
|
||||
self._esc_charset_prober = None
|
||||
self._utf1632_prober = None
|
||||
self._charset_probers = []
|
||||
self.result = None
|
||||
self.done = None
|
||||
self._got_data = None
|
||||
self._input_state = None
|
||||
self._last_char = None
|
||||
def __init__(
|
||||
self,
|
||||
lang_filter: LanguageFilter = LanguageFilter.ALL,
|
||||
should_rename_legacy: bool = False,
|
||||
) -> None:
|
||||
self._esc_charset_prober: Optional[EscCharSetProber] = None
|
||||
self._utf1632_prober: Optional[UTF1632Prober] = None
|
||||
self._charset_probers: List[CharSetProber] = []
|
||||
self.result: ResultDict = {
|
||||
"encoding": None,
|
||||
"confidence": 0.0,
|
||||
"language": None,
|
||||
}
|
||||
self.done = False
|
||||
self._got_data = False
|
||||
self._input_state = InputState.PURE_ASCII
|
||||
self._last_char = b""
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._has_win_bytes = None
|
||||
self._has_win_bytes = False
|
||||
self.should_rename_legacy = should_rename_legacy
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def input_state(self):
|
||||
def input_state(self) -> int:
|
||||
return self._input_state
|
||||
|
||||
@property
|
||||
def has_win_bytes(self):
|
||||
def has_win_bytes(self) -> bool:
|
||||
return self._has_win_bytes
|
||||
|
||||
@property
|
||||
def charset_probers(self):
|
||||
def charset_probers(self) -> List[CharSetProber]:
|
||||
return self._charset_probers
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
"""
|
||||
Reset the UniversalDetector and all of its probers back to their
|
||||
initial states. This is called by ``__init__``, so you only need to
|
||||
@@ -126,7 +151,7 @@ class UniversalDetector:
|
||||
for prober in self._charset_probers:
|
||||
prober.reset()
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> None:
|
||||
"""
|
||||
Takes a chunk of a document and feeds it through all of the relevant
|
||||
charset probers.
|
||||
@@ -166,6 +191,7 @@ class UniversalDetector:
|
||||
elif byte_str.startswith(b"\xFE\xFF\x00\x00"):
|
||||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||
self.result = {
|
||||
# TODO: This encoding is not supported by Python. Should remove?
|
||||
"encoding": "X-ISO-10646-UCS-4-3412",
|
||||
"confidence": 1.0,
|
||||
"language": "",
|
||||
@@ -173,6 +199,7 @@ class UniversalDetector:
|
||||
elif byte_str.startswith(b"\x00\x00\xFF\xFE"):
|
||||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||
self.result = {
|
||||
# TODO: This encoding is not supported by Python. Should remove?
|
||||
"encoding": "X-ISO-10646-UCS-4-2143",
|
||||
"confidence": 1.0,
|
||||
"language": "",
|
||||
@@ -242,6 +269,7 @@ class UniversalDetector:
|
||||
if self.lang_filter & LanguageFilter.NON_CJK:
|
||||
self._charset_probers.append(SBCSGroupProber())
|
||||
self._charset_probers.append(Latin1Prober())
|
||||
self._charset_probers.append(MacRomanProber())
|
||||
for prober in self._charset_probers:
|
||||
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {
|
||||
@@ -254,7 +282,7 @@ class UniversalDetector:
|
||||
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
||||
self._has_win_bytes = True
|
||||
|
||||
def close(self):
|
||||
def close(self) -> ResultDict:
|
||||
"""
|
||||
Stop analyzing the current document and come up with a final
|
||||
prediction.
|
||||
@@ -288,7 +316,8 @@ class UniversalDetector:
|
||||
max_prober = prober
|
||||
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
||||
charset_name = max_prober.charset_name
|
||||
lower_charset_name = max_prober.charset_name.lower()
|
||||
assert charset_name is not None
|
||||
lower_charset_name = charset_name.lower()
|
||||
confidence = max_prober.get_confidence()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
@@ -297,6 +326,11 @@ class UniversalDetector:
|
||||
charset_name = self.ISO_WIN_MAP.get(
|
||||
lower_charset_name, charset_name
|
||||
)
|
||||
# Rename legacy encodings with superset encodings if asked
|
||||
if self.should_rename_legacy:
|
||||
charset_name = self.LEGACY_MAP.get(
|
||||
(charset_name or "").lower(), charset_name
|
||||
)
|
||||
self.result = {
|
||||
"encoding": charset_name,
|
||||
"confidence": confidence,
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
from typing import List, Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
@@ -36,7 +38,7 @@ class UTF1632Prober(CharSetProber):
|
||||
# a fixed constant ratio of expected zeros or non-zeros in modulo-position.
|
||||
EXPECTED_RATIO = 0.94
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.position = 0
|
||||
self.zeros_at_mod = [0] * 4
|
||||
@@ -51,7 +53,7 @@ class UTF1632Prober(CharSetProber):
|
||||
self.first_half_surrogate_pair_detected_16le = False
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
self.position = 0
|
||||
self.zeros_at_mod = [0] * 4
|
||||
@@ -66,7 +68,7 @@ class UTF1632Prober(CharSetProber):
|
||||
self.quad = [0, 0, 0, 0]
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
if self.is_likely_utf32be():
|
||||
return "utf-32be"
|
||||
if self.is_likely_utf32le():
|
||||
@@ -79,16 +81,16 @@ class UTF1632Prober(CharSetProber):
|
||||
return "utf-16"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return ""
|
||||
|
||||
def approx_32bit_chars(self):
|
||||
def approx_32bit_chars(self) -> float:
|
||||
return max(1.0, self.position / 4.0)
|
||||
|
||||
def approx_16bit_chars(self):
|
||||
def approx_16bit_chars(self) -> float:
|
||||
return max(1.0, self.position / 2.0)
|
||||
|
||||
def is_likely_utf32be(self):
|
||||
def is_likely_utf32be(self) -> bool:
|
||||
approx_chars = self.approx_32bit_chars()
|
||||
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
|
||||
self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
|
||||
@@ -98,7 +100,7 @@ class UTF1632Prober(CharSetProber):
|
||||
and not self.invalid_utf32be
|
||||
)
|
||||
|
||||
def is_likely_utf32le(self):
|
||||
def is_likely_utf32le(self) -> bool:
|
||||
approx_chars = self.approx_32bit_chars()
|
||||
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
|
||||
self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
|
||||
@@ -108,7 +110,7 @@ class UTF1632Prober(CharSetProber):
|
||||
and not self.invalid_utf32le
|
||||
)
|
||||
|
||||
def is_likely_utf16be(self):
|
||||
def is_likely_utf16be(self) -> bool:
|
||||
approx_chars = self.approx_16bit_chars()
|
||||
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
|
||||
(self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars
|
||||
@@ -118,7 +120,7 @@ class UTF1632Prober(CharSetProber):
|
||||
and not self.invalid_utf16be
|
||||
)
|
||||
|
||||
def is_likely_utf16le(self):
|
||||
def is_likely_utf16le(self) -> bool:
|
||||
approx_chars = self.approx_16bit_chars()
|
||||
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
|
||||
(self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars
|
||||
@@ -128,7 +130,7 @@ class UTF1632Prober(CharSetProber):
|
||||
and not self.invalid_utf16le
|
||||
)
|
||||
|
||||
def validate_utf32_characters(self, quad):
|
||||
def validate_utf32_characters(self, quad: List[int]) -> None:
|
||||
"""
|
||||
Validate if the quad of bytes is valid UTF-32.
|
||||
|
||||
@@ -150,7 +152,7 @@ class UTF1632Prober(CharSetProber):
|
||||
):
|
||||
self.invalid_utf32le = True
|
||||
|
||||
def validate_utf16_characters(self, pair):
|
||||
def validate_utf16_characters(self, pair: List[int]) -> None:
|
||||
"""
|
||||
Validate if the pair of bytes is valid UTF-16.
|
||||
|
||||
@@ -182,7 +184,7 @@ class UTF1632Prober(CharSetProber):
|
||||
else:
|
||||
self.invalid_utf16le = True
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
for c in byte_str:
|
||||
mod4 = self.position % 4
|
||||
self.quad[mod4] = c
|
||||
@@ -198,7 +200,7 @@ class UTF1632Prober(CharSetProber):
|
||||
return self.state
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
def state(self) -> ProbingState:
|
||||
if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}:
|
||||
# terminal, decided states
|
||||
return self._state
|
||||
@@ -210,7 +212,7 @@ class UTF1632Prober(CharSetProber):
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self._state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
return (
|
||||
0.85
|
||||
if (
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from typing import Union
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import MachineState, ProbingState
|
||||
@@ -34,26 +36,26 @@ from .mbcssm import UTF8_SM_MODEL
|
||||
class UTF8Prober(CharSetProber):
|
||||
ONE_CHAR_PROB = 0.5
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
|
||||
self._num_mb_chars = None
|
||||
self._num_mb_chars = 0
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
def reset(self) -> None:
|
||||
super().reset()
|
||||
self.coding_sm.reset()
|
||||
self._num_mb_chars = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
def charset_name(self) -> str:
|
||||
return "utf-8"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
def language(self) -> str:
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
|
||||
for c in byte_str:
|
||||
coding_state = self.coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
@@ -72,7 +74,7 @@ class UTF8Prober(CharSetProber):
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
def get_confidence(self) -> float:
|
||||
unlike = 0.99
|
||||
if self._num_mb_chars < 6:
|
||||
unlike *= self.ONE_CHAR_PROB**self._num_mb_chars
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
"""
|
||||
This module exists only to simplify retrieving the version number of chardet
|
||||
from within setup.py and from chardet subpackages.
|
||||
from within setuptools and from chardet subpackages.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
__version__ = "5.0.0"
|
||||
__version__ = "5.1.0"
|
||||
VERSION = __version__.split(".")
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
from .initialise import init, deinit, reinit, colorama_text
|
||||
from .initialise import init, deinit, reinit, colorama_text, just_fix_windows_console
|
||||
from .ansi import Fore, Back, Style, Cursor
|
||||
from .ansitowin32 import AnsiToWin32
|
||||
|
||||
__version__ = '0.4.5'
|
||||
__version__ = '0.4.6'
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import sys
|
||||
import os
|
||||
|
||||
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style, BEL
|
||||
from .winterm import WinTerm, WinColor, WinStyle
|
||||
from .winterm import enable_vt_processing, WinTerm, WinColor, WinStyle
|
||||
from .win32 import windll, winapi_test
|
||||
|
||||
|
||||
@@ -94,15 +94,22 @@ class AnsiToWin32(object):
|
||||
# (e.g. Cygwin Terminal). In this case it's up to the terminal
|
||||
# to support the ANSI codes.
|
||||
conversion_supported = on_windows and winapi_test()
|
||||
try:
|
||||
fd = wrapped.fileno()
|
||||
except Exception:
|
||||
fd = -1
|
||||
system_has_native_ansi = not on_windows or enable_vt_processing(fd)
|
||||
have_tty = not self.stream.closed and self.stream.isatty()
|
||||
need_conversion = conversion_supported and not system_has_native_ansi
|
||||
|
||||
# should we strip ANSI sequences from our output?
|
||||
if strip is None:
|
||||
strip = conversion_supported or (not self.stream.closed and not self.stream.isatty())
|
||||
strip = need_conversion or not have_tty
|
||||
self.strip = strip
|
||||
|
||||
# should we should convert ANSI sequences into win32 calls?
|
||||
if convert is None:
|
||||
convert = conversion_supported and not self.stream.closed and self.stream.isatty()
|
||||
convert = need_conversion and have_tty
|
||||
self.convert = convert
|
||||
|
||||
# dict of ansi codes to win32 functions and parameters
|
||||
@@ -264,3 +271,7 @@ class AnsiToWin32(object):
|
||||
if params[0] in '02':
|
||||
winterm.set_title(params[1])
|
||||
return text
|
||||
|
||||
|
||||
def flush(self):
|
||||
self.wrapped.flush()
|
||||
|
||||
@@ -6,13 +6,27 @@ import sys
|
||||
from .ansitowin32 import AnsiToWin32
|
||||
|
||||
|
||||
orig_stdout = None
|
||||
orig_stderr = None
|
||||
def _wipe_internal_state_for_tests():
|
||||
global orig_stdout, orig_stderr
|
||||
orig_stdout = None
|
||||
orig_stderr = None
|
||||
|
||||
wrapped_stdout = None
|
||||
wrapped_stderr = None
|
||||
global wrapped_stdout, wrapped_stderr
|
||||
wrapped_stdout = None
|
||||
wrapped_stderr = None
|
||||
|
||||
atexit_done = False
|
||||
global atexit_done
|
||||
atexit_done = False
|
||||
|
||||
global fixed_windows_console
|
||||
fixed_windows_console = False
|
||||
|
||||
try:
|
||||
# no-op if it wasn't registered
|
||||
atexit.unregister(reset_all)
|
||||
except AttributeError:
|
||||
# python 2: no atexit.unregister. Oh well, we did our best.
|
||||
pass
|
||||
|
||||
|
||||
def reset_all():
|
||||
@@ -55,6 +69,29 @@ def deinit():
|
||||
sys.stderr = orig_stderr
|
||||
|
||||
|
||||
def just_fix_windows_console():
|
||||
global fixed_windows_console
|
||||
|
||||
if sys.platform != "win32":
|
||||
return
|
||||
if fixed_windows_console:
|
||||
return
|
||||
if wrapped_stdout is not None or wrapped_stderr is not None:
|
||||
# Someone already ran init() and it did stuff, so we won't second-guess them
|
||||
return
|
||||
|
||||
# On newer versions of Windows, AnsiToWin32.__init__ will implicitly enable the
|
||||
# native ANSI support in the console as a side-effect. We only need to actually
|
||||
# replace sys.stdout/stderr if we're in the old-style conversion mode.
|
||||
new_stdout = AnsiToWin32(sys.stdout, convert=None, strip=None, autoreset=False)
|
||||
if new_stdout.convert:
|
||||
sys.stdout = new_stdout
|
||||
new_stderr = AnsiToWin32(sys.stderr, convert=None, strip=None, autoreset=False)
|
||||
if new_stderr.convert:
|
||||
sys.stderr = new_stderr
|
||||
|
||||
fixed_windows_console = True
|
||||
|
||||
@contextlib.contextmanager
|
||||
def colorama_text(*args, **kwargs):
|
||||
init(*args, **kwargs)
|
||||
@@ -78,3 +115,7 @@ def wrap_stream(stream, convert, strip, autoreset, wrap):
|
||||
if wrapper.should_wrap():
|
||||
stream = wrapper.stream
|
||||
return stream
|
||||
|
||||
|
||||
# Use this for initial setup as well, to reduce code duplication
|
||||
_wipe_internal_state_for_tests()
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
@@ -0,0 +1,76 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
import sys
|
||||
from unittest import TestCase, main
|
||||
|
||||
from ..ansi import Back, Fore, Style
|
||||
from ..ansitowin32 import AnsiToWin32
|
||||
|
||||
stdout_orig = sys.stdout
|
||||
stderr_orig = sys.stderr
|
||||
|
||||
|
||||
class AnsiTest(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# sanity check: stdout should be a file or StringIO object.
|
||||
# It will only be AnsiToWin32 if init() has previously wrapped it
|
||||
self.assertNotEqual(type(sys.stdout), AnsiToWin32)
|
||||
self.assertNotEqual(type(sys.stderr), AnsiToWin32)
|
||||
|
||||
def tearDown(self):
|
||||
sys.stdout = stdout_orig
|
||||
sys.stderr = stderr_orig
|
||||
|
||||
|
||||
def testForeAttributes(self):
|
||||
self.assertEqual(Fore.BLACK, '\033[30m')
|
||||
self.assertEqual(Fore.RED, '\033[31m')
|
||||
self.assertEqual(Fore.GREEN, '\033[32m')
|
||||
self.assertEqual(Fore.YELLOW, '\033[33m')
|
||||
self.assertEqual(Fore.BLUE, '\033[34m')
|
||||
self.assertEqual(Fore.MAGENTA, '\033[35m')
|
||||
self.assertEqual(Fore.CYAN, '\033[36m')
|
||||
self.assertEqual(Fore.WHITE, '\033[37m')
|
||||
self.assertEqual(Fore.RESET, '\033[39m')
|
||||
|
||||
# Check the light, extended versions.
|
||||
self.assertEqual(Fore.LIGHTBLACK_EX, '\033[90m')
|
||||
self.assertEqual(Fore.LIGHTRED_EX, '\033[91m')
|
||||
self.assertEqual(Fore.LIGHTGREEN_EX, '\033[92m')
|
||||
self.assertEqual(Fore.LIGHTYELLOW_EX, '\033[93m')
|
||||
self.assertEqual(Fore.LIGHTBLUE_EX, '\033[94m')
|
||||
self.assertEqual(Fore.LIGHTMAGENTA_EX, '\033[95m')
|
||||
self.assertEqual(Fore.LIGHTCYAN_EX, '\033[96m')
|
||||
self.assertEqual(Fore.LIGHTWHITE_EX, '\033[97m')
|
||||
|
||||
|
||||
def testBackAttributes(self):
|
||||
self.assertEqual(Back.BLACK, '\033[40m')
|
||||
self.assertEqual(Back.RED, '\033[41m')
|
||||
self.assertEqual(Back.GREEN, '\033[42m')
|
||||
self.assertEqual(Back.YELLOW, '\033[43m')
|
||||
self.assertEqual(Back.BLUE, '\033[44m')
|
||||
self.assertEqual(Back.MAGENTA, '\033[45m')
|
||||
self.assertEqual(Back.CYAN, '\033[46m')
|
||||
self.assertEqual(Back.WHITE, '\033[47m')
|
||||
self.assertEqual(Back.RESET, '\033[49m')
|
||||
|
||||
# Check the light, extended versions.
|
||||
self.assertEqual(Back.LIGHTBLACK_EX, '\033[100m')
|
||||
self.assertEqual(Back.LIGHTRED_EX, '\033[101m')
|
||||
self.assertEqual(Back.LIGHTGREEN_EX, '\033[102m')
|
||||
self.assertEqual(Back.LIGHTYELLOW_EX, '\033[103m')
|
||||
self.assertEqual(Back.LIGHTBLUE_EX, '\033[104m')
|
||||
self.assertEqual(Back.LIGHTMAGENTA_EX, '\033[105m')
|
||||
self.assertEqual(Back.LIGHTCYAN_EX, '\033[106m')
|
||||
self.assertEqual(Back.LIGHTWHITE_EX, '\033[107m')
|
||||
|
||||
|
||||
def testStyleAttributes(self):
|
||||
self.assertEqual(Style.DIM, '\033[2m')
|
||||
self.assertEqual(Style.NORMAL, '\033[22m')
|
||||
self.assertEqual(Style.BRIGHT, '\033[1m')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,294 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
from io import StringIO, TextIOWrapper
|
||||
from unittest import TestCase, main
|
||||
try:
|
||||
from contextlib import ExitStack
|
||||
except ImportError:
|
||||
# python 2
|
||||
from contextlib2 import ExitStack
|
||||
|
||||
try:
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
except ImportError:
|
||||
from mock import MagicMock, Mock, patch
|
||||
|
||||
from ..ansitowin32 import AnsiToWin32, StreamWrapper
|
||||
from ..win32 import ENABLE_VIRTUAL_TERMINAL_PROCESSING
|
||||
from .utils import osname
|
||||
|
||||
|
||||
class StreamWrapperTest(TestCase):
|
||||
|
||||
def testIsAProxy(self):
|
||||
mockStream = Mock()
|
||||
wrapper = StreamWrapper(mockStream, None)
|
||||
self.assertTrue( wrapper.random_attr is mockStream.random_attr )
|
||||
|
||||
def testDelegatesWrite(self):
|
||||
mockStream = Mock()
|
||||
mockConverter = Mock()
|
||||
wrapper = StreamWrapper(mockStream, mockConverter)
|
||||
wrapper.write('hello')
|
||||
self.assertTrue(mockConverter.write.call_args, (('hello',), {}))
|
||||
|
||||
def testDelegatesContext(self):
|
||||
mockConverter = Mock()
|
||||
s = StringIO()
|
||||
with StreamWrapper(s, mockConverter) as fp:
|
||||
fp.write(u'hello')
|
||||
self.assertTrue(s.closed)
|
||||
|
||||
def testProxyNoContextManager(self):
|
||||
mockStream = MagicMock()
|
||||
mockStream.__enter__.side_effect = AttributeError()
|
||||
mockConverter = Mock()
|
||||
with self.assertRaises(AttributeError) as excinfo:
|
||||
with StreamWrapper(mockStream, mockConverter) as wrapper:
|
||||
wrapper.write('hello')
|
||||
|
||||
def test_closed_shouldnt_raise_on_closed_stream(self):
|
||||
stream = StringIO()
|
||||
stream.close()
|
||||
wrapper = StreamWrapper(stream, None)
|
||||
self.assertEqual(wrapper.closed, True)
|
||||
|
||||
def test_closed_shouldnt_raise_on_detached_stream(self):
|
||||
stream = TextIOWrapper(StringIO())
|
||||
stream.detach()
|
||||
wrapper = StreamWrapper(stream, None)
|
||||
self.assertEqual(wrapper.closed, True)
|
||||
|
||||
class AnsiToWin32Test(TestCase):
|
||||
|
||||
def testInit(self):
|
||||
mockStdout = Mock()
|
||||
auto = Mock()
|
||||
stream = AnsiToWin32(mockStdout, autoreset=auto)
|
||||
self.assertEqual(stream.wrapped, mockStdout)
|
||||
self.assertEqual(stream.autoreset, auto)
|
||||
|
||||
@patch('colorama.ansitowin32.winterm', None)
|
||||
@patch('colorama.ansitowin32.winapi_test', lambda *_: True)
|
||||
def testStripIsTrueOnWindows(self):
|
||||
with osname('nt'):
|
||||
mockStdout = Mock()
|
||||
stream = AnsiToWin32(mockStdout)
|
||||
self.assertTrue(stream.strip)
|
||||
|
||||
def testStripIsFalseOffWindows(self):
|
||||
with osname('posix'):
|
||||
mockStdout = Mock(closed=False)
|
||||
stream = AnsiToWin32(mockStdout)
|
||||
self.assertFalse(stream.strip)
|
||||
|
||||
def testWriteStripsAnsi(self):
|
||||
mockStdout = Mock()
|
||||
stream = AnsiToWin32(mockStdout)
|
||||
stream.wrapped = Mock()
|
||||
stream.write_and_convert = Mock()
|
||||
stream.strip = True
|
||||
|
||||
stream.write('abc')
|
||||
|
||||
self.assertFalse(stream.wrapped.write.called)
|
||||
self.assertEqual(stream.write_and_convert.call_args, (('abc',), {}))
|
||||
|
||||
def testWriteDoesNotStripAnsi(self):
|
||||
mockStdout = Mock()
|
||||
stream = AnsiToWin32(mockStdout)
|
||||
stream.wrapped = Mock()
|
||||
stream.write_and_convert = Mock()
|
||||
stream.strip = False
|
||||
stream.convert = False
|
||||
|
||||
stream.write('abc')
|
||||
|
||||
self.assertFalse(stream.write_and_convert.called)
|
||||
self.assertEqual(stream.wrapped.write.call_args, (('abc',), {}))
|
||||
|
||||
def assert_autoresets(self, convert, autoreset=True):
|
||||
stream = AnsiToWin32(Mock())
|
||||
stream.convert = convert
|
||||
stream.reset_all = Mock()
|
||||
stream.autoreset = autoreset
|
||||
stream.winterm = Mock()
|
||||
|
||||
stream.write('abc')
|
||||
|
||||
self.assertEqual(stream.reset_all.called, autoreset)
|
||||
|
||||
def testWriteAutoresets(self):
|
||||
self.assert_autoresets(convert=True)
|
||||
self.assert_autoresets(convert=False)
|
||||
self.assert_autoresets(convert=True, autoreset=False)
|
||||
self.assert_autoresets(convert=False, autoreset=False)
|
||||
|
||||
def testWriteAndConvertWritesPlainText(self):
|
||||
stream = AnsiToWin32(Mock())
|
||||
stream.write_and_convert( 'abc' )
|
||||
self.assertEqual( stream.wrapped.write.call_args, (('abc',), {}) )
|
||||
|
||||
def testWriteAndConvertStripsAllValidAnsi(self):
|
||||
stream = AnsiToWin32(Mock())
|
||||
stream.call_win32 = Mock()
|
||||
data = [
|
||||
'abc\033[mdef',
|
||||
'abc\033[0mdef',
|
||||
'abc\033[2mdef',
|
||||
'abc\033[02mdef',
|
||||
'abc\033[002mdef',
|
||||
'abc\033[40mdef',
|
||||
'abc\033[040mdef',
|
||||
'abc\033[0;1mdef',
|
||||
'abc\033[40;50mdef',
|
||||
'abc\033[50;30;40mdef',
|
||||
'abc\033[Adef',
|
||||
'abc\033[0Gdef',
|
||||
'abc\033[1;20;128Hdef',
|
||||
]
|
||||
for datum in data:
|
||||
stream.wrapped.write.reset_mock()
|
||||
stream.write_and_convert( datum )
|
||||
self.assertEqual(
|
||||
[args[0] for args in stream.wrapped.write.call_args_list],
|
||||
[ ('abc',), ('def',) ]
|
||||
)
|
||||
|
||||
def testWriteAndConvertSkipsEmptySnippets(self):
|
||||
stream = AnsiToWin32(Mock())
|
||||
stream.call_win32 = Mock()
|
||||
stream.write_and_convert( '\033[40m\033[41m' )
|
||||
self.assertFalse( stream.wrapped.write.called )
|
||||
|
||||
def testWriteAndConvertCallsWin32WithParamsAndCommand(self):
|
||||
stream = AnsiToWin32(Mock())
|
||||
stream.convert = True
|
||||
stream.call_win32 = Mock()
|
||||
stream.extract_params = Mock(return_value='params')
|
||||
data = {
|
||||
'abc\033[adef': ('a', 'params'),
|
||||
'abc\033[;;bdef': ('b', 'params'),
|
||||
'abc\033[0cdef': ('c', 'params'),
|
||||
'abc\033[;;0;;Gdef': ('G', 'params'),
|
||||
'abc\033[1;20;128Hdef': ('H', 'params'),
|
||||
}
|
||||
for datum, expected in data.items():
|
||||
stream.call_win32.reset_mock()
|
||||
stream.write_and_convert( datum )
|
||||
self.assertEqual( stream.call_win32.call_args[0], expected )
|
||||
|
||||
def test_reset_all_shouldnt_raise_on_closed_orig_stdout(self):
|
||||
stream = StringIO()
|
||||
converter = AnsiToWin32(stream)
|
||||
stream.close()
|
||||
|
||||
converter.reset_all()
|
||||
|
||||
def test_wrap_shouldnt_raise_on_closed_orig_stdout(self):
|
||||
stream = StringIO()
|
||||
stream.close()
|
||||
with \
|
||||
patch("colorama.ansitowin32.os.name", "nt"), \
|
||||
patch("colorama.ansitowin32.winapi_test", lambda: True):
|
||||
converter = AnsiToWin32(stream)
|
||||
self.assertTrue(converter.strip)
|
||||
self.assertFalse(converter.convert)
|
||||
|
||||
def test_wrap_shouldnt_raise_on_missing_closed_attr(self):
|
||||
with \
|
||||
patch("colorama.ansitowin32.os.name", "nt"), \
|
||||
patch("colorama.ansitowin32.winapi_test", lambda: True):
|
||||
converter = AnsiToWin32(object())
|
||||
self.assertTrue(converter.strip)
|
||||
self.assertFalse(converter.convert)
|
||||
|
||||
def testExtractParams(self):
|
||||
stream = AnsiToWin32(Mock())
|
||||
data = {
|
||||
'': (0,),
|
||||
';;': (0,),
|
||||
'2': (2,),
|
||||
';;002;;': (2,),
|
||||
'0;1': (0, 1),
|
||||
';;003;;456;;': (3, 456),
|
||||
'11;22;33;44;55': (11, 22, 33, 44, 55),
|
||||
}
|
||||
for datum, expected in data.items():
|
||||
self.assertEqual(stream.extract_params('m', datum), expected)
|
||||
|
||||
def testCallWin32UsesLookup(self):
|
||||
listener = Mock()
|
||||
stream = AnsiToWin32(listener)
|
||||
stream.win32_calls = {
|
||||
1: (lambda *_, **__: listener(11),),
|
||||
2: (lambda *_, **__: listener(22),),
|
||||
3: (lambda *_, **__: listener(33),),
|
||||
}
|
||||
stream.call_win32('m', (3, 1, 99, 2))
|
||||
self.assertEqual(
|
||||
[a[0][0] for a in listener.call_args_list],
|
||||
[33, 11, 22] )
|
||||
|
||||
def test_osc_codes(self):
|
||||
mockStdout = Mock()
|
||||
stream = AnsiToWin32(mockStdout, convert=True)
|
||||
with patch('colorama.ansitowin32.winterm') as winterm:
|
||||
data = [
|
||||
'\033]0\x07', # missing arguments
|
||||
'\033]0;foo\x08', # wrong OSC command
|
||||
'\033]0;colorama_test_title\x07', # should work
|
||||
'\033]1;colorama_test_title\x07', # wrong set command
|
||||
'\033]2;colorama_test_title\x07', # should work
|
||||
'\033]' + ';' * 64 + '\x08', # see issue #247
|
||||
]
|
||||
for code in data:
|
||||
stream.write(code)
|
||||
self.assertEqual(winterm.set_title.call_count, 2)
|
||||
|
||||
def test_native_windows_ansi(self):
|
||||
with ExitStack() as stack:
|
||||
def p(a, b):
|
||||
stack.enter_context(patch(a, b, create=True))
|
||||
# Pretend to be on Windows
|
||||
p("colorama.ansitowin32.os.name", "nt")
|
||||
p("colorama.ansitowin32.winapi_test", lambda: True)
|
||||
p("colorama.win32.winapi_test", lambda: True)
|
||||
p("colorama.winterm.win32.windll", "non-None")
|
||||
p("colorama.winterm.get_osfhandle", lambda _: 1234)
|
||||
|
||||
# Pretend that our mock stream has native ANSI support
|
||||
p(
|
||||
"colorama.winterm.win32.GetConsoleMode",
|
||||
lambda _: ENABLE_VIRTUAL_TERMINAL_PROCESSING,
|
||||
)
|
||||
SetConsoleMode = Mock()
|
||||
p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode)
|
||||
|
||||
stdout = Mock()
|
||||
stdout.closed = False
|
||||
stdout.isatty.return_value = True
|
||||
stdout.fileno.return_value = 1
|
||||
|
||||
# Our fake console says it has native vt support, so AnsiToWin32 should
|
||||
# enable that support and do nothing else.
|
||||
stream = AnsiToWin32(stdout)
|
||||
SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING)
|
||||
self.assertFalse(stream.strip)
|
||||
self.assertFalse(stream.convert)
|
||||
self.assertFalse(stream.should_wrap())
|
||||
|
||||
# Now let's pretend we're on an old Windows console, that doesn't have
|
||||
# native ANSI support.
|
||||
p("colorama.winterm.win32.GetConsoleMode", lambda _: 0)
|
||||
SetConsoleMode = Mock()
|
||||
p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode)
|
||||
|
||||
stream = AnsiToWin32(stdout)
|
||||
SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING)
|
||||
self.assertTrue(stream.strip)
|
||||
self.assertTrue(stream.convert)
|
||||
self.assertTrue(stream.should_wrap())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,189 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
import sys
|
||||
from unittest import TestCase, main, skipUnless
|
||||
|
||||
try:
|
||||
from unittest.mock import patch, Mock
|
||||
except ImportError:
|
||||
from mock import patch, Mock
|
||||
|
||||
from ..ansitowin32 import StreamWrapper
|
||||
from ..initialise import init, just_fix_windows_console, _wipe_internal_state_for_tests
|
||||
from .utils import osname, replace_by
|
||||
|
||||
orig_stdout = sys.stdout
|
||||
orig_stderr = sys.stderr
|
||||
|
||||
|
||||
class InitTest(TestCase):
|
||||
|
||||
@skipUnless(sys.stdout.isatty(), "sys.stdout is not a tty")
|
||||
def setUp(self):
|
||||
# sanity check
|
||||
self.assertNotWrapped()
|
||||
|
||||
def tearDown(self):
|
||||
_wipe_internal_state_for_tests()
|
||||
sys.stdout = orig_stdout
|
||||
sys.stderr = orig_stderr
|
||||
|
||||
def assertWrapped(self):
|
||||
self.assertIsNot(sys.stdout, orig_stdout, 'stdout should be wrapped')
|
||||
self.assertIsNot(sys.stderr, orig_stderr, 'stderr should be wrapped')
|
||||
self.assertTrue(isinstance(sys.stdout, StreamWrapper),
|
||||
'bad stdout wrapper')
|
||||
self.assertTrue(isinstance(sys.stderr, StreamWrapper),
|
||||
'bad stderr wrapper')
|
||||
|
||||
def assertNotWrapped(self):
|
||||
self.assertIs(sys.stdout, orig_stdout, 'stdout should not be wrapped')
|
||||
self.assertIs(sys.stderr, orig_stderr, 'stderr should not be wrapped')
|
||||
|
||||
@patch('colorama.initialise.reset_all')
|
||||
@patch('colorama.ansitowin32.winapi_test', lambda *_: True)
|
||||
@patch('colorama.ansitowin32.enable_vt_processing', lambda *_: False)
|
||||
def testInitWrapsOnWindows(self, _):
|
||||
with osname("nt"):
|
||||
init()
|
||||
self.assertWrapped()
|
||||
|
||||
@patch('colorama.initialise.reset_all')
|
||||
@patch('colorama.ansitowin32.winapi_test', lambda *_: False)
|
||||
def testInitDoesntWrapOnEmulatedWindows(self, _):
|
||||
with osname("nt"):
|
||||
init()
|
||||
self.assertNotWrapped()
|
||||
|
||||
def testInitDoesntWrapOnNonWindows(self):
|
||||
with osname("posix"):
|
||||
init()
|
||||
self.assertNotWrapped()
|
||||
|
||||
def testInitDoesntWrapIfNone(self):
|
||||
with replace_by(None):
|
||||
init()
|
||||
# We can't use assertNotWrapped here because replace_by(None)
|
||||
# changes stdout/stderr already.
|
||||
self.assertIsNone(sys.stdout)
|
||||
self.assertIsNone(sys.stderr)
|
||||
|
||||
def testInitAutoresetOnWrapsOnAllPlatforms(self):
|
||||
with osname("posix"):
|
||||
init(autoreset=True)
|
||||
self.assertWrapped()
|
||||
|
||||
def testInitWrapOffDoesntWrapOnWindows(self):
|
||||
with osname("nt"):
|
||||
init(wrap=False)
|
||||
self.assertNotWrapped()
|
||||
|
||||
def testInitWrapOffIncompatibleWithAutoresetOn(self):
|
||||
self.assertRaises(ValueError, lambda: init(autoreset=True, wrap=False))
|
||||
|
||||
@patch('colorama.win32.SetConsoleTextAttribute')
|
||||
@patch('colorama.initialise.AnsiToWin32')
|
||||
def testAutoResetPassedOn(self, mockATW32, _):
|
||||
with osname("nt"):
|
||||
init(autoreset=True)
|
||||
self.assertEqual(len(mockATW32.call_args_list), 2)
|
||||
self.assertEqual(mockATW32.call_args_list[1][1]['autoreset'], True)
|
||||
self.assertEqual(mockATW32.call_args_list[0][1]['autoreset'], True)
|
||||
|
||||
@patch('colorama.initialise.AnsiToWin32')
|
||||
def testAutoResetChangeable(self, mockATW32):
|
||||
with osname("nt"):
|
||||
init()
|
||||
|
||||
init(autoreset=True)
|
||||
self.assertEqual(len(mockATW32.call_args_list), 4)
|
||||
self.assertEqual(mockATW32.call_args_list[2][1]['autoreset'], True)
|
||||
self.assertEqual(mockATW32.call_args_list[3][1]['autoreset'], True)
|
||||
|
||||
init()
|
||||
self.assertEqual(len(mockATW32.call_args_list), 6)
|
||||
self.assertEqual(
|
||||
mockATW32.call_args_list[4][1]['autoreset'], False)
|
||||
self.assertEqual(
|
||||
mockATW32.call_args_list[5][1]['autoreset'], False)
|
||||
|
||||
|
||||
@patch('colorama.initialise.atexit.register')
|
||||
def testAtexitRegisteredOnlyOnce(self, mockRegister):
|
||||
init()
|
||||
self.assertTrue(mockRegister.called)
|
||||
mockRegister.reset_mock()
|
||||
init()
|
||||
self.assertFalse(mockRegister.called)
|
||||
|
||||
|
||||
class JustFixWindowsConsoleTest(TestCase):
|
||||
def _reset(self):
|
||||
_wipe_internal_state_for_tests()
|
||||
sys.stdout = orig_stdout
|
||||
sys.stderr = orig_stderr
|
||||
|
||||
def tearDown(self):
|
||||
self._reset()
|
||||
|
||||
@patch("colorama.ansitowin32.winapi_test", lambda: True)
|
||||
def testJustFixWindowsConsole(self):
|
||||
if sys.platform != "win32":
|
||||
# just_fix_windows_console should be a no-op
|
||||
just_fix_windows_console()
|
||||
self.assertIs(sys.stdout, orig_stdout)
|
||||
self.assertIs(sys.stderr, orig_stderr)
|
||||
else:
|
||||
def fake_std():
|
||||
# Emulate stdout=not a tty, stderr=tty
|
||||
# to check that we handle both cases correctly
|
||||
stdout = Mock()
|
||||
stdout.closed = False
|
||||
stdout.isatty.return_value = False
|
||||
stdout.fileno.return_value = 1
|
||||
sys.stdout = stdout
|
||||
|
||||
stderr = Mock()
|
||||
stderr.closed = False
|
||||
stderr.isatty.return_value = True
|
||||
stderr.fileno.return_value = 2
|
||||
sys.stderr = stderr
|
||||
|
||||
for native_ansi in [False, True]:
|
||||
with patch(
|
||||
'colorama.ansitowin32.enable_vt_processing',
|
||||
lambda *_: native_ansi
|
||||
):
|
||||
self._reset()
|
||||
fake_std()
|
||||
|
||||
# Regular single-call test
|
||||
prev_stdout = sys.stdout
|
||||
prev_stderr = sys.stderr
|
||||
just_fix_windows_console()
|
||||
self.assertIs(sys.stdout, prev_stdout)
|
||||
if native_ansi:
|
||||
self.assertIs(sys.stderr, prev_stderr)
|
||||
else:
|
||||
self.assertIsNot(sys.stderr, prev_stderr)
|
||||
|
||||
# second call without resetting is always a no-op
|
||||
prev_stdout = sys.stdout
|
||||
prev_stderr = sys.stderr
|
||||
just_fix_windows_console()
|
||||
self.assertIs(sys.stdout, prev_stdout)
|
||||
self.assertIs(sys.stderr, prev_stderr)
|
||||
|
||||
self._reset()
|
||||
fake_std()
|
||||
|
||||
# If init() runs first, just_fix_windows_console should be a no-op
|
||||
init()
|
||||
prev_stdout = sys.stdout
|
||||
prev_stderr = sys.stderr
|
||||
just_fix_windows_console()
|
||||
self.assertIs(prev_stdout, sys.stdout)
|
||||
self.assertIs(prev_stderr, sys.stderr)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,57 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
import sys
|
||||
from unittest import TestCase, main
|
||||
|
||||
from ..ansitowin32 import StreamWrapper, AnsiToWin32
|
||||
from .utils import pycharm, replace_by, replace_original_by, StreamTTY, StreamNonTTY
|
||||
|
||||
|
||||
def is_a_tty(stream):
|
||||
return StreamWrapper(stream, None).isatty()
|
||||
|
||||
class IsattyTest(TestCase):
|
||||
|
||||
def test_TTY(self):
|
||||
tty = StreamTTY()
|
||||
self.assertTrue(is_a_tty(tty))
|
||||
with pycharm():
|
||||
self.assertTrue(is_a_tty(tty))
|
||||
|
||||
def test_nonTTY(self):
|
||||
non_tty = StreamNonTTY()
|
||||
self.assertFalse(is_a_tty(non_tty))
|
||||
with pycharm():
|
||||
self.assertFalse(is_a_tty(non_tty))
|
||||
|
||||
def test_withPycharm(self):
|
||||
with pycharm():
|
||||
self.assertTrue(is_a_tty(sys.stderr))
|
||||
self.assertTrue(is_a_tty(sys.stdout))
|
||||
|
||||
def test_withPycharmTTYOverride(self):
|
||||
tty = StreamTTY()
|
||||
with pycharm(), replace_by(tty):
|
||||
self.assertTrue(is_a_tty(tty))
|
||||
|
||||
def test_withPycharmNonTTYOverride(self):
|
||||
non_tty = StreamNonTTY()
|
||||
with pycharm(), replace_by(non_tty):
|
||||
self.assertFalse(is_a_tty(non_tty))
|
||||
|
||||
def test_withPycharmNoneOverride(self):
|
||||
with pycharm():
|
||||
with replace_by(None), replace_original_by(None):
|
||||
self.assertFalse(is_a_tty(None))
|
||||
self.assertFalse(is_a_tty(StreamNonTTY()))
|
||||
self.assertTrue(is_a_tty(StreamTTY()))
|
||||
|
||||
def test_withPycharmStreamWrapped(self):
|
||||
with pycharm():
|
||||
self.assertTrue(AnsiToWin32(StreamTTY()).stream.isatty())
|
||||
self.assertFalse(AnsiToWin32(StreamNonTTY()).stream.isatty())
|
||||
self.assertTrue(AnsiToWin32(sys.stdout).stream.isatty())
|
||||
self.assertTrue(AnsiToWin32(sys.stderr).stream.isatty())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,49 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
from contextlib import contextmanager
|
||||
from io import StringIO
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
class StreamTTY(StringIO):
|
||||
def isatty(self):
|
||||
return True
|
||||
|
||||
class StreamNonTTY(StringIO):
|
||||
def isatty(self):
|
||||
return False
|
||||
|
||||
@contextmanager
|
||||
def osname(name):
|
||||
orig = os.name
|
||||
os.name = name
|
||||
yield
|
||||
os.name = orig
|
||||
|
||||
@contextmanager
|
||||
def replace_by(stream):
|
||||
orig_stdout = sys.stdout
|
||||
orig_stderr = sys.stderr
|
||||
sys.stdout = stream
|
||||
sys.stderr = stream
|
||||
yield
|
||||
sys.stdout = orig_stdout
|
||||
sys.stderr = orig_stderr
|
||||
|
||||
@contextmanager
|
||||
def replace_original_by(stream):
|
||||
orig_stdout = sys.__stdout__
|
||||
orig_stderr = sys.__stderr__
|
||||
sys.__stdout__ = stream
|
||||
sys.__stderr__ = stream
|
||||
yield
|
||||
sys.__stdout__ = orig_stdout
|
||||
sys.__stderr__ = orig_stderr
|
||||
|
||||
@contextmanager
|
||||
def pycharm():
|
||||
os.environ["PYCHARM_HOSTED"] = "1"
|
||||
non_tty = StreamNonTTY()
|
||||
with replace_by(non_tty), replace_original_by(non_tty):
|
||||
yield
|
||||
del os.environ["PYCHARM_HOSTED"]
|
||||
@@ -0,0 +1,131 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
import sys
|
||||
from unittest import TestCase, main, skipUnless
|
||||
|
||||
try:
|
||||
from unittest.mock import Mock, patch
|
||||
except ImportError:
|
||||
from mock import Mock, patch
|
||||
|
||||
from ..winterm import WinColor, WinStyle, WinTerm
|
||||
|
||||
|
||||
class WinTermTest(TestCase):
|
||||
|
||||
@patch('colorama.winterm.win32')
|
||||
def testInit(self, mockWin32):
|
||||
mockAttr = Mock()
|
||||
mockAttr.wAttributes = 7 + 6 * 16 + 8
|
||||
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
|
||||
term = WinTerm()
|
||||
self.assertEqual(term._fore, 7)
|
||||
self.assertEqual(term._back, 6)
|
||||
self.assertEqual(term._style, 8)
|
||||
|
||||
@skipUnless(sys.platform.startswith("win"), "requires Windows")
|
||||
def testGetAttrs(self):
|
||||
term = WinTerm()
|
||||
|
||||
term._fore = 0
|
||||
term._back = 0
|
||||
term._style = 0
|
||||
self.assertEqual(term.get_attrs(), 0)
|
||||
|
||||
term._fore = WinColor.YELLOW
|
||||
self.assertEqual(term.get_attrs(), WinColor.YELLOW)
|
||||
|
||||
term._back = WinColor.MAGENTA
|
||||
self.assertEqual(
|
||||
term.get_attrs(),
|
||||
WinColor.YELLOW + WinColor.MAGENTA * 16)
|
||||
|
||||
term._style = WinStyle.BRIGHT
|
||||
self.assertEqual(
|
||||
term.get_attrs(),
|
||||
WinColor.YELLOW + WinColor.MAGENTA * 16 + WinStyle.BRIGHT)
|
||||
|
||||
@patch('colorama.winterm.win32')
|
||||
def testResetAll(self, mockWin32):
|
||||
mockAttr = Mock()
|
||||
mockAttr.wAttributes = 1 + 2 * 16 + 8
|
||||
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
|
||||
term = WinTerm()
|
||||
|
||||
term.set_console = Mock()
|
||||
term._fore = -1
|
||||
term._back = -1
|
||||
term._style = -1
|
||||
|
||||
term.reset_all()
|
||||
|
||||
self.assertEqual(term._fore, 1)
|
||||
self.assertEqual(term._back, 2)
|
||||
self.assertEqual(term._style, 8)
|
||||
self.assertEqual(term.set_console.called, True)
|
||||
|
||||
@skipUnless(sys.platform.startswith("win"), "requires Windows")
|
||||
def testFore(self):
|
||||
term = WinTerm()
|
||||
term.set_console = Mock()
|
||||
term._fore = 0
|
||||
|
||||
term.fore(5)
|
||||
|
||||
self.assertEqual(term._fore, 5)
|
||||
self.assertEqual(term.set_console.called, True)
|
||||
|
||||
@skipUnless(sys.platform.startswith("win"), "requires Windows")
|
||||
def testBack(self):
|
||||
term = WinTerm()
|
||||
term.set_console = Mock()
|
||||
term._back = 0
|
||||
|
||||
term.back(5)
|
||||
|
||||
self.assertEqual(term._back, 5)
|
||||
self.assertEqual(term.set_console.called, True)
|
||||
|
||||
@skipUnless(sys.platform.startswith("win"), "requires Windows")
|
||||
def testStyle(self):
|
||||
term = WinTerm()
|
||||
term.set_console = Mock()
|
||||
term._style = 0
|
||||
|
||||
term.style(22)
|
||||
|
||||
self.assertEqual(term._style, 22)
|
||||
self.assertEqual(term.set_console.called, True)
|
||||
|
||||
@patch('colorama.winterm.win32')
|
||||
def testSetConsole(self, mockWin32):
|
||||
mockAttr = Mock()
|
||||
mockAttr.wAttributes = 0
|
||||
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
|
||||
term = WinTerm()
|
||||
term.windll = Mock()
|
||||
|
||||
term.set_console()
|
||||
|
||||
self.assertEqual(
|
||||
mockWin32.SetConsoleTextAttribute.call_args,
|
||||
((mockWin32.STDOUT, term.get_attrs()), {})
|
||||
)
|
||||
|
||||
@patch('colorama.winterm.win32')
|
||||
def testSetConsoleOnStderr(self, mockWin32):
|
||||
mockAttr = Mock()
|
||||
mockAttr.wAttributes = 0
|
||||
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
|
||||
term = WinTerm()
|
||||
term.windll = Mock()
|
||||
|
||||
term.set_console(on_stderr=True)
|
||||
|
||||
self.assertEqual(
|
||||
mockWin32.SetConsoleTextAttribute.call_args,
|
||||
((mockWin32.STDERR, term.get_attrs()), {})
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -4,6 +4,8 @@
|
||||
STDOUT = -11
|
||||
STDERR = -12
|
||||
|
||||
ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
from ctypes import LibraryLoader
|
||||
@@ -89,6 +91,20 @@ else:
|
||||
]
|
||||
_SetConsoleTitleW.restype = wintypes.BOOL
|
||||
|
||||
_GetConsoleMode = windll.kernel32.GetConsoleMode
|
||||
_GetConsoleMode.argtypes = [
|
||||
wintypes.HANDLE,
|
||||
POINTER(wintypes.DWORD)
|
||||
]
|
||||
_GetConsoleMode.restype = wintypes.BOOL
|
||||
|
||||
_SetConsoleMode = windll.kernel32.SetConsoleMode
|
||||
_SetConsoleMode.argtypes = [
|
||||
wintypes.HANDLE,
|
||||
wintypes.DWORD
|
||||
]
|
||||
_SetConsoleMode.restype = wintypes.BOOL
|
||||
|
||||
def _winapi_test(handle):
|
||||
csbi = CONSOLE_SCREEN_BUFFER_INFO()
|
||||
success = _GetConsoleScreenBufferInfo(
|
||||
@@ -150,3 +166,15 @@ else:
|
||||
|
||||
def SetConsoleTitle(title):
|
||||
return _SetConsoleTitleW(title)
|
||||
|
||||
def GetConsoleMode(handle):
|
||||
mode = wintypes.DWORD()
|
||||
success = _GetConsoleMode(handle, byref(mode))
|
||||
if not success:
|
||||
raise ctypes.WinError()
|
||||
return mode.value
|
||||
|
||||
def SetConsoleMode(handle, mode):
|
||||
success = _SetConsoleMode(handle, mode)
|
||||
if not success:
|
||||
raise ctypes.WinError()
|
||||
|
||||
@@ -1,7 +1,13 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
from . import win32
|
||||
try:
|
||||
from msvcrt import get_osfhandle
|
||||
except ImportError:
|
||||
def get_osfhandle(_):
|
||||
raise OSError("This isn't windows!")
|
||||
|
||||
|
||||
from . import win32
|
||||
|
||||
# from wincon.h
|
||||
class WinColor(object):
|
||||
BLACK = 0
|
||||
@@ -167,3 +173,23 @@ class WinTerm(object):
|
||||
|
||||
def set_title(self, title):
|
||||
win32.SetConsoleTitle(title)
|
||||
|
||||
|
||||
def enable_vt_processing(fd):
|
||||
if win32.windll is None or not win32.winapi_test():
|
||||
return False
|
||||
|
||||
try:
|
||||
handle = get_osfhandle(fd)
|
||||
mode = win32.GetConsoleMode(handle)
|
||||
win32.SetConsoleMode(
|
||||
handle,
|
||||
mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING,
|
||||
)
|
||||
|
||||
mode = win32.GetConsoleMode(handle)
|
||||
if mode & win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING:
|
||||
return True
|
||||
# Can get TypeError in testsuite where 'fd' is a Mock()
|
||||
except (OSError, TypeError):
|
||||
return False
|
||||
|
||||
@@ -55,7 +55,7 @@ except ImportError:
|
||||
# Python 3.7
|
||||
TypedDict = dict
|
||||
|
||||
__version__ = "1.7.0"
|
||||
__version__ = "1.8.0"
|
||||
|
||||
|
||||
class VersionDict(TypedDict):
|
||||
@@ -122,6 +122,26 @@ _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile(
|
||||
# Pattern for base file name of distro release file
|
||||
_DISTRO_RELEASE_BASENAME_PATTERN = re.compile(r"(\w+)[-_](release|version)$")
|
||||
|
||||
# Base file names to be looked up for if _UNIXCONFDIR is not readable.
|
||||
_DISTRO_RELEASE_BASENAMES = [
|
||||
"SuSE-release",
|
||||
"arch-release",
|
||||
"base-release",
|
||||
"centos-release",
|
||||
"fedora-release",
|
||||
"gentoo-release",
|
||||
"mageia-release",
|
||||
"mandrake-release",
|
||||
"mandriva-release",
|
||||
"mandrivalinux-release",
|
||||
"manjaro-release",
|
||||
"oracle-release",
|
||||
"redhat-release",
|
||||
"rocky-release",
|
||||
"sl-release",
|
||||
"slackware-version",
|
||||
]
|
||||
|
||||
# Base file names to be ignored when searching for distro release file
|
||||
_DISTRO_RELEASE_IGNORE_BASENAMES = (
|
||||
"debian_version",
|
||||
@@ -200,6 +220,7 @@ def id() -> str:
|
||||
"opensuse" openSUSE
|
||||
"amzn" Amazon Linux
|
||||
"arch" Arch Linux
|
||||
"buildroot" Buildroot
|
||||
"cloudlinux" CloudLinux OS
|
||||
"exherbo" Exherbo Linux
|
||||
"gentoo" GenToo Linux
|
||||
@@ -221,6 +242,7 @@ def id() -> str:
|
||||
"midnightbsd" MidnightBSD
|
||||
"rocky" Rocky Linux
|
||||
"aix" AIX
|
||||
"guix" Guix System
|
||||
============== =========================================
|
||||
|
||||
If you have a need to get distros for reliable IDs added into this set,
|
||||
@@ -876,6 +898,9 @@ class LinuxDistribution:
|
||||
if self.uname_attr("id").startswith("aix"):
|
||||
# On AIX platforms, prefer oslevel command output.
|
||||
versions.insert(0, self.oslevel_info())
|
||||
elif self.id() == "debian" or "debian" in self.like().split():
|
||||
# On Debian-like, add debian_version file content to candidates list.
|
||||
versions.append(self._debian_version)
|
||||
version = ""
|
||||
if best:
|
||||
# This algorithm uses the last version in priority order that has
|
||||
@@ -1186,6 +1211,16 @@ class LinuxDistribution:
|
||||
return ""
|
||||
return self._to_str(stdout).strip()
|
||||
|
||||
@cached_property
|
||||
def _debian_version(self) -> str:
|
||||
try:
|
||||
with open(
|
||||
os.path.join(self.etc_dir, "debian_version"), encoding="ascii"
|
||||
) as fp:
|
||||
return fp.readline().rstrip()
|
||||
except FileNotFoundError:
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _parse_uname_content(lines: Sequence[str]) -> Dict[str, str]:
|
||||
if not lines:
|
||||
@@ -1228,14 +1263,14 @@ class LinuxDistribution:
|
||||
# file), because we want to use what was specified as best as
|
||||
# possible.
|
||||
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
|
||||
if "name" in distro_info and "cloudlinux" in distro_info["name"].lower():
|
||||
distro_info["id"] = "cloudlinux"
|
||||
elif match:
|
||||
distro_info["id"] = match.group(1)
|
||||
return distro_info
|
||||
else:
|
||||
try:
|
||||
basenames = os.listdir(self.etc_dir)
|
||||
basenames = [
|
||||
basename
|
||||
for basename in os.listdir(self.etc_dir)
|
||||
if basename not in _DISTRO_RELEASE_IGNORE_BASENAMES
|
||||
and os.path.isfile(os.path.join(self.etc_dir, basename))
|
||||
]
|
||||
# We sort for repeatability in cases where there are multiple
|
||||
# distro specific files; e.g. CentOS, Oracle, Enterprise all
|
||||
# containing `redhat-release` on top of their own.
|
||||
@@ -1245,39 +1280,29 @@ class LinuxDistribution:
|
||||
# sure about the *-release files. Check common entries of
|
||||
# /etc for information. If they turn out to not be there the
|
||||
# error is handled in `_parse_distro_release_file()`.
|
||||
basenames = [
|
||||
"SuSE-release",
|
||||
"arch-release",
|
||||
"base-release",
|
||||
"centos-release",
|
||||
"fedora-release",
|
||||
"gentoo-release",
|
||||
"mageia-release",
|
||||
"mandrake-release",
|
||||
"mandriva-release",
|
||||
"mandrivalinux-release",
|
||||
"manjaro-release",
|
||||
"oracle-release",
|
||||
"redhat-release",
|
||||
"rocky-release",
|
||||
"sl-release",
|
||||
"slackware-version",
|
||||
]
|
||||
basenames = _DISTRO_RELEASE_BASENAMES
|
||||
for basename in basenames:
|
||||
if basename in _DISTRO_RELEASE_IGNORE_BASENAMES:
|
||||
continue
|
||||
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
|
||||
if match:
|
||||
filepath = os.path.join(self.etc_dir, basename)
|
||||
distro_info = self._parse_distro_release_file(filepath)
|
||||
if "name" in distro_info:
|
||||
# The name is always present if the pattern matches
|
||||
self.distro_release_file = filepath
|
||||
distro_info["id"] = match.group(1)
|
||||
if "cloudlinux" in distro_info["name"].lower():
|
||||
distro_info["id"] = "cloudlinux"
|
||||
return distro_info
|
||||
return {}
|
||||
if match is None:
|
||||
continue
|
||||
filepath = os.path.join(self.etc_dir, basename)
|
||||
distro_info = self._parse_distro_release_file(filepath)
|
||||
# The name is always present if the pattern matches.
|
||||
if "name" not in distro_info:
|
||||
continue
|
||||
self.distro_release_file = filepath
|
||||
break
|
||||
else: # the loop didn't "break": no candidate.
|
||||
return {}
|
||||
|
||||
if match is not None:
|
||||
distro_info["id"] = match.group(1)
|
||||
|
||||
# CloudLinux < 7: manually enrich info with proper id.
|
||||
if "cloudlinux" in distro_info.get("name", "").lower():
|
||||
distro_info["id"] = "cloudlinux"
|
||||
|
||||
return distro_info
|
||||
|
||||
def _parse_distro_release_file(self, filepath: str) -> Dict[str, str]:
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2010-202x The platformdirs developers
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1,22 +0,0 @@
|
||||
# This is the MIT license
|
||||
|
||||
Copyright (c) 2010 ActiveState Software Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
@@ -7,13 +7,15 @@ from __future__ import annotations
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipenv.patched.pip._vendor.typing_extensions import Literal # pragma: no cover
|
||||
if sys.version_info >= (3, 8): # pragma: no cover (py38+)
|
||||
from typing import Literal
|
||||
else: # pragma: no cover (py38+)
|
||||
from pipenv.patched.pip._vendor.typing_extensions import Literal
|
||||
|
||||
from .api import PlatformDirsABC
|
||||
from .version import __version__, __version_info__
|
||||
from .version import __version__
|
||||
from .version import __version_tuple__ as __version_info__
|
||||
|
||||
|
||||
def _set_platform_dir_class() -> type[PlatformDirsABC]:
|
||||
@@ -26,7 +28,7 @@ def _set_platform_dir_class() -> type[PlatformDirsABC]:
|
||||
|
||||
if os.getenv("ANDROID_DATA") == "/data" and os.getenv("ANDROID_ROOT") == "/system":
|
||||
|
||||
if os.getenv("SHELL") is not None:
|
||||
if os.getenv("SHELL") or os.getenv("PREFIX"):
|
||||
return Result
|
||||
|
||||
from pipenv.patched.pip._vendor.platformdirs.android import _android_folder
|
||||
|
||||
@@ -107,9 +107,9 @@ class Unix(PlatformDirsABC):
|
||||
@property
|
||||
def user_log_dir(self) -> str:
|
||||
"""
|
||||
:return: log directory tied to the user, same as `user_data_dir` if not opinionated else ``log`` in it
|
||||
:return: log directory tied to the user, same as `user_state_dir` if not opinionated else ``log`` in it
|
||||
"""
|
||||
path = self.user_cache_dir
|
||||
path = self.user_state_dir
|
||||
if self.opinion:
|
||||
path = os.path.join(path, "log")
|
||||
return path
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Version information"""
|
||||
|
||||
__version__ = "2.5.2"
|
||||
__version_info__ = (2, 5, 2)
|
||||
# file generated by setuptools_scm
|
||||
# don't change, don't track in version control
|
||||
__version__ = version = '2.6.2'
|
||||
__version_tuple__ = version_tuple = (2, 6, 2)
|
||||
|
||||
@@ -2,6 +2,7 @@ from __future__ import annotations
|
||||
|
||||
import ctypes
|
||||
import os
|
||||
import sys
|
||||
from functools import lru_cache
|
||||
from typing import Callable
|
||||
|
||||
@@ -132,7 +133,8 @@ def get_win_folder_from_registry(csidl_name: str) -> str:
|
||||
}.get(csidl_name)
|
||||
if shell_folder_name is None:
|
||||
raise ValueError(f"Unknown CSIDL name: {csidl_name}")
|
||||
|
||||
if sys.platform != "win32": # only needed for mypy type checker to know that this code runs only on Windows
|
||||
raise NotImplementedError
|
||||
import winreg
|
||||
|
||||
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders")
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
"""Wrappers to call pyproject.toml-based build backend hooks.
|
||||
"""
|
||||
|
||||
from ._impl import (
|
||||
BackendInvalid,
|
||||
BackendUnavailable,
|
||||
BuildBackendHookCaller,
|
||||
HookMissing,
|
||||
UnsupportedOperation,
|
||||
default_subprocess_runner,
|
||||
quiet_subprocess_runner,
|
||||
)
|
||||
|
||||
__version__ = '1.0.0'
|
||||
__all__ = [
|
||||
'BackendUnavailable',
|
||||
'BackendInvalid',
|
||||
'HookMissing',
|
||||
'UnsupportedOperation',
|
||||
'default_subprocess_runner',
|
||||
'quiet_subprocess_runner',
|
||||
'BuildBackendHookCaller',
|
||||
]
|
||||
@@ -0,0 +1,330 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from contextlib import contextmanager
|
||||
from os.path import abspath
|
||||
from os.path import join as pjoin
|
||||
from subprocess import STDOUT, check_call, check_output
|
||||
|
||||
from ._in_process import _in_proc_script_path
|
||||
|
||||
|
||||
def write_json(obj, path, **kwargs):
|
||||
with open(path, 'w', encoding='utf-8') as f:
|
||||
json.dump(obj, f, **kwargs)
|
||||
|
||||
|
||||
def read_json(path):
|
||||
with open(path, encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
class BackendUnavailable(Exception):
|
||||
"""Will be raised if the backend cannot be imported in the hook process."""
|
||||
def __init__(self, traceback):
|
||||
self.traceback = traceback
|
||||
|
||||
|
||||
class BackendInvalid(Exception):
|
||||
"""Will be raised if the backend is invalid."""
|
||||
def __init__(self, backend_name, backend_path, message):
|
||||
super().__init__(message)
|
||||
self.backend_name = backend_name
|
||||
self.backend_path = backend_path
|
||||
|
||||
|
||||
class HookMissing(Exception):
|
||||
"""Will be raised on missing hooks (if a fallback can't be used)."""
|
||||
def __init__(self, hook_name):
|
||||
super().__init__(hook_name)
|
||||
self.hook_name = hook_name
|
||||
|
||||
|
||||
class UnsupportedOperation(Exception):
|
||||
"""May be raised by build_sdist if the backend indicates that it can't."""
|
||||
def __init__(self, traceback):
|
||||
self.traceback = traceback
|
||||
|
||||
|
||||
def default_subprocess_runner(cmd, cwd=None, extra_environ=None):
|
||||
"""The default method of calling the wrapper subprocess.
|
||||
|
||||
This uses :func:`subprocess.check_call` under the hood.
|
||||
"""
|
||||
env = os.environ.copy()
|
||||
if extra_environ:
|
||||
env.update(extra_environ)
|
||||
|
||||
check_call(cmd, cwd=cwd, env=env)
|
||||
|
||||
|
||||
def quiet_subprocess_runner(cmd, cwd=None, extra_environ=None):
|
||||
"""Call the subprocess while suppressing output.
|
||||
|
||||
This uses :func:`subprocess.check_output` under the hood.
|
||||
"""
|
||||
env = os.environ.copy()
|
||||
if extra_environ:
|
||||
env.update(extra_environ)
|
||||
|
||||
check_output(cmd, cwd=cwd, env=env, stderr=STDOUT)
|
||||
|
||||
|
||||
def norm_and_check(source_tree, requested):
|
||||
"""Normalise and check a backend path.
|
||||
|
||||
Ensure that the requested backend path is specified as a relative path,
|
||||
and resolves to a location under the given source tree.
|
||||
|
||||
Return an absolute version of the requested path.
|
||||
"""
|
||||
if os.path.isabs(requested):
|
||||
raise ValueError("paths must be relative")
|
||||
|
||||
abs_source = os.path.abspath(source_tree)
|
||||
abs_requested = os.path.normpath(os.path.join(abs_source, requested))
|
||||
# We have to use commonprefix for Python 2.7 compatibility. So we
|
||||
# normalise case to avoid problems because commonprefix is a character
|
||||
# based comparison :-(
|
||||
norm_source = os.path.normcase(abs_source)
|
||||
norm_requested = os.path.normcase(abs_requested)
|
||||
if os.path.commonprefix([norm_source, norm_requested]) != norm_source:
|
||||
raise ValueError("paths must be inside source tree")
|
||||
|
||||
return abs_requested
|
||||
|
||||
|
||||
class BuildBackendHookCaller:
|
||||
"""A wrapper to call the build backend hooks for a source directory.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
source_dir,
|
||||
build_backend,
|
||||
backend_path=None,
|
||||
runner=None,
|
||||
python_executable=None,
|
||||
):
|
||||
"""
|
||||
:param source_dir: The source directory to invoke the build backend for
|
||||
:param build_backend: The build backend spec
|
||||
:param backend_path: Additional path entries for the build backend spec
|
||||
:param runner: The :ref:`subprocess runner <Subprocess Runners>` to use
|
||||
:param python_executable:
|
||||
The Python executable used to invoke the build backend
|
||||
"""
|
||||
if runner is None:
|
||||
runner = default_subprocess_runner
|
||||
|
||||
self.source_dir = abspath(source_dir)
|
||||
self.build_backend = build_backend
|
||||
if backend_path:
|
||||
backend_path = [
|
||||
norm_and_check(self.source_dir, p) for p in backend_path
|
||||
]
|
||||
self.backend_path = backend_path
|
||||
self._subprocess_runner = runner
|
||||
if not python_executable:
|
||||
python_executable = sys.executable
|
||||
self.python_executable = python_executable
|
||||
|
||||
@contextmanager
|
||||
def subprocess_runner(self, runner):
|
||||
"""A context manager for temporarily overriding the default
|
||||
:ref:`subprocess runner <Subprocess Runners>`.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
hook_caller = BuildBackendHookCaller(...)
|
||||
with hook_caller.subprocess_runner(quiet_subprocess_runner):
|
||||
...
|
||||
"""
|
||||
prev = self._subprocess_runner
|
||||
self._subprocess_runner = runner
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
self._subprocess_runner = prev
|
||||
|
||||
def _supported_features(self):
|
||||
"""Return the list of optional features supported by the backend."""
|
||||
return self._call_hook('_supported_features', {})
|
||||
|
||||
def get_requires_for_build_wheel(self, config_settings=None):
|
||||
"""Get additional dependencies required for building a wheel.
|
||||
|
||||
:returns: A list of :pep:`dependency specifiers <508>`.
|
||||
:rtype: list[str]
|
||||
|
||||
.. admonition:: Fallback
|
||||
|
||||
If the build backend does not defined a hook with this name, an
|
||||
empty list will be returned.
|
||||
"""
|
||||
return self._call_hook('get_requires_for_build_wheel', {
|
||||
'config_settings': config_settings
|
||||
})
|
||||
|
||||
def prepare_metadata_for_build_wheel(
|
||||
self, metadata_directory, config_settings=None,
|
||||
_allow_fallback=True):
|
||||
"""Prepare a ``*.dist-info`` folder with metadata for this project.
|
||||
|
||||
:returns: Name of the newly created subfolder within
|
||||
``metadata_directory``, containing the metadata.
|
||||
:rtype: str
|
||||
|
||||
.. admonition:: Fallback
|
||||
|
||||
If the build backend does not define a hook with this name and
|
||||
``_allow_fallback`` is truthy, the backend will be asked to build a
|
||||
wheel via the ``build_wheel`` hook and the dist-info extracted from
|
||||
that will be returned.
|
||||
"""
|
||||
return self._call_hook('prepare_metadata_for_build_wheel', {
|
||||
'metadata_directory': abspath(metadata_directory),
|
||||
'config_settings': config_settings,
|
||||
'_allow_fallback': _allow_fallback,
|
||||
})
|
||||
|
||||
def build_wheel(
|
||||
self, wheel_directory, config_settings=None,
|
||||
metadata_directory=None):
|
||||
"""Build a wheel from this project.
|
||||
|
||||
:returns:
|
||||
The name of the newly created wheel within ``wheel_directory``.
|
||||
|
||||
.. admonition:: Interaction with fallback
|
||||
|
||||
If the ``build_wheel`` hook was called in the fallback for
|
||||
:meth:`prepare_metadata_for_build_wheel`, the build backend would
|
||||
not be invoked. Instead, the previously built wheel will be copied
|
||||
to ``wheel_directory`` and the name of that file will be returned.
|
||||
"""
|
||||
if metadata_directory is not None:
|
||||
metadata_directory = abspath(metadata_directory)
|
||||
return self._call_hook('build_wheel', {
|
||||
'wheel_directory': abspath(wheel_directory),
|
||||
'config_settings': config_settings,
|
||||
'metadata_directory': metadata_directory,
|
||||
})
|
||||
|
||||
def get_requires_for_build_editable(self, config_settings=None):
|
||||
"""Get additional dependencies required for building an editable wheel.
|
||||
|
||||
:returns: A list of :pep:`dependency specifiers <508>`.
|
||||
:rtype: list[str]
|
||||
|
||||
.. admonition:: Fallback
|
||||
|
||||
If the build backend does not defined a hook with this name, an
|
||||
empty list will be returned.
|
||||
"""
|
||||
return self._call_hook('get_requires_for_build_editable', {
|
||||
'config_settings': config_settings
|
||||
})
|
||||
|
||||
def prepare_metadata_for_build_editable(
|
||||
self, metadata_directory, config_settings=None,
|
||||
_allow_fallback=True):
|
||||
"""Prepare a ``*.dist-info`` folder with metadata for this project.
|
||||
|
||||
:returns: Name of the newly created subfolder within
|
||||
``metadata_directory``, containing the metadata.
|
||||
:rtype: str
|
||||
|
||||
.. admonition:: Fallback
|
||||
|
||||
If the build backend does not define a hook with this name and
|
||||
``_allow_fallback`` is truthy, the backend will be asked to build a
|
||||
wheel via the ``build_editable`` hook and the dist-info
|
||||
extracted from that will be returned.
|
||||
"""
|
||||
return self._call_hook('prepare_metadata_for_build_editable', {
|
||||
'metadata_directory': abspath(metadata_directory),
|
||||
'config_settings': config_settings,
|
||||
'_allow_fallback': _allow_fallback,
|
||||
})
|
||||
|
||||
def build_editable(
|
||||
self, wheel_directory, config_settings=None,
|
||||
metadata_directory=None):
|
||||
"""Build an editable wheel from this project.
|
||||
|
||||
:returns:
|
||||
The name of the newly created wheel within ``wheel_directory``.
|
||||
|
||||
.. admonition:: Interaction with fallback
|
||||
|
||||
If the ``build_editable`` hook was called in the fallback for
|
||||
:meth:`prepare_metadata_for_build_editable`, the build backend
|
||||
would not be invoked. Instead, the previously built wheel will be
|
||||
copied to ``wheel_directory`` and the name of that file will be
|
||||
returned.
|
||||
"""
|
||||
if metadata_directory is not None:
|
||||
metadata_directory = abspath(metadata_directory)
|
||||
return self._call_hook('build_editable', {
|
||||
'wheel_directory': abspath(wheel_directory),
|
||||
'config_settings': config_settings,
|
||||
'metadata_directory': metadata_directory,
|
||||
})
|
||||
|
||||
def get_requires_for_build_sdist(self, config_settings=None):
|
||||
"""Get additional dependencies required for building an sdist.
|
||||
|
||||
:returns: A list of :pep:`dependency specifiers <508>`.
|
||||
:rtype: list[str]
|
||||
"""
|
||||
return self._call_hook('get_requires_for_build_sdist', {
|
||||
'config_settings': config_settings
|
||||
})
|
||||
|
||||
def build_sdist(self, sdist_directory, config_settings=None):
|
||||
"""Build an sdist from this project.
|
||||
|
||||
:returns:
|
||||
The name of the newly created sdist within ``wheel_directory``.
|
||||
"""
|
||||
return self._call_hook('build_sdist', {
|
||||
'sdist_directory': abspath(sdist_directory),
|
||||
'config_settings': config_settings,
|
||||
})
|
||||
|
||||
def _call_hook(self, hook_name, kwargs):
|
||||
extra_environ = {'PEP517_BUILD_BACKEND': self.build_backend}
|
||||
|
||||
if self.backend_path:
|
||||
backend_path = os.pathsep.join(self.backend_path)
|
||||
extra_environ['PEP517_BACKEND_PATH'] = backend_path
|
||||
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
hook_input = {'kwargs': kwargs}
|
||||
write_json(hook_input, pjoin(td, 'input.json'), indent=2)
|
||||
|
||||
# Run the hook in a subprocess
|
||||
with _in_proc_script_path() as script:
|
||||
python = self.python_executable
|
||||
self._subprocess_runner(
|
||||
[python, abspath(str(script)), hook_name, td],
|
||||
cwd=self.source_dir,
|
||||
extra_environ=extra_environ
|
||||
)
|
||||
|
||||
data = read_json(pjoin(td, 'output.json'))
|
||||
if data.get('unsupported'):
|
||||
raise UnsupportedOperation(data.get('traceback', ''))
|
||||
if data.get('no_backend'):
|
||||
raise BackendUnavailable(data.get('traceback', ''))
|
||||
if data.get('backend_invalid'):
|
||||
raise BackendInvalid(
|
||||
backend_name=self.build_backend,
|
||||
backend_path=self.backend_path,
|
||||
message=data.get('backend_error', '')
|
||||
)
|
||||
if data.get('hook_missing'):
|
||||
raise HookMissing(data.get('missing_hook_name') or hook_name)
|
||||
return data['return_val']
|
||||
@@ -0,0 +1,18 @@
|
||||
"""This is a subpackage because the directory is on sys.path for _in_process.py
|
||||
|
||||
The subpackage should stay as empty as possible to avoid shadowing modules that
|
||||
the backend might import.
|
||||
"""
|
||||
|
||||
import importlib.resources as resources
|
||||
|
||||
try:
|
||||
resources.files
|
||||
except AttributeError:
|
||||
# Python 3.8 compatibility
|
||||
def _in_proc_script_path():
|
||||
return resources.path(__package__, '_in_process.py')
|
||||
else:
|
||||
def _in_proc_script_path():
|
||||
return resources.as_file(
|
||||
resources.files(__package__).joinpath('_in_process.py'))
|
||||
@@ -0,0 +1,353 @@
|
||||
"""This is invoked in a subprocess to call the build backend hooks.
|
||||
|
||||
It expects:
|
||||
- Command line args: hook_name, control_dir
|
||||
- Environment variables:
|
||||
PEP517_BUILD_BACKEND=entry.point:spec
|
||||
PEP517_BACKEND_PATH=paths (separated with os.pathsep)
|
||||
- control_dir/input.json:
|
||||
- {"kwargs": {...}}
|
||||
|
||||
Results:
|
||||
- control_dir/output.json
|
||||
- {"return_val": ...}
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import os.path
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import traceback
|
||||
from glob import glob
|
||||
from importlib import import_module
|
||||
from os.path import join as pjoin
|
||||
|
||||
# This file is run as a script, and `import wrappers` is not zip-safe, so we
|
||||
# include write_json() and read_json() from wrappers.py.
|
||||
|
||||
|
||||
def write_json(obj, path, **kwargs):
|
||||
with open(path, 'w', encoding='utf-8') as f:
|
||||
json.dump(obj, f, **kwargs)
|
||||
|
||||
|
||||
def read_json(path):
|
||||
with open(path, encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
class BackendUnavailable(Exception):
|
||||
"""Raised if we cannot import the backend"""
|
||||
def __init__(self, traceback):
|
||||
self.traceback = traceback
|
||||
|
||||
|
||||
class BackendInvalid(Exception):
|
||||
"""Raised if the backend is invalid"""
|
||||
def __init__(self, message):
|
||||
self.message = message
|
||||
|
||||
|
||||
class HookMissing(Exception):
|
||||
"""Raised if a hook is missing and we are not executing the fallback"""
|
||||
def __init__(self, hook_name=None):
|
||||
super().__init__(hook_name)
|
||||
self.hook_name = hook_name
|
||||
|
||||
|
||||
def contained_in(filename, directory):
|
||||
"""Test if a file is located within the given directory."""
|
||||
filename = os.path.normcase(os.path.abspath(filename))
|
||||
directory = os.path.normcase(os.path.abspath(directory))
|
||||
return os.path.commonprefix([filename, directory]) == directory
|
||||
|
||||
|
||||
def _build_backend():
|
||||
"""Find and load the build backend"""
|
||||
# Add in-tree backend directories to the front of sys.path.
|
||||
backend_path = os.environ.get('PEP517_BACKEND_PATH')
|
||||
if backend_path:
|
||||
extra_pathitems = backend_path.split(os.pathsep)
|
||||
sys.path[:0] = extra_pathitems
|
||||
|
||||
ep = os.environ['PEP517_BUILD_BACKEND']
|
||||
mod_path, _, obj_path = ep.partition(':')
|
||||
try:
|
||||
obj = import_module(mod_path)
|
||||
except ImportError:
|
||||
raise BackendUnavailable(traceback.format_exc())
|
||||
|
||||
if backend_path:
|
||||
if not any(
|
||||
contained_in(obj.__file__, path)
|
||||
for path in extra_pathitems
|
||||
):
|
||||
raise BackendInvalid("Backend was not loaded from backend-path")
|
||||
|
||||
if obj_path:
|
||||
for path_part in obj_path.split('.'):
|
||||
obj = getattr(obj, path_part)
|
||||
return obj
|
||||
|
||||
|
||||
def _supported_features():
|
||||
"""Return the list of options features supported by the backend.
|
||||
|
||||
Returns a list of strings.
|
||||
The only possible value is 'build_editable'.
|
||||
"""
|
||||
backend = _build_backend()
|
||||
features = []
|
||||
if hasattr(backend, "build_editable"):
|
||||
features.append("build_editable")
|
||||
return features
|
||||
|
||||
|
||||
def get_requires_for_build_wheel(config_settings):
|
||||
"""Invoke the optional get_requires_for_build_wheel hook
|
||||
|
||||
Returns [] if the hook is not defined.
|
||||
"""
|
||||
backend = _build_backend()
|
||||
try:
|
||||
hook = backend.get_requires_for_build_wheel
|
||||
except AttributeError:
|
||||
return []
|
||||
else:
|
||||
return hook(config_settings)
|
||||
|
||||
|
||||
def get_requires_for_build_editable(config_settings):
|
||||
"""Invoke the optional get_requires_for_build_editable hook
|
||||
|
||||
Returns [] if the hook is not defined.
|
||||
"""
|
||||
backend = _build_backend()
|
||||
try:
|
||||
hook = backend.get_requires_for_build_editable
|
||||
except AttributeError:
|
||||
return []
|
||||
else:
|
||||
return hook(config_settings)
|
||||
|
||||
|
||||
def prepare_metadata_for_build_wheel(
|
||||
metadata_directory, config_settings, _allow_fallback):
|
||||
"""Invoke optional prepare_metadata_for_build_wheel
|
||||
|
||||
Implements a fallback by building a wheel if the hook isn't defined,
|
||||
unless _allow_fallback is False in which case HookMissing is raised.
|
||||
"""
|
||||
backend = _build_backend()
|
||||
try:
|
||||
hook = backend.prepare_metadata_for_build_wheel
|
||||
except AttributeError:
|
||||
if not _allow_fallback:
|
||||
raise HookMissing()
|
||||
else:
|
||||
return hook(metadata_directory, config_settings)
|
||||
# fallback to build_wheel outside the try block to avoid exception chaining
|
||||
# which can be confusing to users and is not relevant
|
||||
whl_basename = backend.build_wheel(metadata_directory, config_settings)
|
||||
return _get_wheel_metadata_from_wheel(whl_basename, metadata_directory,
|
||||
config_settings)
|
||||
|
||||
|
||||
def prepare_metadata_for_build_editable(
|
||||
metadata_directory, config_settings, _allow_fallback):
|
||||
"""Invoke optional prepare_metadata_for_build_editable
|
||||
|
||||
Implements a fallback by building an editable wheel if the hook isn't
|
||||
defined, unless _allow_fallback is False in which case HookMissing is
|
||||
raised.
|
||||
"""
|
||||
backend = _build_backend()
|
||||
try:
|
||||
hook = backend.prepare_metadata_for_build_editable
|
||||
except AttributeError:
|
||||
if not _allow_fallback:
|
||||
raise HookMissing()
|
||||
try:
|
||||
build_hook = backend.build_editable
|
||||
except AttributeError:
|
||||
raise HookMissing(hook_name='build_editable')
|
||||
else:
|
||||
whl_basename = build_hook(metadata_directory, config_settings)
|
||||
return _get_wheel_metadata_from_wheel(whl_basename,
|
||||
metadata_directory,
|
||||
config_settings)
|
||||
else:
|
||||
return hook(metadata_directory, config_settings)
|
||||
|
||||
|
||||
WHEEL_BUILT_MARKER = 'PEP517_ALREADY_BUILT_WHEEL'
|
||||
|
||||
|
||||
def _dist_info_files(whl_zip):
|
||||
"""Identify the .dist-info folder inside a wheel ZipFile."""
|
||||
res = []
|
||||
for path in whl_zip.namelist():
|
||||
m = re.match(r'[^/\\]+-[^/\\]+\.dist-info/', path)
|
||||
if m:
|
||||
res.append(path)
|
||||
if res:
|
||||
return res
|
||||
raise Exception("No .dist-info folder found in wheel")
|
||||
|
||||
|
||||
def _get_wheel_metadata_from_wheel(
|
||||
whl_basename, metadata_directory, config_settings):
|
||||
"""Extract the metadata from a wheel.
|
||||
|
||||
Fallback for when the build backend does not
|
||||
define the 'get_wheel_metadata' hook.
|
||||
"""
|
||||
from zipfile import ZipFile
|
||||
with open(os.path.join(metadata_directory, WHEEL_BUILT_MARKER), 'wb'):
|
||||
pass # Touch marker file
|
||||
|
||||
whl_file = os.path.join(metadata_directory, whl_basename)
|
||||
with ZipFile(whl_file) as zipf:
|
||||
dist_info = _dist_info_files(zipf)
|
||||
zipf.extractall(path=metadata_directory, members=dist_info)
|
||||
return dist_info[0].split('/')[0]
|
||||
|
||||
|
||||
def _find_already_built_wheel(metadata_directory):
|
||||
"""Check for a wheel already built during the get_wheel_metadata hook.
|
||||
"""
|
||||
if not metadata_directory:
|
||||
return None
|
||||
metadata_parent = os.path.dirname(metadata_directory)
|
||||
if not os.path.isfile(pjoin(metadata_parent, WHEEL_BUILT_MARKER)):
|
||||
return None
|
||||
|
||||
whl_files = glob(os.path.join(metadata_parent, '*.whl'))
|
||||
if not whl_files:
|
||||
print('Found wheel built marker, but no .whl files')
|
||||
return None
|
||||
if len(whl_files) > 1:
|
||||
print('Found multiple .whl files; unspecified behaviour. '
|
||||
'Will call build_wheel.')
|
||||
return None
|
||||
|
||||
# Exactly one .whl file
|
||||
return whl_files[0]
|
||||
|
||||
|
||||
def build_wheel(wheel_directory, config_settings, metadata_directory=None):
|
||||
"""Invoke the mandatory build_wheel hook.
|
||||
|
||||
If a wheel was already built in the
|
||||
prepare_metadata_for_build_wheel fallback, this
|
||||
will copy it rather than rebuilding the wheel.
|
||||
"""
|
||||
prebuilt_whl = _find_already_built_wheel(metadata_directory)
|
||||
if prebuilt_whl:
|
||||
shutil.copy2(prebuilt_whl, wheel_directory)
|
||||
return os.path.basename(prebuilt_whl)
|
||||
|
||||
return _build_backend().build_wheel(wheel_directory, config_settings,
|
||||
metadata_directory)
|
||||
|
||||
|
||||
def build_editable(wheel_directory, config_settings, metadata_directory=None):
|
||||
"""Invoke the optional build_editable hook.
|
||||
|
||||
If a wheel was already built in the
|
||||
prepare_metadata_for_build_editable fallback, this
|
||||
will copy it rather than rebuilding the wheel.
|
||||
"""
|
||||
backend = _build_backend()
|
||||
try:
|
||||
hook = backend.build_editable
|
||||
except AttributeError:
|
||||
raise HookMissing()
|
||||
else:
|
||||
prebuilt_whl = _find_already_built_wheel(metadata_directory)
|
||||
if prebuilt_whl:
|
||||
shutil.copy2(prebuilt_whl, wheel_directory)
|
||||
return os.path.basename(prebuilt_whl)
|
||||
|
||||
return hook(wheel_directory, config_settings, metadata_directory)
|
||||
|
||||
|
||||
def get_requires_for_build_sdist(config_settings):
|
||||
"""Invoke the optional get_requires_for_build_wheel hook
|
||||
|
||||
Returns [] if the hook is not defined.
|
||||
"""
|
||||
backend = _build_backend()
|
||||
try:
|
||||
hook = backend.get_requires_for_build_sdist
|
||||
except AttributeError:
|
||||
return []
|
||||
else:
|
||||
return hook(config_settings)
|
||||
|
||||
|
||||
class _DummyException(Exception):
|
||||
"""Nothing should ever raise this exception"""
|
||||
|
||||
|
||||
class GotUnsupportedOperation(Exception):
|
||||
"""For internal use when backend raises UnsupportedOperation"""
|
||||
def __init__(self, traceback):
|
||||
self.traceback = traceback
|
||||
|
||||
|
||||
def build_sdist(sdist_directory, config_settings):
|
||||
"""Invoke the mandatory build_sdist hook."""
|
||||
backend = _build_backend()
|
||||
try:
|
||||
return backend.build_sdist(sdist_directory, config_settings)
|
||||
except getattr(backend, 'UnsupportedOperation', _DummyException):
|
||||
raise GotUnsupportedOperation(traceback.format_exc())
|
||||
|
||||
|
||||
HOOK_NAMES = {
|
||||
'get_requires_for_build_wheel',
|
||||
'prepare_metadata_for_build_wheel',
|
||||
'build_wheel',
|
||||
'get_requires_for_build_editable',
|
||||
'prepare_metadata_for_build_editable',
|
||||
'build_editable',
|
||||
'get_requires_for_build_sdist',
|
||||
'build_sdist',
|
||||
'_supported_features',
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 3:
|
||||
sys.exit("Needs args: hook_name, control_dir")
|
||||
hook_name = sys.argv[1]
|
||||
control_dir = sys.argv[2]
|
||||
if hook_name not in HOOK_NAMES:
|
||||
sys.exit("Unknown hook: %s" % hook_name)
|
||||
hook = globals()[hook_name]
|
||||
|
||||
hook_input = read_json(pjoin(control_dir, 'input.json'))
|
||||
|
||||
json_out = {'unsupported': False, 'return_val': None}
|
||||
try:
|
||||
json_out['return_val'] = hook(**hook_input['kwargs'])
|
||||
except BackendUnavailable as e:
|
||||
json_out['no_backend'] = True
|
||||
json_out['traceback'] = e.traceback
|
||||
except BackendInvalid as e:
|
||||
json_out['backend_invalid'] = True
|
||||
json_out['backend_error'] = e.message
|
||||
except GotUnsupportedOperation as e:
|
||||
json_out['unsupported'] = True
|
||||
json_out['traceback'] = e.traceback
|
||||
except HookMissing as e:
|
||||
json_out['hook_missing'] = True
|
||||
json_out['missing_hook_name'] = e.hook_name or hook_name
|
||||
|
||||
write_json(json_out, pjoin(control_dir, 'output.json'), indent=2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -77,8 +77,8 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
|
||||
elif charset_normalizer_version:
|
||||
major, minor, patch = charset_normalizer_version.split(".")[:3]
|
||||
major, minor, patch = int(major), int(minor), int(patch)
|
||||
# charset_normalizer >= 2.0.0 < 3.0.0
|
||||
assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0)
|
||||
# charset_normalizer >= 2.0.0 < 4.0.0
|
||||
assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0)
|
||||
else:
|
||||
raise Exception("You need either charset_normalizer or chardet installed")
|
||||
|
||||
|
||||
@@ -5,10 +5,10 @@
|
||||
__title__ = "requests"
|
||||
__description__ = "Python HTTP for Humans."
|
||||
__url__ = "https://requests.readthedocs.io"
|
||||
__version__ = "2.28.1"
|
||||
__build__ = 0x022801
|
||||
__version__ = "2.28.2"
|
||||
__build__ = 0x022802
|
||||
__author__ = "Kenneth Reitz"
|
||||
__author_email__ = "me@kennethreitz.org"
|
||||
__license__ = "Apache 2.0"
|
||||
__copyright__ = "Copyright 2022 Kenneth Reitz"
|
||||
__copyright__ = "Copyright Kenneth Reitz"
|
||||
__cake__ = "\u2728 \U0001f370 \u2728"
|
||||
|
||||
@@ -438,7 +438,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
|
||||
if not scheme:
|
||||
raise MissingSchema(
|
||||
f"Invalid URL {url!r}: No scheme supplied. "
|
||||
f"Perhaps you meant http://{url}?"
|
||||
f"Perhaps you meant https://{url}?"
|
||||
)
|
||||
|
||||
if not host:
|
||||
|
||||
@@ -5,7 +5,7 @@ from typing import IO, TYPE_CHECKING, Any, Callable, Optional, Union
|
||||
|
||||
from ._extension import load_ipython_extension # noqa: F401
|
||||
|
||||
__all__ = ["get_console", "reconfigure", "print", "inspect"]
|
||||
__all__ = ["get_console", "reconfigure", "print", "inspect", "print_json"]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .console import Console
|
||||
@@ -40,7 +40,8 @@ def reconfigure(*args: Any, **kwargs: Any) -> None:
|
||||
"""Reconfigures the global console by replacing it with another.
|
||||
|
||||
Args:
|
||||
console (Console): Replacement console instance.
|
||||
*args (Any): Positional arguments for the replacement :class:`~rich.console.Console`.
|
||||
**kwargs (Any): Keyword arguments for the replacement :class:`~rich.console.Console`.
|
||||
"""
|
||||
from pipenv.patched.pip._vendor.rich.console import Console
|
||||
|
||||
@@ -80,7 +81,7 @@ def print_json(
|
||||
indent: Union[None, int, str] = 2,
|
||||
highlight: bool = True,
|
||||
skip_keys: bool = False,
|
||||
ensure_ascii: bool = True,
|
||||
ensure_ascii: bool = False,
|
||||
check_circular: bool = True,
|
||||
allow_nan: bool = True,
|
||||
default: Optional[Callable[[Any], Any]] = None,
|
||||
|
||||
@@ -227,10 +227,6 @@ if __name__ == "__main__": # pragma: no cover
|
||||
|
||||
c = Console(record=True)
|
||||
c.print(test_card)
|
||||
# c.save_svg(
|
||||
# path="/Users/darrenburns/Library/Application Support/JetBrains/PyCharm2021.3/scratches/svg_export.svg",
|
||||
# title="Rich can export to SVG",
|
||||
# )
|
||||
|
||||
print(f"rendered in {pre_cache_taken}ms (cold cache)")
|
||||
print(f"rendered in {taken}ms (warm cache)")
|
||||
@@ -247,10 +243,6 @@ if __name__ == "__main__": # pragma: no cover
|
||||
"Textualize",
|
||||
"[u blue link=https://github.com/textualize]https://github.com/textualize",
|
||||
)
|
||||
sponsor_message.add_row(
|
||||
"Buy devs a :coffee:",
|
||||
"[u blue link=https://ko-fi.com/textualize]https://ko-fi.com/textualize",
|
||||
)
|
||||
sponsor_message.add_row(
|
||||
"Twitter",
|
||||
"[u blue link=https://twitter.com/willmcgugan]https://twitter.com/willmcgugan",
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
from types import TracebackType
|
||||
from typing import IO, Iterable, Iterator, List, Optional, Type
|
||||
|
||||
|
||||
class NullFile(IO[str]):
|
||||
|
||||
# TODO: "mode", "name" and "closed" are only required for Python 3.6.
|
||||
|
||||
@property
|
||||
def mode(self) -> str:
|
||||
return ""
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "NullFile"
|
||||
|
||||
def closed(self) -> bool:
|
||||
return False
|
||||
|
||||
def close(self) -> None:
|
||||
pass
|
||||
|
||||
def isatty(self) -> bool:
|
||||
return False
|
||||
|
||||
def read(self, __n: int = 1) -> str:
|
||||
return ""
|
||||
|
||||
def readable(self) -> bool:
|
||||
return False
|
||||
|
||||
def readline(self, __limit: int = 1) -> str:
|
||||
return ""
|
||||
|
||||
def readlines(self, __hint: int = 1) -> List[str]:
|
||||
return []
|
||||
|
||||
def seek(self, __offset: int, __whence: int = 1) -> int:
|
||||
return 0
|
||||
|
||||
def seekable(self) -> bool:
|
||||
return False
|
||||
|
||||
def tell(self) -> int:
|
||||
return 0
|
||||
|
||||
def truncate(self, __size: Optional[int] = 1) -> int:
|
||||
return 0
|
||||
|
||||
def writable(self) -> bool:
|
||||
return False
|
||||
|
||||
def writelines(self, __lines: Iterable[str]) -> None:
|
||||
pass
|
||||
|
||||
def __next__(self) -> str:
|
||||
return ""
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
return iter([""])
|
||||
|
||||
def __enter__(self) -> IO[str]:
|
||||
pass
|
||||
|
||||
def __exit__(
|
||||
self,
|
||||
__t: Optional[Type[BaseException]],
|
||||
__value: Optional[BaseException],
|
||||
__traceback: Optional[TracebackType],
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def write(self, text: str) -> int:
|
||||
return 0
|
||||
|
||||
def flush(self) -> None:
|
||||
pass
|
||||
|
||||
def fileno(self) -> int:
|
||||
return -1
|
||||
|
||||
|
||||
NULL_FILE = NullFile()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user