* update pip to 23.0 in patched.txt

* Vendor in pip517 since pip dropped it from its _vendor.

* adjust vendoring script.

* vendor in pip==23.0

* correct vendoring script.

* fix import with vendoring script.
This commit is contained in:
Matt Davis
2023-02-04 07:10:02 -05:00
committed by GitHub
parent 6d420e4172
commit b7ed02240b
143 changed files with 3230 additions and 849 deletions
+1 -1
View File
@@ -1,2 +1,2 @@
pip==22.3.1
pip==23.0
safety==2.3.2
+1 -1
View File
@@ -1,6 +1,6 @@
from typing import List, Optional
__version__ = "22.3.1"
__version__ = "23.0"
def main(args: Optional[List[str]] = None) -> int:
+8 -7
View File
@@ -8,7 +8,6 @@ import site
import sys
import textwrap
from collections import OrderedDict
from sysconfig import get_paths
from types import TracebackType
from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type
@@ -18,7 +17,12 @@ from pipenv.patched.pip._vendor.packaging.version import Version
from pipenv.patched.pip import __file__ as pip_location
from pipenv.patched.pip._internal.cli.spinners import open_spinner
from pipenv.patched.pip._internal.locations import get_platlib, get_prefixed_libs, get_purelib
from pipenv.patched.pip._internal.locations import (
get_isolated_environment_bin_path,
get_isolated_environment_lib_paths,
get_platlib,
get_purelib,
)
from pipenv.patched.pip._internal.metadata import get_default_environment, get_environment
from pipenv.patched.pip._internal.utils.subprocess import call_subprocess
from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
@@ -33,11 +37,8 @@ class _Prefix:
def __init__(self, path: str) -> None:
self.path = path
self.setup = False
self.bin_dir = get_paths(
"nt" if os.name == "nt" else "posix_prefix",
vars={"base": path, "platbase": path},
)["scripts"]
self.lib_dirs = get_prefixed_libs(path)
self.bin_dir = get_isolated_environment_bin_path(path)
self.lib_dirs = get_isolated_environment_lib_paths(path)
def get_runnable_pip() -> str:
@@ -825,11 +825,9 @@ install_options: Callable[..., Option] = partial(
dest="install_options",
action="append",
metavar="options",
help="Extra arguments to be supplied to the setup.py install "
'command (use like --install-option="--install-scripts=/usr/local/'
'bin"). Use multiple --install-option options to pass multiple '
"options to setup.py install. If you are using an option with a "
"directory path, be sure to use absolute path.",
help="This option is deprecated. Using this option with location-changing "
"options may cause unexpected behavior. "
"Use pip-level options like --user, --prefix, --root, and --target.",
)
build_options: Callable[..., Option] = partial(
@@ -48,7 +48,7 @@ def create_vendor_txt_map() -> Dict[str, str]:
def get_module_from_module_name(module_name: str) -> ModuleType:
# Module name can be uppercase in vendor.txt for some reason...
module_name = module_name.lower()
module_name = module_name.lower().replace("-", "_")
# PATCH: setuptools is actually only pkg_resources.
if module_name == "setuptools":
module_name = "pkg_resources"
@@ -46,11 +46,6 @@ class InspectCommand(Command):
self.parser.insert_option_group(0, self.cmd_opts)
def run(self, options: Values, args: List[str]) -> int:
logger.warning(
"pip inspect is currently an experimental command. "
"The output format may change in a future release without prior warning."
)
cmdoptions.check_list_path_option(options)
dists = get_environment(options.path).iter_installed_distributions(
local_only=options.local,
@@ -58,7 +53,7 @@ class InspectCommand(Command):
skip=set(stdlib_pkgs),
)
output = {
"version": "0",
"version": "1",
"pip_version": __version__,
"installed": [self._dist_to_dict(dist) for dist in dists],
"environment": default_environment(),
@@ -41,6 +41,7 @@ from pipenv.patched.pip._internal.utils.distutils_args import parse_distutils_ar
from pipenv.patched.pip._internal.utils.filesystem import test_writable_dir
from pipenv.patched.pip._internal.utils.logging import getLogger
from pipenv.patched.pip._internal.utils.misc import (
check_externally_managed,
ensure_dir,
get_pip_version,
protect_pip_from_modification_on_windows,
@@ -284,6 +285,20 @@ class InstallCommand(RequirementCommand):
if options.use_user_site and options.target_dir is not None:
raise CommandError("Can not combine '--user' and '--target'")
# Check whether the environment we're installing into is externally
# managed, as specified in PEP 668. Specifying --root, --target, or
# --prefix disables the check, since there's no reliable way to locate
# the EXTERNALLY-MANAGED file for those cases. An exception is also
# made specifically for "--dry-run --report" for convenience.
installing_into_current_environment = (
not (options.dry_run and options.json_report_file)
and options.root_path is None
and options.target_dir is None
and options.prefix_path is None
)
if installing_into_current_environment:
check_externally_managed()
upgrade_strategy = "to-satisfy-only"
if options.upgrade:
upgrade_strategy = options.upgrade_strategy
@@ -402,12 +417,6 @@ class InstallCommand(RequirementCommand):
)
if options.json_report_file:
logger.warning(
"--report is currently an experimental option. "
"The output format may change in a future release "
"without prior warning."
)
report = InstallationReport(requirement_set.requirements_to_install)
if options.json_report_file == "-":
print_json(data=report.to_dict())
@@ -53,6 +53,7 @@ class _PackageInfo(NamedTuple):
name: str
version: str
location: str
editable_project_location: Optional[str]
requires: List[str]
required_by: List[str]
installer: str
@@ -120,6 +121,7 @@ def search_packages_info(query: List[str]) -> Generator[_PackageInfo, None, None
name=dist.raw_name,
version=str(dist.version),
location=dist.location or "",
editable_project_location=dist.editable_project_location,
requires=requires,
required_by=required_by,
installer=dist.installer,
@@ -158,6 +160,10 @@ def print_results(
write_output("Author-email: %s", dist.author_email)
write_output("License: %s", dist.license)
write_output("Location: %s", dist.location)
if dist.editable_project_location is not None:
write_output(
"Editable project location: %s", dist.editable_project_location
)
write_output("Requires: %s", ", ".join(dist.requires))
write_output("Required-by: %s", ", ".join(dist.required_by))
@@ -14,7 +14,10 @@ from pipenv.patched.pip._internal.req.constructors import (
install_req_from_line,
install_req_from_parsed_requirement,
)
from pipenv.patched.pip._internal.utils.misc import protect_pip_from_modification_on_windows
from pipenv.patched.pip._internal.utils.misc import (
check_externally_managed,
protect_pip_from_modification_on_windows,
)
logger = logging.getLogger(__name__)
@@ -90,6 +93,8 @@ class UninstallCommand(Command, SessionCommandMixin):
f'"pip help {self.name}")'
)
check_externally_managed()
protect_pip_from_modification_on_windows(
modifying_pip="pip" in reqs_to_uninstall
)
+86 -1
View File
@@ -6,9 +6,14 @@ subpackage and, thus, should not depend on them.
"""
import configparser
import contextlib
import locale
import logging
import pathlib
import re
import sys
from itertools import chain, groupby, repeat
from typing import TYPE_CHECKING, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union
from pipenv.patched.pip._vendor.requests.models import Request, Response
from pipenv.patched.pip._vendor.rich.console import Console, ConsoleOptions, RenderResult
@@ -22,6 +27,8 @@ if TYPE_CHECKING:
from pipenv.patched.pip._internal.metadata import BaseDistribution
from pipenv.patched.pip._internal.req.req_install import InstallRequirement
logger = logging.getLogger(__name__)
#
# Scaffolding
@@ -658,3 +665,81 @@ class ConfigurationFileCouldNotBeLoaded(ConfigurationError):
assert self.error is not None
message_part = f".\n{self.error}\n"
return f"Configuration file {self.reason}{message_part}"
_DEFAULT_EXTERNALLY_MANAGED_ERROR = f"""\
The Python environment under {sys.prefix} is managed externally, and may not be
manipulated by the user. Please use specific tooling from the distributor of
the Python installation to interact with this environment instead.
"""
class ExternallyManagedEnvironment(DiagnosticPipError):
"""The current environment is externally managed.
This is raised when the current environment is externally managed, as
defined by `PEP 668`_. The ``EXTERNALLY-MANAGED`` configuration is checked
and displayed when the error is bubbled up to the user.
:param error: The error message read from ``EXTERNALLY-MANAGED``.
"""
reference = "externally-managed-environment"
def __init__(self, error: Optional[str]) -> None:
if error is None:
context = Text(_DEFAULT_EXTERNALLY_MANAGED_ERROR)
else:
context = Text(error)
super().__init__(
message="This environment is externally managed",
context=context,
note_stmt=(
"If you believe this is a mistake, please contact your "
"Python installation or OS distribution provider."
),
hint_stmt=Text("See PEP 668 for the detailed specification."),
)
@staticmethod
def _iter_externally_managed_error_keys() -> Iterator[str]:
# LC_MESSAGES is in POSIX, but not the C standard. The most common
# platform that does not implement this category is Windows, where
# using other categories for console message localization is equally
# unreliable, so we fall back to the locale-less vendor message. This
# can always be re-evaluated when a vendor proposes a new alternative.
try:
category = locale.LC_MESSAGES
except AttributeError:
lang: Optional[str] = None
else:
lang, _ = locale.getlocale(category)
if lang is not None:
yield f"Error-{lang}"
for sep in ("-", "_"):
before, found, _ = lang.partition(sep)
if not found:
continue
yield f"Error-{before}"
yield "Error"
@classmethod
def from_config(
cls,
config: Union[pathlib.Path, str],
) -> "ExternallyManagedEnvironment":
parser = configparser.ConfigParser(interpolation=None)
try:
parser.read(config, encoding="utf-8")
section = parser["externally-managed"]
for key in cls._iter_externally_managed_error_keys():
with contextlib.suppress(KeyError):
return cls(section[key])
except KeyError:
pass
except (OSError, UnicodeDecodeError, configparser.ParsingError):
from pipenv.patched.pip._internal.utils._log import VERBOSE
exc_info = logger.isEnabledFor(VERBOSE)
logger.warning("Failed to read %s", config, exc_info=exc_info)
return cls(None)
@@ -354,7 +354,7 @@ def _get_index_content(link: Link, *, session: PipSession) -> Optional["IndexCon
if not url.endswith("/"):
url += "/"
# TODO: In the future, it would be nice if pip supported PEP 691
# style respones in the file:// URLs, however there's no
# style responses in the file:// URLs, however there's no
# standard file extension for application/vnd.pypi.simple.v1+json
# so we'll need to come up with something on our own.
url = urllib.parse.urljoin(url, "index.html")
@@ -1,14 +1,11 @@
"""Routines related to PyPI, indexes"""
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False
import enum
import functools
import itertools
import logging
import re
from typing import FrozenSet, Iterable, List, Optional, Set, Tuple, Union
from typing import TYPE_CHECKING, FrozenSet, Iterable, List, Optional, Set, Tuple, Union
from pipenv.patched.pip._vendor.packaging import specifiers
from pipenv.patched.pip._vendor.packaging.tags import Tag
@@ -39,6 +36,9 @@ from pipenv.patched.pip._internal.utils.misc import build_netloc
from pipenv.patched.pip._internal.utils.packaging import check_requires_python
from pipenv.patched.pip._internal.utils.unpacking import SUPPORTED_EXTENSIONS
if TYPE_CHECKING:
from pipenv.patched.pip._vendor.typing_extensions import TypeGuard
__all__ = ["FormatControl", "BestCandidateResult", "PackageFinder"]
@@ -255,7 +255,7 @@ class LinkEvaluator:
def filter_unallowed_hashes(
candidates: List[InstallationCandidate],
hashes: Hashes,
hashes: Optional[Hashes],
project_name: str,
) -> List[InstallationCandidate]:
"""
@@ -549,6 +549,7 @@ class CandidateEvaluator:
binary_preference = 1
if wheel.build_tag is not None:
match = re.match(r"^(\d+)(.*)$", wheel.build_tag)
assert match is not None, "guaranteed by filename validation"
build_tag_groups = match.groups()
build_tag = (int(build_tag_groups[0]), build_tag_groups[1])
else: # sdist
@@ -954,43 +955,46 @@ class PackageFinder:
"No matching distribution found for {}".format(req)
)
best_installed = False
if installed_version and (
best_candidate is None or best_candidate.version <= installed_version
):
best_installed = True
def _should_install_candidate(
candidate: Optional[InstallationCandidate],
) -> "TypeGuard[InstallationCandidate]":
if installed_version is None:
return True
if best_candidate is None:
return False
return best_candidate.version > installed_version
if not upgrade and installed_version is not None:
if best_installed:
logger.debug(
"Existing installed version (%s) is most up-to-date and "
"satisfies requirement",
installed_version,
)
else:
if _should_install_candidate(best_candidate):
logger.debug(
"Existing installed version (%s) satisfies requirement "
"(most up-to-date version is %s)",
installed_version,
best_candidate.version,
)
else:
logger.debug(
"Existing installed version (%s) is most up-to-date and "
"satisfies requirement",
installed_version,
)
return None
if best_installed:
# We have an existing version, and its the best version
if _should_install_candidate(best_candidate):
logger.debug(
"Installed version (%s) is most up-to-date (past versions: %s)",
installed_version,
"Using version %s (newest of versions: %s)",
best_candidate.version,
_format_versions(best_candidate_result.iter_applicable()),
)
raise BestVersionAlreadyInstalled
return best_candidate
# We have an existing version, and its the best version
logger.debug(
"Using version %s (newest of versions: %s)",
best_candidate.version,
"Installed version (%s) is most up-to-date (past versions: %s)",
installed_version,
_format_versions(best_candidate_result.iter_applicable()),
)
return best_candidate
raise BestVersionAlreadyInstalled
def _find_name_version_sep(fragment: str, canonical_name: str) -> int:
@@ -25,9 +25,10 @@ __all__ = [
"USER_CACHE_DIR",
"get_bin_prefix",
"get_bin_user",
"get_isolated_environment_bin_path",
"get_isolated_environment_lib_paths",
"get_major_minor_version",
"get_platlib",
"get_prefixed_libs",
"get_purelib",
"get_scheme",
"get_src_prefix",
@@ -482,13 +483,13 @@ def _looks_like_apple_library(path: str) -> bool:
return path == f"/Library/Python/{get_major_minor_version()}/site-packages"
def get_prefixed_libs(prefix: str) -> List[str]:
def get_isolated_environment_lib_paths(prefix: str) -> List[str]:
"""Return the lib locations under ``prefix``."""
new_pure, new_plat = _sysconfig.get_prefixed_libs(prefix)
new_pure, new_plat = _sysconfig.get_isolated_environment_lib_paths(prefix)
if _USE_SYSCONFIG:
return _deduplicated(new_pure, new_plat)
old_pure, old_plat = _distutils.get_prefixed_libs(prefix)
old_pure, old_plat = _distutils.get_isolated_environment_lib_paths(prefix)
old_lib_paths = _deduplicated(old_pure, old_plat)
# Apple's Python (shipped with Xcode and Command Line Tools) hard-code
@@ -526,3 +527,7 @@ def get_prefixed_libs(prefix: str) -> List[str]:
_log_context(prefix=prefix)
return old_lib_paths
def get_isolated_environment_bin_path(prefix: str) -> str:
return _sysconfig.get_isolated_environment_paths(prefix)["scripts"]
@@ -173,7 +173,7 @@ def get_platlib() -> str:
return get_python_lib(plat_specific=True)
def get_prefixed_libs(prefix: str) -> Tuple[str, str]:
def get_isolated_environment_lib_paths(prefix: str) -> Tuple[str, str]:
return (
get_python_lib(plat_specific=False, prefix=prefix),
get_python_lib(plat_specific=True, prefix=prefix),
@@ -213,6 +213,13 @@ def get_platlib() -> str:
return sysconfig.get_paths()["platlib"]
def get_prefixed_libs(prefix: str) -> typing.Tuple[str, str]:
paths = sysconfig.get_paths(vars={"base": prefix, "platbase": prefix})
def get_isolated_environment_paths(prefix: str) -> typing.Dict[str, str]:
variables = {"base": prefix, "platbase": prefix}
if "venv" in sysconfig.get_scheme_names():
return sysconfig.get_paths(vars=variables, scheme="venv")
return sysconfig.get_paths(vars=variables)
def get_isolated_environment_lib_paths(prefix: str) -> typing.Tuple[str, str]:
paths = get_isolated_environment_paths(prefix)
return (paths["purelib"], paths["platlib"])
@@ -13,7 +13,7 @@ from pipenv.patched.pip._internal.utils.virtualenv import running_under_virtuale
USER_CACHE_DIR = appdirs.user_cache_dir("pip")
# FIXME doesn't account for venv linked to global site-packages
site_packages: typing.Optional[str] = sysconfig.get_path("purelib")
site_packages: str = sysconfig.get_path("purelib")
def get_major_minor_version() -> str:
@@ -103,17 +103,28 @@ class ArchiveInfo:
def __init__(
self,
hash: Optional[str] = None,
hashes: Optional[Dict[str, str]] = None,
) -> None:
if hash is not None:
# Auto-populate the hashes key to upgrade to the new format automatically.
# We don't back-populate the legacy hash key.
hash_name, hash_value = hash.split("=", 1)
if hashes is None:
hashes = {hash_name: hash_value}
elif hash_name not in hash:
hashes = hashes.copy()
hashes[hash_name] = hash_value
self.hash = hash
self.hashes = hashes
@classmethod
def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]:
if d is None:
return None
return cls(hash=_get(d, str, "hash"))
return cls(hash=_get(d, str, "hash"), hashes=_get(d, dict, "hashes"))
def _to_dict(self) -> Dict[str, Any]:
return _filter_none(hash=self.hash)
return _filter_none(hash=self.hash, hashes=self.hashes)
class DirInfo:
@@ -38,7 +38,7 @@ class InstallationReport:
def to_dict(self) -> Dict[str, Any]:
return {
"version": "0",
"version": "1",
"pip_version": __version__,
"install": [
self._install_req_to_dict(ireq) for ireq in self._install_requirements
+47 -30
View File
@@ -18,6 +18,7 @@ from typing import (
Union,
)
from pipenv.patched.pip._internal.utils.deprecation import deprecated
from pipenv.patched.pip._internal.utils.filetypes import WHEEL_EXTENSION
from pipenv.patched.pip._internal.utils.hashes import Hashes
from pipenv.patched.pip._internal.utils.misc import (
@@ -78,6 +79,9 @@ class LinkHash:
name, value = match.groups()
return cls(name=name, value=value)
def as_dict(self) -> Dict[str, str]:
return {self.name: self.value}
def as_hashes(self) -> Hashes:
"""Return a Hashes instance which checks only for the current hash."""
return Hashes({self.name: [self.value]})
@@ -164,8 +168,8 @@ class Link(KeyBasedCompareMixin):
"requires_python",
"yanked_reason",
"dist_info_metadata",
"link_hash",
"cache_link_parsing",
"egg_fragment",
]
def __init__(
@@ -175,7 +179,6 @@ class Link(KeyBasedCompareMixin):
requires_python: Optional[str] = None,
yanked_reason: Optional[str] = None,
dist_info_metadata: Optional[str] = None,
link_hash: Optional[LinkHash] = None,
cache_link_parsing: bool = True,
hashes: Optional[Mapping[str, str]] = None,
) -> None:
@@ -198,16 +201,11 @@ class Link(KeyBasedCompareMixin):
attribute, if present, in a simple repository HTML link. This may be parsed
into its own `Link` by `self.metadata_link()`. See PEP 658 for more
information and the specification.
:param link_hash: a checksum for the content the link points to. If not
provided, this will be extracted from the link URL, if the URL has
any checksum.
:param cache_link_parsing: A flag that is used elsewhere to determine
whether resources retrieved from this link
should be cached. PyPI index urls should
generally have this set to False, for
example.
whether resources retrieved from this link should be cached. PyPI
URLs should generally have this set to False, for example.
:param hashes: A mapping of hash names to digests to allow us to
determine the validity of a download.
determine the validity of a download.
"""
# url can be a UNC windows share
@@ -218,17 +216,23 @@ class Link(KeyBasedCompareMixin):
# Store the url as a private attribute to prevent accidentally
# trying to set a new value.
self._url = url
self._hashes = hashes if hashes is not None else {}
link_hash = LinkHash.split_hash_name_and_value(url)
hashes_from_link = {} if link_hash is None else link_hash.as_dict()
if hashes is None:
self._hashes = hashes_from_link
else:
self._hashes = {**hashes, **hashes_from_link}
self.comes_from = comes_from
self.requires_python = requires_python if requires_python else None
self.yanked_reason = yanked_reason
self.dist_info_metadata = dist_info_metadata
self.link_hash = link_hash or LinkHash.split_hash_name_and_value(self._url)
super().__init__(key=url, defining_class=Link)
self.cache_link_parsing = cache_link_parsing
self.egg_fragment = self._egg_fragment()
@classmethod
def from_json(
@@ -358,12 +362,28 @@ class Link(KeyBasedCompareMixin):
_egg_fragment_re = re.compile(r"[#&]egg=([^&]*)")
@property
def egg_fragment(self) -> Optional[str]:
# Per PEP 508.
_project_name_re = re.compile(
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
)
def _egg_fragment(self) -> Optional[str]:
match = self._egg_fragment_re.search(self._url)
if not match:
return None
return match.group(1)
# An egg fragment looks like a PEP 508 project name, along with
# an optional extras specifier. Anything else is invalid.
project_name = match.group(1)
if not self._project_name_re.match(project_name):
deprecated(
reason=f"{self} contains an egg fragment with a non-PEP 508 name",
replacement="to use the req @ url syntax, and remove the egg fragment",
gone_in="25.0",
issue=11617,
)
return project_name
_subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)")
@@ -382,29 +402,26 @@ class Link(KeyBasedCompareMixin):
if self.dist_info_metadata is None:
return None
metadata_url = f"{self.url_without_fragment}.metadata"
link_hash: Optional[LinkHash] = None
# If data-dist-info-metadata="true" is set, then the metadata file exists,
# but there is no information about its checksum or anything else.
if self.dist_info_metadata != "true":
link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata)
return Link(metadata_url, link_hash=link_hash)
else:
link_hash = None
if link_hash is None:
return Link(metadata_url)
return Link(metadata_url, hashes=link_hash.as_dict())
def as_hashes(self) -> Optional[Hashes]:
if self.link_hash is not None:
return self.link_hash.as_hashes()
return None
def as_hashes(self) -> Hashes:
return Hashes({k: [v] for k, v in self._hashes.items()})
@property
def hash(self) -> Optional[str]:
if self.link_hash is not None:
return self.link_hash.value
return None
return next(iter(self._hashes.values()), None)
@property
def hash_name(self) -> Optional[str]:
if self.link_hash is not None:
return self.link_hash.name
return None
return next(iter(self._hashes), None)
@property
def show_url(self) -> str:
@@ -433,15 +450,15 @@ class Link(KeyBasedCompareMixin):
@property
def has_hash(self) -> bool:
return self.link_hash is not None
return bool(self._hashes)
def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
"""
Return True if the link has a hash and it is allowed by `hashes`.
"""
if self.link_hash is None:
if hashes is None:
return False
return self.link_hash.is_hash_allowed(hashes)
return any(hashes.is_hash_allowed(k, v) for k, v in self._hashes.items())
class _CleanResult(NamedTuple):
+157 -34
View File
@@ -4,8 +4,12 @@ Contains interface (MultiDomainBasicAuth) and associated glue code for
providing credentials in the context of network requests.
"""
import os
import shutil
import subprocess
import urllib.parse
from typing import Any, Dict, List, Optional, Tuple
from abc import ABC, abstractmethod
from typing import Any, Dict, List, NamedTuple, Optional, Tuple
from pipenv.patched.pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
from pipenv.patched.pip._vendor.requests.models import Request, Response
@@ -23,51 +27,165 @@ from pipenv.patched.pip._internal.vcs.versioncontrol import AuthInfo
logger = getLogger(__name__)
Credentials = Tuple[str, str, str]
try:
import keyring
except ImportError:
keyring = None # type: ignore[assignment]
except Exception as exc:
logger.warning(
"Keyring is skipped due to an exception: %s",
str(exc),
)
keyring = None # type: ignore[assignment]
KEYRING_DISABLED = False
def get_keyring_auth(url: Optional[str], username: Optional[str]) -> Optional[AuthInfo]:
"""Return the tuple auth for a given url from keyring."""
global keyring
if not url or not keyring:
class Credentials(NamedTuple):
url: str
username: str
password: str
class KeyRingBaseProvider(ABC):
"""Keyring base provider interface"""
@abstractmethod
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
...
@abstractmethod
def save_auth_info(self, url: str, username: str, password: str) -> None:
...
class KeyRingNullProvider(KeyRingBaseProvider):
"""Keyring null provider"""
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
return None
try:
try:
get_credential = keyring.get_credential
except AttributeError:
pass
else:
def save_auth_info(self, url: str, username: str, password: str) -> None:
return None
class KeyRingPythonProvider(KeyRingBaseProvider):
"""Keyring interface which uses locally imported `keyring`"""
def __init__(self) -> None:
import keyring
self.keyring = keyring
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
# Support keyring's get_credential interface which supports getting
# credentials without a username. This is only available for
# keyring>=15.2.0.
if hasattr(self.keyring, "get_credential"):
logger.debug("Getting credentials from keyring for %s", url)
cred = get_credential(url, username)
cred = self.keyring.get_credential(url, username)
if cred is not None:
return cred.username, cred.password
return None
if username:
if username is not None:
logger.debug("Getting password from keyring for %s", url)
password = keyring.get_password(url, username)
password = self.keyring.get_password(url, username)
if password:
return username, password
return None
def save_auth_info(self, url: str, username: str, password: str) -> None:
self.keyring.set_password(url, username, password)
class KeyRingCliProvider(KeyRingBaseProvider):
"""Provider which uses `keyring` cli
Instead of calling the keyring package installed alongside pip
we call keyring on the command line which will enable pip to
use which ever installation of keyring is available first in
PATH.
"""
def __init__(self, cmd: str) -> None:
self.keyring = cmd
def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]:
# This is the default implementation of keyring.get_credential
# https://github.com/jaraco/keyring/blob/97689324abcf01bd1793d49063e7ca01e03d7d07/keyring/backend.py#L134-L139
if username is not None:
password = self._get_password(url, username)
if password is not None:
return username, password
return None
def save_auth_info(self, url: str, username: str, password: str) -> None:
return self._set_password(url, username, password)
def _get_password(self, service_name: str, username: str) -> Optional[str]:
"""Mirror the implementation of keyring.get_password using cli"""
if self.keyring is None:
return None
cmd = [self.keyring, "get", service_name, username]
env = os.environ.copy()
env["PYTHONIOENCODING"] = "utf-8"
res = subprocess.run(
cmd,
stdin=subprocess.DEVNULL,
capture_output=True,
env=env,
)
if res.returncode:
return None
return res.stdout.decode("utf-8").strip(os.linesep)
def _set_password(self, service_name: str, username: str, password: str) -> None:
"""Mirror the implementation of keyring.set_password using cli"""
if self.keyring is None:
return None
cmd = [self.keyring, "set", service_name, username]
input_ = (password + os.linesep).encode("utf-8")
env = os.environ.copy()
env["PYTHONIOENCODING"] = "utf-8"
res = subprocess.run(cmd, input=input_, env=env)
res.check_returncode()
return None
def get_keyring_provider() -> KeyRingBaseProvider:
# keyring has previously failed and been disabled
if not KEYRING_DISABLED:
# Default to trying to use Python provider
try:
return KeyRingPythonProvider()
except ImportError:
pass
except Exception as exc:
# In the event of an unexpected exception
# we should warn the user
logger.warning(
"Installed copy of keyring fails with exception %s, "
"trying to find a keyring executable as a fallback",
str(exc),
)
# Fallback to Cli Provider if `keyring` isn't installed
cli = shutil.which("keyring")
if cli:
return KeyRingCliProvider(cli)
return KeyRingNullProvider()
def get_keyring_auth(url: Optional[str], username: Optional[str]) -> Optional[AuthInfo]:
"""Return the tuple auth for a given url from keyring."""
# Do nothing if no url was provided
if not url:
return None
keyring = get_keyring_provider()
try:
return keyring.get_auth_info(url, username)
except Exception as exc:
logger.warning(
"Keyring is skipped due to an exception: %s",
str(exc),
)
keyring = None # type: ignore[assignment]
return None
global KEYRING_DISABLED
KEYRING_DISABLED = True
return None
class MultiDomainBasicAuth(AuthBase):
@@ -241,7 +359,7 @@ class MultiDomainBasicAuth(AuthBase):
# Factored out to allow for easy patching in tests
def _should_save_password_to_keyring(self) -> bool:
if not keyring:
if get_keyring_provider() is None:
return False
return ask("Save credentials to keyring [y/N]: ", ["y", "n"]) == "y"
@@ -276,7 +394,11 @@ class MultiDomainBasicAuth(AuthBase):
# Prompt to save the password to keyring
if save and self._should_save_password_to_keyring():
self._credentials_to_save = (parsed.netloc, username, password)
self._credentials_to_save = Credentials(
url=parsed.netloc,
username=username,
password=password,
)
# Consume content and release the original connection to allow our new
# request to reuse the same one.
@@ -309,15 +431,16 @@ class MultiDomainBasicAuth(AuthBase):
def save_credentials(self, resp: Response, **kwargs: Any) -> None:
"""Response callback to save credentials on success."""
assert keyring is not None, "should never reach here without keyring"
if not keyring:
return
keyring = get_keyring_provider()
assert not isinstance(
keyring, KeyRingNullProvider
), "should never reach here without keyring"
creds = self._credentials_to_save
self._credentials_to_save = None
if creds and resp.status_code < 400:
try:
logger.info("Saving credentials to keyring")
keyring.set_password(*creds)
keyring.save_auth_info(creds.url, creds.username, creds.password)
except Exception:
logger.exception("Failed to save credentials")
@@ -3,7 +3,7 @@
import os
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
from pipenv.patched.pip._internal.build_env import BuildEnvironment
from pipenv.patched.pip._internal.exceptions import (
@@ -15,7 +15,7 @@ from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory
def generate_metadata(
build_env: BuildEnvironment, backend: Pep517HookCaller, details: str
build_env: BuildEnvironment, backend: BuildBackendHookCaller, details: str
) -> str:
"""Generate metadata using mechanisms described in PEP 517.
@@ -26,7 +26,7 @@ def generate_metadata(
metadata_dir = metadata_tmpdir.path
with build_env:
# Note that Pep517HookCaller implements a fallback for
# Note that BuildBackendHookCaller implements a fallback for
# prepare_metadata_for_build_wheel, so we don't have to
# consider the possibility that this hook doesn't exist.
runner = runner_with_spinner_message("Preparing metadata (pyproject.toml)")
@@ -3,7 +3,7 @@
import os
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
from pipenv.patched.pip._internal.build_env import BuildEnvironment
from pipenv.patched.pip._internal.exceptions import (
@@ -15,7 +15,7 @@ from pipenv.patched.pip._internal.utils.temp_dir import TempDirectory
def generate_editable_metadata(
build_env: BuildEnvironment, backend: Pep517HookCaller, details: str
build_env: BuildEnvironment, backend: BuildBackendHookCaller, details: str
) -> str:
"""Generate metadata using mechanisms described in PEP 660.
@@ -26,7 +26,7 @@ def generate_editable_metadata(
metadata_dir = metadata_tmpdir.path
with build_env:
# Note that Pep517HookCaller implements a fallback for
# Note that BuildBackendHookCaller implements a fallback for
# prepare_metadata_for_build_wheel/editable, so we don't have to
# consider the possibility that this hook doesn't exist.
runner = runner_with_spinner_message(
@@ -2,7 +2,7 @@ import logging
import os
from typing import Optional
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
from pipenv.patched.pip._internal.utils.subprocess import runner_with_spinner_message
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
def build_wheel_pep517(
name: str,
backend: Pep517HookCaller,
backend: BuildBackendHookCaller,
metadata_directory: str,
tempd: str,
) -> Optional[str]:
@@ -2,7 +2,7 @@ import logging
import os
from typing import Optional
from pipenv.patched.pip._vendor.pep517.wrappers import HookMissing, Pep517HookCaller
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller, HookMissing
from pipenv.patched.pip._internal.utils.subprocess import runner_with_spinner_message
@@ -11,7 +11,7 @@ logger = logging.getLogger(__name__)
def build_wheel_editable(
name: str,
backend: Pep517HookCaller,
backend: BuildBackendHookCaller,
metadata_directory: str,
tempd: str,
) -> Optional[str]:
@@ -75,7 +75,7 @@ def check_package_set(
if name not in package_set:
missed = True
if req.marker is not None:
missed = req.marker.evaluate()
missed = req.marker.evaluate({"extra": ""})
if missed:
missing_deps.add((name, req))
continue
+3 -4
View File
@@ -159,9 +159,8 @@ def load_pyproject_toml(
if backend is None:
# If the user didn't specify a backend, we assume they want to use
# the setuptools backend. But we can't be sure they have included
# a version of setuptools which supplies the backend, or wheel
# (which is needed by the backend) in their requirements. So we
# make a note to check that those requirements are present once
# a version of setuptools which supplies the backend. So we
# make a note to check that this requirement is present once
# we have set up the environment.
# This is quite a lot of work to check for a very specific case. But
# the problem is, that case is potentially quite common - projects that
@@ -170,6 +169,6 @@ def load_pyproject_toml(
# tools themselves. The original PEP 518 code had a similar check (but
# implemented in a different way).
backend = "setuptools.build_meta:__legacy__"
check = ["setuptools>=40.8.0", "wheel"]
check = ["setuptools>=40.8.0"]
return BuildSystemDetails(requires, backend, check, backend_path)
@@ -18,7 +18,7 @@ from pipenv.patched.pip._vendor.packaging.specifiers import SpecifierSet
from pipenv.patched.pip._vendor.packaging.utils import canonicalize_name
from pipenv.patched.pip._vendor.packaging.version import Version
from pipenv.patched.pip._vendor.packaging.version import parse as parse_version
from pipenv.patched.pip._vendor.pep517.wrappers import Pep517HookCaller
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
from pipenv.patched.pip._internal.build_env import BuildEnvironment, NoOpBuildEnvironment
from pipenv.patched.pip._internal.exceptions import InstallationError, LegacyInstallFailure
@@ -51,7 +51,7 @@ from pipenv.patched.pip._internal.utils.direct_url_helpers import (
)
from pipenv.patched.pip._internal.utils.hashes import Hashes
from pipenv.patched.pip._internal.utils.misc import (
ConfiguredPep517HookCaller,
ConfiguredBuildBackendHookCaller,
ask_path_exists,
backup_dir,
display_path,
@@ -173,7 +173,7 @@ class InstallRequirement:
self.requirements_to_check: List[str] = []
# The PEP 517 backend we should use to build the project
self.pep517_backend: Optional[Pep517HookCaller] = None
self.pep517_backend: Optional[BuildBackendHookCaller] = None
# Are we using PEP 517 for this requirement?
# After pyproject.toml has been loaded, the only valid values are True
@@ -195,7 +195,11 @@ class InstallRequirement:
else:
s = "<InstallRequirement>"
if self.satisfied_by is not None:
s += " in {}".format(display_path(self.satisfied_by.location))
if self.satisfied_by.location is not None:
location = display_path(self.satisfied_by.location)
else:
location = "<memory>"
s += f" in {location}"
if self.comes_from:
if isinstance(self.comes_from, str):
comes_from: Optional[str] = self.comes_from
@@ -482,7 +486,7 @@ class InstallRequirement:
requires, backend, check, backend_path = pyproject_toml_data
self.requirements_to_check = check
self.pyproject_requires = requires
self.pep517_backend = ConfiguredPep517HookCaller(
self.pep517_backend = ConfiguredBuildBackendHookCaller(
self,
self.unpacked_source_directory,
backend,
@@ -133,7 +133,7 @@ class UpgradePrompt:
return Group(
Text(),
Text.from_markup(
f"{notice} A new release of pip available: "
f"{notice} A new release of pip is available: "
f"[red]{self.old}[reset] -> [green]{self.new}[reset]"
),
Text.from_markup(
@@ -155,7 +155,7 @@ def was_installed_by_pip(pkg: str) -> bool:
def _get_current_remote_pip_version(
session: PipSession, options: optparse.Values
) -> str:
) -> Optional[str]:
# Lets use PackageFinder to see what the latest pip version is
link_collector = LinkCollector.create(
session,
@@ -176,7 +176,7 @@ def _get_current_remote_pip_version(
)
best_candidate = finder.find_best_candidate("pip").best_candidate
if best_candidate is None:
return
return None
return str(best_candidate.version)
@@ -186,11 +186,14 @@ def _self_version_check_logic(
state: SelfCheckState,
current_time: datetime.datetime,
local_version: DistributionVersion,
get_remote_version: Callable[[], str],
get_remote_version: Callable[[], Optional[str]],
) -> Optional[UpgradePrompt]:
remote_version_str = state.get(current_time)
if remote_version_str is None:
remote_version_str = get_remote_version()
if remote_version_str is None:
logger.debug("No remote pip version found")
return None
state.set(remote_version_str, current_time)
remote_version = parse_version(remote_version_str)
@@ -1,10 +1,7 @@
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False
import os
import re
import sys
from typing import Optional
from typing import List, Optional
from pipenv.patched.pip._internal.locations import site_packages, user_site
from pipenv.patched.pip._internal.utils.virtualenv import (
@@ -57,7 +54,7 @@ def egg_link_path_from_location(raw_name: str) -> Optional[str]:
This method will just return the first one found.
"""
sites = []
sites: List[str] = []
if running_under_virtualenv():
sites.append(site_packages)
if not virtualenv_no_global() and user_site:
+21 -5
View File
@@ -12,6 +12,7 @@ import posixpath
import shutil
import stat
import sys
import sysconfig
import urllib.parse
from io import StringIO
from itertools import filterfalse, tee, zip_longest
@@ -34,11 +35,11 @@ from typing import (
cast,
)
from pipenv.patched.pip._vendor.pep517 import Pep517HookCaller
from pipenv.patched.pip._vendor.pyproject_hooks import BuildBackendHookCaller
from pipenv.patched.pip._vendor.tenacity import retry, stop_after_delay, wait_fixed
from pipenv.patched.pip import __version__
from pipenv.patched.pip._internal.exceptions import CommandError
from pipenv.patched.pip._internal.exceptions import CommandError, ExternallyManagedEnvironment
from pipenv.patched.pip._internal.locations import get_major_minor_version
from pipenv.patched.pip._internal.utils.compat import WINDOWS
from pipenv.patched.pip._internal.utils.virtualenv import running_under_virtualenv
@@ -57,10 +58,10 @@ __all__ = [
"captured_stdout",
"ensure_dir",
"remove_auth_from_url",
"ConfiguredPep517HookCaller",
"check_externally_managed",
"ConfiguredBuildBackendHookCaller",
]
logger = logging.getLogger(__name__)
T = TypeVar("T")
@@ -581,6 +582,21 @@ def protect_pip_from_modification_on_windows(modifying_pip: bool) -> None:
)
def check_externally_managed() -> None:
"""Check whether the current environment is externally managed.
If the ``EXTERNALLY-MANAGED`` config file is found, the current environment
is considered externally managed, and an ExternallyManagedEnvironment is
raised.
"""
if running_under_virtualenv():
return
marker = os.path.join(sysconfig.get_path("stdlib"), "EXTERNALLY-MANAGED")
if not os.path.isfile(marker):
return
raise ExternallyManagedEnvironment.from_config(marker)
def is_console_interactive() -> bool:
"""Is this console interactive?"""
return sys.stdin is not None and sys.stdin.isatty()
@@ -635,7 +651,7 @@ def partition(
return filterfalse(pred, t1), filter(pred, t2)
class ConfiguredPep517HookCaller(Pep517HookCaller):
class ConfiguredBuildBackendHookCaller(BuildBackendHookCaller):
def __init__(
self,
config_holder: Any,
@@ -239,8 +239,8 @@ def call_subprocess(
def runner_with_spinner_message(message: str) -> Callable[..., None]:
"""Provide a subprocess_runner that shows a spinner message.
Intended for use with for pep517's Pep517HookCaller. Thus, the runner has
an API that matches what's expected by Pep517HookCaller.subprocess_runner.
Intended for use with for BuildBackendHookCaller. Thus, the runner has
an API that matches what's expected by BuildBackendHookCaller.subprocess_runner.
"""
def runner(
@@ -19,7 +19,7 @@ def _running_under_venv() -> bool:
return sys.prefix != getattr(sys, "base_prefix", sys.prefix)
def _running_under_regular_virtualenv() -> bool:
def _running_under_legacy_virtualenv() -> bool:
"""Checks if sys.real_prefix is set.
This handles virtual environments created with pypa's virtualenv.
@@ -29,8 +29,8 @@ def _running_under_regular_virtualenv() -> bool:
def running_under_virtualenv() -> bool:
"""Return True if we're running inside a virtualenv, False otherwise."""
return _running_under_venv() or _running_under_regular_virtualenv()
"""True if we're running inside a virtual environment, False otherwise."""
return _running_under_venv() or _running_under_legacy_virtualenv()
def _get_pyvenv_cfg_lines() -> Optional[List[str]]:
@@ -77,7 +77,7 @@ def _no_global_under_venv() -> bool:
return False
def _no_global_under_regular_virtualenv() -> bool:
def _no_global_under_legacy_virtualenv() -> bool:
"""Check if "no-global-site-packages.txt" exists beside site.py
This mirrors logic in pypa/virtualenv for determining whether system
@@ -98,7 +98,7 @@ def virtualenv_no_global() -> bool:
if _running_under_venv():
return _no_global_under_venv()
if _running_under_regular_virtualenv():
return _no_global_under_regular_virtualenv()
if _running_under_legacy_virtualenv():
return _no_global_under_legacy_virtualenv()
return False
+1 -1
View File
@@ -72,7 +72,7 @@ class Bazaar(VersionControl):
@classmethod
def get_url_rev_and_auth(cls, url: str) -> Tuple[str, Optional[str], AuthInfo]:
# hotfix the URL scheme after removing bzr+ from bzr+ssh:// readd it
# hotfix the URL scheme after removing bzr+ from bzr+ssh:// re-add it
url, rev, user_pass = super().get_url_rev_and_auth(url)
if url.startswith("ssh://"):
url = "bzr+" + url
@@ -87,7 +87,7 @@ class Subversion(VersionControl):
@classmethod
def get_url_rev_and_auth(cls, url: str) -> Tuple[str, Optional[str], AuthInfo]:
# hotfix the URL scheme after removing svn+ from svn+ssh:// readd it
# hotfix the URL scheme after removing svn+ from svn+ssh:// re-add it
url, rev, user_pass = super().get_url_rev_and_auth(url)
if url.startswith("ssh://"):
url = "svn+" + url
@@ -1,4 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2022.09.24"
__version__ = "2022.12.07"
@@ -636,37 +636,6 @@ BA6+C4OmF4O5MBKgxTMVBbkN+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IB
ZQ==
-----END CERTIFICATE-----
# Issuer: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C.
# Subject: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C.
# Label: "Network Solutions Certificate Authority"
# Serial: 116697915152937497490437556386812487904
# MD5 Fingerprint: d3:f3:a6:16:c0:fa:6b:1d:59:b1:2d:96:4d:0e:11:2e
# SHA1 Fingerprint: 74:f8:a3:c3:ef:e7:b3:90:06:4b:83:90:3c:21:64:60:20:e5:df:ce
# SHA256 Fingerprint: 15:f0:ba:00:a3:ac:7a:f3:ac:88:4c:07:2b:10:11:a0:77:bd:77:c0:97:f4:01:64:b2:f8:59:8a:bd:83:86:0c
-----BEGIN CERTIFICATE-----
MIID5jCCAs6gAwIBAgIQV8szb8JcFuZHFhfjkDFo4DANBgkqhkiG9w0BAQUFADBi
MQswCQYDVQQGEwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMu
MTAwLgYDVQQDEydOZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3Jp
dHkwHhcNMDYxMjAxMDAwMDAwWhcNMjkxMjMxMjM1OTU5WjBiMQswCQYDVQQGEwJV
UzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMuMTAwLgYDVQQDEydO
ZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEiMA0GCSqG
SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDkvH6SMG3G2I4rC7xGzuAnlt7e+foS0zwz
c7MEL7xxjOWftiJgPl9dzgn/ggwbmlFQGiaJ3dVhXRncEg8tCqJDXRfQNJIg6nPP
OCwGJgl6cvf6UDL4wpPTaaIjzkGxzOTVHzbRijr4jGPiFFlp7Q3Tf2vouAPlT2rl
mGNpSAW+Lv8ztumXWWn4Zxmuk2GWRBXTcrA/vGp97Eh/jcOrqnErU2lBUzS1sLnF
BgrEsEX1QV1uiUV7PTsmjHTC5dLRfbIR1PtYMiKagMnc/Qzpf14Dl847ABSHJ3A4
qY5usyd2mFHgBeMhqxrVhSI8KbWaFsWAqPS7azCPL0YCorEMIuDTAgMBAAGjgZcw
gZQwHQYDVR0OBBYEFCEwyfsA106Y2oeqKtCnLrFAMadMMA4GA1UdDwEB/wQEAwIB
BjAPBgNVHRMBAf8EBTADAQH/MFIGA1UdHwRLMEkwR6BFoEOGQWh0dHA6Ly9jcmwu
bmV0c29sc3NsLmNvbS9OZXR3b3JrU29sdXRpb25zQ2VydGlmaWNhdGVBdXRob3Jp
dHkuY3JsMA0GCSqGSIb3DQEBBQUAA4IBAQC7rkvnt1frf6ott3NHhWrB5KUd5Oc8
6fRZZXe1eltajSU24HqXLjjAV2CDmAaDn7l2em5Q4LqILPxFzBiwmZVRDuwduIj/
h1AcgsLj4DKAv6ALR8jDMe+ZZzKATxcheQxpXN5eNK4CtSbqUN9/GGUsyfJj4akH
/nxxH2szJGoeBfcFaMBqEssuXmHLrijTfsK0ZpEmXzwuJF/LWA/rKOyvEZbz3Htv
wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN
pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey
-----END CERTIFICATE-----
# Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited
# Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited
# Label: "COMODO ECC Certification Authority"
@@ -2204,46 +2173,6 @@ KoZIzj0EAwMDaAAwZQIxAOVpEslu28YxuglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg
xwy8p2Fp8fc74SrL+SvzZpA3
-----END CERTIFICATE-----
# Issuer: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden
# Subject: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden
# Label: "Staat der Nederlanden EV Root CA"
# Serial: 10000013
# MD5 Fingerprint: fc:06:af:7b:e8:1a:f1:9a:b4:e8:d2:70:1f:c0:f5:ba
# SHA1 Fingerprint: 76:e2:7e:c1:4f:db:82:c1:c0:a6:75:b5:05:be:3d:29:b4:ed:db:bb
# SHA256 Fingerprint: 4d:24:91:41:4c:fe:95:67:46:ec:4c:ef:a6:cf:6f:72:e2:8a:13:29:43:2f:9d:8a:90:7a:c4:cb:5d:ad:c1:5a
-----BEGIN CERTIFICATE-----
MIIFcDCCA1igAwIBAgIEAJiWjTANBgkqhkiG9w0BAQsFADBYMQswCQYDVQQGEwJO
TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSkwJwYDVQQDDCBTdGFh
dCBkZXIgTmVkZXJsYW5kZW4gRVYgUm9vdCBDQTAeFw0xMDEyMDgxMTE5MjlaFw0y
MjEyMDgxMTEwMjhaMFgxCzAJBgNVBAYTAk5MMR4wHAYDVQQKDBVTdGFhdCBkZXIg
TmVkZXJsYW5kZW4xKTAnBgNVBAMMIFN0YWF0IGRlciBOZWRlcmxhbmRlbiBFViBS
b290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA48d+ifkkSzrS
M4M1LGns3Amk41GoJSt5uAg94JG6hIXGhaTK5skuU6TJJB79VWZxXSzFYGgEt9nC
UiY4iKTWO0Cmws0/zZiTs1QUWJZV1VD+hq2kY39ch/aO5ieSZxeSAgMs3NZmdO3d
Z//BYY1jTw+bbRcwJu+r0h8QoPnFfxZpgQNH7R5ojXKhTbImxrpsX23Wr9GxE46p
rfNeaXUmGD5BKyF/7otdBwadQ8QpCiv8Kj6GyzyDOvnJDdrFmeK8eEEzduG/L13l
pJhQDBXd4Pqcfzho0LKmeqfRMb1+ilgnQ7O6M5HTp5gVXJrm0w912fxBmJc+qiXb
j5IusHsMX/FjqTf5m3VpTCgmJdrV8hJwRVXj33NeN/UhbJCONVrJ0yPr08C+eKxC
KFhmpUZtcALXEPlLVPxdhkqHz3/KRawRWrUgUY0viEeXOcDPusBCAUCZSCELa6fS
/ZbV0b5GnUngC6agIk440ME8MLxwjyx1zNDFjFE7PZQIZCZhfbnDZY8UnCHQqv0X
cgOPvZuM5l5Tnrmd74K74bzickFbIZTTRTeU0d8JOV3nI6qaHcptqAqGhYqCvkIH
1vI4gnPah1vlPNOePqc7nvQDs/nxfRN0Av+7oeX6AHkcpmZBiFxgV6YuCcS6/ZrP
px9Aw7vMWgpVSzs4dlG4Y4uElBbmVvMCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB
/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFP6rAJCYniT8qcwaivsnuL8wbqg7
MA0GCSqGSIb3DQEBCwUAA4ICAQDPdyxuVr5Os7aEAJSrR8kN0nbHhp8dB9O2tLsI
eK9p0gtJ3jPFrK3CiAJ9Brc1AsFgyb/E6JTe1NOpEyVa/m6irn0F3H3zbPB+po3u
2dfOWBfoqSmuc0iH55vKbimhZF8ZE/euBhD/UcabTVUlT5OZEAFTdfETzsemQUHS
v4ilf0X8rLiltTMMgsT7B/Zq5SWEXwbKwYY5EdtYzXc7LMJMD16a4/CrPmEbUCTC
wPTxGfARKbalGAKb12NMcIxHowNDXLldRqANb/9Zjr7dn3LDWyvfjFvO5QxGbJKy
CqNMVEIYFRIYvdr8unRu/8G2oGTYqV9Vrp9canaW2HNnh/tNf1zuacpzEPuKqf2e
vTY4SUmH9A4U8OmHuD+nT3pajnnUk+S7aFKErGzp85hwVXIy+TSrK0m1zSBi5Dp6
Z2Orltxtrpfs/J92VoguZs9btsmksNcFuuEnL5O7Jiqik7Ab846+HUCjuTaPPoIa
Gl6I6lD4WeKDRikL40Rc4ZW2aZCaFG+XroHPaO+Zmr615+F/+PoTRxZMzG0IQOeL
eG9QgkRQP2YGiqtDhFZKDyAthg710tvSeopLzaXoTvFeJiUBWSOgftL2fiFX1ye8
FVdMpEbB4IMeDExNH08GGeL5qPQ6gqGyeUN51q1veieQA6TqJIc/2b3Z6fJfUEkc
7uzXLg==
-----END CERTIFICATE-----
# Issuer: CN=IdenTrust Commercial Root CA 1 O=IdenTrust
# Subject: CN=IdenTrust Commercial Root CA 1 O=IdenTrust
# Label: "IdenTrust Commercial Root CA 1"
@@ -2851,116 +2780,6 @@ T8p+ck0LcIymSLumoRT2+1hEmRSuqguTaaApJUqlyyvdimYHFngVV3Eb7PVHhPOe
MTd61X8kreS8/f3MboPoDKi3QWwH3b08hpcv0g==
-----END CERTIFICATE-----
# Issuer: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
# Subject: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
# Label: "TrustCor RootCert CA-1"
# Serial: 15752444095811006489
# MD5 Fingerprint: 6e:85:f1:dc:1a:00:d3:22:d5:b2:b2:ac:6b:37:05:45
# SHA1 Fingerprint: ff:bd:cd:e7:82:c8:43:5e:3c:6f:26:86:5c:ca:a8:3a:45:5b:c3:0a
# SHA256 Fingerprint: d4:0e:9c:86:cd:8f:e4:68:c1:77:69:59:f4:9e:a7:74:fa:54:86:84:b6:c4:06:f3:90:92:61:f4:dc:e2:57:5c
-----BEGIN CERTIFICATE-----
MIIEMDCCAxigAwIBAgIJANqb7HHzA7AZMA0GCSqGSIb3DQEBCwUAMIGkMQswCQYD
VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk
MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U
cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRydXN0Q29y
IFJvb3RDZXJ0IENBLTEwHhcNMTYwMjA0MTIzMjE2WhcNMjkxMjMxMTcyMzE2WjCB
pDELMAkGA1UEBhMCUEExDzANBgNVBAgMBlBhbmFtYTEUMBIGA1UEBwwLUGFuYW1h
IENpdHkxJDAiBgNVBAoMG1RydXN0Q29yIFN5c3RlbXMgUy4gZGUgUi5MLjEnMCUG
A1UECwweVHJ1c3RDb3IgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MR8wHQYDVQQDDBZU
cnVzdENvciBSb290Q2VydCBDQS0xMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
CgKCAQEAv463leLCJhJrMxnHQFgKq1mqjQCj/IDHUHuO1CAmujIS2CNUSSUQIpid
RtLByZ5OGy4sDjjzGiVoHKZaBeYei0i/mJZ0PmnK6bV4pQa81QBeCQryJ3pS/C3V
seq0iWEk8xoT26nPUu0MJLq5nux+AHT6k61sKZKuUbS701e/s/OojZz0JEsq1pme
9J7+wH5COucLlVPat2gOkEz7cD+PSiyU8ybdY2mplNgQTsVHCJCZGxdNuWxu72CV
EY4hgLW9oHPY0LJ3xEXqWib7ZnZ2+AYfYW0PVcWDtxBWcgYHpfOxGgMFZA6dWorW
hnAbJN7+KIor0Gqw/Hqi3LJ5DotlDwIDAQABo2MwYTAdBgNVHQ4EFgQU7mtJPHo/
DeOxCbeKyKsZn3MzUOcwHwYDVR0jBBgwFoAU7mtJPHo/DeOxCbeKyKsZn3MzUOcw
DwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQAD
ggEBACUY1JGPE+6PHh0RU9otRCkZoB5rMZ5NDp6tPVxBb5UrJKF5mDo4Nvu7Zp5I
/5CQ7z3UuJu0h3U/IJvOcs+hVcFNZKIZBqEHMwwLKeXx6quj7LUKdJDHfXLy11yf
ke+Ri7fc7Waiz45mO7yfOgLgJ90WmMCV1Aqk5IGadZQ1nJBfiDcGrVmVCrDRZ9MZ
yonnMlo2HD6CqFqTvsbQZJG2z9m2GM/bftJlo6bEjhcxwft+dtvTheNYsnd6djts
L1Ac59v2Z3kf9YKVmgenFK+P3CghZwnS1k1aHBkcjndcw5QkPTJrS37UeJSDvjdN
zl/HHk484IkzlQsPpTLWPFp5LBk=
-----END CERTIFICATE-----
# Issuer: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
# Subject: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
# Label: "TrustCor RootCert CA-2"
# Serial: 2711694510199101698
# MD5 Fingerprint: a2:e1:f8:18:0b:ba:45:d5:c7:41:2a:bb:37:52:45:64
# SHA1 Fingerprint: b8:be:6d:cb:56:f1:55:b9:63:d4:12:ca:4e:06:34:c7:94:b2:1c:c0
# SHA256 Fingerprint: 07:53:e9:40:37:8c:1b:d5:e3:83:6e:39:5d:ae:a5:cb:83:9e:50:46:f1:bd:0e:ae:19:51:cf:10:fe:c7:c9:65
-----BEGIN CERTIFICATE-----
MIIGLzCCBBegAwIBAgIIJaHfyjPLWQIwDQYJKoZIhvcNAQELBQAwgaQxCzAJBgNV
BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw
IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy
dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEfMB0GA1UEAwwWVHJ1c3RDb3Ig
Um9vdENlcnQgQ0EtMjAeFw0xNjAyMDQxMjMyMjNaFw0zNDEyMzExNzI2MzlaMIGk
MQswCQYDVQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEg
Q2l0eTEkMCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYD
VQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRy
dXN0Q29yIFJvb3RDZXJ0IENBLTIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK
AoICAQCnIG7CKqJiJJWQdsg4foDSq8GbZQWU9MEKENUCrO2fk8eHyLAnK0IMPQo+
QVqedd2NyuCb7GgypGmSaIwLgQ5WoD4a3SwlFIIvl9NkRvRUqdw6VC0xK5mC8tkq
1+9xALgxpL56JAfDQiDyitSSBBtlVkxs1Pu2YVpHI7TYabS3OtB0PAx1oYxOdqHp
2yqlO/rOsP9+aij9JxzIsekp8VduZLTQwRVtDr4uDkbIXvRR/u8OYzo7cbrPb1nK
DOObXUm4TOJXsZiKQlecdu/vvdFoqNL0Cbt3Nb4lggjEFixEIFapRBF37120Hape
az6LMvYHL1cEksr1/p3C6eizjkxLAjHZ5DxIgif3GIJ2SDpxsROhOdUuxTTCHWKF
3wP+TfSvPd9cW436cOGlfifHhi5qjxLGhF5DUVCcGZt45vz27Ud+ez1m7xMTiF88
oWP7+ayHNZ/zgp6kPwqcMWmLmaSISo5uZk3vFsQPeSghYA2FFn3XVDjxklb9tTNM
g9zXEJ9L/cb4Qr26fHMC4P99zVvh1Kxhe1fVSntb1IVYJ12/+CtgrKAmrhQhJ8Z3
mjOAPF5GP/fDsaOGM8boXg25NSyqRsGFAnWAoOsk+xWq5Gd/bnc/9ASKL3x74xdh
8N0JqSDIvgmk0H5Ew7IwSjiqqewYmgeCK9u4nBit2uBGF6zPXQIDAQABo2MwYTAd
BgNVHQ4EFgQU2f4hQG6UnrybPZx9mCAZ5YwwYrIwHwYDVR0jBBgwFoAU2f4hQG6U
nrybPZx9mCAZ5YwwYrIwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYw
DQYJKoZIhvcNAQELBQADggIBAJ5Fngw7tu/hOsh80QA9z+LqBrWyOrsGS2h60COX
dKcs8AjYeVrXWoSK2BKaG9l9XE1wxaX5q+WjiYndAfrs3fnpkpfbsEZC89NiqpX+
MWcUaViQCqoL7jcjx1BRtPV+nuN79+TMQjItSQzL/0kMmx40/W5ulop5A7Zv2wnL
/V9lFDfhOPXzYRZY5LVtDQsEGz9QLX+zx3oaFoBg+Iof6Rsqxvm6ARppv9JYx1RX
CI/hOWB3S6xZhBqI8d3LT3jX5+EzLfzuQfogsL7L9ziUwOHQhQ+77Sxzq+3+knYa
ZH9bDTMJBzN7Bj8RpFxwPIXAz+OQqIN3+tvmxYxoZxBnpVIt8MSZj3+/0WvitUfW
2dCFmU2Umw9Lje4AWkcdEQOsQRivh7dvDDqPys/cA8GiCcjl/YBeyGBCARsaU1q7
N6a3vLqE6R5sGtRk2tRD/pOLS/IseRYQ1JMLiI+h2IYURpFHmygk71dSTlxCnKr3
Sewn6EAes6aJInKc9Q0ztFijMDvd1GpUk74aTfOTlPf8hAs/hCBcNANExdqtvArB
As8e5ZTZ845b2EzwnexhF7sUMlQMAimTHpKG9n/v55IFDlndmQguLvqcAFLTxWYp
5KeXRKQOKIETNcX2b2TmQcTVL8w0RSXPQQCWPUouwpaYT05KnJe32x+SMsj/D1Fu
1uwJ
-----END CERTIFICATE-----
# Issuer: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
# Subject: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority
# Label: "TrustCor ECA-1"
# Serial: 9548242946988625984
# MD5 Fingerprint: 27:92:23:1d:0a:f5:40:7c:e9:e6:6b:9d:d8:f5:e7:6c
# SHA1 Fingerprint: 58:d1:df:95:95:67:6b:63:c0:f0:5b:1c:17:4d:8b:84:0b:c8:78:bd
# SHA256 Fingerprint: 5a:88:5d:b1:9c:01:d9:12:c5:75:93:88:93:8c:af:bb:df:03:1a:b2:d4:8e:91:ee:15:58:9b:42:97:1d:03:9c
-----BEGIN CERTIFICATE-----
MIIEIDCCAwigAwIBAgIJAISCLF8cYtBAMA0GCSqGSIb3DQEBCwUAMIGcMQswCQYD
VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk
MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U
cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxFzAVBgNVBAMMDlRydXN0Q29y
IEVDQS0xMB4XDTE2MDIwNDEyMzIzM1oXDTI5MTIzMTE3MjgwN1owgZwxCzAJBgNV
BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw
IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy
dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEXMBUGA1UEAwwOVHJ1c3RDb3Ig
RUNBLTEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPj+ARtZ+odnbb
3w9U73NjKYKtR8aja+3+XzP4Q1HpGjORMRegdMTUpwHmspI+ap3tDvl0mEDTPwOA
BoJA6LHip1GnHYMma6ve+heRK9jGrB6xnhkB1Zem6g23xFUfJ3zSCNV2HykVh0A5
3ThFEXXQmqc04L/NyFIduUd+Dbi7xgz2c1cWWn5DkR9VOsZtRASqnKmcp0yJF4Ou
owReUoCLHhIlERnXDH19MURB6tuvsBzvgdAsxZohmz3tQjtQJvLsznFhBmIhVE5/
wZ0+fyCMgMsq2JdiyIMzkX2woloPV+g7zPIlstR8L+xNxqE6FXrntl019fZISjZF
ZtS6mFjBAgMBAAGjYzBhMB0GA1UdDgQWBBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAf
BgNVHSMEGDAWgBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAPBgNVHRMBAf8EBTADAQH/
MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEABT41XBVwm8nHc2Fv
civUwo/yQ10CzsSUuZQRg2dd4mdsdXa/uwyqNsatR5Nj3B5+1t4u/ukZMjgDfxT2
AHMsWbEhBuH7rBiVDKP/mZb3Kyeb1STMHd3BOuCYRLDE5D53sXOpZCz2HAF8P11F
hcCF5yWPldwX8zyfGm6wyuMdKulMY/okYWLW2n62HGz1Ah3UKt1VkOsqEUc8Ll50
soIipX1TH0XsJ5F95yIW6MBoNtjG8U+ARDL54dHRHareqKucBK+tIA5kmE2la8BI
WJZpTdwHjFGTot+fDz2LYLSCjaoITmJF4PkL0uDgPFveXHEnJcLmA4GLEFPjx1Wi
tJ/X5g==
-----END CERTIFICATE-----
# Issuer: CN=SSL.com Root Certification Authority RSA O=SSL Corporation
# Subject: CN=SSL.com Root Certification Authority RSA O=SSL Corporation
# Label: "SSL.com Root Certification Authority RSA"
+29 -7
View File
@@ -15,19 +15,29 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import List, Union
from .charsetgroupprober import CharSetGroupProber
from .charsetprober import CharSetProber
from .enums import InputState
from .resultdict import ResultDict
from .universaldetector import UniversalDetector
from .version import VERSION, __version__
__all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"]
def detect(byte_str):
def detect(
byte_str: Union[bytes, bytearray], should_rename_legacy: bool = False
) -> ResultDict:
"""
Detect the encoding of the given byte string.
:param byte_str: The byte sequence to examine.
:type byte_str: ``bytes`` or ``bytearray``
:param should_rename_legacy: Should we rename legacy encodings
to their more modern equivalents?
:type should_rename_legacy: ``bool``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
@@ -35,12 +45,16 @@ def detect(byte_str):
f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
)
byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
detector.feed(byte_str)
return detector.close()
def detect_all(byte_str, ignore_threshold=False):
def detect_all(
byte_str: Union[bytes, bytearray],
ignore_threshold: bool = False,
should_rename_legacy: bool = False,
) -> List[ResultDict]:
"""
Detect all the possible encodings of the given byte string.
@@ -50,6 +64,9 @@ def detect_all(byte_str, ignore_threshold=False):
``UniversalDetector.MINIMUM_THRESHOLD``
in results.
:type ignore_threshold: ``bool``
:param should_rename_legacy: Should we rename legacy encodings
to their more modern equivalents?
:type should_rename_legacy: ``bool``
"""
if not isinstance(byte_str, bytearray):
if not isinstance(byte_str, bytes):
@@ -58,15 +75,15 @@ def detect_all(byte_str, ignore_threshold=False):
)
byte_str = bytearray(byte_str)
detector = UniversalDetector()
detector = UniversalDetector(should_rename_legacy=should_rename_legacy)
detector.feed(byte_str)
detector.close()
if detector.input_state == InputState.HIGH_BYTE:
results = []
probers = []
results: List[ResultDict] = []
probers: List[CharSetProber] = []
for prober in detector.charset_probers:
if hasattr(prober, "probers"):
if isinstance(prober, CharSetGroupProber):
probers.extend(p for p in prober.probers)
else:
probers.append(prober)
@@ -80,6 +97,11 @@ def detect_all(byte_str, ignore_threshold=False):
charset_name = detector.ISO_WIN_MAP.get(
lower_charset_name, charset_name
)
# Rename legacy encodings with superset encodings if asked
if should_rename_legacy:
charset_name = detector.LEGACY_MAP.get(
charset_name.lower(), charset_name
)
results.append(
{
"encoding": charset_name,
@@ -32,16 +32,16 @@ from .mbcssm import BIG5_SM_MODEL
class Big5Prober(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
self.distribution_analyzer = Big5DistributionAnalysis()
self.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "Big5"
@property
def language(self):
def language(self) -> str:
return "Chinese"
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import Tuple, Union
from .big5freq import (
BIG5_CHAR_TO_FREQ_ORDER,
BIG5_TABLE_SIZE,
@@ -59,22 +61,22 @@ class CharDistributionAnalysis:
SURE_NO = 0.01
MINIMUM_DATA_THRESHOLD = 3
def __init__(self):
def __init__(self) -> None:
# Mapping table to get frequency order from char order (get from
# GetOrder())
self._char_to_freq_order = tuple()
self._table_size = None # Size of above table
self._char_to_freq_order: Tuple[int, ...] = tuple()
self._table_size = 0 # Size of above table
# This is a constant value which varies from language to language,
# used in calculating confidence. See
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
# for further detail.
self.typical_distribution_ratio = None
self._done = None
self._total_chars = None
self._freq_chars = None
self.typical_distribution_ratio = 0.0
self._done = False
self._total_chars = 0
self._freq_chars = 0
self.reset()
def reset(self):
def reset(self) -> None:
"""reset analyser, clear any state"""
# If this flag is set to True, detection is done and conclusion has
# been made
@@ -83,7 +85,7 @@ class CharDistributionAnalysis:
# The number of characters whose frequency order is less than 512
self._freq_chars = 0
def feed(self, char, char_len):
def feed(self, char: Union[bytes, bytearray], char_len: int) -> None:
"""feed a character with known length"""
if char_len == 2:
# we only care about 2-bytes character in our distribution analysis
@@ -97,7 +99,7 @@ class CharDistributionAnalysis:
if 512 > self._char_to_freq_order[order]:
self._freq_chars += 1
def get_confidence(self):
def get_confidence(self) -> float:
"""return confidence based on existing data"""
# if we didn't receive any character in our consideration range,
# return negative answer
@@ -114,12 +116,12 @@ class CharDistributionAnalysis:
# normalize confidence (we don't want to be 100% sure)
return self.SURE_YES
def got_enough_data(self):
def got_enough_data(self) -> bool:
# It is not necessary to receive all data to draw conclusion.
# For charset detection, certain amount of data is enough
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
def get_order(self, _):
def get_order(self, _: Union[bytes, bytearray]) -> int:
# We do not handle characters based on the original encoding string,
# but convert this encoding string to a number, here called order.
# This allows multiple encodings of a language to share one frequency
@@ -128,13 +130,13 @@ class CharDistributionAnalysis:
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
self._table_size = EUCTW_TABLE_SIZE
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for euc-TW encoding, we are interested
# first byte range: 0xc4 -- 0xfe
# second byte range: 0xa1 -- 0xfe
@@ -146,13 +148,13 @@ class EUCTWDistributionAnalysis(CharDistributionAnalysis):
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for euc-KR encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
@@ -164,13 +166,13 @@ class EUCKRDistributionAnalysis(CharDistributionAnalysis):
class JOHABDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
self._table_size = EUCKR_TABLE_SIZE
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
first_char = byte_str[0]
if 0x88 <= first_char < 0xD4:
code = first_char * 256 + byte_str[1]
@@ -179,13 +181,13 @@ class JOHABDistributionAnalysis(CharDistributionAnalysis):
class GB2312DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
self._table_size = GB2312_TABLE_SIZE
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for GB2312 encoding, we are interested
# first byte range: 0xb0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
@@ -197,13 +199,13 @@ class GB2312DistributionAnalysis(CharDistributionAnalysis):
class Big5DistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
self._table_size = BIG5_TABLE_SIZE
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for big5 encoding, we are interested
# first byte range: 0xa4 -- 0xfe
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
@@ -217,13 +219,13 @@ class Big5DistributionAnalysis(CharDistributionAnalysis):
class SJISDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for sjis encoding, we are interested
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
@@ -242,13 +244,13 @@ class SJISDistributionAnalysis(CharDistributionAnalysis):
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
self._table_size = JIS_TABLE_SIZE
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> int:
# for euc-JP encoding, we are interested
# first byte range: 0xa0 -- 0xfe
# second byte range: 0xa1 -- 0xfe
@@ -25,29 +25,30 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import List, Optional, Union
from .charsetprober import CharSetProber
from .enums import ProbingState
from .enums import LanguageFilter, ProbingState
class CharSetGroupProber(CharSetProber):
def __init__(self, lang_filter=None):
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
self._active_num = 0
self.probers = []
self._best_guess_prober = None
self.probers: List[CharSetProber] = []
self._best_guess_prober: Optional[CharSetProber] = None
def reset(self):
def reset(self) -> None:
super().reset()
self._active_num = 0
for prober in self.probers:
if prober:
prober.reset()
prober.active = True
self._active_num += 1
prober.reset()
prober.active = True
self._active_num += 1
self._best_guess_prober = None
@property
def charset_name(self):
def charset_name(self) -> Optional[str]:
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
@@ -55,17 +56,15 @@ class CharSetGroupProber(CharSetProber):
return self._best_guess_prober.charset_name
@property
def language(self):
def language(self) -> Optional[str]:
if not self._best_guess_prober:
self.get_confidence()
if not self._best_guess_prober:
return None
return self._best_guess_prober.language
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for prober in self.probers:
if not prober:
continue
if not prober.active:
continue
state = prober.feed(byte_str)
@@ -83,7 +82,7 @@ class CharSetGroupProber(CharSetProber):
return self.state
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
state = self.state
if state == ProbingState.FOUND_IT:
return 0.99
@@ -92,8 +91,6 @@ class CharSetGroupProber(CharSetProber):
best_conf = 0.0
self._best_guess_prober = None
for prober in self.probers:
if not prober:
continue
if not prober.active:
self.logger.debug("%s not active", prober.charset_name)
continue
@@ -28,8 +28,9 @@
import logging
import re
from typing import Optional, Union
from .enums import ProbingState
from .enums import LanguageFilter, ProbingState
INTERNATIONAL_WORDS_PATTERN = re.compile(
b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?"
@@ -40,35 +41,40 @@ class CharSetProber:
SHORTCUT_THRESHOLD = 0.95
def __init__(self, lang_filter=None):
self._state = None
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
self._state = ProbingState.DETECTING
self.active = True
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
def reset(self):
def reset(self) -> None:
self._state = ProbingState.DETECTING
@property
def charset_name(self):
def charset_name(self) -> Optional[str]:
return None
def feed(self, byte_str):
@property
def language(self) -> Optional[str]:
raise NotImplementedError
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
raise NotImplementedError
@property
def state(self):
def state(self) -> ProbingState:
return self._state
def get_confidence(self):
def get_confidence(self) -> float:
return 0.0
@staticmethod
def filter_high_byte_only(buf):
def filter_high_byte_only(buf: Union[bytes, bytearray]) -> bytes:
buf = re.sub(b"([\x00-\x7F])+", b" ", buf)
return buf
@staticmethod
def filter_international_words(buf):
def filter_international_words(buf: Union[bytes, bytearray]) -> bytearray:
"""
We define three types of bytes:
alphabet: english alphabets [a-zA-Z]
@@ -102,7 +108,7 @@ class CharSetProber:
return filtered
@staticmethod
def remove_xml_tags(buf):
def remove_xml_tags(buf: Union[bytes, bytearray]) -> bytes:
"""
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
@@ -117,10 +123,13 @@ class CharSetProber:
for curr, buf_char in enumerate(buf):
# Check if we're coming out of or entering an XML tag
if buf_char == b">":
# https://github.com/python/typeshed/issues/8182
if buf_char == b">": # type: ignore[comparison-overlap]
prev = curr + 1
in_tag = False
elif buf_char == b"<":
# https://github.com/python/typeshed/issues/8182
elif buf_char == b"<": # type: ignore[comparison-overlap]
if curr > prev and not in_tag:
# Keep everything after last non-extended-ASCII,
# non-alphabetic character
@@ -15,12 +15,18 @@ If no paths are provided, it takes its input from stdin.
import argparse
import sys
from typing import Iterable, List, Optional
from .. import __version__
from ..universaldetector import UniversalDetector
def description_of(lines, name="stdin"):
def description_of(
lines: Iterable[bytes],
name: str = "stdin",
minimal: bool = False,
should_rename_legacy: bool = False,
) -> Optional[str]:
"""
Return a string describing the probable encoding of a file or
list of strings.
@@ -29,8 +35,11 @@ def description_of(lines, name="stdin"):
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
:param should_rename_legacy: Should we rename legacy encodings to
their more modern equivalents?
:type should_rename_legacy: ``bool``
"""
u = UniversalDetector()
u = UniversalDetector(should_rename_legacy=should_rename_legacy)
for line in lines:
line = bytearray(line)
u.feed(line)
@@ -39,12 +48,14 @@ def description_of(lines, name="stdin"):
break
u.close()
result = u.result
if minimal:
return result["encoding"]
if result["encoding"]:
return f'{name}: {result["encoding"]} with confidence {result["confidence"]}'
return f"{name}: no result"
def main(argv=None):
def main(argv: Optional[List[str]] = None) -> None:
"""
Handles command line arguments and gets things started.
@@ -54,17 +65,28 @@ def main(argv=None):
"""
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings"
description=(
"Takes one or more file paths and reports their detected encodings"
)
)
parser.add_argument(
"input",
help="File whose encoding we would like to determine. \
(default: stdin)",
help="File whose encoding we would like to determine. (default: stdin)",
type=argparse.FileType("rb"),
nargs="*",
default=[sys.stdin.buffer],
)
parser.add_argument(
"--minimal",
help="Print only the encoding to standard output",
action="store_true",
)
parser.add_argument(
"-l",
"--legacy",
help="Rename legacy encodings to more modern ones.",
action="store_true",
)
parser.add_argument(
"--version", action="version", version=f"%(prog)s {__version__}"
)
@@ -79,7 +101,11 @@ def main(argv=None):
"--help\n",
file=sys.stderr,
)
print(description_of(f, f.name))
print(
description_of(
f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy
)
)
if __name__ == "__main__":
@@ -27,6 +27,7 @@
import logging
from .codingstatemachinedict import CodingStateMachineDict
from .enums import MachineState
@@ -53,18 +54,19 @@ class CodingStateMachine:
encoding from consideration from here on.
"""
def __init__(self, sm):
def __init__(self, sm: CodingStateMachineDict) -> None:
self._model = sm
self._curr_byte_pos = 0
self._curr_char_len = 0
self._curr_state = None
self._curr_state = MachineState.START
self.active = True
self.logger = logging.getLogger(__name__)
self.reset()
def reset(self):
def reset(self) -> None:
self._curr_state = MachineState.START
def next_state(self, c):
def next_state(self, c: int) -> int:
# for each byte we get its class
# if it is first byte, we also get byte length
byte_class = self._model["class_table"][c]
@@ -77,12 +79,12 @@ class CodingStateMachine:
self._curr_byte_pos += 1
return self._curr_state
def get_current_charlen(self):
def get_current_charlen(self) -> int:
return self._curr_char_len
def get_coding_state_machine(self):
def get_coding_state_machine(self) -> str:
return self._model["name"]
@property
def language(self):
def language(self) -> str:
return self._model["language"]
@@ -0,0 +1,19 @@
from typing import TYPE_CHECKING, Tuple
if TYPE_CHECKING:
# TypedDict was introduced in Python 3.8.
#
# TODO: Remove the else block and TYPE_CHECKING check when dropping support
# for Python 3.7.
from typing import TypedDict
class CodingStateMachineDict(TypedDict, total=False):
class_table: Tuple[int, ...]
class_factor: int
state_table: Tuple[int, ...]
char_len_table: Tuple[int, ...]
name: str
language: str # Optional key
else:
CodingStateMachineDict = dict
@@ -32,7 +32,7 @@ from .mbcssm import CP949_SM_MODEL
class CP949Prober(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
@@ -41,9 +41,9 @@ class CP949Prober(MultiByteCharSetProber):
self.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "CP949"
@property
def language(self):
def language(self) -> str:
return "Korean"
+6 -3
View File
@@ -4,6 +4,8 @@ All of the Enums that are used throughout the chardet package.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
from enum import Enum, Flag
class InputState:
"""
@@ -15,12 +17,13 @@ class InputState:
HIGH_BYTE = 2
class LanguageFilter:
class LanguageFilter(Flag):
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
NONE = 0x00
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
@@ -31,7 +34,7 @@ class LanguageFilter:
CJK = CHINESE | JAPANESE | KOREAN
class ProbingState:
class ProbingState(Enum):
"""
This enum represents the different states a prober can be in.
"""
@@ -62,7 +65,7 @@ class SequenceLikelihood:
POSITIVE = 3
@classmethod
def get_num_categories(cls):
def get_num_categories(cls) -> int:
""":returns: The number of likelihood categories in the enum."""
return 4
+13 -13
View File
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import Optional, Union
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import LanguageFilter, MachineState, ProbingState
@@ -43,7 +45,7 @@ class EscCharSetProber(CharSetProber):
identify these encodings.
"""
def __init__(self, lang_filter=None):
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
self.coding_sm = []
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
@@ -53,17 +55,15 @@ class EscCharSetProber(CharSetProber):
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
if self.lang_filter & LanguageFilter.KOREAN:
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
self.active_sm_count = None
self._detected_charset = None
self._detected_language = None
self._state = None
self.active_sm_count = 0
self._detected_charset: Optional[str] = None
self._detected_language: Optional[str] = None
self._state = ProbingState.DETECTING
self.reset()
def reset(self):
def reset(self) -> None:
super().reset()
for coding_sm in self.coding_sm:
if not coding_sm:
continue
coding_sm.active = True
coding_sm.reset()
self.active_sm_count = len(self.coding_sm)
@@ -71,20 +71,20 @@ class EscCharSetProber(CharSetProber):
self._detected_language = None
@property
def charset_name(self):
def charset_name(self) -> Optional[str]:
return self._detected_charset
@property
def language(self):
def language(self) -> Optional[str]:
return self._detected_language
def get_confidence(self):
def get_confidence(self) -> float:
return 0.99 if self._detected_charset else 0.00
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for c in byte_str:
for coding_sm in self.coding_sm:
if not coding_sm or not coding_sm.active:
if not coding_sm.active:
continue
coding_state = coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
+5 -4
View File
@@ -25,6 +25,7 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .codingstatemachinedict import CodingStateMachineDict
from .enums import MachineState
# fmt: off
@@ -75,7 +76,7 @@ MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
HZ_SM_MODEL = {
HZ_SM_MODEL: CodingStateMachineDict = {
"class_table": HZ_CLS,
"class_factor": 6,
"state_table": HZ_ST,
@@ -134,7 +135,7 @@ ISO2022CN_ST = (
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022CN_SM_MODEL = {
ISO2022CN_SM_MODEL: CodingStateMachineDict = {
"class_table": ISO2022CN_CLS,
"class_factor": 9,
"state_table": ISO2022CN_ST,
@@ -194,7 +195,7 @@ ISO2022JP_ST = (
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
ISO2022JP_SM_MODEL = {
ISO2022JP_SM_MODEL: CodingStateMachineDict = {
"class_table": ISO2022JP_CLS,
"class_factor": 10,
"state_table": ISO2022JP_ST,
@@ -250,7 +251,7 @@ ISO2022KR_ST = (
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
ISO2022KR_SM_MODEL = {
ISO2022KR_SM_MODEL: CodingStateMachineDict = {
"class_table": ISO2022KR_CLS,
"class_factor": 6,
"state_table": ISO2022KR_ST,
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import Union
from .chardistribution import EUCJPDistributionAnalysis
from .codingstatemachine import CodingStateMachine
from .enums import MachineState, ProbingState
@@ -34,26 +36,29 @@ from .mbcssm import EUCJP_SM_MODEL
class EUCJPProber(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
self.distribution_analyzer = EUCJPDistributionAnalysis()
self.context_analyzer = EUCJPContextAnalysis()
self.reset()
def reset(self):
def reset(self) -> None:
super().reset()
self.context_analyzer.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "EUC-JP"
@property
def language(self):
def language(self) -> str:
return "Japanese"
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
assert self.coding_sm is not None
assert self.distribution_analyzer is not None
for i, byte in enumerate(byte_str):
# PY3K: byte_str is a byte array, so byte is an int, not a byte
coding_state = self.coding_sm.next_state(byte)
@@ -89,7 +94,9 @@ class EUCJPProber(MultiByteCharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
assert self.distribution_analyzer is not None
context_conf = self.context_analyzer.get_confidence()
distrib_conf = self.distribution_analyzer.get_confidence()
return max(context_conf, distrib_conf)
@@ -32,16 +32,16 @@ from .mbcssm import EUCKR_SM_MODEL
class EUCKRProber(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
self.distribution_analyzer = EUCKRDistributionAnalysis()
self.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "EUC-KR"
@property
def language(self):
def language(self) -> str:
return "Korean"
@@ -32,16 +32,16 @@ from .mbcssm import EUCTW_SM_MODEL
class EUCTWProber(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
self.distribution_analyzer = EUCTWDistributionAnalysis()
self.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "EUC-TW"
@property
def language(self):
def language(self) -> str:
return "Taiwan"
@@ -32,16 +32,16 @@ from .mbcssm import GB2312_SM_MODEL
class GB2312Prober(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
self.distribution_analyzer = GB2312DistributionAnalysis()
self.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "GB2312"
@property
def language(self):
def language(self) -> str:
return "Chinese"
@@ -25,8 +25,11 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import Optional, Union
from .charsetprober import CharSetProber
from .enums import ProbingState
from .sbcharsetprober import SingleByteCharSetProber
# This prober doesn't actually recognize a language or a charset.
# It is a helper prober for the use of the Hebrew model probers
@@ -127,6 +130,7 @@ from .enums import ProbingState
class HebrewProber(CharSetProber):
SPACE = 0x20
# windows-1255 / ISO-8859-8 code points of interest
FINAL_KAF = 0xEA
NORMAL_KAF = 0xEB
@@ -152,31 +156,35 @@ class HebrewProber(CharSetProber):
VISUAL_HEBREW_NAME = "ISO-8859-8"
LOGICAL_HEBREW_NAME = "windows-1255"
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._final_char_logical_score = None
self._final_char_visual_score = None
self._prev = None
self._before_prev = None
self._logical_prober = None
self._visual_prober = None
self._final_char_logical_score = 0
self._final_char_visual_score = 0
self._prev = self.SPACE
self._before_prev = self.SPACE
self._logical_prober: Optional[SingleByteCharSetProber] = None
self._visual_prober: Optional[SingleByteCharSetProber] = None
self.reset()
def reset(self):
def reset(self) -> None:
self._final_char_logical_score = 0
self._final_char_visual_score = 0
# The two last characters seen in the previous buffer,
# mPrev and mBeforePrev are initialized to space in order to simulate
# a word delimiter at the beginning of the data
self._prev = " "
self._before_prev = " "
self._prev = self.SPACE
self._before_prev = self.SPACE
# These probers are owned by the group prober.
def set_model_probers(self, logical_prober, visual_prober):
def set_model_probers(
self,
logical_prober: SingleByteCharSetProber,
visual_prober: SingleByteCharSetProber,
) -> None:
self._logical_prober = logical_prober
self._visual_prober = visual_prober
def is_final(self, c):
def is_final(self, c: int) -> bool:
return c in [
self.FINAL_KAF,
self.FINAL_MEM,
@@ -185,7 +193,7 @@ class HebrewProber(CharSetProber):
self.FINAL_TSADI,
]
def is_non_final(self, c):
def is_non_final(self, c: int) -> bool:
# The normal Tsadi is not a good Non-Final letter due to words like
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
# apostrophe is converted to a space in FilterWithoutEnglishLetters
@@ -198,7 +206,7 @@ class HebrewProber(CharSetProber):
# since these words are quite rare.
return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE]
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
# Final letter analysis for logical-visual decision.
# Look for evidence that the received buffer is either logical Hebrew
# or visual Hebrew.
@@ -232,9 +240,9 @@ class HebrewProber(CharSetProber):
byte_str = self.filter_high_byte_only(byte_str)
for cur in byte_str:
if cur == " ":
if cur == self.SPACE:
# We stand on a space - a word just ended
if self._before_prev != " ":
if self._before_prev != self.SPACE:
# next-to-last char was not a space so self._prev is not a
# 1 letter word
if self.is_final(self._prev):
@@ -247,9 +255,9 @@ class HebrewProber(CharSetProber):
else:
# Not standing on a space
if (
(self._before_prev == " ")
(self._before_prev == self.SPACE)
and (self.is_final(self._prev))
and (cur != " ")
and (cur != self.SPACE)
):
# case (3) [-2:space][-1:final letter][cur:not space]
self._final_char_visual_score += 1
@@ -261,7 +269,10 @@ class HebrewProber(CharSetProber):
return ProbingState.DETECTING
@property
def charset_name(self):
def charset_name(self) -> str:
assert self._logical_prober is not None
assert self._visual_prober is not None
# Make the decision: is it Logical or Visual?
# If the final letter score distance is dominant enough, rely on it.
finalsub = self._final_char_logical_score - self._final_char_visual_score
@@ -289,11 +300,14 @@ class HebrewProber(CharSetProber):
return self.LOGICAL_HEBREW_NAME
@property
def language(self):
def language(self) -> str:
return "Hebrew"
@property
def state(self):
def state(self) -> ProbingState:
assert self._logical_prober is not None
assert self._visual_prober is not None
# Remain active as long as any of the model probers are active.
if (self._logical_prober.state == ProbingState.NOT_ME) and (
self._visual_prober.state == ProbingState.NOT_ME
@@ -32,16 +32,16 @@ from .mbcssm import JOHAB_SM_MODEL
class JOHABProber(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL)
self.distribution_analyzer = JOHABDistributionAnalysis()
self.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "Johab"
@property
def language(self):
def language(self) -> str:
return "Korean"
+16 -15
View File
@@ -25,6 +25,7 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import List, Tuple, Union
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
# fmt: off
@@ -123,15 +124,15 @@ class JapaneseContextAnalysis:
MAX_REL_THRESHOLD = 1000
MINIMUM_DATA_THRESHOLD = 4
def __init__(self):
self._total_rel = None
self._rel_sample = None
self._need_to_skip_char_num = None
self._last_char_order = None
self._done = None
def __init__(self) -> None:
self._total_rel = 0
self._rel_sample: List[int] = []
self._need_to_skip_char_num = 0
self._last_char_order = -1
self._done = False
self.reset()
def reset(self):
def reset(self) -> None:
self._total_rel = 0 # total sequence received
# category counters, each integer counts sequence in its category
self._rel_sample = [0] * self.NUM_OF_CATEGORY
@@ -143,7 +144,7 @@ class JapaneseContextAnalysis:
# been made
self._done = False
def feed(self, byte_str, num_bytes):
def feed(self, byte_str: Union[bytes, bytearray], num_bytes: int) -> None:
if self._done:
return
@@ -172,29 +173,29 @@ class JapaneseContextAnalysis:
] += 1
self._last_char_order = order
def got_enough_data(self):
def got_enough_data(self) -> bool:
return self._total_rel > self.ENOUGH_REL_THRESHOLD
def get_confidence(self):
def get_confidence(self) -> float:
# This is just one way to calculate confidence. It works well for me.
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
return (self._total_rel - self._rel_sample[0]) / self._total_rel
return self.DONT_KNOW
def get_order(self, _):
def get_order(self, _: Union[bytes, bytearray]) -> Tuple[int, int]:
return -1, 1
class SJISContextAnalysis(JapaneseContextAnalysis):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._charset_name = "SHIFT_JIS"
@property
def charset_name(self):
def charset_name(self) -> str:
return self._charset_name
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
if not byte_str:
return -1, 1
# find out current char's byte length
@@ -216,7 +217,7 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
class EUCJPContextAnalysis(JapaneseContextAnalysis):
def get_order(self, byte_str):
def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]:
if not byte_str:
return -1, 1
# find out current char's byte length
@@ -26,6 +26,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import List, Union
from .charsetprober import CharSetProber
from .enums import ProbingState
@@ -96,26 +98,26 @@ Latin1ClassModel = (
class Latin1Prober(CharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self._last_char_class = None
self._freq_counter = None
self._last_char_class = OTH
self._freq_counter: List[int] = []
self.reset()
def reset(self):
def reset(self) -> None:
self._last_char_class = OTH
self._freq_counter = [0] * FREQ_CAT_NUM
super().reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "ISO-8859-1"
@property
def language(self):
def language(self) -> str:
return ""
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
byte_str = self.remove_xml_tags(byte_str)
for c in byte_str:
char_class = Latin1_CharToClass[c]
@@ -128,7 +130,7 @@ class Latin1Prober(CharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
if self.state == ProbingState.NOT_ME:
return 0.01
@@ -0,0 +1,162 @@
######################## BEGIN LICENSE BLOCK ########################
# This code was modified from latin1prober.py by Rob Speer <rob@lumino.so>.
# The Original Code is Mozilla Universal charset detector code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 2001
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Rob Speer - adapt to MacRoman encoding
# Mark Pilgrim - port to Python
# Shy Shalom - original C code
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import List, Union
from .charsetprober import CharSetProber
from .enums import ProbingState
FREQ_CAT_NUM = 4
UDF = 0 # undefined
OTH = 1 # other
ASC = 2 # ascii capital letter
ASS = 3 # ascii small letter
ACV = 4 # accent capital vowel
ACO = 5 # accent capital other
ASV = 6 # accent small vowel
ASO = 7 # accent small other
ODD = 8 # character that is unlikely to appear
CLASS_NUM = 9 # total classes
# The change from Latin1 is that we explicitly look for extended characters
# that are infrequently-occurring symbols, and consider them to always be
# improbable. This should let MacRoman get out of the way of more likely
# encodings in most situations.
# fmt: off
MacRoman_CharToClass = (
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV, # 80 - 87
ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV, # 88 - 8F
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV, # 90 - 97
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # 98 - 9F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, # A0 - A7
OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV, # A8 - AF
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV, # B8 - BF
OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH, # C0 - C7
OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV, # C8 - CF
OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD, # D0 - D7
ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH, # D8 - DF
OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, # E0 - E7
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # E8 - EF
ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD, # F0 - F7
ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD, # F8 - FF
)
# 0 : illegal
# 1 : very unlikely
# 2 : normal
# 3 : very likely
MacRomanClassModel = (
# UDF OTH ASC ASS ACV ACO ASV ASO ODD
0, 0, 0, 0, 0, 0, 0, 0, 0, # UDF
0, 3, 3, 3, 3, 3, 3, 3, 1, # OTH
0, 3, 3, 3, 3, 3, 3, 3, 1, # ASC
0, 3, 3, 3, 1, 1, 3, 3, 1, # ASS
0, 3, 3, 3, 1, 2, 1, 2, 1, # ACV
0, 3, 3, 3, 3, 3, 3, 3, 1, # ACO
0, 3, 1, 3, 1, 1, 1, 3, 1, # ASV
0, 3, 1, 3, 1, 1, 3, 3, 1, # ASO
0, 1, 1, 1, 1, 1, 1, 1, 1, # ODD
)
# fmt: on
class MacRomanProber(CharSetProber):
def __init__(self) -> None:
super().__init__()
self._last_char_class = OTH
self._freq_counter: List[int] = []
self.reset()
def reset(self) -> None:
self._last_char_class = OTH
self._freq_counter = [0] * FREQ_CAT_NUM
# express the prior that MacRoman is a somewhat rare encoding;
# this can be done by starting out in a slightly improbable state
# that must be overcome
self._freq_counter[2] = 10
super().reset()
@property
def charset_name(self) -> str:
return "MacRoman"
@property
def language(self) -> str:
return ""
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
byte_str = self.remove_xml_tags(byte_str)
for c in byte_str:
char_class = MacRoman_CharToClass[c]
freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class]
if freq == 0:
self._state = ProbingState.NOT_ME
break
self._freq_counter[freq] += 1
self._last_char_class = char_class
return self.state
def get_confidence(self) -> float:
if self.state == ProbingState.NOT_ME:
return 0.01
total = sum(self._freq_counter)
confidence = (
0.0
if total < 0.01
else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total
)
confidence = max(confidence, 0.0)
# lower the confidence of MacRoman so that other more accurate
# detector can take priority.
confidence *= 0.73
return confidence
@@ -27,8 +27,12 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import Optional, Union
from .chardistribution import CharDistributionAnalysis
from .charsetprober import CharSetProber
from .enums import MachineState, ProbingState
from .codingstatemachine import CodingStateMachine
from .enums import LanguageFilter, MachineState, ProbingState
class MultiByteCharSetProber(CharSetProber):
@@ -36,29 +40,24 @@ class MultiByteCharSetProber(CharSetProber):
MultiByteCharSetProber
"""
def __init__(self, lang_filter=None):
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
self.distribution_analyzer = None
self.coding_sm = None
self._last_char = [0, 0]
self.distribution_analyzer: Optional[CharDistributionAnalysis] = None
self.coding_sm: Optional[CodingStateMachine] = None
self._last_char = bytearray(b"\0\0")
def reset(self):
def reset(self) -> None:
super().reset()
if self.coding_sm:
self.coding_sm.reset()
if self.distribution_analyzer:
self.distribution_analyzer.reset()
self._last_char = [0, 0]
self._last_char = bytearray(b"\0\0")
@property
def charset_name(self):
raise NotImplementedError
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
assert self.coding_sm is not None
assert self.distribution_analyzer is not None
@property
def language(self):
raise NotImplementedError
def feed(self, byte_str):
for i, byte in enumerate(byte_str):
coding_state = self.coding_sm.next_state(byte)
if coding_state == MachineState.ERROR:
@@ -91,5 +90,6 @@ class MultiByteCharSetProber(CharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
assert self.distribution_analyzer is not None
return self.distribution_analyzer.get_confidence()
@@ -30,6 +30,7 @@
from .big5prober import Big5Prober
from .charsetgroupprober import CharSetGroupProber
from .cp949prober import CP949Prober
from .enums import LanguageFilter
from .eucjpprober import EUCJPProber
from .euckrprober import EUCKRProber
from .euctwprober import EUCTWProber
@@ -40,7 +41,7 @@ from .utf8prober import UTF8Prober
class MBCSGroupProber(CharSetGroupProber):
def __init__(self, lang_filter=None):
def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None:
super().__init__(lang_filter=lang_filter)
self.probers = [
UTF8Prober(),
+12 -11
View File
@@ -25,6 +25,7 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .codingstatemachinedict import CodingStateMachineDict
from .enums import MachineState
# BIG5
@@ -74,7 +75,7 @@ BIG5_ST = (
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
BIG5_SM_MODEL = {
BIG5_SM_MODEL: CodingStateMachineDict = {
"class_table": BIG5_CLS,
"class_factor": 5,
"state_table": BIG5_ST,
@@ -117,7 +118,7 @@ CP949_ST = (
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
CP949_SM_MODEL = {
CP949_SM_MODEL: CodingStateMachineDict = {
"class_table": CP949_CLS,
"class_factor": 10,
"state_table": CP949_ST,
@@ -173,7 +174,7 @@ EUCJP_ST = (
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
EUCJP_SM_MODEL = {
EUCJP_SM_MODEL: CodingStateMachineDict = {
"class_table": EUCJP_CLS,
"class_factor": 6,
"state_table": EUCJP_ST,
@@ -226,7 +227,7 @@ EUCKR_ST = (
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
EUCKR_SM_MODEL = {
EUCKR_SM_MODEL: CodingStateMachineDict = {
"class_table": EUCKR_CLS,
"class_factor": 4,
"state_table": EUCKR_ST,
@@ -283,7 +284,7 @@ JOHAB_ST = (
JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2)
JOHAB_SM_MODEL = {
JOHAB_SM_MODEL: CodingStateMachineDict = {
"class_table": JOHAB_CLS,
"class_factor": 10,
"state_table": JOHAB_ST,
@@ -340,7 +341,7 @@ EUCTW_ST = (
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
EUCTW_SM_MODEL = {
EUCTW_SM_MODEL: CodingStateMachineDict = {
"class_table": EUCTW_CLS,
"class_factor": 7,
"state_table": EUCTW_ST,
@@ -402,7 +403,7 @@ GB2312_ST = (
# 2 here.
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
GB2312_SM_MODEL = {
GB2312_SM_MODEL: CodingStateMachineDict = {
"class_table": GB2312_CLS,
"class_factor": 7,
"state_table": GB2312_ST,
@@ -458,7 +459,7 @@ SJIS_ST = (
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
SJIS_SM_MODEL = {
SJIS_SM_MODEL: CodingStateMachineDict = {
"class_table": SJIS_CLS,
"class_factor": 6,
"state_table": SJIS_ST,
@@ -516,7 +517,7 @@ UCS2BE_ST = (
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
UCS2BE_SM_MODEL = {
UCS2BE_SM_MODEL: CodingStateMachineDict = {
"class_table": UCS2BE_CLS,
"class_factor": 6,
"state_table": UCS2BE_ST,
@@ -574,7 +575,7 @@ UCS2LE_ST = (
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
UCS2LE_SM_MODEL = {
UCS2LE_SM_MODEL: CodingStateMachineDict = {
"class_table": UCS2LE_CLS,
"class_factor": 6,
"state_table": UCS2LE_ST,
@@ -651,7 +652,7 @@ UTF8_ST = (
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
UTF8_SM_MODEL = {
UTF8_SM_MODEL: CodingStateMachineDict = {
"class_table": UTF8_CLS,
"class_factor": 16,
"state_table": UTF8_ST,
@@ -6,6 +6,7 @@ This code is based on the language metadata from the uchardet project.
"""
from string import ascii_letters
from typing import List, Optional
# TODO: Add Ukrainian (KOI8-U)
@@ -33,13 +34,13 @@ class Language:
def __init__(
self,
name=None,
iso_code=None,
use_ascii=True,
charsets=None,
alphabet=None,
wiki_start_pages=None,
):
name: Optional[str] = None,
iso_code: Optional[str] = None,
use_ascii: bool = True,
charsets: Optional[List[str]] = None,
alphabet: Optional[str] = None,
wiki_start_pages: Optional[List[str]] = None,
) -> None:
super().__init__()
self.name = name
self.iso_code = iso_code
@@ -55,7 +56,7 @@ class Language:
self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None
self.wiki_start_pages = wiki_start_pages
def __repr__(self):
def __repr__(self) -> str:
param_str = ", ".join(
f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
)
@@ -103,7 +104,7 @@ LANGUAGES = {
name="Danish",
iso_code="da",
use_ascii=True,
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="æøåÆØÅ",
wiki_start_pages=["Forside"],
),
@@ -111,8 +112,8 @@ LANGUAGES = {
name="German",
iso_code="de",
use_ascii=True,
charsets=["ISO-8859-1", "WINDOWS-1252"],
alphabet="äöüßÄÖÜ",
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="äöüßÄÖÜ",
wiki_start_pages=["Wikipedia:Hauptseite"],
),
"Greek": Language(
@@ -127,7 +128,7 @@ LANGUAGES = {
name="English",
iso_code="en",
use_ascii=True,
charsets=["ISO-8859-1", "WINDOWS-1252"],
charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
wiki_start_pages=["Main_Page"],
),
"Esperanto": Language(
@@ -143,7 +144,7 @@ LANGUAGES = {
name="Spanish",
iso_code="es",
use_ascii=True,
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ñáéíóúüÑÁÉÍÓÚÜ",
wiki_start_pages=["Wikipedia:Portada"],
),
@@ -161,7 +162,7 @@ LANGUAGES = {
name="Finnish",
iso_code="fi",
use_ascii=True,
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ÅÄÖŠŽåäöšž",
wiki_start_pages=["Wikipedia:Etusivu"],
),
@@ -169,7 +170,7 @@ LANGUAGES = {
name="French",
iso_code="fr",
use_ascii=True,
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ",
wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"],
),
@@ -203,7 +204,7 @@ LANGUAGES = {
name="Italian",
iso_code="it",
use_ascii=True,
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ÀÈÉÌÒÓÙàèéìòóù",
wiki_start_pages=["Pagina_principale"],
),
@@ -237,7 +238,7 @@ LANGUAGES = {
name="Dutch",
iso_code="nl",
use_ascii=True,
charsets=["ISO-8859-1", "WINDOWS-1252"],
charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"],
wiki_start_pages=["Hoofdpagina"],
),
"Polish": Language(
@@ -253,7 +254,7 @@ LANGUAGES = {
name="Portuguese",
iso_code="pt",
use_ascii=True,
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252"],
charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"],
alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú",
wiki_start_pages=["Wikipédia:Página_principal"],
),
@@ -0,0 +1,16 @@
from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING:
# TypedDict was introduced in Python 3.8.
#
# TODO: Remove the else block and TYPE_CHECKING check when dropping support
# for Python 3.7.
from typing import TypedDict
class ResultDict(TypedDict):
encoding: Optional[str]
confidence: float
language: Optional[str]
else:
ResultDict = dict
@@ -26,23 +26,20 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from collections import namedtuple
from typing import Dict, List, NamedTuple, Optional, Union
from .charsetprober import CharSetProber
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
SingleByteCharSetModel = namedtuple(
"SingleByteCharSetModel",
[
"charset_name",
"language",
"char_to_order_map",
"language_model",
"typical_positive_ratio",
"keep_ascii_letters",
"alphabet",
],
)
class SingleByteCharSetModel(NamedTuple):
charset_name: str
language: str
char_to_order_map: Dict[int, int]
language_model: Dict[int, Dict[int, int]]
typical_positive_ratio: float
keep_ascii_letters: bool
alphabet: str
class SingleByteCharSetProber(CharSetProber):
@@ -51,22 +48,27 @@ class SingleByteCharSetProber(CharSetProber):
POSITIVE_SHORTCUT_THRESHOLD = 0.95
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
def __init__(self, model, is_reversed=False, name_prober=None):
def __init__(
self,
model: SingleByteCharSetModel,
is_reversed: bool = False,
name_prober: Optional[CharSetProber] = None,
) -> None:
super().__init__()
self._model = model
# TRUE if we need to reverse every pair in the model lookup
self._reversed = is_reversed
# Optional auxiliary prober for name decision
self._name_prober = name_prober
self._last_order = None
self._seq_counters = None
self._total_seqs = None
self._total_char = None
self._control_char = None
self._freq_char = None
self._last_order = 255
self._seq_counters: List[int] = []
self._total_seqs = 0
self._total_char = 0
self._control_char = 0
self._freq_char = 0
self.reset()
def reset(self):
def reset(self) -> None:
super().reset()
# char order of last character
self._last_order = 255
@@ -78,18 +80,18 @@ class SingleByteCharSetProber(CharSetProber):
self._freq_char = 0
@property
def charset_name(self):
def charset_name(self) -> Optional[str]:
if self._name_prober:
return self._name_prober.charset_name
return self._model.charset_name
@property
def language(self):
def language(self) -> Optional[str]:
if self._name_prober:
return self._name_prober.language
return self._model.language
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
# TODO: Make filter_international_words keep things in self.alphabet
if not self._model.keep_ascii_letters:
byte_str = self.filter_international_words(byte_str)
@@ -139,7 +141,7 @@ class SingleByteCharSetProber(CharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
r = 0.01
if self._total_seqs > 0:
r = (
@@ -48,7 +48,7 @@ from .sbcharsetprober import SingleByteCharSetProber
class SBCSGroupProber(CharSetGroupProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
hebrew_prober = HebrewProber()
logical_hebrew_prober = SingleByteCharSetProber(
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import Union
from .chardistribution import SJISDistributionAnalysis
from .codingstatemachine import CodingStateMachine
from .enums import MachineState, ProbingState
@@ -34,26 +36,29 @@ from .mbcssm import SJIS_SM_MODEL
class SJISProber(MultiByteCharSetProber):
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
self.distribution_analyzer = SJISDistributionAnalysis()
self.context_analyzer = SJISContextAnalysis()
self.reset()
def reset(self):
def reset(self) -> None:
super().reset()
self.context_analyzer.reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return self.context_analyzer.charset_name
@property
def language(self):
def language(self) -> str:
return "Japanese"
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
assert self.coding_sm is not None
assert self.distribution_analyzer is not None
for i, byte in enumerate(byte_str):
coding_state = self.coding_sm.next_state(byte)
if coding_state == MachineState.ERROR:
@@ -92,7 +97,9 @@ class SJISProber(MultiByteCharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
assert self.distribution_analyzer is not None
context_conf = self.context_analyzer.get_confidence()
distrib_conf = self.distribution_analyzer.get_confidence()
return max(context_conf, distrib_conf)
@@ -39,12 +39,16 @@ class a user of ``chardet`` should use.
import codecs
import logging
import re
from typing import List, Optional, Union
from .charsetgroupprober import CharSetGroupProber
from .charsetprober import CharSetProber
from .enums import InputState, LanguageFilter, ProbingState
from .escprober import EscCharSetProber
from .latin1prober import Latin1Prober
from .macromanprober import MacRomanProber
from .mbcsgroupprober import MBCSGroupProber
from .resultdict import ResultDict
from .sbcsgroupprober import SBCSGroupProber
from .utf1632prober import UTF1632Prober
@@ -80,34 +84,55 @@ class UniversalDetector:
"iso-8859-9": "Windows-1254",
"iso-8859-13": "Windows-1257",
}
# Based on https://encoding.spec.whatwg.org/#names-and-labels
# but altered to match Python names for encodings and remove mappings
# that break tests.
LEGACY_MAP = {
"ascii": "Windows-1252",
"iso-8859-1": "Windows-1252",
"tis-620": "ISO-8859-11",
"iso-8859-9": "Windows-1254",
"gb2312": "GB18030",
"euc-kr": "CP949",
"utf-16le": "UTF-16",
}
def __init__(self, lang_filter=LanguageFilter.ALL):
self._esc_charset_prober = None
self._utf1632_prober = None
self._charset_probers = []
self.result = None
self.done = None
self._got_data = None
self._input_state = None
self._last_char = None
def __init__(
self,
lang_filter: LanguageFilter = LanguageFilter.ALL,
should_rename_legacy: bool = False,
) -> None:
self._esc_charset_prober: Optional[EscCharSetProber] = None
self._utf1632_prober: Optional[UTF1632Prober] = None
self._charset_probers: List[CharSetProber] = []
self.result: ResultDict = {
"encoding": None,
"confidence": 0.0,
"language": None,
}
self.done = False
self._got_data = False
self._input_state = InputState.PURE_ASCII
self._last_char = b""
self.lang_filter = lang_filter
self.logger = logging.getLogger(__name__)
self._has_win_bytes = None
self._has_win_bytes = False
self.should_rename_legacy = should_rename_legacy
self.reset()
@property
def input_state(self):
def input_state(self) -> int:
return self._input_state
@property
def has_win_bytes(self):
def has_win_bytes(self) -> bool:
return self._has_win_bytes
@property
def charset_probers(self):
def charset_probers(self) -> List[CharSetProber]:
return self._charset_probers
def reset(self):
def reset(self) -> None:
"""
Reset the UniversalDetector and all of its probers back to their
initial states. This is called by ``__init__``, so you only need to
@@ -126,7 +151,7 @@ class UniversalDetector:
for prober in self._charset_probers:
prober.reset()
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> None:
"""
Takes a chunk of a document and feeds it through all of the relevant
charset probers.
@@ -166,6 +191,7 @@ class UniversalDetector:
elif byte_str.startswith(b"\xFE\xFF\x00\x00"):
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
self.result = {
# TODO: This encoding is not supported by Python. Should remove?
"encoding": "X-ISO-10646-UCS-4-3412",
"confidence": 1.0,
"language": "",
@@ -173,6 +199,7 @@ class UniversalDetector:
elif byte_str.startswith(b"\x00\x00\xFF\xFE"):
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
self.result = {
# TODO: This encoding is not supported by Python. Should remove?
"encoding": "X-ISO-10646-UCS-4-2143",
"confidence": 1.0,
"language": "",
@@ -242,6 +269,7 @@ class UniversalDetector:
if self.lang_filter & LanguageFilter.NON_CJK:
self._charset_probers.append(SBCSGroupProber())
self._charset_probers.append(Latin1Prober())
self._charset_probers.append(MacRomanProber())
for prober in self._charset_probers:
if prober.feed(byte_str) == ProbingState.FOUND_IT:
self.result = {
@@ -254,7 +282,7 @@ class UniversalDetector:
if self.WIN_BYTE_DETECTOR.search(byte_str):
self._has_win_bytes = True
def close(self):
def close(self) -> ResultDict:
"""
Stop analyzing the current document and come up with a final
prediction.
@@ -288,7 +316,8 @@ class UniversalDetector:
max_prober = prober
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
charset_name = max_prober.charset_name
lower_charset_name = max_prober.charset_name.lower()
assert charset_name is not None
lower_charset_name = charset_name.lower()
confidence = max_prober.get_confidence()
# Use Windows encoding name instead of ISO-8859 if we saw any
# extra Windows-specific bytes
@@ -297,6 +326,11 @@ class UniversalDetector:
charset_name = self.ISO_WIN_MAP.get(
lower_charset_name, charset_name
)
# Rename legacy encodings with superset encodings if asked
if self.should_rename_legacy:
charset_name = self.LEGACY_MAP.get(
(charset_name or "").lower(), charset_name
)
self.result = {
"encoding": charset_name,
"confidence": confidence,
@@ -18,6 +18,8 @@
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import List, Union
from .charsetprober import CharSetProber
from .enums import ProbingState
@@ -36,7 +38,7 @@ class UTF1632Prober(CharSetProber):
# a fixed constant ratio of expected zeros or non-zeros in modulo-position.
EXPECTED_RATIO = 0.94
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.position = 0
self.zeros_at_mod = [0] * 4
@@ -51,7 +53,7 @@ class UTF1632Prober(CharSetProber):
self.first_half_surrogate_pair_detected_16le = False
self.reset()
def reset(self):
def reset(self) -> None:
super().reset()
self.position = 0
self.zeros_at_mod = [0] * 4
@@ -66,7 +68,7 @@ class UTF1632Prober(CharSetProber):
self.quad = [0, 0, 0, 0]
@property
def charset_name(self):
def charset_name(self) -> str:
if self.is_likely_utf32be():
return "utf-32be"
if self.is_likely_utf32le():
@@ -79,16 +81,16 @@ class UTF1632Prober(CharSetProber):
return "utf-16"
@property
def language(self):
def language(self) -> str:
return ""
def approx_32bit_chars(self):
def approx_32bit_chars(self) -> float:
return max(1.0, self.position / 4.0)
def approx_16bit_chars(self):
def approx_16bit_chars(self) -> float:
return max(1.0, self.position / 2.0)
def is_likely_utf32be(self):
def is_likely_utf32be(self) -> bool:
approx_chars = self.approx_32bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
@@ -98,7 +100,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf32be
)
def is_likely_utf32le(self):
def is_likely_utf32le(self) -> bool:
approx_chars = self.approx_32bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO
@@ -108,7 +110,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf32le
)
def is_likely_utf16be(self):
def is_likely_utf16be(self) -> bool:
approx_chars = self.approx_16bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
(self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars
@@ -118,7 +120,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf16be
)
def is_likely_utf16le(self):
def is_likely_utf16le(self) -> bool:
approx_chars = self.approx_16bit_chars()
return approx_chars >= self.MIN_CHARS_FOR_DETECTION and (
(self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars
@@ -128,7 +130,7 @@ class UTF1632Prober(CharSetProber):
and not self.invalid_utf16le
)
def validate_utf32_characters(self, quad):
def validate_utf32_characters(self, quad: List[int]) -> None:
"""
Validate if the quad of bytes is valid UTF-32.
@@ -150,7 +152,7 @@ class UTF1632Prober(CharSetProber):
):
self.invalid_utf32le = True
def validate_utf16_characters(self, pair):
def validate_utf16_characters(self, pair: List[int]) -> None:
"""
Validate if the pair of bytes is valid UTF-16.
@@ -182,7 +184,7 @@ class UTF1632Prober(CharSetProber):
else:
self.invalid_utf16le = True
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for c in byte_str:
mod4 = self.position % 4
self.quad[mod4] = c
@@ -198,7 +200,7 @@ class UTF1632Prober(CharSetProber):
return self.state
@property
def state(self):
def state(self) -> ProbingState:
if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}:
# terminal, decided states
return self._state
@@ -210,7 +212,7 @@ class UTF1632Prober(CharSetProber):
self._state = ProbingState.NOT_ME
return self._state
def get_confidence(self):
def get_confidence(self) -> float:
return (
0.85
if (
@@ -25,6 +25,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import Union
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import MachineState, ProbingState
@@ -34,26 +36,26 @@ from .mbcssm import UTF8_SM_MODEL
class UTF8Prober(CharSetProber):
ONE_CHAR_PROB = 0.5
def __init__(self):
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
self._num_mb_chars = None
self._num_mb_chars = 0
self.reset()
def reset(self):
def reset(self) -> None:
super().reset()
self.coding_sm.reset()
self._num_mb_chars = 0
@property
def charset_name(self):
def charset_name(self) -> str:
return "utf-8"
@property
def language(self):
def language(self) -> str:
return ""
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for c in byte_str:
coding_state = self.coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
@@ -72,7 +74,7 @@ class UTF8Prober(CharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
unlike = 0.99
if self._num_mb_chars < 6:
unlike *= self.ONE_CHAR_PROB**self._num_mb_chars
@@ -1,9 +1,9 @@
"""
This module exists only to simplify retrieving the version number of chardet
from within setup.py and from chardet subpackages.
from within setuptools and from chardet subpackages.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
__version__ = "5.0.0"
__version__ = "5.1.0"
VERSION = __version__.split(".")
@@ -1,6 +1,7 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from .initialise import init, deinit, reinit, colorama_text
from .initialise import init, deinit, reinit, colorama_text, just_fix_windows_console
from .ansi import Fore, Back, Style, Cursor
from .ansitowin32 import AnsiToWin32
__version__ = '0.4.5'
__version__ = '0.4.6'
@@ -4,7 +4,7 @@ import sys
import os
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style, BEL
from .winterm import WinTerm, WinColor, WinStyle
from .winterm import enable_vt_processing, WinTerm, WinColor, WinStyle
from .win32 import windll, winapi_test
@@ -94,15 +94,22 @@ class AnsiToWin32(object):
# (e.g. Cygwin Terminal). In this case it's up to the terminal
# to support the ANSI codes.
conversion_supported = on_windows and winapi_test()
try:
fd = wrapped.fileno()
except Exception:
fd = -1
system_has_native_ansi = not on_windows or enable_vt_processing(fd)
have_tty = not self.stream.closed and self.stream.isatty()
need_conversion = conversion_supported and not system_has_native_ansi
# should we strip ANSI sequences from our output?
if strip is None:
strip = conversion_supported or (not self.stream.closed and not self.stream.isatty())
strip = need_conversion or not have_tty
self.strip = strip
# should we should convert ANSI sequences into win32 calls?
if convert is None:
convert = conversion_supported and not self.stream.closed and self.stream.isatty()
convert = need_conversion and have_tty
self.convert = convert
# dict of ansi codes to win32 functions and parameters
@@ -264,3 +271,7 @@ class AnsiToWin32(object):
if params[0] in '02':
winterm.set_title(params[1])
return text
def flush(self):
self.wrapped.flush()
@@ -6,13 +6,27 @@ import sys
from .ansitowin32 import AnsiToWin32
orig_stdout = None
orig_stderr = None
def _wipe_internal_state_for_tests():
global orig_stdout, orig_stderr
orig_stdout = None
orig_stderr = None
wrapped_stdout = None
wrapped_stderr = None
global wrapped_stdout, wrapped_stderr
wrapped_stdout = None
wrapped_stderr = None
atexit_done = False
global atexit_done
atexit_done = False
global fixed_windows_console
fixed_windows_console = False
try:
# no-op if it wasn't registered
atexit.unregister(reset_all)
except AttributeError:
# python 2: no atexit.unregister. Oh well, we did our best.
pass
def reset_all():
@@ -55,6 +69,29 @@ def deinit():
sys.stderr = orig_stderr
def just_fix_windows_console():
global fixed_windows_console
if sys.platform != "win32":
return
if fixed_windows_console:
return
if wrapped_stdout is not None or wrapped_stderr is not None:
# Someone already ran init() and it did stuff, so we won't second-guess them
return
# On newer versions of Windows, AnsiToWin32.__init__ will implicitly enable the
# native ANSI support in the console as a side-effect. We only need to actually
# replace sys.stdout/stderr if we're in the old-style conversion mode.
new_stdout = AnsiToWin32(sys.stdout, convert=None, strip=None, autoreset=False)
if new_stdout.convert:
sys.stdout = new_stdout
new_stderr = AnsiToWin32(sys.stderr, convert=None, strip=None, autoreset=False)
if new_stderr.convert:
sys.stderr = new_stderr
fixed_windows_console = True
@contextlib.contextmanager
def colorama_text(*args, **kwargs):
init(*args, **kwargs)
@@ -78,3 +115,7 @@ def wrap_stream(stream, convert, strip, autoreset, wrap):
if wrapper.should_wrap():
stream = wrapper.stream
return stream
# Use this for initial setup as well, to reduce code duplication
_wipe_internal_state_for_tests()
@@ -0,0 +1 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
@@ -0,0 +1,76 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import sys
from unittest import TestCase, main
from ..ansi import Back, Fore, Style
from ..ansitowin32 import AnsiToWin32
stdout_orig = sys.stdout
stderr_orig = sys.stderr
class AnsiTest(TestCase):
def setUp(self):
# sanity check: stdout should be a file or StringIO object.
# It will only be AnsiToWin32 if init() has previously wrapped it
self.assertNotEqual(type(sys.stdout), AnsiToWin32)
self.assertNotEqual(type(sys.stderr), AnsiToWin32)
def tearDown(self):
sys.stdout = stdout_orig
sys.stderr = stderr_orig
def testForeAttributes(self):
self.assertEqual(Fore.BLACK, '\033[30m')
self.assertEqual(Fore.RED, '\033[31m')
self.assertEqual(Fore.GREEN, '\033[32m')
self.assertEqual(Fore.YELLOW, '\033[33m')
self.assertEqual(Fore.BLUE, '\033[34m')
self.assertEqual(Fore.MAGENTA, '\033[35m')
self.assertEqual(Fore.CYAN, '\033[36m')
self.assertEqual(Fore.WHITE, '\033[37m')
self.assertEqual(Fore.RESET, '\033[39m')
# Check the light, extended versions.
self.assertEqual(Fore.LIGHTBLACK_EX, '\033[90m')
self.assertEqual(Fore.LIGHTRED_EX, '\033[91m')
self.assertEqual(Fore.LIGHTGREEN_EX, '\033[92m')
self.assertEqual(Fore.LIGHTYELLOW_EX, '\033[93m')
self.assertEqual(Fore.LIGHTBLUE_EX, '\033[94m')
self.assertEqual(Fore.LIGHTMAGENTA_EX, '\033[95m')
self.assertEqual(Fore.LIGHTCYAN_EX, '\033[96m')
self.assertEqual(Fore.LIGHTWHITE_EX, '\033[97m')
def testBackAttributes(self):
self.assertEqual(Back.BLACK, '\033[40m')
self.assertEqual(Back.RED, '\033[41m')
self.assertEqual(Back.GREEN, '\033[42m')
self.assertEqual(Back.YELLOW, '\033[43m')
self.assertEqual(Back.BLUE, '\033[44m')
self.assertEqual(Back.MAGENTA, '\033[45m')
self.assertEqual(Back.CYAN, '\033[46m')
self.assertEqual(Back.WHITE, '\033[47m')
self.assertEqual(Back.RESET, '\033[49m')
# Check the light, extended versions.
self.assertEqual(Back.LIGHTBLACK_EX, '\033[100m')
self.assertEqual(Back.LIGHTRED_EX, '\033[101m')
self.assertEqual(Back.LIGHTGREEN_EX, '\033[102m')
self.assertEqual(Back.LIGHTYELLOW_EX, '\033[103m')
self.assertEqual(Back.LIGHTBLUE_EX, '\033[104m')
self.assertEqual(Back.LIGHTMAGENTA_EX, '\033[105m')
self.assertEqual(Back.LIGHTCYAN_EX, '\033[106m')
self.assertEqual(Back.LIGHTWHITE_EX, '\033[107m')
def testStyleAttributes(self):
self.assertEqual(Style.DIM, '\033[2m')
self.assertEqual(Style.NORMAL, '\033[22m')
self.assertEqual(Style.BRIGHT, '\033[1m')
if __name__ == '__main__':
main()
@@ -0,0 +1,294 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from io import StringIO, TextIOWrapper
from unittest import TestCase, main
try:
from contextlib import ExitStack
except ImportError:
# python 2
from contextlib2 import ExitStack
try:
from unittest.mock import MagicMock, Mock, patch
except ImportError:
from mock import MagicMock, Mock, patch
from ..ansitowin32 import AnsiToWin32, StreamWrapper
from ..win32 import ENABLE_VIRTUAL_TERMINAL_PROCESSING
from .utils import osname
class StreamWrapperTest(TestCase):
def testIsAProxy(self):
mockStream = Mock()
wrapper = StreamWrapper(mockStream, None)
self.assertTrue( wrapper.random_attr is mockStream.random_attr )
def testDelegatesWrite(self):
mockStream = Mock()
mockConverter = Mock()
wrapper = StreamWrapper(mockStream, mockConverter)
wrapper.write('hello')
self.assertTrue(mockConverter.write.call_args, (('hello',), {}))
def testDelegatesContext(self):
mockConverter = Mock()
s = StringIO()
with StreamWrapper(s, mockConverter) as fp:
fp.write(u'hello')
self.assertTrue(s.closed)
def testProxyNoContextManager(self):
mockStream = MagicMock()
mockStream.__enter__.side_effect = AttributeError()
mockConverter = Mock()
with self.assertRaises(AttributeError) as excinfo:
with StreamWrapper(mockStream, mockConverter) as wrapper:
wrapper.write('hello')
def test_closed_shouldnt_raise_on_closed_stream(self):
stream = StringIO()
stream.close()
wrapper = StreamWrapper(stream, None)
self.assertEqual(wrapper.closed, True)
def test_closed_shouldnt_raise_on_detached_stream(self):
stream = TextIOWrapper(StringIO())
stream.detach()
wrapper = StreamWrapper(stream, None)
self.assertEqual(wrapper.closed, True)
class AnsiToWin32Test(TestCase):
def testInit(self):
mockStdout = Mock()
auto = Mock()
stream = AnsiToWin32(mockStdout, autoreset=auto)
self.assertEqual(stream.wrapped, mockStdout)
self.assertEqual(stream.autoreset, auto)
@patch('colorama.ansitowin32.winterm', None)
@patch('colorama.ansitowin32.winapi_test', lambda *_: True)
def testStripIsTrueOnWindows(self):
with osname('nt'):
mockStdout = Mock()
stream = AnsiToWin32(mockStdout)
self.assertTrue(stream.strip)
def testStripIsFalseOffWindows(self):
with osname('posix'):
mockStdout = Mock(closed=False)
stream = AnsiToWin32(mockStdout)
self.assertFalse(stream.strip)
def testWriteStripsAnsi(self):
mockStdout = Mock()
stream = AnsiToWin32(mockStdout)
stream.wrapped = Mock()
stream.write_and_convert = Mock()
stream.strip = True
stream.write('abc')
self.assertFalse(stream.wrapped.write.called)
self.assertEqual(stream.write_and_convert.call_args, (('abc',), {}))
def testWriteDoesNotStripAnsi(self):
mockStdout = Mock()
stream = AnsiToWin32(mockStdout)
stream.wrapped = Mock()
stream.write_and_convert = Mock()
stream.strip = False
stream.convert = False
stream.write('abc')
self.assertFalse(stream.write_and_convert.called)
self.assertEqual(stream.wrapped.write.call_args, (('abc',), {}))
def assert_autoresets(self, convert, autoreset=True):
stream = AnsiToWin32(Mock())
stream.convert = convert
stream.reset_all = Mock()
stream.autoreset = autoreset
stream.winterm = Mock()
stream.write('abc')
self.assertEqual(stream.reset_all.called, autoreset)
def testWriteAutoresets(self):
self.assert_autoresets(convert=True)
self.assert_autoresets(convert=False)
self.assert_autoresets(convert=True, autoreset=False)
self.assert_autoresets(convert=False, autoreset=False)
def testWriteAndConvertWritesPlainText(self):
stream = AnsiToWin32(Mock())
stream.write_and_convert( 'abc' )
self.assertEqual( stream.wrapped.write.call_args, (('abc',), {}) )
def testWriteAndConvertStripsAllValidAnsi(self):
stream = AnsiToWin32(Mock())
stream.call_win32 = Mock()
data = [
'abc\033[mdef',
'abc\033[0mdef',
'abc\033[2mdef',
'abc\033[02mdef',
'abc\033[002mdef',
'abc\033[40mdef',
'abc\033[040mdef',
'abc\033[0;1mdef',
'abc\033[40;50mdef',
'abc\033[50;30;40mdef',
'abc\033[Adef',
'abc\033[0Gdef',
'abc\033[1;20;128Hdef',
]
for datum in data:
stream.wrapped.write.reset_mock()
stream.write_and_convert( datum )
self.assertEqual(
[args[0] for args in stream.wrapped.write.call_args_list],
[ ('abc',), ('def',) ]
)
def testWriteAndConvertSkipsEmptySnippets(self):
stream = AnsiToWin32(Mock())
stream.call_win32 = Mock()
stream.write_and_convert( '\033[40m\033[41m' )
self.assertFalse( stream.wrapped.write.called )
def testWriteAndConvertCallsWin32WithParamsAndCommand(self):
stream = AnsiToWin32(Mock())
stream.convert = True
stream.call_win32 = Mock()
stream.extract_params = Mock(return_value='params')
data = {
'abc\033[adef': ('a', 'params'),
'abc\033[;;bdef': ('b', 'params'),
'abc\033[0cdef': ('c', 'params'),
'abc\033[;;0;;Gdef': ('G', 'params'),
'abc\033[1;20;128Hdef': ('H', 'params'),
}
for datum, expected in data.items():
stream.call_win32.reset_mock()
stream.write_and_convert( datum )
self.assertEqual( stream.call_win32.call_args[0], expected )
def test_reset_all_shouldnt_raise_on_closed_orig_stdout(self):
stream = StringIO()
converter = AnsiToWin32(stream)
stream.close()
converter.reset_all()
def test_wrap_shouldnt_raise_on_closed_orig_stdout(self):
stream = StringIO()
stream.close()
with \
patch("colorama.ansitowin32.os.name", "nt"), \
patch("colorama.ansitowin32.winapi_test", lambda: True):
converter = AnsiToWin32(stream)
self.assertTrue(converter.strip)
self.assertFalse(converter.convert)
def test_wrap_shouldnt_raise_on_missing_closed_attr(self):
with \
patch("colorama.ansitowin32.os.name", "nt"), \
patch("colorama.ansitowin32.winapi_test", lambda: True):
converter = AnsiToWin32(object())
self.assertTrue(converter.strip)
self.assertFalse(converter.convert)
def testExtractParams(self):
stream = AnsiToWin32(Mock())
data = {
'': (0,),
';;': (0,),
'2': (2,),
';;002;;': (2,),
'0;1': (0, 1),
';;003;;456;;': (3, 456),
'11;22;33;44;55': (11, 22, 33, 44, 55),
}
for datum, expected in data.items():
self.assertEqual(stream.extract_params('m', datum), expected)
def testCallWin32UsesLookup(self):
listener = Mock()
stream = AnsiToWin32(listener)
stream.win32_calls = {
1: (lambda *_, **__: listener(11),),
2: (lambda *_, **__: listener(22),),
3: (lambda *_, **__: listener(33),),
}
stream.call_win32('m', (3, 1, 99, 2))
self.assertEqual(
[a[0][0] for a in listener.call_args_list],
[33, 11, 22] )
def test_osc_codes(self):
mockStdout = Mock()
stream = AnsiToWin32(mockStdout, convert=True)
with patch('colorama.ansitowin32.winterm') as winterm:
data = [
'\033]0\x07', # missing arguments
'\033]0;foo\x08', # wrong OSC command
'\033]0;colorama_test_title\x07', # should work
'\033]1;colorama_test_title\x07', # wrong set command
'\033]2;colorama_test_title\x07', # should work
'\033]' + ';' * 64 + '\x08', # see issue #247
]
for code in data:
stream.write(code)
self.assertEqual(winterm.set_title.call_count, 2)
def test_native_windows_ansi(self):
with ExitStack() as stack:
def p(a, b):
stack.enter_context(patch(a, b, create=True))
# Pretend to be on Windows
p("colorama.ansitowin32.os.name", "nt")
p("colorama.ansitowin32.winapi_test", lambda: True)
p("colorama.win32.winapi_test", lambda: True)
p("colorama.winterm.win32.windll", "non-None")
p("colorama.winterm.get_osfhandle", lambda _: 1234)
# Pretend that our mock stream has native ANSI support
p(
"colorama.winterm.win32.GetConsoleMode",
lambda _: ENABLE_VIRTUAL_TERMINAL_PROCESSING,
)
SetConsoleMode = Mock()
p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode)
stdout = Mock()
stdout.closed = False
stdout.isatty.return_value = True
stdout.fileno.return_value = 1
# Our fake console says it has native vt support, so AnsiToWin32 should
# enable that support and do nothing else.
stream = AnsiToWin32(stdout)
SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING)
self.assertFalse(stream.strip)
self.assertFalse(stream.convert)
self.assertFalse(stream.should_wrap())
# Now let's pretend we're on an old Windows console, that doesn't have
# native ANSI support.
p("colorama.winterm.win32.GetConsoleMode", lambda _: 0)
SetConsoleMode = Mock()
p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode)
stream = AnsiToWin32(stdout)
SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING)
self.assertTrue(stream.strip)
self.assertTrue(stream.convert)
self.assertTrue(stream.should_wrap())
if __name__ == '__main__':
main()
@@ -0,0 +1,189 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import sys
from unittest import TestCase, main, skipUnless
try:
from unittest.mock import patch, Mock
except ImportError:
from mock import patch, Mock
from ..ansitowin32 import StreamWrapper
from ..initialise import init, just_fix_windows_console, _wipe_internal_state_for_tests
from .utils import osname, replace_by
orig_stdout = sys.stdout
orig_stderr = sys.stderr
class InitTest(TestCase):
@skipUnless(sys.stdout.isatty(), "sys.stdout is not a tty")
def setUp(self):
# sanity check
self.assertNotWrapped()
def tearDown(self):
_wipe_internal_state_for_tests()
sys.stdout = orig_stdout
sys.stderr = orig_stderr
def assertWrapped(self):
self.assertIsNot(sys.stdout, orig_stdout, 'stdout should be wrapped')
self.assertIsNot(sys.stderr, orig_stderr, 'stderr should be wrapped')
self.assertTrue(isinstance(sys.stdout, StreamWrapper),
'bad stdout wrapper')
self.assertTrue(isinstance(sys.stderr, StreamWrapper),
'bad stderr wrapper')
def assertNotWrapped(self):
self.assertIs(sys.stdout, orig_stdout, 'stdout should not be wrapped')
self.assertIs(sys.stderr, orig_stderr, 'stderr should not be wrapped')
@patch('colorama.initialise.reset_all')
@patch('colorama.ansitowin32.winapi_test', lambda *_: True)
@patch('colorama.ansitowin32.enable_vt_processing', lambda *_: False)
def testInitWrapsOnWindows(self, _):
with osname("nt"):
init()
self.assertWrapped()
@patch('colorama.initialise.reset_all')
@patch('colorama.ansitowin32.winapi_test', lambda *_: False)
def testInitDoesntWrapOnEmulatedWindows(self, _):
with osname("nt"):
init()
self.assertNotWrapped()
def testInitDoesntWrapOnNonWindows(self):
with osname("posix"):
init()
self.assertNotWrapped()
def testInitDoesntWrapIfNone(self):
with replace_by(None):
init()
# We can't use assertNotWrapped here because replace_by(None)
# changes stdout/stderr already.
self.assertIsNone(sys.stdout)
self.assertIsNone(sys.stderr)
def testInitAutoresetOnWrapsOnAllPlatforms(self):
with osname("posix"):
init(autoreset=True)
self.assertWrapped()
def testInitWrapOffDoesntWrapOnWindows(self):
with osname("nt"):
init(wrap=False)
self.assertNotWrapped()
def testInitWrapOffIncompatibleWithAutoresetOn(self):
self.assertRaises(ValueError, lambda: init(autoreset=True, wrap=False))
@patch('colorama.win32.SetConsoleTextAttribute')
@patch('colorama.initialise.AnsiToWin32')
def testAutoResetPassedOn(self, mockATW32, _):
with osname("nt"):
init(autoreset=True)
self.assertEqual(len(mockATW32.call_args_list), 2)
self.assertEqual(mockATW32.call_args_list[1][1]['autoreset'], True)
self.assertEqual(mockATW32.call_args_list[0][1]['autoreset'], True)
@patch('colorama.initialise.AnsiToWin32')
def testAutoResetChangeable(self, mockATW32):
with osname("nt"):
init()
init(autoreset=True)
self.assertEqual(len(mockATW32.call_args_list), 4)
self.assertEqual(mockATW32.call_args_list[2][1]['autoreset'], True)
self.assertEqual(mockATW32.call_args_list[3][1]['autoreset'], True)
init()
self.assertEqual(len(mockATW32.call_args_list), 6)
self.assertEqual(
mockATW32.call_args_list[4][1]['autoreset'], False)
self.assertEqual(
mockATW32.call_args_list[5][1]['autoreset'], False)
@patch('colorama.initialise.atexit.register')
def testAtexitRegisteredOnlyOnce(self, mockRegister):
init()
self.assertTrue(mockRegister.called)
mockRegister.reset_mock()
init()
self.assertFalse(mockRegister.called)
class JustFixWindowsConsoleTest(TestCase):
def _reset(self):
_wipe_internal_state_for_tests()
sys.stdout = orig_stdout
sys.stderr = orig_stderr
def tearDown(self):
self._reset()
@patch("colorama.ansitowin32.winapi_test", lambda: True)
def testJustFixWindowsConsole(self):
if sys.platform != "win32":
# just_fix_windows_console should be a no-op
just_fix_windows_console()
self.assertIs(sys.stdout, orig_stdout)
self.assertIs(sys.stderr, orig_stderr)
else:
def fake_std():
# Emulate stdout=not a tty, stderr=tty
# to check that we handle both cases correctly
stdout = Mock()
stdout.closed = False
stdout.isatty.return_value = False
stdout.fileno.return_value = 1
sys.stdout = stdout
stderr = Mock()
stderr.closed = False
stderr.isatty.return_value = True
stderr.fileno.return_value = 2
sys.stderr = stderr
for native_ansi in [False, True]:
with patch(
'colorama.ansitowin32.enable_vt_processing',
lambda *_: native_ansi
):
self._reset()
fake_std()
# Regular single-call test
prev_stdout = sys.stdout
prev_stderr = sys.stderr
just_fix_windows_console()
self.assertIs(sys.stdout, prev_stdout)
if native_ansi:
self.assertIs(sys.stderr, prev_stderr)
else:
self.assertIsNot(sys.stderr, prev_stderr)
# second call without resetting is always a no-op
prev_stdout = sys.stdout
prev_stderr = sys.stderr
just_fix_windows_console()
self.assertIs(sys.stdout, prev_stdout)
self.assertIs(sys.stderr, prev_stderr)
self._reset()
fake_std()
# If init() runs first, just_fix_windows_console should be a no-op
init()
prev_stdout = sys.stdout
prev_stderr = sys.stderr
just_fix_windows_console()
self.assertIs(prev_stdout, sys.stdout)
self.assertIs(prev_stderr, sys.stderr)
if __name__ == '__main__':
main()
@@ -0,0 +1,57 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import sys
from unittest import TestCase, main
from ..ansitowin32 import StreamWrapper, AnsiToWin32
from .utils import pycharm, replace_by, replace_original_by, StreamTTY, StreamNonTTY
def is_a_tty(stream):
return StreamWrapper(stream, None).isatty()
class IsattyTest(TestCase):
def test_TTY(self):
tty = StreamTTY()
self.assertTrue(is_a_tty(tty))
with pycharm():
self.assertTrue(is_a_tty(tty))
def test_nonTTY(self):
non_tty = StreamNonTTY()
self.assertFalse(is_a_tty(non_tty))
with pycharm():
self.assertFalse(is_a_tty(non_tty))
def test_withPycharm(self):
with pycharm():
self.assertTrue(is_a_tty(sys.stderr))
self.assertTrue(is_a_tty(sys.stdout))
def test_withPycharmTTYOverride(self):
tty = StreamTTY()
with pycharm(), replace_by(tty):
self.assertTrue(is_a_tty(tty))
def test_withPycharmNonTTYOverride(self):
non_tty = StreamNonTTY()
with pycharm(), replace_by(non_tty):
self.assertFalse(is_a_tty(non_tty))
def test_withPycharmNoneOverride(self):
with pycharm():
with replace_by(None), replace_original_by(None):
self.assertFalse(is_a_tty(None))
self.assertFalse(is_a_tty(StreamNonTTY()))
self.assertTrue(is_a_tty(StreamTTY()))
def test_withPycharmStreamWrapped(self):
with pycharm():
self.assertTrue(AnsiToWin32(StreamTTY()).stream.isatty())
self.assertFalse(AnsiToWin32(StreamNonTTY()).stream.isatty())
self.assertTrue(AnsiToWin32(sys.stdout).stream.isatty())
self.assertTrue(AnsiToWin32(sys.stderr).stream.isatty())
if __name__ == '__main__':
main()
@@ -0,0 +1,49 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from contextlib import contextmanager
from io import StringIO
import sys
import os
class StreamTTY(StringIO):
def isatty(self):
return True
class StreamNonTTY(StringIO):
def isatty(self):
return False
@contextmanager
def osname(name):
orig = os.name
os.name = name
yield
os.name = orig
@contextmanager
def replace_by(stream):
orig_stdout = sys.stdout
orig_stderr = sys.stderr
sys.stdout = stream
sys.stderr = stream
yield
sys.stdout = orig_stdout
sys.stderr = orig_stderr
@contextmanager
def replace_original_by(stream):
orig_stdout = sys.__stdout__
orig_stderr = sys.__stderr__
sys.__stdout__ = stream
sys.__stderr__ = stream
yield
sys.__stdout__ = orig_stdout
sys.__stderr__ = orig_stderr
@contextmanager
def pycharm():
os.environ["PYCHARM_HOSTED"] = "1"
non_tty = StreamNonTTY()
with replace_by(non_tty), replace_original_by(non_tty):
yield
del os.environ["PYCHARM_HOSTED"]
@@ -0,0 +1,131 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
import sys
from unittest import TestCase, main, skipUnless
try:
from unittest.mock import Mock, patch
except ImportError:
from mock import Mock, patch
from ..winterm import WinColor, WinStyle, WinTerm
class WinTermTest(TestCase):
@patch('colorama.winterm.win32')
def testInit(self, mockWin32):
mockAttr = Mock()
mockAttr.wAttributes = 7 + 6 * 16 + 8
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
term = WinTerm()
self.assertEqual(term._fore, 7)
self.assertEqual(term._back, 6)
self.assertEqual(term._style, 8)
@skipUnless(sys.platform.startswith("win"), "requires Windows")
def testGetAttrs(self):
term = WinTerm()
term._fore = 0
term._back = 0
term._style = 0
self.assertEqual(term.get_attrs(), 0)
term._fore = WinColor.YELLOW
self.assertEqual(term.get_attrs(), WinColor.YELLOW)
term._back = WinColor.MAGENTA
self.assertEqual(
term.get_attrs(),
WinColor.YELLOW + WinColor.MAGENTA * 16)
term._style = WinStyle.BRIGHT
self.assertEqual(
term.get_attrs(),
WinColor.YELLOW + WinColor.MAGENTA * 16 + WinStyle.BRIGHT)
@patch('colorama.winterm.win32')
def testResetAll(self, mockWin32):
mockAttr = Mock()
mockAttr.wAttributes = 1 + 2 * 16 + 8
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
term = WinTerm()
term.set_console = Mock()
term._fore = -1
term._back = -1
term._style = -1
term.reset_all()
self.assertEqual(term._fore, 1)
self.assertEqual(term._back, 2)
self.assertEqual(term._style, 8)
self.assertEqual(term.set_console.called, True)
@skipUnless(sys.platform.startswith("win"), "requires Windows")
def testFore(self):
term = WinTerm()
term.set_console = Mock()
term._fore = 0
term.fore(5)
self.assertEqual(term._fore, 5)
self.assertEqual(term.set_console.called, True)
@skipUnless(sys.platform.startswith("win"), "requires Windows")
def testBack(self):
term = WinTerm()
term.set_console = Mock()
term._back = 0
term.back(5)
self.assertEqual(term._back, 5)
self.assertEqual(term.set_console.called, True)
@skipUnless(sys.platform.startswith("win"), "requires Windows")
def testStyle(self):
term = WinTerm()
term.set_console = Mock()
term._style = 0
term.style(22)
self.assertEqual(term._style, 22)
self.assertEqual(term.set_console.called, True)
@patch('colorama.winterm.win32')
def testSetConsole(self, mockWin32):
mockAttr = Mock()
mockAttr.wAttributes = 0
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
term = WinTerm()
term.windll = Mock()
term.set_console()
self.assertEqual(
mockWin32.SetConsoleTextAttribute.call_args,
((mockWin32.STDOUT, term.get_attrs()), {})
)
@patch('colorama.winterm.win32')
def testSetConsoleOnStderr(self, mockWin32):
mockAttr = Mock()
mockAttr.wAttributes = 0
mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr
term = WinTerm()
term.windll = Mock()
term.set_console(on_stderr=True)
self.assertEqual(
mockWin32.SetConsoleTextAttribute.call_args,
((mockWin32.STDERR, term.get_attrs()), {})
)
if __name__ == '__main__':
main()
@@ -4,6 +4,8 @@
STDOUT = -11
STDERR = -12
ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
try:
import ctypes
from ctypes import LibraryLoader
@@ -89,6 +91,20 @@ else:
]
_SetConsoleTitleW.restype = wintypes.BOOL
_GetConsoleMode = windll.kernel32.GetConsoleMode
_GetConsoleMode.argtypes = [
wintypes.HANDLE,
POINTER(wintypes.DWORD)
]
_GetConsoleMode.restype = wintypes.BOOL
_SetConsoleMode = windll.kernel32.SetConsoleMode
_SetConsoleMode.argtypes = [
wintypes.HANDLE,
wintypes.DWORD
]
_SetConsoleMode.restype = wintypes.BOOL
def _winapi_test(handle):
csbi = CONSOLE_SCREEN_BUFFER_INFO()
success = _GetConsoleScreenBufferInfo(
@@ -150,3 +166,15 @@ else:
def SetConsoleTitle(title):
return _SetConsoleTitleW(title)
def GetConsoleMode(handle):
mode = wintypes.DWORD()
success = _GetConsoleMode(handle, byref(mode))
if not success:
raise ctypes.WinError()
return mode.value
def SetConsoleMode(handle, mode):
success = _SetConsoleMode(handle, mode)
if not success:
raise ctypes.WinError()
+27 -1
View File
@@ -1,7 +1,13 @@
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
from . import win32
try:
from msvcrt import get_osfhandle
except ImportError:
def get_osfhandle(_):
raise OSError("This isn't windows!")
from . import win32
# from wincon.h
class WinColor(object):
BLACK = 0
@@ -167,3 +173,23 @@ class WinTerm(object):
def set_title(self, title):
win32.SetConsoleTitle(title)
def enable_vt_processing(fd):
if win32.windll is None or not win32.winapi_test():
return False
try:
handle = get_osfhandle(fd)
mode = win32.GetConsoleMode(handle)
win32.SetConsoleMode(
handle,
mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING,
)
mode = win32.GetConsoleMode(handle)
if mode & win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING:
return True
# Can get TypeError in testsuite where 'fd' is a Mock()
except (OSError, TypeError):
return False
+63 -38
View File
@@ -55,7 +55,7 @@ except ImportError:
# Python 3.7
TypedDict = dict
__version__ = "1.7.0"
__version__ = "1.8.0"
class VersionDict(TypedDict):
@@ -122,6 +122,26 @@ _DISTRO_RELEASE_CONTENT_REVERSED_PATTERN = re.compile(
# Pattern for base file name of distro release file
_DISTRO_RELEASE_BASENAME_PATTERN = re.compile(r"(\w+)[-_](release|version)$")
# Base file names to be looked up for if _UNIXCONFDIR is not readable.
_DISTRO_RELEASE_BASENAMES = [
"SuSE-release",
"arch-release",
"base-release",
"centos-release",
"fedora-release",
"gentoo-release",
"mageia-release",
"mandrake-release",
"mandriva-release",
"mandrivalinux-release",
"manjaro-release",
"oracle-release",
"redhat-release",
"rocky-release",
"sl-release",
"slackware-version",
]
# Base file names to be ignored when searching for distro release file
_DISTRO_RELEASE_IGNORE_BASENAMES = (
"debian_version",
@@ -200,6 +220,7 @@ def id() -> str:
"opensuse" openSUSE
"amzn" Amazon Linux
"arch" Arch Linux
"buildroot" Buildroot
"cloudlinux" CloudLinux OS
"exherbo" Exherbo Linux
"gentoo" GenToo Linux
@@ -221,6 +242,7 @@ def id() -> str:
"midnightbsd" MidnightBSD
"rocky" Rocky Linux
"aix" AIX
"guix" Guix System
============== =========================================
If you have a need to get distros for reliable IDs added into this set,
@@ -876,6 +898,9 @@ class LinuxDistribution:
if self.uname_attr("id").startswith("aix"):
# On AIX platforms, prefer oslevel command output.
versions.insert(0, self.oslevel_info())
elif self.id() == "debian" or "debian" in self.like().split():
# On Debian-like, add debian_version file content to candidates list.
versions.append(self._debian_version)
version = ""
if best:
# This algorithm uses the last version in priority order that has
@@ -1186,6 +1211,16 @@ class LinuxDistribution:
return ""
return self._to_str(stdout).strip()
@cached_property
def _debian_version(self) -> str:
try:
with open(
os.path.join(self.etc_dir, "debian_version"), encoding="ascii"
) as fp:
return fp.readline().rstrip()
except FileNotFoundError:
return ""
@staticmethod
def _parse_uname_content(lines: Sequence[str]) -> Dict[str, str]:
if not lines:
@@ -1228,14 +1263,14 @@ class LinuxDistribution:
# file), because we want to use what was specified as best as
# possible.
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
if "name" in distro_info and "cloudlinux" in distro_info["name"].lower():
distro_info["id"] = "cloudlinux"
elif match:
distro_info["id"] = match.group(1)
return distro_info
else:
try:
basenames = os.listdir(self.etc_dir)
basenames = [
basename
for basename in os.listdir(self.etc_dir)
if basename not in _DISTRO_RELEASE_IGNORE_BASENAMES
and os.path.isfile(os.path.join(self.etc_dir, basename))
]
# We sort for repeatability in cases where there are multiple
# distro specific files; e.g. CentOS, Oracle, Enterprise all
# containing `redhat-release` on top of their own.
@@ -1245,39 +1280,29 @@ class LinuxDistribution:
# sure about the *-release files. Check common entries of
# /etc for information. If they turn out to not be there the
# error is handled in `_parse_distro_release_file()`.
basenames = [
"SuSE-release",
"arch-release",
"base-release",
"centos-release",
"fedora-release",
"gentoo-release",
"mageia-release",
"mandrake-release",
"mandriva-release",
"mandrivalinux-release",
"manjaro-release",
"oracle-release",
"redhat-release",
"rocky-release",
"sl-release",
"slackware-version",
]
basenames = _DISTRO_RELEASE_BASENAMES
for basename in basenames:
if basename in _DISTRO_RELEASE_IGNORE_BASENAMES:
continue
match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename)
if match:
filepath = os.path.join(self.etc_dir, basename)
distro_info = self._parse_distro_release_file(filepath)
if "name" in distro_info:
# The name is always present if the pattern matches
self.distro_release_file = filepath
distro_info["id"] = match.group(1)
if "cloudlinux" in distro_info["name"].lower():
distro_info["id"] = "cloudlinux"
return distro_info
return {}
if match is None:
continue
filepath = os.path.join(self.etc_dir, basename)
distro_info = self._parse_distro_release_file(filepath)
# The name is always present if the pattern matches.
if "name" not in distro_info:
continue
self.distro_release_file = filepath
break
else: # the loop didn't "break": no candidate.
return {}
if match is not None:
distro_info["id"] = match.group(1)
# CloudLinux < 7: manually enrich info with proper id.
if "cloudlinux" in distro_info.get("name", "").lower():
distro_info["id"] = "cloudlinux"
return distro_info
def _parse_distro_release_file(self, filepath: str) -> Dict[str, str]:
"""
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2010-202x The platformdirs developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
@@ -1,22 +0,0 @@
# This is the MIT license
Copyright (c) 2010 ActiveState Software Inc.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -7,13 +7,15 @@ from __future__ import annotations
import os
import sys
from pathlib import Path
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from pipenv.patched.pip._vendor.typing_extensions import Literal # pragma: no cover
if sys.version_info >= (3, 8): # pragma: no cover (py38+)
from typing import Literal
else: # pragma: no cover (py38+)
from pipenv.patched.pip._vendor.typing_extensions import Literal
from .api import PlatformDirsABC
from .version import __version__, __version_info__
from .version import __version__
from .version import __version_tuple__ as __version_info__
def _set_platform_dir_class() -> type[PlatformDirsABC]:
@@ -26,7 +28,7 @@ def _set_platform_dir_class() -> type[PlatformDirsABC]:
if os.getenv("ANDROID_DATA") == "/data" and os.getenv("ANDROID_ROOT") == "/system":
if os.getenv("SHELL") is not None:
if os.getenv("SHELL") or os.getenv("PREFIX"):
return Result
from pipenv.patched.pip._vendor.platformdirs.android import _android_folder
@@ -107,9 +107,9 @@ class Unix(PlatformDirsABC):
@property
def user_log_dir(self) -> str:
"""
:return: log directory tied to the user, same as `user_data_dir` if not opinionated else ``log`` in it
:return: log directory tied to the user, same as `user_state_dir` if not opinionated else ``log`` in it
"""
path = self.user_cache_dir
path = self.user_state_dir
if self.opinion:
path = os.path.join(path, "log")
return path
@@ -1,4 +1,4 @@
"""Version information"""
__version__ = "2.5.2"
__version_info__ = (2, 5, 2)
# file generated by setuptools_scm
# don't change, don't track in version control
__version__ = version = '2.6.2'
__version_tuple__ = version_tuple = (2, 6, 2)
@@ -2,6 +2,7 @@ from __future__ import annotations
import ctypes
import os
import sys
from functools import lru_cache
from typing import Callable
@@ -132,7 +133,8 @@ def get_win_folder_from_registry(csidl_name: str) -> str:
}.get(csidl_name)
if shell_folder_name is None:
raise ValueError(f"Unknown CSIDL name: {csidl_name}")
if sys.platform != "win32": # only needed for mypy type checker to know that this code runs only on Windows
raise NotImplementedError
import winreg
key = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders")
@@ -0,0 +1,23 @@
"""Wrappers to call pyproject.toml-based build backend hooks.
"""
from ._impl import (
BackendInvalid,
BackendUnavailable,
BuildBackendHookCaller,
HookMissing,
UnsupportedOperation,
default_subprocess_runner,
quiet_subprocess_runner,
)
__version__ = '1.0.0'
__all__ = [
'BackendUnavailable',
'BackendInvalid',
'HookMissing',
'UnsupportedOperation',
'default_subprocess_runner',
'quiet_subprocess_runner',
'BuildBackendHookCaller',
]
@@ -0,0 +1,330 @@
import json
import os
import sys
import tempfile
from contextlib import contextmanager
from os.path import abspath
from os.path import join as pjoin
from subprocess import STDOUT, check_call, check_output
from ._in_process import _in_proc_script_path
def write_json(obj, path, **kwargs):
with open(path, 'w', encoding='utf-8') as f:
json.dump(obj, f, **kwargs)
def read_json(path):
with open(path, encoding='utf-8') as f:
return json.load(f)
class BackendUnavailable(Exception):
"""Will be raised if the backend cannot be imported in the hook process."""
def __init__(self, traceback):
self.traceback = traceback
class BackendInvalid(Exception):
"""Will be raised if the backend is invalid."""
def __init__(self, backend_name, backend_path, message):
super().__init__(message)
self.backend_name = backend_name
self.backend_path = backend_path
class HookMissing(Exception):
"""Will be raised on missing hooks (if a fallback can't be used)."""
def __init__(self, hook_name):
super().__init__(hook_name)
self.hook_name = hook_name
class UnsupportedOperation(Exception):
"""May be raised by build_sdist if the backend indicates that it can't."""
def __init__(self, traceback):
self.traceback = traceback
def default_subprocess_runner(cmd, cwd=None, extra_environ=None):
"""The default method of calling the wrapper subprocess.
This uses :func:`subprocess.check_call` under the hood.
"""
env = os.environ.copy()
if extra_environ:
env.update(extra_environ)
check_call(cmd, cwd=cwd, env=env)
def quiet_subprocess_runner(cmd, cwd=None, extra_environ=None):
"""Call the subprocess while suppressing output.
This uses :func:`subprocess.check_output` under the hood.
"""
env = os.environ.copy()
if extra_environ:
env.update(extra_environ)
check_output(cmd, cwd=cwd, env=env, stderr=STDOUT)
def norm_and_check(source_tree, requested):
"""Normalise and check a backend path.
Ensure that the requested backend path is specified as a relative path,
and resolves to a location under the given source tree.
Return an absolute version of the requested path.
"""
if os.path.isabs(requested):
raise ValueError("paths must be relative")
abs_source = os.path.abspath(source_tree)
abs_requested = os.path.normpath(os.path.join(abs_source, requested))
# We have to use commonprefix for Python 2.7 compatibility. So we
# normalise case to avoid problems because commonprefix is a character
# based comparison :-(
norm_source = os.path.normcase(abs_source)
norm_requested = os.path.normcase(abs_requested)
if os.path.commonprefix([norm_source, norm_requested]) != norm_source:
raise ValueError("paths must be inside source tree")
return abs_requested
class BuildBackendHookCaller:
"""A wrapper to call the build backend hooks for a source directory.
"""
def __init__(
self,
source_dir,
build_backend,
backend_path=None,
runner=None,
python_executable=None,
):
"""
:param source_dir: The source directory to invoke the build backend for
:param build_backend: The build backend spec
:param backend_path: Additional path entries for the build backend spec
:param runner: The :ref:`subprocess runner <Subprocess Runners>` to use
:param python_executable:
The Python executable used to invoke the build backend
"""
if runner is None:
runner = default_subprocess_runner
self.source_dir = abspath(source_dir)
self.build_backend = build_backend
if backend_path:
backend_path = [
norm_and_check(self.source_dir, p) for p in backend_path
]
self.backend_path = backend_path
self._subprocess_runner = runner
if not python_executable:
python_executable = sys.executable
self.python_executable = python_executable
@contextmanager
def subprocess_runner(self, runner):
"""A context manager for temporarily overriding the default
:ref:`subprocess runner <Subprocess Runners>`.
.. code-block:: python
hook_caller = BuildBackendHookCaller(...)
with hook_caller.subprocess_runner(quiet_subprocess_runner):
...
"""
prev = self._subprocess_runner
self._subprocess_runner = runner
try:
yield
finally:
self._subprocess_runner = prev
def _supported_features(self):
"""Return the list of optional features supported by the backend."""
return self._call_hook('_supported_features', {})
def get_requires_for_build_wheel(self, config_settings=None):
"""Get additional dependencies required for building a wheel.
:returns: A list of :pep:`dependency specifiers <508>`.
:rtype: list[str]
.. admonition:: Fallback
If the build backend does not defined a hook with this name, an
empty list will be returned.
"""
return self._call_hook('get_requires_for_build_wheel', {
'config_settings': config_settings
})
def prepare_metadata_for_build_wheel(
self, metadata_directory, config_settings=None,
_allow_fallback=True):
"""Prepare a ``*.dist-info`` folder with metadata for this project.
:returns: Name of the newly created subfolder within
``metadata_directory``, containing the metadata.
:rtype: str
.. admonition:: Fallback
If the build backend does not define a hook with this name and
``_allow_fallback`` is truthy, the backend will be asked to build a
wheel via the ``build_wheel`` hook and the dist-info extracted from
that will be returned.
"""
return self._call_hook('prepare_metadata_for_build_wheel', {
'metadata_directory': abspath(metadata_directory),
'config_settings': config_settings,
'_allow_fallback': _allow_fallback,
})
def build_wheel(
self, wheel_directory, config_settings=None,
metadata_directory=None):
"""Build a wheel from this project.
:returns:
The name of the newly created wheel within ``wheel_directory``.
.. admonition:: Interaction with fallback
If the ``build_wheel`` hook was called in the fallback for
:meth:`prepare_metadata_for_build_wheel`, the build backend would
not be invoked. Instead, the previously built wheel will be copied
to ``wheel_directory`` and the name of that file will be returned.
"""
if metadata_directory is not None:
metadata_directory = abspath(metadata_directory)
return self._call_hook('build_wheel', {
'wheel_directory': abspath(wheel_directory),
'config_settings': config_settings,
'metadata_directory': metadata_directory,
})
def get_requires_for_build_editable(self, config_settings=None):
"""Get additional dependencies required for building an editable wheel.
:returns: A list of :pep:`dependency specifiers <508>`.
:rtype: list[str]
.. admonition:: Fallback
If the build backend does not defined a hook with this name, an
empty list will be returned.
"""
return self._call_hook('get_requires_for_build_editable', {
'config_settings': config_settings
})
def prepare_metadata_for_build_editable(
self, metadata_directory, config_settings=None,
_allow_fallback=True):
"""Prepare a ``*.dist-info`` folder with metadata for this project.
:returns: Name of the newly created subfolder within
``metadata_directory``, containing the metadata.
:rtype: str
.. admonition:: Fallback
If the build backend does not define a hook with this name and
``_allow_fallback`` is truthy, the backend will be asked to build a
wheel via the ``build_editable`` hook and the dist-info
extracted from that will be returned.
"""
return self._call_hook('prepare_metadata_for_build_editable', {
'metadata_directory': abspath(metadata_directory),
'config_settings': config_settings,
'_allow_fallback': _allow_fallback,
})
def build_editable(
self, wheel_directory, config_settings=None,
metadata_directory=None):
"""Build an editable wheel from this project.
:returns:
The name of the newly created wheel within ``wheel_directory``.
.. admonition:: Interaction with fallback
If the ``build_editable`` hook was called in the fallback for
:meth:`prepare_metadata_for_build_editable`, the build backend
would not be invoked. Instead, the previously built wheel will be
copied to ``wheel_directory`` and the name of that file will be
returned.
"""
if metadata_directory is not None:
metadata_directory = abspath(metadata_directory)
return self._call_hook('build_editable', {
'wheel_directory': abspath(wheel_directory),
'config_settings': config_settings,
'metadata_directory': metadata_directory,
})
def get_requires_for_build_sdist(self, config_settings=None):
"""Get additional dependencies required for building an sdist.
:returns: A list of :pep:`dependency specifiers <508>`.
:rtype: list[str]
"""
return self._call_hook('get_requires_for_build_sdist', {
'config_settings': config_settings
})
def build_sdist(self, sdist_directory, config_settings=None):
"""Build an sdist from this project.
:returns:
The name of the newly created sdist within ``wheel_directory``.
"""
return self._call_hook('build_sdist', {
'sdist_directory': abspath(sdist_directory),
'config_settings': config_settings,
})
def _call_hook(self, hook_name, kwargs):
extra_environ = {'PEP517_BUILD_BACKEND': self.build_backend}
if self.backend_path:
backend_path = os.pathsep.join(self.backend_path)
extra_environ['PEP517_BACKEND_PATH'] = backend_path
with tempfile.TemporaryDirectory() as td:
hook_input = {'kwargs': kwargs}
write_json(hook_input, pjoin(td, 'input.json'), indent=2)
# Run the hook in a subprocess
with _in_proc_script_path() as script:
python = self.python_executable
self._subprocess_runner(
[python, abspath(str(script)), hook_name, td],
cwd=self.source_dir,
extra_environ=extra_environ
)
data = read_json(pjoin(td, 'output.json'))
if data.get('unsupported'):
raise UnsupportedOperation(data.get('traceback', ''))
if data.get('no_backend'):
raise BackendUnavailable(data.get('traceback', ''))
if data.get('backend_invalid'):
raise BackendInvalid(
backend_name=self.build_backend,
backend_path=self.backend_path,
message=data.get('backend_error', '')
)
if data.get('hook_missing'):
raise HookMissing(data.get('missing_hook_name') or hook_name)
return data['return_val']
@@ -0,0 +1,18 @@
"""This is a subpackage because the directory is on sys.path for _in_process.py
The subpackage should stay as empty as possible to avoid shadowing modules that
the backend might import.
"""
import importlib.resources as resources
try:
resources.files
except AttributeError:
# Python 3.8 compatibility
def _in_proc_script_path():
return resources.path(__package__, '_in_process.py')
else:
def _in_proc_script_path():
return resources.as_file(
resources.files(__package__).joinpath('_in_process.py'))
@@ -0,0 +1,353 @@
"""This is invoked in a subprocess to call the build backend hooks.
It expects:
- Command line args: hook_name, control_dir
- Environment variables:
PEP517_BUILD_BACKEND=entry.point:spec
PEP517_BACKEND_PATH=paths (separated with os.pathsep)
- control_dir/input.json:
- {"kwargs": {...}}
Results:
- control_dir/output.json
- {"return_val": ...}
"""
import json
import os
import os.path
import re
import shutil
import sys
import traceback
from glob import glob
from importlib import import_module
from os.path import join as pjoin
# This file is run as a script, and `import wrappers` is not zip-safe, so we
# include write_json() and read_json() from wrappers.py.
def write_json(obj, path, **kwargs):
with open(path, 'w', encoding='utf-8') as f:
json.dump(obj, f, **kwargs)
def read_json(path):
with open(path, encoding='utf-8') as f:
return json.load(f)
class BackendUnavailable(Exception):
"""Raised if we cannot import the backend"""
def __init__(self, traceback):
self.traceback = traceback
class BackendInvalid(Exception):
"""Raised if the backend is invalid"""
def __init__(self, message):
self.message = message
class HookMissing(Exception):
"""Raised if a hook is missing and we are not executing the fallback"""
def __init__(self, hook_name=None):
super().__init__(hook_name)
self.hook_name = hook_name
def contained_in(filename, directory):
"""Test if a file is located within the given directory."""
filename = os.path.normcase(os.path.abspath(filename))
directory = os.path.normcase(os.path.abspath(directory))
return os.path.commonprefix([filename, directory]) == directory
def _build_backend():
"""Find and load the build backend"""
# Add in-tree backend directories to the front of sys.path.
backend_path = os.environ.get('PEP517_BACKEND_PATH')
if backend_path:
extra_pathitems = backend_path.split(os.pathsep)
sys.path[:0] = extra_pathitems
ep = os.environ['PEP517_BUILD_BACKEND']
mod_path, _, obj_path = ep.partition(':')
try:
obj = import_module(mod_path)
except ImportError:
raise BackendUnavailable(traceback.format_exc())
if backend_path:
if not any(
contained_in(obj.__file__, path)
for path in extra_pathitems
):
raise BackendInvalid("Backend was not loaded from backend-path")
if obj_path:
for path_part in obj_path.split('.'):
obj = getattr(obj, path_part)
return obj
def _supported_features():
"""Return the list of options features supported by the backend.
Returns a list of strings.
The only possible value is 'build_editable'.
"""
backend = _build_backend()
features = []
if hasattr(backend, "build_editable"):
features.append("build_editable")
return features
def get_requires_for_build_wheel(config_settings):
"""Invoke the optional get_requires_for_build_wheel hook
Returns [] if the hook is not defined.
"""
backend = _build_backend()
try:
hook = backend.get_requires_for_build_wheel
except AttributeError:
return []
else:
return hook(config_settings)
def get_requires_for_build_editable(config_settings):
"""Invoke the optional get_requires_for_build_editable hook
Returns [] if the hook is not defined.
"""
backend = _build_backend()
try:
hook = backend.get_requires_for_build_editable
except AttributeError:
return []
else:
return hook(config_settings)
def prepare_metadata_for_build_wheel(
metadata_directory, config_settings, _allow_fallback):
"""Invoke optional prepare_metadata_for_build_wheel
Implements a fallback by building a wheel if the hook isn't defined,
unless _allow_fallback is False in which case HookMissing is raised.
"""
backend = _build_backend()
try:
hook = backend.prepare_metadata_for_build_wheel
except AttributeError:
if not _allow_fallback:
raise HookMissing()
else:
return hook(metadata_directory, config_settings)
# fallback to build_wheel outside the try block to avoid exception chaining
# which can be confusing to users and is not relevant
whl_basename = backend.build_wheel(metadata_directory, config_settings)
return _get_wheel_metadata_from_wheel(whl_basename, metadata_directory,
config_settings)
def prepare_metadata_for_build_editable(
metadata_directory, config_settings, _allow_fallback):
"""Invoke optional prepare_metadata_for_build_editable
Implements a fallback by building an editable wheel if the hook isn't
defined, unless _allow_fallback is False in which case HookMissing is
raised.
"""
backend = _build_backend()
try:
hook = backend.prepare_metadata_for_build_editable
except AttributeError:
if not _allow_fallback:
raise HookMissing()
try:
build_hook = backend.build_editable
except AttributeError:
raise HookMissing(hook_name='build_editable')
else:
whl_basename = build_hook(metadata_directory, config_settings)
return _get_wheel_metadata_from_wheel(whl_basename,
metadata_directory,
config_settings)
else:
return hook(metadata_directory, config_settings)
WHEEL_BUILT_MARKER = 'PEP517_ALREADY_BUILT_WHEEL'
def _dist_info_files(whl_zip):
"""Identify the .dist-info folder inside a wheel ZipFile."""
res = []
for path in whl_zip.namelist():
m = re.match(r'[^/\\]+-[^/\\]+\.dist-info/', path)
if m:
res.append(path)
if res:
return res
raise Exception("No .dist-info folder found in wheel")
def _get_wheel_metadata_from_wheel(
whl_basename, metadata_directory, config_settings):
"""Extract the metadata from a wheel.
Fallback for when the build backend does not
define the 'get_wheel_metadata' hook.
"""
from zipfile import ZipFile
with open(os.path.join(metadata_directory, WHEEL_BUILT_MARKER), 'wb'):
pass # Touch marker file
whl_file = os.path.join(metadata_directory, whl_basename)
with ZipFile(whl_file) as zipf:
dist_info = _dist_info_files(zipf)
zipf.extractall(path=metadata_directory, members=dist_info)
return dist_info[0].split('/')[0]
def _find_already_built_wheel(metadata_directory):
"""Check for a wheel already built during the get_wheel_metadata hook.
"""
if not metadata_directory:
return None
metadata_parent = os.path.dirname(metadata_directory)
if not os.path.isfile(pjoin(metadata_parent, WHEEL_BUILT_MARKER)):
return None
whl_files = glob(os.path.join(metadata_parent, '*.whl'))
if not whl_files:
print('Found wheel built marker, but no .whl files')
return None
if len(whl_files) > 1:
print('Found multiple .whl files; unspecified behaviour. '
'Will call build_wheel.')
return None
# Exactly one .whl file
return whl_files[0]
def build_wheel(wheel_directory, config_settings, metadata_directory=None):
"""Invoke the mandatory build_wheel hook.
If a wheel was already built in the
prepare_metadata_for_build_wheel fallback, this
will copy it rather than rebuilding the wheel.
"""
prebuilt_whl = _find_already_built_wheel(metadata_directory)
if prebuilt_whl:
shutil.copy2(prebuilt_whl, wheel_directory)
return os.path.basename(prebuilt_whl)
return _build_backend().build_wheel(wheel_directory, config_settings,
metadata_directory)
def build_editable(wheel_directory, config_settings, metadata_directory=None):
"""Invoke the optional build_editable hook.
If a wheel was already built in the
prepare_metadata_for_build_editable fallback, this
will copy it rather than rebuilding the wheel.
"""
backend = _build_backend()
try:
hook = backend.build_editable
except AttributeError:
raise HookMissing()
else:
prebuilt_whl = _find_already_built_wheel(metadata_directory)
if prebuilt_whl:
shutil.copy2(prebuilt_whl, wheel_directory)
return os.path.basename(prebuilt_whl)
return hook(wheel_directory, config_settings, metadata_directory)
def get_requires_for_build_sdist(config_settings):
"""Invoke the optional get_requires_for_build_wheel hook
Returns [] if the hook is not defined.
"""
backend = _build_backend()
try:
hook = backend.get_requires_for_build_sdist
except AttributeError:
return []
else:
return hook(config_settings)
class _DummyException(Exception):
"""Nothing should ever raise this exception"""
class GotUnsupportedOperation(Exception):
"""For internal use when backend raises UnsupportedOperation"""
def __init__(self, traceback):
self.traceback = traceback
def build_sdist(sdist_directory, config_settings):
"""Invoke the mandatory build_sdist hook."""
backend = _build_backend()
try:
return backend.build_sdist(sdist_directory, config_settings)
except getattr(backend, 'UnsupportedOperation', _DummyException):
raise GotUnsupportedOperation(traceback.format_exc())
HOOK_NAMES = {
'get_requires_for_build_wheel',
'prepare_metadata_for_build_wheel',
'build_wheel',
'get_requires_for_build_editable',
'prepare_metadata_for_build_editable',
'build_editable',
'get_requires_for_build_sdist',
'build_sdist',
'_supported_features',
}
def main():
if len(sys.argv) < 3:
sys.exit("Needs args: hook_name, control_dir")
hook_name = sys.argv[1]
control_dir = sys.argv[2]
if hook_name not in HOOK_NAMES:
sys.exit("Unknown hook: %s" % hook_name)
hook = globals()[hook_name]
hook_input = read_json(pjoin(control_dir, 'input.json'))
json_out = {'unsupported': False, 'return_val': None}
try:
json_out['return_val'] = hook(**hook_input['kwargs'])
except BackendUnavailable as e:
json_out['no_backend'] = True
json_out['traceback'] = e.traceback
except BackendInvalid as e:
json_out['backend_invalid'] = True
json_out['backend_error'] = e.message
except GotUnsupportedOperation as e:
json_out['unsupported'] = True
json_out['traceback'] = e.traceback
except HookMissing as e:
json_out['hook_missing'] = True
json_out['missing_hook_name'] = e.hook_name or hook_name
write_json(json_out, pjoin(control_dir, 'output.json'), indent=2)
if __name__ == '__main__':
main()
@@ -77,8 +77,8 @@ def check_compatibility(urllib3_version, chardet_version, charset_normalizer_ver
elif charset_normalizer_version:
major, minor, patch = charset_normalizer_version.split(".")[:3]
major, minor, patch = int(major), int(minor), int(patch)
# charset_normalizer >= 2.0.0 < 3.0.0
assert (2, 0, 0) <= (major, minor, patch) < (3, 0, 0)
# charset_normalizer >= 2.0.0 < 4.0.0
assert (2, 0, 0) <= (major, minor, patch) < (4, 0, 0)
else:
raise Exception("You need either charset_normalizer or chardet installed")
@@ -5,10 +5,10 @@
__title__ = "requests"
__description__ = "Python HTTP for Humans."
__url__ = "https://requests.readthedocs.io"
__version__ = "2.28.1"
__build__ = 0x022801
__version__ = "2.28.2"
__build__ = 0x022802
__author__ = "Kenneth Reitz"
__author_email__ = "me@kennethreitz.org"
__license__ = "Apache 2.0"
__copyright__ = "Copyright 2022 Kenneth Reitz"
__copyright__ = "Copyright Kenneth Reitz"
__cake__ = "\u2728 \U0001f370 \u2728"
@@ -438,7 +438,7 @@ class PreparedRequest(RequestEncodingMixin, RequestHooksMixin):
if not scheme:
raise MissingSchema(
f"Invalid URL {url!r}: No scheme supplied. "
f"Perhaps you meant http://{url}?"
f"Perhaps you meant https://{url}?"
)
if not host:
+4 -3
View File
@@ -5,7 +5,7 @@ from typing import IO, TYPE_CHECKING, Any, Callable, Optional, Union
from ._extension import load_ipython_extension # noqa: F401
__all__ = ["get_console", "reconfigure", "print", "inspect"]
__all__ = ["get_console", "reconfigure", "print", "inspect", "print_json"]
if TYPE_CHECKING:
from .console import Console
@@ -40,7 +40,8 @@ def reconfigure(*args: Any, **kwargs: Any) -> None:
"""Reconfigures the global console by replacing it with another.
Args:
console (Console): Replacement console instance.
*args (Any): Positional arguments for the replacement :class:`~rich.console.Console`.
**kwargs (Any): Keyword arguments for the replacement :class:`~rich.console.Console`.
"""
from pipenv.patched.pip._vendor.rich.console import Console
@@ -80,7 +81,7 @@ def print_json(
indent: Union[None, int, str] = 2,
highlight: bool = True,
skip_keys: bool = False,
ensure_ascii: bool = True,
ensure_ascii: bool = False,
check_circular: bool = True,
allow_nan: bool = True,
default: Optional[Callable[[Any], Any]] = None,
@@ -227,10 +227,6 @@ if __name__ == "__main__": # pragma: no cover
c = Console(record=True)
c.print(test_card)
# c.save_svg(
# path="/Users/darrenburns/Library/Application Support/JetBrains/PyCharm2021.3/scratches/svg_export.svg",
# title="Rich can export to SVG",
# )
print(f"rendered in {pre_cache_taken}ms (cold cache)")
print(f"rendered in {taken}ms (warm cache)")
@@ -247,10 +243,6 @@ if __name__ == "__main__": # pragma: no cover
"Textualize",
"[u blue link=https://github.com/textualize]https://github.com/textualize",
)
sponsor_message.add_row(
"Buy devs a :coffee:",
"[u blue link=https://ko-fi.com/textualize]https://ko-fi.com/textualize",
)
sponsor_message.add_row(
"Twitter",
"[u blue link=https://twitter.com/willmcgugan]https://twitter.com/willmcgugan",
@@ -0,0 +1,83 @@
from types import TracebackType
from typing import IO, Iterable, Iterator, List, Optional, Type
class NullFile(IO[str]):
# TODO: "mode", "name" and "closed" are only required for Python 3.6.
@property
def mode(self) -> str:
return ""
@property
def name(self) -> str:
return "NullFile"
def closed(self) -> bool:
return False
def close(self) -> None:
pass
def isatty(self) -> bool:
return False
def read(self, __n: int = 1) -> str:
return ""
def readable(self) -> bool:
return False
def readline(self, __limit: int = 1) -> str:
return ""
def readlines(self, __hint: int = 1) -> List[str]:
return []
def seek(self, __offset: int, __whence: int = 1) -> int:
return 0
def seekable(self) -> bool:
return False
def tell(self) -> int:
return 0
def truncate(self, __size: Optional[int] = 1) -> int:
return 0
def writable(self) -> bool:
return False
def writelines(self, __lines: Iterable[str]) -> None:
pass
def __next__(self) -> str:
return ""
def __iter__(self) -> Iterator[str]:
return iter([""])
def __enter__(self) -> IO[str]:
pass
def __exit__(
self,
__t: Optional[Type[BaseException]],
__value: Optional[BaseException],
__traceback: Optional[TracebackType],
) -> None:
pass
def write(self, text: str) -> int:
return 0
def flush(self) -> None:
pass
def fileno(self) -> int:
return -1
NULL_FILE = NullFile()

Some files were not shown because too many files have changed in this diff Show More