Spaces:

rudra0410hf
/

medimage-translator

Running

App Files Files Community

rudra0410hf commited on 3 days ago

Commit

ff4c600

verified ·

1 Parent(s): a7031fb

Delete env

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER +0 -1
env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE +0 -20
env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA +0 -46
env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD +0 -43
env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL +0 -5
env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt +0 -2
env/Lib/site-packages/_yaml/__init__.py +0 -33
env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER +0 -1
env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE +0 -20
env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA +0 -77
env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD +0 -14
env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL +0 -5
env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt +0 -1
env/Lib/site-packages/certifi/__init__.py +0 -4
env/Lib/site-packages/certifi/__main__.py +0 -12
env/Lib/site-packages/certifi/cacert.pem +0 -0
env/Lib/site-packages/certifi/core.py +0 -114
env/Lib/site-packages/certifi/py.typed +0 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER +0 -1
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE +0 -21
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA +0 -721
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD +0 -35
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL +0 -5
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt +0 -2
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt +0 -1
env/Lib/site-packages/charset_normalizer/__init__.py +0 -48
env/Lib/site-packages/charset_normalizer/__main__.py +0 -6
env/Lib/site-packages/charset_normalizer/api.py +0 -668
env/Lib/site-packages/charset_normalizer/cd.py +0 -395
env/Lib/site-packages/charset_normalizer/cli/__init__.py +0 -8
env/Lib/site-packages/charset_normalizer/cli/__main__.py +0 -321
env/Lib/site-packages/charset_normalizer/constant.py +0 -1998
env/Lib/site-packages/charset_normalizer/legacy.py +0 -66
env/Lib/site-packages/charset_normalizer/md.py +0 -630
env/Lib/site-packages/charset_normalizer/models.py +0 -360
env/Lib/site-packages/charset_normalizer/py.typed +0 -0
env/Lib/site-packages/charset_normalizer/utils.py +0 -408
env/Lib/site-packages/charset_normalizer/version.py +0 -8
env/Lib/site-packages/colorama-0.4.6.dist-info/INSTALLER +0 -1
env/Lib/site-packages/colorama-0.4.6.dist-info/METADATA +0 -441
env/Lib/site-packages/colorama-0.4.6.dist-info/RECORD +0 -31
env/Lib/site-packages/colorama-0.4.6.dist-info/WHEEL +0 -5
env/Lib/site-packages/colorama-0.4.6.dist-info/licenses/LICENSE.txt +0 -27
env/Lib/site-packages/colorama/__init__.py +0 -7
env/Lib/site-packages/colorama/ansi.py +0 -102
env/Lib/site-packages/colorama/ansitowin32.py +0 -277
env/Lib/site-packages/colorama/initialise.py +0 -121
env/Lib/site-packages/colorama/tests/__init__.py +0 -1
env/Lib/site-packages/colorama/tests/ansi_test.py +0 -76
env/Lib/site-packages/colorama/tests/ansitowin32_test.py +0 -294

env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER DELETED Viewed

	@@ -1 +0,0 @@
1	- pip

env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE DELETED Viewed

@@ -1,20 +0,0 @@
-Copyright (c) 2017-2021 Ingy döt Net
-Copyright (c) 2006-2016 Kirill Simonov
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA DELETED Viewed

@@ -1,46 +0,0 @@
-Metadata-Version: 2.1
-Name: PyYAML
-Version: 6.0.2
-Summary: YAML parser and emitter for Python
-Home-page: https://pyyaml.org/
-Download-URL: https://pypi.org/project/PyYAML/
-Author: Kirill Simonov
-Author-email: [email protected]
-License: MIT
-Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues
-Project-URL: CI, https://github.com/yaml/pyyaml/actions
-Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation
-Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core
-Project-URL: Source Code, https://github.com/yaml/pyyaml
-Platform: Any
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Cython
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: Topic :: Software Development :: Libraries :: Python Modules
-Classifier: Topic :: Text Processing :: Markup
-Requires-Python: >=3.8
-License-File: LICENSE
-YAML is a data serialization format designed for human readability
-and interaction with scripting languages.  PyYAML is a YAML parser
-and emitter for Python.
-PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
-support, capable extension API, and sensible error messages.  PyYAML
-supports standard YAML tags and provides Python-specific tags that
-allow to represent an arbitrary Python object.
-PyYAML is applicable for a broad range of tasks from complex
-configuration files to object serialization and persistence.

env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD DELETED Viewed

@@ -1,43 +0,0 @@
-PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101
-PyYAML-6.0.2.dist-info/METADATA,sha256=9lwXqTOrXPts-jI2Lo5UwuaAYo0hiRA0BZqjch0WjAk,2106
-PyYAML-6.0.2.dist-info/RECORD,,
-PyYAML-6.0.2.dist-info/WHEEL,sha256=c7SWG1_hRvc9HXHEkmWlTu1Jr4WpzRucfzqTP-_8q0s,102
-PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11
-_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402
-_yaml/__pycache__/__init__.cpython-312.pyc,,
-yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311
-yaml/__pycache__/__init__.cpython-312.pyc,,
-yaml/__pycache__/composer.cpython-312.pyc,,
-yaml/__pycache__/constructor.cpython-312.pyc,,
-yaml/__pycache__/cyaml.cpython-312.pyc,,
-yaml/__pycache__/dumper.cpython-312.pyc,,
-yaml/__pycache__/emitter.cpython-312.pyc,,
-yaml/__pycache__/error.cpython-312.pyc,,
-yaml/__pycache__/events.cpython-312.pyc,,
-yaml/__pycache__/loader.cpython-312.pyc,,
-yaml/__pycache__/nodes.cpython-312.pyc,,
-yaml/__pycache__/parser.cpython-312.pyc,,
-yaml/__pycache__/reader.cpython-312.pyc,,
-yaml/__pycache__/representer.cpython-312.pyc,,
-yaml/__pycache__/resolver.cpython-312.pyc,,
-yaml/__pycache__/scanner.cpython-312.pyc,,
-yaml/__pycache__/serializer.cpython-312.pyc,,
-yaml/__pycache__/tokens.cpython-312.pyc,,
-yaml/_yaml.cp312-win_amd64.pyd,sha256=Bx7e_LEQx7cnd1_A9_nClp3X77g-_Lw1aoAAtYZbwWk,263680
-yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883
-yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639
-yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851
-yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837
-yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006
-yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533
-yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445
-yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061
-yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440
-yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495
-yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794
-yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190
-yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004
-yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279
-yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165
-yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573

env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: bdist_wheel (0.44.0)
-Root-Is-Purelib: false
-Tag: cp312-cp312-win_amd64

env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- _yaml
2	- yaml

env/Lib/site-packages/_yaml/__init__.py DELETED Viewed

@@ -1,33 +0,0 @@
-# This is a stub package designed to roughly emulate the _yaml
-# extension module, which previously existed as a standalone module
-# and has been moved into the `yaml` package namespace.
-# It does not perfectly mimic its old counterpart, but should get
-# close enough for anyone who's relying on it even when they shouldn't.
-import yaml
-# in some circumstances, the yaml module we imoprted may be from a different version, so we need
-# to tread carefully when poking at it here (it may not have the attributes we expect)
-if not getattr(yaml, '__with_libyaml__', False):
-    from sys import version_info
-    exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
-    raise exc("No module named '_yaml'")
-else:
-    from yaml._yaml import *
-    import warnings
-    warnings.warn(
-        'The _yaml extension module is now located at yaml._yaml'
-        ' and its location is subject to change.  To use the'
-        ' LibYAML-based parser and emitter, import from `yaml`:'
-        ' `from yaml import CLoader as Loader, CDumper as Dumper`.',
-        DeprecationWarning
-    )
-    del warnings
-    # Don't `del yaml` here because yaml is actually an existing
-    # namespace member of _yaml.
-__name__ = '_yaml'
-# If the module is top-level (i.e. not a part of any specific package)
-# then the attribute should be set to ''.
-# https://docs.python.org/3.8/library/types.html
-__package__ = ''

env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER DELETED Viewed

	@@ -1 +0,0 @@
1	- pip

env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE DELETED Viewed

@@ -1,20 +0,0 @@
-This package contains a modified version of ca-bundle.crt:
-ca-bundle.crt -- Bundle of CA Root Certificates
-This is a bundle of X.509 certificates of public Certificate Authorities
-(CA). These were automatically extracted from Mozilla's root certificates
-file (certdata.txt).  This file can be found in the mozilla source tree:
-https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
-It contains the certificates in PEM format and therefore
-can be directly used with curl / libcurl / php_curl, or with
-an Apache+mod_ssl webserver for SSL client authentication.
-Just configure this file as the SSLCACertificateFile.#
-***** BEGIN LICENSE BLOCK *****
-This Source Code Form is subject to the terms of the Mozilla Public License,
-v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
-one at http://mozilla.org/MPL/2.0/.
-***** END LICENSE BLOCK *****
-@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $

env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA DELETED Viewed

@@ -1,77 +0,0 @@
-Metadata-Version: 2.2
-Name: certifi
-Version: 2025.1.31
-Summary: Python package for providing Mozilla's CA Bundle.
-Home-page: https://github.com/certifi/python-certifi
-Author: Kenneth Reitz
-Author-email: [email protected]
-License: MPL-2.0
-Project-URL: Source, https://github.com/certifi/python-certifi
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
-Classifier: Natural Language :: English
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Requires-Python: >=3.6
-License-File: LICENSE
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: home-page
-Dynamic: license
-Dynamic: project-url
-Dynamic: requires-python
-Dynamic: summary
-Certifi: Python SSL Certificates
-================================
-Certifi provides Mozilla's carefully curated collection of Root Certificates for
-validating the trustworthiness of SSL certificates while verifying the identity
-of TLS hosts. It has been extracted from the `Requests`_ project.
-Installation
-------------
-``certifi`` is available on PyPI. Simply install it with ``pip``::
-    $ pip install certifi
-Usage
------
-To reference the installed certificate authority (CA) bundle, you can use the
-built-in function::
-    >>> import certifi
-    >>> certifi.where()
-    '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
-Or from the command line::
-    $ python -m certifi
-    /usr/local/lib/python3.7/site-packages/certifi/cacert.pem
-Enjoy!
-.. _`Requests`: https://requests.readthedocs.io/en/master/
-Addition/Removal of Certificates
---------------------------------
-Certifi does not support any addition/removal or other modification of the
-CA trust store content. This project is intended to provide a reliable and
-highly portable root of trust to python deployments. Look to upstream projects
-for methods to use alternate trust.

env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-certifi-2025.1.31.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-certifi-2025.1.31.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
-certifi-2025.1.31.dist-info/METADATA,sha256=t5kcT5aGu0dQ6_psUNZYTqnC0uCRnponewm3uYjeHbg,2451
-certifi-2025.1.31.dist-info/RECORD,,
-certifi-2025.1.31.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-certifi-2025.1.31.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
-certifi/__init__.py,sha256=neIaAf7BM36ygmQCmy-ZsSyjnvjWghFeu13wwEAnjj0,94
-certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
-certifi/__pycache__/__init__.cpython-312.pyc,,
-certifi/__pycache__/__main__.cpython-312.pyc,,
-certifi/__pycache__/core.cpython-312.pyc,,
-certifi/cacert.pem,sha256=xVsh-Qf3-G1IrdCTVS-1ZRdJ_1-GBQjMu0I9bB-9gMc,297255
-certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
-certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: setuptools (75.8.0)
-Root-Is-Purelib: true
-Tag: py3-none-any

env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- certifi

env/Lib/site-packages/certifi/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .core import contents, where
-__all__ = ["contents", "where"]
-__version__ = "2025.01.31"

env/Lib/site-packages/certifi/__main__.py DELETED Viewed

@@ -1,12 +0,0 @@
-import argparse
-from certifi import contents, where
-parser = argparse.ArgumentParser()
-parser.add_argument("-c", "--contents", action="store_true")
-args = parser.parse_args()
-if args.contents:
-    print(contents())
-else:
-    print(where())

env/Lib/site-packages/certifi/cacert.pem DELETED Viewed

The diff for this file is too large to render. See raw diff

env/Lib/site-packages/certifi/core.py DELETED Viewed

@@ -1,114 +0,0 @@
-"""
-certifi.py
-~~~~~~~~~~
-This module returns the installation location of cacert.pem or its contents.
-"""
-import sys
-import atexit
-def exit_cacert_ctx() -> None:
-    _CACERT_CTX.__exit__(None, None, None)  # type: ignore[union-attr]
-if sys.version_info >= (3, 11):
-    from importlib.resources import as_file, files
-    _CACERT_CTX = None
-    _CACERT_PATH = None
-    def where() -> str:
-        # This is slightly terrible, but we want to delay extracting the file
-        # in cases where we're inside of a zipimport situation until someone
-        # actually calls where(), but we don't want to re-extract the file
-        # on every call of where(), so we'll do it once then store it in a
-        # global variable.
-        global _CACERT_CTX
-        global _CACERT_PATH
-        if _CACERT_PATH is None:
-            # This is slightly janky, the importlib.resources API wants you to
-            # manage the cleanup of this file, so it doesn't actually return a
-            # path, it returns a context manager that will give you the path
-            # when you enter it and will do any cleanup when you leave it. In
-            # the common case of not needing a temporary file, it will just
-            # return the file system location and the __exit__() is a no-op.
-            #
-            # We also have to hold onto the actual context manager, because
-            # it will do the cleanup whenever it gets garbage collected, so
-            # we will also store that at the global level as well.
-            _CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
-            _CACERT_PATH = str(_CACERT_CTX.__enter__())
-            atexit.register(exit_cacert_ctx)
-        return _CACERT_PATH
-    def contents() -> str:
-        return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
-elif sys.version_info >= (3, 7):
-    from importlib.resources import path as get_path, read_text
-    _CACERT_CTX = None
-    _CACERT_PATH = None
-    def where() -> str:
-        # This is slightly terrible, but we want to delay extracting the
-        # file in cases where we're inside of a zipimport situation until
-        # someone actually calls where(), but we don't want to re-extract
-        # the file on every call of where(), so we'll do it once then store
-        # it in a global variable.
-        global _CACERT_CTX
-        global _CACERT_PATH
-        if _CACERT_PATH is None:
-            # This is slightly janky, the importlib.resources API wants you
-            # to manage the cleanup of this file, so it doesn't actually
-            # return a path, it returns a context manager that will give
-            # you the path when you enter it and will do any cleanup when
-            # you leave it. In the common case of not needing a temporary
-            # file, it will just return the file system location and the
-            # __exit__() is a no-op.
-            #
-            # We also have to hold onto the actual context manager, because
-            # it will do the cleanup whenever it gets garbage collected, so
-            # we will also store that at the global level as well.
-            _CACERT_CTX = get_path("certifi", "cacert.pem")
-            _CACERT_PATH = str(_CACERT_CTX.__enter__())
-            atexit.register(exit_cacert_ctx)
-        return _CACERT_PATH
-    def contents() -> str:
-        return read_text("certifi", "cacert.pem", encoding="ascii")
-else:
-    import os
-    import types
-    from typing import Union
-    Package = Union[types.ModuleType, str]
-    Resource = Union[str, "os.PathLike"]
-    # This fallback will work for Python versions prior to 3.7 that lack the
-    # importlib.resources module but relies on the existing `where` function
-    # so won't address issues with environments like PyOxidizer that don't set
-    # __file__ on modules.
-    def read_text(
-        package: Package,
-        resource: Resource,
-        encoding: str = 'utf-8',
-        errors: str = 'strict'
-    ) -> str:
-        with open(where(), encoding=encoding) as data:
-            return data.read()
-    # If we don't have importlib.resources, then we will just do the old logic
-    # of assuming we're on the filesystem and munge the path directly.
-    def where() -> str:
-        f = os.path.dirname(__file__)
-        return os.path.join(f, "cacert.pem")
-    def contents() -> str:
-        return read_text("certifi", "cacert.pem", encoding="ascii")

env/Lib/site-packages/certifi/py.typed DELETED Viewed

File without changes

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER DELETED Viewed

	@@ -1 +0,0 @@
1	- pip

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2025 TAHRI Ahmed R.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA DELETED Viewed

@@ -1,721 +0,0 @@
-Metadata-Version: 2.1
-Name: charset-normalizer
-Version: 3.4.1
-Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
-Author-email: "Ahmed R. TAHRI" <[email protected]>
-Maintainer-email: "Ahmed R. TAHRI" <[email protected]>
-License: MIT
-Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
-Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
-Project-URL: Code, https://github.com/jawah/charset_normalizer
-Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
-Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Classifier: Programming Language :: Python :: 3.12
-Classifier: Programming Language :: Python :: 3.13
-Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: Topic :: Text Processing :: Linguistic
-Classifier: Topic :: Utilities
-Classifier: Typing :: Typed
-Requires-Python: >=3.7
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Provides-Extra: unicode-backport
-<h1 align="center">Charset Detection, for Everyone 👋</h1>
-<p align="center">
-  <sup>The Real First Universal Charset Detector</sup><br>
-  <a href="https://pypi.org/project/charset-normalizer">
-    <img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
-  </a>
-  <a href="https://pepy.tech/project/charset-normalizer/">
-    <img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
-  </a>
-  <a href="https://bestpractices.coreinfrastructure.org/projects/7297">
-    <img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
-  </a>
-</p>
-<p align="center">
-  <sup><i>Featured Packages</i></sup><br>
-  <a href="https://github.com/jawah/niquests">
-   <img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Best_HTTP_Client-cyan">
-  </a>
-  <a href="https://github.com/jawah/wassima">
-   <img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
-  </a>
-</p>
-<p align="center">
-  <sup><i>In other language (unofficial port - by the community)</i></sup><br>
-  <a href="https://github.com/nickspring/charset-normalizer-rs">
-   <img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
-  </a>
-</p>
-> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
-> I'm trying to resolve the issue by taking a new approach.
-> All IANA character set names for which the Python core library provides codecs are supported.
-<p align="center">
-  >>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
-</p>
-This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
-| Feature                                          | [Chardet](https://github.com/chardet/chardet) |                                         Charset Normalizer                                         | [cChardet](https://github.com/PyYoshi/cChardet) |
-|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
-| `Fast`                                           |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
-| `Universal**`                                    |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
-| `Reliable` **without** distinguishable standards |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
-| `Reliable` **with** distinguishable standards    |                       ✅                       |                                                 ✅                                                  |                        ✅                        |
-| `License`                                        |           LGPL-2.1<br>_restrictive_           |                                                MIT                                                 |            MPL-1.1<br>_restrictive_             |
-| `Native Python`                                  |                       ✅                       |                                                 ✅                                                  |                        ❌                        |
-| `Detect spoken language`                         |                       ❌                       |                                                 ✅                                                  |                       N/A                       |
-| `UnicodeDecodeError Safety`                      |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
-| `Whl Size (min)`                                 |                   193.6 kB                    |                                               42 kB                                                |                     ~200 kB                     |
-| `Supported Encoding`                             |                      33                       | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) |                       40                        |
-<p align="center">
-<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
-</p>
-*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
-## ⚡ Performance
-This package offer better performance than its counterpart Chardet. Here are some numbers.
-| Package                                       | Accuracy | Mean per file (ms) | File per sec (est) |
-|-----------------------------------------------|:--------:|:------------------:|:------------------:|
-| [chardet](https://github.com/chardet/chardet) |   86 %   |       63 ms        |    16 file/sec     |
-| charset-normalizer                            | **98 %** |     **10 ms**      |    100 file/sec    |
-| Package                                       | 99th percentile | 95th percentile | 50th percentile |
-|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
-| [chardet](https://github.com/chardet/chardet) |     265 ms      |      71 ms      |      7 ms       |
-| charset-normalizer                            |     100 ms      |      50 ms      |      5 ms       |
-_updated as of december 2024 using CPython 3.12_
-Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
-> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
-> And yes, these results might change at any time. The dataset can be updated to include more files.
-> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
-> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
-> (e.g. Supported Encoding) Challenge-them if you want.
-## ✨ Installation
-Using pip:
-```sh
-pip install charset-normalizer -U
-```
-## 🚀 Basic Usage
-### CLI
-This package comes with a CLI.
-```
-usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
-                  file [file ...]
-The Real First Universal Charset Detector. Discover originating encoding used
-on text file. Normalize text to unicode.
-positional arguments:
-  files                 File(s) to be analysed
-optional arguments:
-  -h, --help            show this help message and exit
-  -v, --verbose         Display complementary information about file if any.
-                        Stdout will contain logs about the detection process.
-  -a, --with-alternative
-                        Output complementary possibilities if any. Top-level
-                        JSON WILL be a list.
-  -n, --normalize       Permit to normalize input file. If not set, program
-                        does not write anything.
-  -m, --minimal         Only output the charset detected to STDOUT. Disabling
-                        JSON output.
-  -r, --replace         Replace file when trying to normalize it instead of
-                        creating a new one.
-  -f, --force           Replace file without asking if you are sure, use this
-                        flag with caution.
-  -t THRESHOLD, --threshold THRESHOLD
-                        Define a custom maximum amount of chaos allowed in
-                        decoded content. 0. <= chaos <= 1.
-  --version             Show version information and exit.
-```
-```bash
-normalizer ./data/sample.1.fr.srt
-```
-or
-```bash
-python -m charset_normalizer ./data/sample.1.fr.srt
-```
-🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
-```json
-{
-    "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
-    "encoding": "cp1252",
-    "encoding_aliases": [
-        "1252",
-        "windows_1252"
-    ],
-    "alternative_encodings": [
-        "cp1254",
-        "cp1256",
-        "cp1258",
-        "iso8859_14",
-        "iso8859_15",
-        "iso8859_16",
-        "iso8859_3",
-        "iso8859_9",
-        "latin_1",
-        "mbcs"
-    ],
-    "language": "French",
-    "alphabets": [
-        "Basic Latin",
-        "Latin-1 Supplement"
-    ],
-    "has_sig_or_bom": false,
-    "chaos": 0.149,
-    "coherence": 97.152,
-    "unicode_path": null,
-    "is_preferred": true
-}
-```
-### Python
-*Just print out normalized text*
-```python
-from charset_normalizer import from_path
-results = from_path('./my_subtitle.srt')
-print(str(results.best()))
-```
-*Upgrade your code without effort*
-```python
-from charset_normalizer import detect
-```
-The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
-See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
-## 😇 Why
-When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
-reliable alternative using a completely different method. Also! I never back down on a good challenge!
-I **don't care** about the **originating charset** encoding, because **two different tables** can
-produce **two identical rendered string.**
-What I want is to get readable text, the best I can.
-In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
-Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
-## 🍰 How
-  - Discard all charset encoding table that could not fit the binary content.
-  - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
-  - Extract matches with the lowest mess detected.
-  - Additionally, we measure coherence / probe for a language.
-**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
-*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
-**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
- I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
- improve or rewrite it.
-*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
-that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
-## ⚡ Known limitations
-  - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
-  - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
-## ⚠️ About Python EOLs
-**If you are running:**
-- Python >=2.7,<3.5: Unsupported
-- Python 3.5: charset-normalizer < 2.1
-- Python 3.6: charset-normalizer < 3.1
-- Python 3.7: charset-normalizer < 4.0
-Upgrade your Python interpreter as soon as possible.
-## 👤 Contributing
-Contributions, issues and feature requests are very much welcome.<br />
-Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
-## 📝 License
-Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
-This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
-Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
-## 💼 For Enterprise
-Professional support for charset-normalizer is available as part of the [Tidelift
-Subscription][1]. Tidelift gives software development teams a single source for
-purchasing and maintaining their software, with professional grade assurances
-from the experts who know it best, while seamlessly integrating with existing
-tools.
-[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
-[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
-# Changelog
-All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
-## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
-### Changed
-- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
-- Enforce annotation delayed loading for a simpler and consistent types in the project.
-- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
-### Added
-- pre-commit configuration.
-- noxfile.
-### Removed
-- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
-- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
-- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
-- Unused `utils.range_scan` function.
-### Fixed
-- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
-- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
-## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
-### Added
-- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
-- Support for Python 3.13 (#512)
-### Fixed
-- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
-- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
-- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
-## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
-### Fixed
-- Unintentional memory usage regression when using large payload that match several encoding (#376)
-- Regression on some detection case showcased in the documentation (#371)
-### Added
-- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
-## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
-### Changed
-- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
-- Improved the general detection reliability based on reports from the community
-## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
-### Added
-- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
-- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
-### Removed
-- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
-- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
-### Changed
-- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
-- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
-### Fixed
-- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
-## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
-### Changed
-- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
-- Minor improvement over the global detection reliability
-### Added
-- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
-- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
-- Explicit support for Python 3.12
-### Fixed
-- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
-## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
-### Added
-- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
-### Removed
-- Support for Python 3.6 (PR #260)
-### Changed
-- Optional speedup provided by mypy/c 1.0.1
-## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
-### Fixed
-- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
-### Changed
-- Speedup provided by mypy/c 0.990 on Python >= 3.7
-## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
-### Added
-- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
-- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
-- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
-- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
-### Changed
-- Build with static metadata using 'build' frontend
-- Make the language detection stricter
-- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
-### Fixed
-- CLI with opt --normalize fail when using full path for files
-- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
-- Sphinx warnings when generating the documentation
-### Removed
-- Coherence detector no longer return 'Simple English' instead return 'English'
-- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
-- Breaking: Method `first()` and `best()` from CharsetMatch
-- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
-- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
-- Breaking: Top-level function `normalize`
-- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
-- Support for the backport `unicodedata2`
-## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
-### Added
-- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
-- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
-- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
-### Changed
-- Build with static metadata using 'build' frontend
-- Make the language detection stricter
-### Fixed
-- CLI with opt --normalize fail when using full path for files
-- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
-### Removed
-- Coherence detector no longer return 'Simple English' instead return 'English'
-- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
-## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
-### Added
-- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
-### Removed
-- Breaking: Method `first()` and `best()` from CharsetMatch
-- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
-### Fixed
-- Sphinx warnings when generating the documentation
-## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
-### Changed
-- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
-### Removed
-- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
-- Breaking: Top-level function `normalize`
-- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
-- Support for the backport `unicodedata2`
-## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
-### Deprecated
-- Function `normalize` scheduled for removal in 3.0
-### Changed
-- Removed useless call to decode in fn is_unprintable (#206)
-### Fixed
-- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
-## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
-### Added
-- Output the Unicode table version when running the CLI with `--version` (PR #194)
-### Changed
-- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
-- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
-### Fixed
-- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
-- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
-### Removed
-- Support for Python 3.5 (PR #192)
-### Deprecated
-- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
-## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
-### Fixed
-- ASCII miss-detection on rare cases (PR #170)
-## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
-### Added
-- Explicit support for Python 3.11 (PR #164)
-### Changed
-- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
-## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
-### Fixed
-- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
-### Changed
-- Skipping the language-detection (CD) on ASCII (PR #155)
-## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
-### Changed
-- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
-### Fixed
-- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
-## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
-### Changed
-- Improvement over Vietnamese detection (PR #126)
-- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
-- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
-- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
-- Code style as refactored by Sourcery-AI (PR #131)
-- Minor adjustment on the MD around european words (PR #133)
-- Remove and replace SRTs from assets / tests (PR #139)
-- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
-- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
-### Fixed
-- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
-- Avoid using too insignificant chunk (PR #137)
-### Added
-- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
-- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
-## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
-### Added
-- Add support for Kazakh (Cyrillic) language detection (PR #109)
-### Changed
-- Further, improve inferring the language from a given single-byte code page (PR #112)
-- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
-- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
-- Various detection improvement (MD+CD) (PR #117)
-### Removed
-- Remove redundant logging entry about detected language(s) (PR #115)
-### Fixed
-- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
-## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
-### Fixed
-- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
-- Fix CLI crash when using --minimal output in certain cases (PR #103)
-### Changed
-- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
-## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
-### Changed
-- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
-- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
-- The Unicode detection is slightly improved (PR #93)
-- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
-### Removed
-- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
-### Fixed
-- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
-- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
-- The MANIFEST.in was not exhaustive (PR #78)
-## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
-### Fixed
-- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
-- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
-- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
-- Submatch factoring could be wrong in rare edge cases (PR #72)
-- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
-- Fix line endings from CRLF to LF for certain project files (PR #67)
-### Changed
-- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
-- Allow fallback on specified encoding if any (PR #71)
-## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
-### Changed
-- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
-- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
-## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
-### Fixed
-- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
-### Changed
-- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
-## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
-### Fixed
-- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
-- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
-- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
-- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
-### Changed
-- Public function normalize default args values were not aligned with from_bytes (PR #53)
-### Added
-- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
-## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
-### Changed
-- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
-- Accent has been made on UTF-8 detection, should perform rather instantaneous.
-- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
-- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
-- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
-- utf_7 detection has been reinstated.
-### Removed
-- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
-- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
-- The exception hook on UnicodeDecodeError has been removed.
-### Deprecated
-- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
-### Fixed
-- The CLI output used the relative path of the file(s). Should be absolute.
-## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
-### Fixed
-- Logger configuration/usage no longer conflict with others (PR #44)
-## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
-### Removed
-- Using standard logging instead of using the package loguru.
-- Dropping nose test framework in favor of the maintained pytest.
-- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
-- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
-- Stop support for UTF-7 that does not contain a SIG.
-- Dropping PrettyTable, replaced with pure JSON output in CLI.
-### Fixed
-- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
-- Not searching properly for the BOM when trying utf32/16 parent codec.
-### Changed
-- Improving the package final size by compressing frequencies.json.
-- Huge improvement over the larges payload.
-### Added
-- CLI now produces JSON consumable output.
-- Return ASCII if given sequences fit. Given reasonable confidence.
-## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
-### Fixed
-- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
-## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
-### Fixed
-- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
-## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
-### Fixed
-- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
-## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
-### Changed
-- Amend the previous release to allow prettytable 2.0 (PR #35)
-## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
-### Fixed
-- Fix error while using the package with a python pre-release interpreter (PR #33)
-### Changed
-- Dependencies refactoring, constraints revised.
-### Added
-- Add python 3.9 and 3.10 to the supported interpreters
-MIT License
-Copyright (c) 2025 TAHRI Ahmed R.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD DELETED Viewed

@@ -1,35 +0,0 @@
-../../Scripts/normalizer.exe,sha256=aGyf7WAVLi4gHrr8F-d9-4fQG9ifpfMEXEvLwyt8KjI,108411
-charset_normalizer-3.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-charset_normalizer-3.4.1.dist-info/LICENSE,sha256=GFd0hdNwTxpHne2OVzwJds_tMV_S_ReYP6mI2kwvcNE,1092
-charset_normalizer-3.4.1.dist-info/METADATA,sha256=0_fAC3DknimRZusm6kkP4ylPD0JVzBq5mKHWLNBJM6w,36034
-charset_normalizer-3.4.1.dist-info/RECORD,,
-charset_normalizer-3.4.1.dist-info/WHEEL,sha256=pWXrJbnZSH-J-PhYmKs2XNn4DHCPNBYq965vsBJBFvA,101
-charset_normalizer-3.4.1.dist-info/entry_points.txt,sha256=8C-Y3iXIfyXQ83Tpir2B8t-XLJYpxF5xbb38d_js-h4,65
-charset_normalizer-3.4.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
-charset_normalizer/__init__.py,sha256=0NT8MHi7SKq3juMqYfOdrkzjisK0L73lneNHH4qaUAs,1638
-charset_normalizer/__main__.py,sha256=2sj_BS6H0sU25C1bMqz9DVwa6kOK9lchSEbSU-_iu7M,115
-charset_normalizer/__pycache__/__init__.cpython-312.pyc,,
-charset_normalizer/__pycache__/__main__.cpython-312.pyc,,
-charset_normalizer/__pycache__/api.cpython-312.pyc,,
-charset_normalizer/__pycache__/cd.cpython-312.pyc,,
-charset_normalizer/__pycache__/constant.cpython-312.pyc,,
-charset_normalizer/__pycache__/legacy.cpython-312.pyc,,
-charset_normalizer/__pycache__/md.cpython-312.pyc,,
-charset_normalizer/__pycache__/models.cpython-312.pyc,,
-charset_normalizer/__pycache__/utils.cpython-312.pyc,,
-charset_normalizer/__pycache__/version.cpython-312.pyc,,
-charset_normalizer/api.py,sha256=2a0p2Gnhbdo9O6C04CNxTSN23fIbgOF20nxb0pWPNFM,23285
-charset_normalizer/cd.py,sha256=uq8nVxRpR6Guc16ACvOWtL8KO3w7vYaCh8hHisuOyTg,12917
-charset_normalizer/cli/__init__.py,sha256=d9MUx-1V_qD3x9igIy4JT4oC5CU0yjulk7QyZWeRFhg,144
-charset_normalizer/cli/__main__.py,sha256=lZ89qRWun7FRxX0qm1GhK-m0DH0i048yiMAX1mVIuRg,10731
-charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,,
-charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,,
-charset_normalizer/constant.py,sha256=7OKYi28cJjZxIcX3lQCwfK9ijoOgaVEbERww7SqqNSY,42475
-charset_normalizer/legacy.py,sha256=v8An1aAQHUu036UWOhyIaDGkirZ0t4hfNVlyje5KInU,2394
-charset_normalizer/md.cp312-win_amd64.pyd,sha256=XBGy--IKda7c3iBfvw_dovocqb2RSucmVtxvtlG_3tA,10752
-charset_normalizer/md.py,sha256=e452fhwIAguEUr3FJzG7QZvFgXI-dVLOh_M1ZUiFI6U,20666
-charset_normalizer/md__mypyc.cp312-win_amd64.pyd,sha256=_-jWSji0BgBVvrIHbmabYQNMBF4-xTusdO5mu6P8JsA,125440
-charset_normalizer/models.py,sha256=ZR2PE-fqf6dASZfqdE5Uhkmr0o1MciSdXOjuNqwkmvg,12754
-charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-charset_normalizer/utils.py,sha256=oH9Q3WcAMwmsSB7uM8uDozz9DXnkYecbkTNbdnMbgzI,12410
-charset_normalizer/version.py,sha256=7_thI7FzRQxEsbtUYwrJs3FCFWF666mw74H8mggPRR0,123

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
-Root-Is-Purelib: false
-Tag: cp312-cp312-win_amd64

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- [console_scripts]
2	- normalizer = charset_normalizer:cli.cli_detect

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- charset_normalizer

env/Lib/site-packages/charset_normalizer/__init__.py DELETED Viewed

@@ -1,48 +0,0 @@
-"""
-Charset-Normalizer
-~~~~~~~~~~~~~~
-The Real First Universal Charset Detector.
-A library that helps you read text from an unknown charset encoding.
-Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
-All IANA character set names for which the Python core library provides codecs are supported.
-Basic usage:
-   >>> from charset_normalizer import from_bytes
-   >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
-   >>> best_guess = results.best()
-   >>> str(best_guess)
-   'Bсеки човек има право на образование. Oбразованието!'
-Others methods and usages are available - see the full documentation
-at <https://github.com/Ousret/charset_normalizer>.
-:copyright: (c) 2021 by Ahmed TAHRI
-:license: MIT, see LICENSE for more details.
-"""
-from __future__ import annotations
-import logging
-from .api import from_bytes, from_fp, from_path, is_binary
-from .legacy import detect
-from .models import CharsetMatch, CharsetMatches
-from .utils import set_logging_handler
-from .version import VERSION, __version__
-__all__ = (
-    "from_fp",
-    "from_path",
-    "from_bytes",
-    "is_binary",
-    "detect",
-    "CharsetMatch",
-    "CharsetMatches",
-    "__version__",
-    "VERSION",
-    "set_logging_handler",
-)
-# Attach a NullHandler to the top level logger by default
-# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
-logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())

env/Lib/site-packages/charset_normalizer/__main__.py DELETED Viewed

@@ -1,6 +0,0 @@
-from __future__ import annotations
-from .cli import cli_detect
-if __name__ == "__main__":
-    cli_detect()

env/Lib/site-packages/charset_normalizer/api.py DELETED Viewed

@@ -1,668 +0,0 @@
-from __future__ import annotations
-import logging
-from os import PathLike
-from typing import BinaryIO
-from .cd import (
-    coherence_ratio,
-    encoding_languages,
-    mb_encoding_languages,
-    merge_coherence_ratios,
-)
-from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
-from .md import mess_ratio
-from .models import CharsetMatch, CharsetMatches
-from .utils import (
-    any_specified_encoding,
-    cut_sequence_chunks,
-    iana_name,
-    identify_sig_or_bom,
-    is_cp_similar,
-    is_multi_byte_encoding,
-    should_strip_sig_or_bom,
-)
-logger = logging.getLogger("charset_normalizer")
-explain_handler = logging.StreamHandler()
-explain_handler.setFormatter(
-    logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
-)
-def from_bytes(
-    sequences: bytes | bytearray,
-    steps: int = 5,
-    chunk_size: int = 512,
-    threshold: float = 0.2,
-    cp_isolation: list[str] | None = None,
-    cp_exclusion: list[str] | None = None,
-    preemptive_behaviour: bool = True,
-    explain: bool = False,
-    language_threshold: float = 0.1,
-    enable_fallback: bool = True,
-) -> CharsetMatches:
-    """
-    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
-    If there is no results, it is a strong indicator that the source is binary/not text.
-    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
-    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
-    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
-    but never take it for granted. Can improve the performance.
-    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
-    purpose.
-    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
-    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
-    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
-    Custom logging format and handler can be set manually.
-    """
-    if not isinstance(sequences, (bytearray, bytes)):
-        raise TypeError(
-            "Expected object of type bytes or bytearray, got: {}".format(
-                type(sequences)
-            )
-        )
-    if explain:
-        previous_logger_level: int = logger.level
-        logger.addHandler(explain_handler)
-        logger.setLevel(TRACE)
-    length: int = len(sequences)
-    if length == 0:
-        logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
-        if explain:  # Defensive: ensure exit path clean handler
-            logger.removeHandler(explain_handler)
-            logger.setLevel(previous_logger_level or logging.WARNING)
-        return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
-    if cp_isolation is not None:
-        logger.log(
-            TRACE,
-            "cp_isolation is set. use this flag for debugging purpose. "
-            "limited list of encoding allowed : %s.",
-            ", ".join(cp_isolation),
-        )
-        cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
-    else:
-        cp_isolation = []
-    if cp_exclusion is not None:
-        logger.log(
-            TRACE,
-            "cp_exclusion is set. use this flag for debugging purpose. "
-            "limited list of encoding excluded : %s.",
-            ", ".join(cp_exclusion),
-        )
-        cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
-    else:
-        cp_exclusion = []
-    if length <= (chunk_size * steps):
-        logger.log(
-            TRACE,
-            "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
-            steps,
-            chunk_size,
-            length,
-        )
-        steps = 1
-        chunk_size = length
-    if steps > 1 and length / steps < chunk_size:
-        chunk_size = int(length / steps)
-    is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
-    is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
-    if is_too_small_sequence:
-        logger.log(
-            TRACE,
-            "Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
-                length
-            ),
-        )
-    elif is_too_large_sequence:
-        logger.log(
-            TRACE,
-            "Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
-                length
-            ),
-        )
-    prioritized_encodings: list[str] = []
-    specified_encoding: str | None = (
-        any_specified_encoding(sequences) if preemptive_behaviour else None
-    )
-    if specified_encoding is not None:
-        prioritized_encodings.append(specified_encoding)
-        logger.log(
-            TRACE,
-            "Detected declarative mark in sequence. Priority +1 given for %s.",
-            specified_encoding,
-        )
-    tested: set[str] = set()
-    tested_but_hard_failure: list[str] = []
-    tested_but_soft_failure: list[str] = []
-    fallback_ascii: CharsetMatch | None = None
-    fallback_u8: CharsetMatch | None = None
-    fallback_specified: CharsetMatch | None = None
-    results: CharsetMatches = CharsetMatches()
-    early_stop_results: CharsetMatches = CharsetMatches()
-    sig_encoding, sig_payload = identify_sig_or_bom(sequences)
-    if sig_encoding is not None:
-        prioritized_encodings.append(sig_encoding)
-        logger.log(
-            TRACE,
-            "Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
-            len(sig_payload),
-            sig_encoding,
-        )
-    prioritized_encodings.append("ascii")
-    if "utf_8" not in prioritized_encodings:
-        prioritized_encodings.append("utf_8")
-    for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
-        if cp_isolation and encoding_iana not in cp_isolation:
-            continue
-        if cp_exclusion and encoding_iana in cp_exclusion:
-            continue
-        if encoding_iana in tested:
-            continue
-        tested.add(encoding_iana)
-        decoded_payload: str | None = None
-        bom_or_sig_available: bool = sig_encoding == encoding_iana
-        strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
-            encoding_iana
-        )
-        if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
-            logger.log(
-                TRACE,
-                "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
-                encoding_iana,
-            )
-            continue
-        if encoding_iana in {"utf_7"} and not bom_or_sig_available:
-            logger.log(
-                TRACE,
-                "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
-                encoding_iana,
-            )
-            continue
-        try:
-            is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
-        except (ModuleNotFoundError, ImportError):
-            logger.log(
-                TRACE,
-                "Encoding %s does not provide an IncrementalDecoder",
-                encoding_iana,
-            )
-            continue
-        try:
-            if is_too_large_sequence and is_multi_byte_decoder is False:
-                str(
-                    (
-                        sequences[: int(50e4)]
-                        if strip_sig_or_bom is False
-                        else sequences[len(sig_payload) : int(50e4)]
-                    ),
-                    encoding=encoding_iana,
-                )
-            else:
-                decoded_payload = str(
-                    (
-                        sequences
-                        if strip_sig_or_bom is False
-                        else sequences[len(sig_payload) :]
-                    ),
-                    encoding=encoding_iana,
-                )
-        except (UnicodeDecodeError, LookupError) as e:
-            if not isinstance(e, LookupError):
-                logger.log(
-                    TRACE,
-                    "Code page %s does not fit given bytes sequence at ALL. %s",
-                    encoding_iana,
-                    str(e),
-                )
-            tested_but_hard_failure.append(encoding_iana)
-            continue
-        similar_soft_failure_test: bool = False
-        for encoding_soft_failed in tested_but_soft_failure:
-            if is_cp_similar(encoding_iana, encoding_soft_failed):
-                similar_soft_failure_test = True
-                break
-        if similar_soft_failure_test:
-            logger.log(
-                TRACE,
-                "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
-                encoding_iana,
-                encoding_soft_failed,
-            )
-            continue
-        r_ = range(
-            0 if not bom_or_sig_available else len(sig_payload),
-            length,
-            int(length / steps),
-        )
-        multi_byte_bonus: bool = (
-            is_multi_byte_decoder
-            and decoded_payload is not None
-            and len(decoded_payload) < length
-        )
-        if multi_byte_bonus:
-            logger.log(
-                TRACE,
-                "Code page %s is a multi byte encoding table and it appear that at least one character "
-                "was encoded using n-bytes.",
-                encoding_iana,
-            )
-        max_chunk_gave_up: int = int(len(r_) / 4)
-        max_chunk_gave_up = max(max_chunk_gave_up, 2)
-        early_stop_count: int = 0
-        lazy_str_hard_failure = False
-        md_chunks: list[str] = []
-        md_ratios = []
-        try:
-            for chunk in cut_sequence_chunks(
-                sequences,
-                encoding_iana,
-                r_,
-                chunk_size,
-                bom_or_sig_available,
-                strip_sig_or_bom,
-                sig_payload,
-                is_multi_byte_decoder,
-                decoded_payload,
-            ):
-                md_chunks.append(chunk)
-                md_ratios.append(
-                    mess_ratio(
-                        chunk,
-                        threshold,
-                        explain is True and 1 <= len(cp_isolation) <= 2,
-                    )
-                )
-                if md_ratios[-1] >= threshold:
-                    early_stop_count += 1
-                if (early_stop_count >= max_chunk_gave_up) or (
-                    bom_or_sig_available and strip_sig_or_bom is False
-                ):
-                    break
-        except (
-            UnicodeDecodeError
-        ) as e:  # Lazy str loading may have missed something there
-            logger.log(
-                TRACE,
-                "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
-                encoding_iana,
-                str(e),
-            )
-            early_stop_count = max_chunk_gave_up
-            lazy_str_hard_failure = True
-        # We might want to check the sequence again with the whole content
-        # Only if initial MD tests passes
-        if (
-            not lazy_str_hard_failure
-            and is_too_large_sequence
-            and not is_multi_byte_decoder
-        ):
-            try:
-                sequences[int(50e3) :].decode(encoding_iana, errors="strict")
-            except UnicodeDecodeError as e:
-                logger.log(
-                    TRACE,
-                    "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
-                    encoding_iana,
-                    str(e),
-                )
-                tested_but_hard_failure.append(encoding_iana)
-                continue
-        mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
-        if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
-            tested_but_soft_failure.append(encoding_iana)
-            logger.log(
-                TRACE,
-                "%s was excluded because of initial chaos probing. Gave up %i time(s). "
-                "Computed mean chaos is %f %%.",
-                encoding_iana,
-                early_stop_count,
-                round(mean_mess_ratio * 100, ndigits=3),
-            )
-            # Preparing those fallbacks in case we got nothing.
-            if (
-                enable_fallback
-                and encoding_iana in ["ascii", "utf_8", specified_encoding]
-                and not lazy_str_hard_failure
-            ):
-                fallback_entry = CharsetMatch(
-                    sequences,
-                    encoding_iana,
-                    threshold,
-                    False,
-                    [],
-                    decoded_payload,
-                    preemptive_declaration=specified_encoding,
-                )
-                if encoding_iana == specified_encoding:
-                    fallback_specified = fallback_entry
-                elif encoding_iana == "ascii":
-                    fallback_ascii = fallback_entry
-                else:
-                    fallback_u8 = fallback_entry
-            continue
-        logger.log(
-            TRACE,
-            "%s passed initial chaos probing. Mean measured chaos is %f %%",
-            encoding_iana,
-            round(mean_mess_ratio * 100, ndigits=3),
-        )
-        if not is_multi_byte_decoder:
-            target_languages: list[str] = encoding_languages(encoding_iana)
-        else:
-            target_languages = mb_encoding_languages(encoding_iana)
-        if target_languages:
-            logger.log(
-                TRACE,
-                "{} should target any language(s) of {}".format(
-                    encoding_iana, str(target_languages)
-                ),
-            )
-        cd_ratios = []
-        # We shall skip the CD when its about ASCII
-        # Most of the time its not relevant to run "language-detection" on it.
-        if encoding_iana != "ascii":
-            for chunk in md_chunks:
-                chunk_languages = coherence_ratio(
-                    chunk,
-                    language_threshold,
-                    ",".join(target_languages) if target_languages else None,
-                )
-                cd_ratios.append(chunk_languages)
-        cd_ratios_merged = merge_coherence_ratios(cd_ratios)
-        if cd_ratios_merged:
-            logger.log(
-                TRACE,
-                "We detected language {} using {}".format(
-                    cd_ratios_merged, encoding_iana
-                ),
-            )
-        current_match = CharsetMatch(
-            sequences,
-            encoding_iana,
-            mean_mess_ratio,
-            bom_or_sig_available,
-            cd_ratios_merged,
-            (
-                decoded_payload
-                if (
-                    is_too_large_sequence is False
-                    or encoding_iana in [specified_encoding, "ascii", "utf_8"]
-                )
-                else None
-            ),
-            preemptive_declaration=specified_encoding,
-        )
-        results.append(current_match)
-        if (
-            encoding_iana in [specified_encoding, "ascii", "utf_8"]
-            and mean_mess_ratio < 0.1
-        ):
-            # If md says nothing to worry about, then... stop immediately!
-            if mean_mess_ratio == 0.0:
-                logger.debug(
-                    "Encoding detection: %s is most likely the one.",
-                    current_match.encoding,
-                )
-                if explain:  # Defensive: ensure exit path clean handler
-                    logger.removeHandler(explain_handler)
-                    logger.setLevel(previous_logger_level)
-                return CharsetMatches([current_match])
-            early_stop_results.append(current_match)
-        if (
-            len(early_stop_results)
-            and (specified_encoding is None or specified_encoding in tested)
-            and "ascii" in tested
-            and "utf_8" in tested
-        ):
-            probable_result: CharsetMatch = early_stop_results.best()  # type: ignore[assignment]
-            logger.debug(
-                "Encoding detection: %s is most likely the one.",
-                probable_result.encoding,
-            )
-            if explain:  # Defensive: ensure exit path clean handler
-                logger.removeHandler(explain_handler)
-                logger.setLevel(previous_logger_level)
-            return CharsetMatches([probable_result])
-        if encoding_iana == sig_encoding:
-            logger.debug(
-                "Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
-                "the beginning of the sequence.",
-                encoding_iana,
-            )
-            if explain:  # Defensive: ensure exit path clean handler
-                logger.removeHandler(explain_handler)
-                logger.setLevel(previous_logger_level)
-            return CharsetMatches([results[encoding_iana]])
-    if len(results) == 0:
-        if fallback_u8 or fallback_ascii or fallback_specified:
-            logger.log(
-                TRACE,
-                "Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
-            )
-        if fallback_specified:
-            logger.debug(
-                "Encoding detection: %s will be used as a fallback match",
-                fallback_specified.encoding,
-            )
-            results.append(fallback_specified)
-        elif (
-            (fallback_u8 and fallback_ascii is None)
-            or (
-                fallback_u8
-                and fallback_ascii
-                and fallback_u8.fingerprint != fallback_ascii.fingerprint
-            )
-            or (fallback_u8 is not None)
-        ):
-            logger.debug("Encoding detection: utf_8 will be used as a fallback match")
-            results.append(fallback_u8)
-        elif fallback_ascii:
-            logger.debug("Encoding detection: ascii will be used as a fallback match")
-            results.append(fallback_ascii)
-    if results:
-        logger.debug(
-            "Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
-            results.best().encoding,  # type: ignore
-            len(results) - 1,
-        )
-    else:
-        logger.debug("Encoding detection: Unable to determine any suitable charset.")
-    if explain:
-        logger.removeHandler(explain_handler)
-        logger.setLevel(previous_logger_level)
-    return results
-def from_fp(
-    fp: BinaryIO,
-    steps: int = 5,
-    chunk_size: int = 512,
-    threshold: float = 0.20,
-    cp_isolation: list[str] | None = None,
-    cp_exclusion: list[str] | None = None,
-    preemptive_behaviour: bool = True,
-    explain: bool = False,
-    language_threshold: float = 0.1,
-    enable_fallback: bool = True,
-) -> CharsetMatches:
-    """
-    Same thing than the function from_bytes but using a file pointer that is already ready.
-    Will not close the file pointer.
-    """
-    return from_bytes(
-        fp.read(),
-        steps,
-        chunk_size,
-        threshold,
-        cp_isolation,
-        cp_exclusion,
-        preemptive_behaviour,
-        explain,
-        language_threshold,
-        enable_fallback,
-    )
-def from_path(
-    path: str | bytes | PathLike,  # type: ignore[type-arg]
-    steps: int = 5,
-    chunk_size: int = 512,
-    threshold: float = 0.20,
-    cp_isolation: list[str] | None = None,
-    cp_exclusion: list[str] | None = None,
-    preemptive_behaviour: bool = True,
-    explain: bool = False,
-    language_threshold: float = 0.1,
-    enable_fallback: bool = True,
-) -> CharsetMatches:
-    """
-    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
-    Can raise IOError.
-    """
-    with open(path, "rb") as fp:
-        return from_fp(
-            fp,
-            steps,
-            chunk_size,
-            threshold,
-            cp_isolation,
-            cp_exclusion,
-            preemptive_behaviour,
-            explain,
-            language_threshold,
-            enable_fallback,
-        )
-def is_binary(
-    fp_or_path_or_payload: PathLike | str | BinaryIO | bytes,  # type: ignore[type-arg]
-    steps: int = 5,
-    chunk_size: int = 512,
-    threshold: float = 0.20,
-    cp_isolation: list[str] | None = None,
-    cp_exclusion: list[str] | None = None,
-    preemptive_behaviour: bool = True,
-    explain: bool = False,
-    language_threshold: float = 0.1,
-    enable_fallback: bool = False,
-) -> bool:
-    """
-    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
-    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
-    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
-    """
-    if isinstance(fp_or_path_or_payload, (str, PathLike)):
-        guesses = from_path(
-            fp_or_path_or_payload,
-            steps=steps,
-            chunk_size=chunk_size,
-            threshold=threshold,
-            cp_isolation=cp_isolation,
-            cp_exclusion=cp_exclusion,
-            preemptive_behaviour=preemptive_behaviour,
-            explain=explain,
-            language_threshold=language_threshold,
-            enable_fallback=enable_fallback,
-        )
-    elif isinstance(
-        fp_or_path_or_payload,
-        (
-            bytes,
-            bytearray,
-        ),
-    ):
-        guesses = from_bytes(
-            fp_or_path_or_payload,
-            steps=steps,
-            chunk_size=chunk_size,
-            threshold=threshold,
-            cp_isolation=cp_isolation,
-            cp_exclusion=cp_exclusion,
-            preemptive_behaviour=preemptive_behaviour,
-            explain=explain,
-            language_threshold=language_threshold,
-            enable_fallback=enable_fallback,
-        )
-    else:
-        guesses = from_fp(
-            fp_or_path_or_payload,
-            steps=steps,
-            chunk_size=chunk_size,
-            threshold=threshold,
-            cp_isolation=cp_isolation,
-            cp_exclusion=cp_exclusion,
-            preemptive_behaviour=preemptive_behaviour,
-            explain=explain,
-            language_threshold=language_threshold,
-            enable_fallback=enable_fallback,
-        )
-    return not guesses

env/Lib/site-packages/charset_normalizer/cd.py DELETED Viewed

@@ -1,395 +0,0 @@
-from __future__ import annotations
-import importlib
-from codecs import IncrementalDecoder
-from collections import Counter
-from functools import lru_cache
-from typing import Counter as TypeCounter
-from .constant import (
-    FREQUENCIES,
-    KO_NAMES,
-    LANGUAGE_SUPPORTED_COUNT,
-    TOO_SMALL_SEQUENCE,
-    ZH_NAMES,
-)
-from .md import is_suspiciously_successive_range
-from .models import CoherenceMatches
-from .utils import (
-    is_accentuated,
-    is_latin,
-    is_multi_byte_encoding,
-    is_unicode_range_secondary,
-    unicode_range,
-)
-def encoding_unicode_range(iana_name: str) -> list[str]:
-    """
-    Return associated unicode ranges in a single byte code page.
-    """
-    if is_multi_byte_encoding(iana_name):
-        raise OSError("Function not supported on multi-byte code page")
-    decoder = importlib.import_module(f"encodings.{iana_name}").IncrementalDecoder
-    p: IncrementalDecoder = decoder(errors="ignore")
-    seen_ranges: dict[str, int] = {}
-    character_count: int = 0
-    for i in range(0x40, 0xFF):
-        chunk: str = p.decode(bytes([i]))
-        if chunk:
-            character_range: str | None = unicode_range(chunk)
-            if character_range is None:
-                continue
-            if is_unicode_range_secondary(character_range) is False:
-                if character_range not in seen_ranges:
-                    seen_ranges[character_range] = 0
-                seen_ranges[character_range] += 1
-                character_count += 1
-    return sorted(
-        [
-            character_range
-            for character_range in seen_ranges
-            if seen_ranges[character_range] / character_count >= 0.15
-        ]
-    )
-def unicode_range_languages(primary_range: str) -> list[str]:
-    """
-    Return inferred languages used with a unicode range.
-    """
-    languages: list[str] = []
-    for language, characters in FREQUENCIES.items():
-        for character in characters:
-            if unicode_range(character) == primary_range:
-                languages.append(language)
-                break
-    return languages
-@lru_cache()
-def encoding_languages(iana_name: str) -> list[str]:
-    """
-    Single-byte encoding language association. Some code page are heavily linked to particular language(s).
-    This function does the correspondence.
-    """
-    unicode_ranges: list[str] = encoding_unicode_range(iana_name)
-    primary_range: str | None = None
-    for specified_range in unicode_ranges:
-        if "Latin" not in specified_range:
-            primary_range = specified_range
-            break
-    if primary_range is None:
-        return ["Latin Based"]
-    return unicode_range_languages(primary_range)
-@lru_cache()
-def mb_encoding_languages(iana_name: str) -> list[str]:
-    """
-    Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
-    This function does the correspondence.
-    """
-    if (
-        iana_name.startswith("shift_")
-        or iana_name.startswith("iso2022_jp")
-        or iana_name.startswith("euc_j")
-        or iana_name == "cp932"
-    ):
-        return ["Japanese"]
-    if iana_name.startswith("gb") or iana_name in ZH_NAMES:
-        return ["Chinese"]
-    if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
-        return ["Korean"]
-    return []
-@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
-def get_target_features(language: str) -> tuple[bool, bool]:
-    """
-    Determine main aspects from a supported language if it contains accents and if is pure Latin.
-    """
-    target_have_accents: bool = False
-    target_pure_latin: bool = True
-    for character in FREQUENCIES[language]:
-        if not target_have_accents and is_accentuated(character):
-            target_have_accents = True
-        if target_pure_latin and is_latin(character) is False:
-            target_pure_latin = False
-    return target_have_accents, target_pure_latin
-def alphabet_languages(
-    characters: list[str], ignore_non_latin: bool = False
-) -> list[str]:
-    """
-    Return associated languages associated to given characters.
-    """
-    languages: list[tuple[str, float]] = []
-    source_have_accents = any(is_accentuated(character) for character in characters)
-    for language, language_characters in FREQUENCIES.items():
-        target_have_accents, target_pure_latin = get_target_features(language)
-        if ignore_non_latin and target_pure_latin is False:
-            continue
-        if target_have_accents is False and source_have_accents:
-            continue
-        character_count: int = len(language_characters)
-        character_match_count: int = len(
-            [c for c in language_characters if c in characters]
-        )
-        ratio: float = character_match_count / character_count
-        if ratio >= 0.2:
-            languages.append((language, ratio))
-    languages = sorted(languages, key=lambda x: x[1], reverse=True)
-    return [compatible_language[0] for compatible_language in languages]
-def characters_popularity_compare(
-    language: str, ordered_characters: list[str]
-) -> float:
-    """
-    Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
-    The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
-    Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
-    """
-    if language not in FREQUENCIES:
-        raise ValueError(f"{language} not available")
-    character_approved_count: int = 0
-    FREQUENCIES_language_set = set(FREQUENCIES[language])
-    ordered_characters_count: int = len(ordered_characters)
-    target_language_characters_count: int = len(FREQUENCIES[language])
-    large_alphabet: bool = target_language_characters_count > 26
-    for character, character_rank in zip(
-        ordered_characters, range(0, ordered_characters_count)
-    ):
-        if character not in FREQUENCIES_language_set:
-            continue
-        character_rank_in_language: int = FREQUENCIES[language].index(character)
-        expected_projection_ratio: float = (
-            target_language_characters_count / ordered_characters_count
-        )
-        character_rank_projection: int = int(character_rank * expected_projection_ratio)
-        if (
-            large_alphabet is False
-            and abs(character_rank_projection - character_rank_in_language) > 4
-        ):
-            continue
-        if (
-            large_alphabet is True
-            and abs(character_rank_projection - character_rank_in_language)
-            < target_language_characters_count / 3
-        ):
-            character_approved_count += 1
-            continue
-        characters_before_source: list[str] = FREQUENCIES[language][
-            0:character_rank_in_language
-        ]
-        characters_after_source: list[str] = FREQUENCIES[language][
-            character_rank_in_language:
-        ]
-        characters_before: list[str] = ordered_characters[0:character_rank]
-        characters_after: list[str] = ordered_characters[character_rank:]
-        before_match_count: int = len(
-            set(characters_before) & set(characters_before_source)
-        )
-        after_match_count: int = len(
-            set(characters_after) & set(characters_after_source)
-        )
-        if len(characters_before_source) == 0 and before_match_count <= 4:
-            character_approved_count += 1
-            continue
-        if len(characters_after_source) == 0 and after_match_count <= 4:
-            character_approved_count += 1
-            continue
-        if (
-            before_match_count / len(characters_before_source) >= 0.4
-            or after_match_count / len(characters_after_source) >= 0.4
-        ):
-            character_approved_count += 1
-            continue
-    return character_approved_count / len(ordered_characters)
-def alpha_unicode_split(decoded_sequence: str) -> list[str]:
-    """
-    Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
-    Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
-    One containing the latin letters and the other hebrew.
-    """
-    layers: dict[str, str] = {}
-    for character in decoded_sequence:
-        if character.isalpha() is False:
-            continue
-        character_range: str | None = unicode_range(character)
-        if character_range is None:
-            continue
-        layer_target_range: str | None = None
-        for discovered_range in layers:
-            if (
-                is_suspiciously_successive_range(discovered_range, character_range)
-                is False
-            ):
-                layer_target_range = discovered_range
-                break
-        if layer_target_range is None:
-            layer_target_range = character_range
-        if layer_target_range not in layers:
-            layers[layer_target_range] = character.lower()
-            continue
-        layers[layer_target_range] += character.lower()
-    return list(layers.values())
-def merge_coherence_ratios(results: list[CoherenceMatches]) -> CoherenceMatches:
-    """
-    This function merge results previously given by the function coherence_ratio.
-    The return type is the same as coherence_ratio.
-    """
-    per_language_ratios: dict[str, list[float]] = {}
-    for result in results:
-        for sub_result in result:
-            language, ratio = sub_result
-            if language not in per_language_ratios:
-                per_language_ratios[language] = [ratio]
-                continue
-            per_language_ratios[language].append(ratio)
-    merge = [
-        (
-            language,
-            round(
-                sum(per_language_ratios[language]) / len(per_language_ratios[language]),
-                4,
-            ),
-        )
-        for language in per_language_ratios
-    ]
-    return sorted(merge, key=lambda x: x[1], reverse=True)
-def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
-    """
-    We shall NOT return "English—" in CoherenceMatches because it is an alternative
-    of "English". This function only keeps the best match and remove the em-dash in it.
-    """
-    index_results: dict[str, list[float]] = dict()
-    for result in results:
-        language, ratio = result
-        no_em_name: str = language.replace("—", "")
-        if no_em_name not in index_results:
-            index_results[no_em_name] = []
-        index_results[no_em_name].append(ratio)
-    if any(len(index_results[e]) > 1 for e in index_results):
-        filtered_results: CoherenceMatches = []
-        for language in index_results:
-            filtered_results.append((language, max(index_results[language])))
-        return filtered_results
-    return results
-@lru_cache(maxsize=2048)
-def coherence_ratio(
-    decoded_sequence: str, threshold: float = 0.1, lg_inclusion: str | None = None
-) -> CoherenceMatches:
-    """
-    Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
-    A layer = Character extraction by alphabets/ranges.
-    """
-    results: list[tuple[str, float]] = []
-    ignore_non_latin: bool = False
-    sufficient_match_count: int = 0
-    lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
-    if "Latin Based" in lg_inclusion_list:
-        ignore_non_latin = True
-        lg_inclusion_list.remove("Latin Based")
-    for layer in alpha_unicode_split(decoded_sequence):
-        sequence_frequencies: TypeCounter[str] = Counter(layer)
-        most_common = sequence_frequencies.most_common()
-        character_count: int = sum(o for c, o in most_common)
-        if character_count <= TOO_SMALL_SEQUENCE:
-            continue
-        popular_character_ordered: list[str] = [c for c, o in most_common]
-        for language in lg_inclusion_list or alphabet_languages(
-            popular_character_ordered, ignore_non_latin
-        ):
-            ratio: float = characters_popularity_compare(
-                language, popular_character_ordered
-            )
-            if ratio < threshold:
-                continue
-            elif ratio >= 0.8:
-                sufficient_match_count += 1
-            results.append((language, round(ratio, 4)))
-            if sufficient_match_count >= 3:
-                break
-    return sorted(
-        filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
-    )

env/Lib/site-packages/charset_normalizer/cli/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-from __future__ import annotations
-from .__main__ import cli_detect, query_yes_no
-__all__ = (
-    "cli_detect",
-    "query_yes_no",
-)

env/Lib/site-packages/charset_normalizer/cli/__main__.py DELETED Viewed

@@ -1,321 +0,0 @@
-from __future__ import annotations
-import argparse
-import sys
-from json import dumps
-from os.path import abspath, basename, dirname, join, realpath
-from platform import python_version
-from unicodedata import unidata_version
-import charset_normalizer.md as md_module
-from charset_normalizer import from_fp
-from charset_normalizer.models import CliDetectionResult
-from charset_normalizer.version import __version__
-def query_yes_no(question: str, default: str = "yes") -> bool:
-    """Ask a yes/no question via input() and return their answer.
-    "question" is a string that is presented to the user.
-    "default" is the presumed answer if the user just hits <Enter>.
-        It must be "yes" (the default), "no" or None (meaning
-        an answer is required of the user).
-    The "answer" return value is True for "yes" or False for "no".
-    Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
-    """
-    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
-    if default is None:
-        prompt = " [y/n] "
-    elif default == "yes":
-        prompt = " [Y/n] "
-    elif default == "no":
-        prompt = " [y/N] "
-    else:
-        raise ValueError("invalid default answer: '%s'" % default)
-    while True:
-        sys.stdout.write(question + prompt)
-        choice = input().lower()
-        if default is not None and choice == "":
-            return valid[default]
-        elif choice in valid:
-            return valid[choice]
-        else:
-            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
-def cli_detect(argv: list[str] | None = None) -> int:
-    """
-    CLI assistant using ARGV and ArgumentParser
-    :param argv:
-    :return: 0 if everything is fine, anything else equal trouble
-    """
-    parser = argparse.ArgumentParser(
-        description="The Real First Universal Charset Detector. "
-        "Discover originating encoding used on text file. "
-        "Normalize text to unicode."
-    )
-    parser.add_argument(
-        "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
-    )
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="store_true",
-        default=False,
-        dest="verbose",
-        help="Display complementary information about file if any. "
-        "Stdout will contain logs about the detection process.",
-    )
-    parser.add_argument(
-        "-a",
-        "--with-alternative",
-        action="store_true",
-        default=False,
-        dest="alternatives",
-        help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
-    )
-    parser.add_argument(
-        "-n",
-        "--normalize",
-        action="store_true",
-        default=False,
-        dest="normalize",
-        help="Permit to normalize input file. If not set, program does not write anything.",
-    )
-    parser.add_argument(
-        "-m",
-        "--minimal",
-        action="store_true",
-        default=False,
-        dest="minimal",
-        help="Only output the charset detected to STDOUT. Disabling JSON output.",
-    )
-    parser.add_argument(
-        "-r",
-        "--replace",
-        action="store_true",
-        default=False,
-        dest="replace",
-        help="Replace file when trying to normalize it instead of creating a new one.",
-    )
-    parser.add_argument(
-        "-f",
-        "--force",
-        action="store_true",
-        default=False,
-        dest="force",
-        help="Replace file without asking if you are sure, use this flag with caution.",
-    )
-    parser.add_argument(
-        "-i",
-        "--no-preemptive",
-        action="store_true",
-        default=False,
-        dest="no_preemptive",
-        help="Disable looking at a charset declaration to hint the detector.",
-    )
-    parser.add_argument(
-        "-t",
-        "--threshold",
-        action="store",
-        default=0.2,
-        type=float,
-        dest="threshold",
-        help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
-    )
-    parser.add_argument(
-        "--version",
-        action="version",
-        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
-            __version__,
-            python_version(),
-            unidata_version,
-            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
-        ),
-        help="Show version information and exit.",
-    )
-    args = parser.parse_args(argv)
-    if args.replace is True and args.normalize is False:
-        if args.files:
-            for my_file in args.files:
-                my_file.close()
-        print("Use --replace in addition of --normalize only.", file=sys.stderr)
-        return 1
-    if args.force is True and args.replace is False:
-        if args.files:
-            for my_file in args.files:
-                my_file.close()
-        print("Use --force in addition of --replace only.", file=sys.stderr)
-        return 1
-    if args.threshold < 0.0 or args.threshold > 1.0:
-        if args.files:
-            for my_file in args.files:
-                my_file.close()
-        print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
-        return 1
-    x_ = []
-    for my_file in args.files:
-        matches = from_fp(
-            my_file,
-            threshold=args.threshold,
-            explain=args.verbose,
-            preemptive_behaviour=args.no_preemptive is False,
-        )
-        best_guess = matches.best()
-        if best_guess is None:
-            print(
-                'Unable to identify originating encoding for "{}". {}'.format(
-                    my_file.name,
-                    (
-                        "Maybe try increasing maximum amount of chaos."
-                        if args.threshold < 1.0
-                        else ""
-                    ),
-                ),
-                file=sys.stderr,
-            )
-            x_.append(
-                CliDetectionResult(
-                    abspath(my_file.name),
-                    None,
-                    [],
-                    [],
-                    "Unknown",
-                    [],
-                    False,
-                    1.0,
-                    0.0,
-                    None,
-                    True,
-                )
-            )
-        else:
-            x_.append(
-                CliDetectionResult(
-                    abspath(my_file.name),
-                    best_guess.encoding,
-                    best_guess.encoding_aliases,
-                    [
-                        cp
-                        for cp in best_guess.could_be_from_charset
-                        if cp != best_guess.encoding
-                    ],
-                    best_guess.language,
-                    best_guess.alphabets,
-                    best_guess.bom,
-                    best_guess.percent_chaos,
-                    best_guess.percent_coherence,
-                    None,
-                    True,
-                )
-            )
-            if len(matches) > 1 and args.alternatives:
-                for el in matches:
-                    if el != best_guess:
-                        x_.append(
-                            CliDetectionResult(
-                                abspath(my_file.name),
-                                el.encoding,
-                                el.encoding_aliases,
-                                [
-                                    cp
-                                    for cp in el.could_be_from_charset
-                                    if cp != el.encoding
-                                ],
-                                el.language,
-                                el.alphabets,
-                                el.bom,
-                                el.percent_chaos,
-                                el.percent_coherence,
-                                None,
-                                False,
-                            )
-                        )
-            if args.normalize is True:
-                if best_guess.encoding.startswith("utf") is True:
-                    print(
-                        '"{}" file does not need to be normalized, as it already came from unicode.'.format(
-                            my_file.name
-                        ),
-                        file=sys.stderr,
-                    )
-                    if my_file.closed is False:
-                        my_file.close()
-                    continue
-                dir_path = dirname(realpath(my_file.name))
-                file_name = basename(realpath(my_file.name))
-                o_: list[str] = file_name.split(".")
-                if args.replace is False:
-                    o_.insert(-1, best_guess.encoding)
-                    if my_file.closed is False:
-                        my_file.close()
-                elif (
-                    args.force is False
-                    and query_yes_no(
-                        'Are you sure to normalize "{}" by replacing it ?'.format(
-                            my_file.name
-                        ),
-                        "no",
-                    )
-                    is False
-                ):
-                    if my_file.closed is False:
-                        my_file.close()
-                    continue
-                try:
-                    x_[0].unicode_path = join(dir_path, ".".join(o_))
-                    with open(x_[0].unicode_path, "wb") as fp:
-                        fp.write(best_guess.output())
-                except OSError as e:
-                    print(str(e), file=sys.stderr)
-                    if my_file.closed is False:
-                        my_file.close()
-                    return 2
-        if my_file.closed is False:
-            my_file.close()
-    if args.minimal is False:
-        print(
-            dumps(
-                [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
-                ensure_ascii=True,
-                indent=4,
-            )
-        )
-    else:
-        for my_file in args.files:
-            print(
-                ", ".join(
-                    [
-                        el.encoding or "undefined"
-                        for el in x_
-                        if el.path == abspath(my_file.name)
-                    ]
-                )
-            )
-    return 0
-if __name__ == "__main__":
-    cli_detect()

env/Lib/site-packages/charset_normalizer/constant.py DELETED Viewed

@@ -1,1998 +0,0 @@
-from __future__ import annotations
-from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
-from encodings.aliases import aliases
-from re import IGNORECASE
-from re import compile as re_compile
-# Contain for each eligible encoding a list of/item bytes SIG/BOM
-ENCODING_MARKS: dict[str, bytes | list[bytes]] = {
-    "utf_8": BOM_UTF8,
-    "utf_7": [
-        b"\x2b\x2f\x76\x38",
-        b"\x2b\x2f\x76\x39",
-        b"\x2b\x2f\x76\x2b",
-        b"\x2b\x2f\x76\x2f",
-        b"\x2b\x2f\x76\x38\x2d",
-    ],
-    "gb18030": b"\x84\x31\x95\x33",
-    "utf_32": [BOM_UTF32_BE, BOM_UTF32_LE],
-    "utf_16": [BOM_UTF16_BE, BOM_UTF16_LE],
-}
-TOO_SMALL_SEQUENCE: int = 32
-TOO_BIG_SEQUENCE: int = int(10e6)
-UTF8_MAXIMAL_ALLOCATION: int = 1_112_064
-# Up-to-date Unicode ucd/15.0.0
-UNICODE_RANGES_COMBINED: dict[str, range] = {
-    "Control character": range(32),
-    "Basic Latin": range(32, 128),
-    "Latin-1 Supplement": range(128, 256),
-    "Latin Extended-A": range(256, 384),
-    "Latin Extended-B": range(384, 592),
-    "IPA Extensions": range(592, 688),
-    "Spacing Modifier Letters": range(688, 768),
-    "Combining Diacritical Marks": range(768, 880),
-    "Greek and Coptic": range(880, 1024),
-    "Cyrillic": range(1024, 1280),
-    "Cyrillic Supplement": range(1280, 1328),
-    "Armenian": range(1328, 1424),
-    "Hebrew": range(1424, 1536),
-    "Arabic": range(1536, 1792),
-    "Syriac": range(1792, 1872),
-    "Arabic Supplement": range(1872, 1920),
-    "Thaana": range(1920, 1984),
-    "NKo": range(1984, 2048),
-    "Samaritan": range(2048, 2112),
-    "Mandaic": range(2112, 2144),
-    "Syriac Supplement": range(2144, 2160),
-    "Arabic Extended-B": range(2160, 2208),
-    "Arabic Extended-A": range(2208, 2304),
-    "Devanagari": range(2304, 2432),
-    "Bengali": range(2432, 2560),
-    "Gurmukhi": range(2560, 2688),
-    "Gujarati": range(2688, 2816),
-    "Oriya": range(2816, 2944),
-    "Tamil": range(2944, 3072),
-    "Telugu": range(3072, 3200),
-    "Kannada": range(3200, 3328),
-    "Malayalam": range(3328, 3456),
-    "Sinhala": range(3456, 3584),
-    "Thai": range(3584, 3712),
-    "Lao": range(3712, 3840),
-    "Tibetan": range(3840, 4096),
-    "Myanmar": range(4096, 4256),
-    "Georgian": range(4256, 4352),
-    "Hangul Jamo": range(4352, 4608),
-    "Ethiopic": range(4608, 4992),
-    "Ethiopic Supplement": range(4992, 5024),
-    "Cherokee": range(5024, 5120),
-    "Unified Canadian Aboriginal Syllabics": range(5120, 5760),
-    "Ogham": range(5760, 5792),
-    "Runic": range(5792, 5888),
-    "Tagalog": range(5888, 5920),
-    "Hanunoo": range(5920, 5952),
-    "Buhid": range(5952, 5984),
-    "Tagbanwa": range(5984, 6016),
-    "Khmer": range(6016, 6144),
-    "Mongolian": range(6144, 6320),
-    "Unified Canadian Aboriginal Syllabics Extended": range(6320, 6400),
-    "Limbu": range(6400, 6480),
-    "Tai Le": range(6480, 6528),
-    "New Tai Lue": range(6528, 6624),
-    "Khmer Symbols": range(6624, 6656),
-    "Buginese": range(6656, 6688),
-    "Tai Tham": range(6688, 6832),
-    "Combining Diacritical Marks Extended": range(6832, 6912),
-    "Balinese": range(6912, 7040),
-    "Sundanese": range(7040, 7104),
-    "Batak": range(7104, 7168),
-    "Lepcha": range(7168, 7248),
-    "Ol Chiki": range(7248, 7296),
-    "Cyrillic Extended-C": range(7296, 7312),
-    "Georgian Extended": range(7312, 7360),
-    "Sundanese Supplement": range(7360, 7376),
-    "Vedic Extensions": range(7376, 7424),
-    "Phonetic Extensions": range(7424, 7552),
-    "Phonetic Extensions Supplement": range(7552, 7616),
-    "Combining Diacritical Marks Supplement": range(7616, 7680),
-    "Latin Extended Additional": range(7680, 7936),
-    "Greek Extended": range(7936, 8192),
-    "General Punctuation": range(8192, 8304),
-    "Superscripts and Subscripts": range(8304, 8352),
-    "Currency Symbols": range(8352, 8400),
-    "Combining Diacritical Marks for Symbols": range(8400, 8448),
-    "Letterlike Symbols": range(8448, 8528),
-    "Number Forms": range(8528, 8592),
-    "Arrows": range(8592, 8704),
-    "Mathematical Operators": range(8704, 8960),
-    "Miscellaneous Technical": range(8960, 9216),
-    "Control Pictures": range(9216, 9280),
-    "Optical Character Recognition": range(9280, 9312),
-    "Enclosed Alphanumerics": range(9312, 9472),
-    "Box Drawing": range(9472, 9600),
-    "Block Elements": range(9600, 9632),
-    "Geometric Shapes": range(9632, 9728),
-    "Miscellaneous Symbols": range(9728, 9984),
-    "Dingbats": range(9984, 10176),
-    "Miscellaneous Mathematical Symbols-A": range(10176, 10224),
-    "Supplemental Arrows-A": range(10224, 10240),
-    "Braille Patterns": range(10240, 10496),
-    "Supplemental Arrows-B": range(10496, 10624),
-    "Miscellaneous Mathematical Symbols-B": range(10624, 10752),
-    "Supplemental Mathematical Operators": range(10752, 11008),
-    "Miscellaneous Symbols and Arrows": range(11008, 11264),
-    "Glagolitic": range(11264, 11360),
-    "Latin Extended-C": range(11360, 11392),
-    "Coptic": range(11392, 11520),
-    "Georgian Supplement": range(11520, 11568),
-    "Tifinagh": range(11568, 11648),
-    "Ethiopic Extended": range(11648, 11744),
-    "Cyrillic Extended-A": range(11744, 11776),
-    "Supplemental Punctuation": range(11776, 11904),
-    "CJK Radicals Supplement": range(11904, 12032),
-    "Kangxi Radicals": range(12032, 12256),
-    "Ideographic Description Characters": range(12272, 12288),
-    "CJK Symbols and Punctuation": range(12288, 12352),
-    "Hiragana": range(12352, 12448),
-    "Katakana": range(12448, 12544),
-    "Bopomofo": range(12544, 12592),
-    "Hangul Compatibility Jamo": range(12592, 12688),
-    "Kanbun": range(12688, 12704),
-    "Bopomofo Extended": range(12704, 12736),
-    "CJK Strokes": range(12736, 12784),
-    "Katakana Phonetic Extensions": range(12784, 12800),
-    "Enclosed CJK Letters and Months": range(12800, 13056),
-    "CJK Compatibility": range(13056, 13312),
-    "CJK Unified Ideographs Extension A": range(13312, 19904),
-    "Yijing Hexagram Symbols": range(19904, 19968),
-    "CJK Unified Ideographs": range(19968, 40960),
-    "Yi Syllables": range(40960, 42128),
-    "Yi Radicals": range(42128, 42192),
-    "Lisu": range(42192, 42240),
-    "Vai": range(42240, 42560),
-    "Cyrillic Extended-B": range(42560, 42656),
-    "Bamum": range(42656, 42752),
-    "Modifier Tone Letters": range(42752, 42784),
-    "Latin Extended-D": range(42784, 43008),
-    "Syloti Nagri": range(43008, 43056),
-    "Common Indic Number Forms": range(43056, 43072),
-    "Phags-pa": range(43072, 43136),
-    "Saurashtra": range(43136, 43232),
-    "Devanagari Extended": range(43232, 43264),
-    "Kayah Li": range(43264, 43312),
-    "Rejang": range(43312, 43360),
-    "Hangul Jamo Extended-A": range(43360, 43392),
-    "Javanese": range(43392, 43488),
-    "Myanmar Extended-B": range(43488, 43520),
-    "Cham": range(43520, 43616),
-    "Myanmar Extended-A": range(43616, 43648),
-    "Tai Viet": range(43648, 43744),
-    "Meetei Mayek Extensions": range(43744, 43776),
-    "Ethiopic Extended-A": range(43776, 43824),
-    "Latin Extended-E": range(43824, 43888),
-    "Cherokee Supplement": range(43888, 43968),
-    "Meetei Mayek": range(43968, 44032),
-    "Hangul Syllables": range(44032, 55216),
-    "Hangul Jamo Extended-B": range(55216, 55296),
-    "High Surrogates": range(55296, 56192),
-    "High Private Use Surrogates": range(56192, 56320),
-    "Low Surrogates": range(56320, 57344),
-    "Private Use Area": range(57344, 63744),
-    "CJK Compatibility Ideographs": range(63744, 64256),
-    "Alphabetic Presentation Forms": range(64256, 64336),
-    "Arabic Presentation Forms-A": range(64336, 65024),
-    "Variation Selectors": range(65024, 65040),
-    "Vertical Forms": range(65040, 65056),
-    "Combining Half Marks": range(65056, 65072),
-    "CJK Compatibility Forms": range(65072, 65104),
-    "Small Form Variants": range(65104, 65136),
-    "Arabic Presentation Forms-B": range(65136, 65280),
-    "Halfwidth and Fullwidth Forms": range(65280, 65520),
-    "Specials": range(65520, 65536),
-    "Linear B Syllabary": range(65536, 65664),
-    "Linear B Ideograms": range(65664, 65792),
-    "Aegean Numbers": range(65792, 65856),
-    "Ancient Greek Numbers": range(65856, 65936),
-    "Ancient Symbols": range(65936, 66000),
-    "Phaistos Disc": range(66000, 66048),
-    "Lycian": range(66176, 66208),
-    "Carian": range(66208, 66272),
-    "Coptic Epact Numbers": range(66272, 66304),
-    "Old Italic": range(66304, 66352),
-    "Gothic": range(66352, 66384),
-    "Old Permic": range(66384, 66432),
-    "Ugaritic": range(66432, 66464),
-    "Old Persian": range(66464, 66528),
-    "Deseret": range(66560, 66640),
-    "Shavian": range(66640, 66688),
-    "Osmanya": range(66688, 66736),
-    "Osage": range(66736, 66816),
-    "Elbasan": range(66816, 66864),
-    "Caucasian Albanian": range(66864, 66928),
-    "Vithkuqi": range(66928, 67008),
-    "Linear A": range(67072, 67456),
-    "Latin Extended-F": range(67456, 67520),
-    "Cypriot Syllabary": range(67584, 67648),
-    "Imperial Aramaic": range(67648, 67680),
-    "Palmyrene": range(67680, 67712),
-    "Nabataean": range(67712, 67760),
-    "Hatran": range(67808, 67840),
-    "Phoenician": range(67840, 67872),
-    "Lydian": range(67872, 67904),
-    "Meroitic Hieroglyphs": range(67968, 68000),
-    "Meroitic Cursive": range(68000, 68096),
-    "Kharoshthi": range(68096, 68192),
-    "Old South Arabian": range(68192, 68224),
-    "Old North Arabian": range(68224, 68256),
-    "Manichaean": range(68288, 68352),
-    "Avestan": range(68352, 68416),
-    "Inscriptional Parthian": range(68416, 68448),
-    "Inscriptional Pahlavi": range(68448, 68480),
-    "Psalter Pahlavi": range(68480, 68528),
-    "Old Turkic": range(68608, 68688),
-    "Old Hungarian": range(68736, 68864),
-    "Hanifi Rohingya": range(68864, 68928),
-    "Rumi Numeral Symbols": range(69216, 69248),
-    "Yezidi": range(69248, 69312),
-    "Arabic Extended-C": range(69312, 69376),
-    "Old Sogdian": range(69376, 69424),
-    "Sogdian": range(69424, 69488),
-    "Old Uyghur": range(69488, 69552),
-    "Chorasmian": range(69552, 69600),
-    "Elymaic": range(69600, 69632),
-    "Brahmi": range(69632, 69760),
-    "Kaithi": range(69760, 69840),
-    "Sora Sompeng": range(69840, 69888),
-    "Chakma": range(69888, 69968),
-    "Mahajani": range(69968, 70016),
-    "Sharada": range(70016, 70112),
-    "Sinhala Archaic Numbers": range(70112, 70144),
-    "Khojki": range(70144, 70224),
-    "Multani": range(70272, 70320),
-    "Khudawadi": range(70320, 70400),
-    "Grantha": range(70400, 70528),
-    "Newa": range(70656, 70784),
-    "Tirhuta": range(70784, 70880),
-    "Siddham": range(71040, 71168),
-    "Modi": range(71168, 71264),
-    "Mongolian Supplement": range(71264, 71296),
-    "Takri": range(71296, 71376),
-    "Ahom": range(71424, 71504),
-    "Dogra": range(71680, 71760),
-    "Warang Citi": range(71840, 71936),
-    "Dives Akuru": range(71936, 72032),
-    "Nandinagari": range(72096, 72192),
-    "Zanabazar Square": range(72192, 72272),
-    "Soyombo": range(72272, 72368),
-    "Unified Canadian Aboriginal Syllabics Extended-A": range(72368, 72384),
-    "Pau Cin Hau": range(72384, 72448),
-    "Devanagari Extended-A": range(72448, 72544),
-    "Bhaiksuki": range(72704, 72816),
-    "Marchen": range(72816, 72896),
-    "Masaram Gondi": range(72960, 73056),
-    "Gunjala Gondi": range(73056, 73136),
-    "Makasar": range(73440, 73472),
-    "Kawi": range(73472, 73568),
-    "Lisu Supplement": range(73648, 73664),
-    "Tamil Supplement": range(73664, 73728),
-    "Cuneiform": range(73728, 74752),
-    "Cuneiform Numbers and Punctuation": range(74752, 74880),
-    "Early Dynastic Cuneiform": range(74880, 75088),
-    "Cypro-Minoan": range(77712, 77824),
-    "Egyptian Hieroglyphs": range(77824, 78896),
-    "Egyptian Hieroglyph Format Controls": range(78896, 78944),
-    "Anatolian Hieroglyphs": range(82944, 83584),
-    "Bamum Supplement": range(92160, 92736),
-    "Mro": range(92736, 92784),
-    "Tangsa": range(92784, 92880),
-    "Bassa Vah": range(92880, 92928),
-    "Pahawh Hmong": range(92928, 93072),
-    "Medefaidrin": range(93760, 93856),
-    "Miao": range(93952, 94112),
-    "Ideographic Symbols and Punctuation": range(94176, 94208),
-    "Tangut": range(94208, 100352),
-    "Tangut Components": range(100352, 101120),
-    "Khitan Small Script": range(101120, 101632),
-    "Tangut Supplement": range(101632, 101760),
-    "Kana Extended-B": range(110576, 110592),
-    "Kana Supplement": range(110592, 110848),
-    "Kana Extended-A": range(110848, 110896),
-    "Small Kana Extension": range(110896, 110960),
-    "Nushu": range(110960, 111360),
-    "Duployan": range(113664, 113824),
-    "Shorthand Format Controls": range(113824, 113840),
-    "Znamenny Musical Notation": range(118528, 118736),
-    "Byzantine Musical Symbols": range(118784, 119040),
-    "Musical Symbols": range(119040, 119296),
-    "Ancient Greek Musical Notation": range(119296, 119376),
-    "Kaktovik Numerals": range(119488, 119520),
-    "Mayan Numerals": range(119520, 119552),
-    "Tai Xuan Jing Symbols": range(119552, 119648),
-    "Counting Rod Numerals": range(119648, 119680),
-    "Mathematical Alphanumeric Symbols": range(119808, 120832),
-    "Sutton SignWriting": range(120832, 121520),
-    "Latin Extended-G": range(122624, 122880),
-    "Glagolitic Supplement": range(122880, 122928),
-    "Cyrillic Extended-D": range(122928, 123024),
-    "Nyiakeng Puachue Hmong": range(123136, 123216),
-    "Toto": range(123536, 123584),
-    "Wancho": range(123584, 123648),
-    "Nag Mundari": range(124112, 124160),
-    "Ethiopic Extended-B": range(124896, 124928),
-    "Mende Kikakui": range(124928, 125152),
-    "Adlam": range(125184, 125280),
-    "Indic Siyaq Numbers": range(126064, 126144),
-    "Ottoman Siyaq Numbers": range(126208, 126288),
-    "Arabic Mathematical Alphabetic Symbols": range(126464, 126720),
-    "Mahjong Tiles": range(126976, 127024),
-    "Domino Tiles": range(127024, 127136),
-    "Playing Cards": range(127136, 127232),
-    "Enclosed Alphanumeric Supplement": range(127232, 127488),
-    "Enclosed Ideographic Supplement": range(127488, 127744),
-    "Miscellaneous Symbols and Pictographs": range(127744, 128512),
-    "Emoticons range(Emoji)": range(128512, 128592),
-    "Ornamental Dingbats": range(128592, 128640),
-    "Transport and Map Symbols": range(128640, 128768),
-    "Alchemical Symbols": range(128768, 128896),
-    "Geometric Shapes Extended": range(128896, 129024),
-    "Supplemental Arrows-C": range(129024, 129280),
-    "Supplemental Symbols and Pictographs": range(129280, 129536),
-    "Chess Symbols": range(129536, 129648),
-    "Symbols and Pictographs Extended-A": range(129648, 129792),
-    "Symbols for Legacy Computing": range(129792, 130048),
-    "CJK Unified Ideographs Extension B": range(131072, 173792),
-    "CJK Unified Ideographs Extension C": range(173824, 177984),
-    "CJK Unified Ideographs Extension D": range(177984, 178208),
-    "CJK Unified Ideographs Extension E": range(178208, 183984),
-    "CJK Unified Ideographs Extension F": range(183984, 191472),
-    "CJK Compatibility Ideographs Supplement": range(194560, 195104),
-    "CJK Unified Ideographs Extension G": range(196608, 201552),
-    "CJK Unified Ideographs Extension H": range(201552, 205744),
-    "Tags": range(917504, 917632),
-    "Variation Selectors Supplement": range(917760, 918000),
-    "Supplementary Private Use Area-A": range(983040, 1048576),
-    "Supplementary Private Use Area-B": range(1048576, 1114112),
-}
-UNICODE_SECONDARY_RANGE_KEYWORD: list[str] = [
-    "Supplement",
-    "Extended",
-    "Extensions",
-    "Modifier",
-    "Marks",
-    "Punctuation",
-    "Symbols",
-    "Forms",
-    "Operators",
-    "Miscellaneous",
-    "Drawing",
-    "Block",
-    "Shapes",
-    "Supplemental",
-    "Tags",
-]
-RE_POSSIBLE_ENCODING_INDICATION = re_compile(
-    r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)",
-    IGNORECASE,
-)
-IANA_NO_ALIASES = [
-    "cp720",
-    "cp737",
-    "cp856",
-    "cp874",
-    "cp875",
-    "cp1006",
-    "koi8_r",
-    "koi8_t",
-    "koi8_u",
-]
-IANA_SUPPORTED: list[str] = sorted(
-    filter(
-        lambda x: x.endswith("_codec") is False
-        and x not in {"rot_13", "tactis", "mbcs"},
-        list(set(aliases.values())) + IANA_NO_ALIASES,
-    )
-)
-IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED)
-# pre-computed code page that are similar using the function cp_similarity.
-IANA_SUPPORTED_SIMILAR: dict[str, list[str]] = {
-    "cp037": ["cp1026", "cp1140", "cp273", "cp500"],
-    "cp1026": ["cp037", "cp1140", "cp273", "cp500"],
-    "cp1125": ["cp866"],
-    "cp1140": ["cp037", "cp1026", "cp273", "cp500"],
-    "cp1250": ["iso8859_2"],
-    "cp1251": ["kz1048", "ptcp154"],
-    "cp1252": ["iso8859_15", "iso8859_9", "latin_1"],
-    "cp1253": ["iso8859_7"],
-    "cp1254": ["iso8859_15", "iso8859_9", "latin_1"],
-    "cp1257": ["iso8859_13"],
-    "cp273": ["cp037", "cp1026", "cp1140", "cp500"],
-    "cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"],
-    "cp500": ["cp037", "cp1026", "cp1140", "cp273"],
-    "cp850": ["cp437", "cp857", "cp858", "cp865"],
-    "cp857": ["cp850", "cp858", "cp865"],
-    "cp858": ["cp437", "cp850", "cp857", "cp865"],
-    "cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"],
-    "cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"],
-    "cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"],
-    "cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"],
-    "cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"],
-    "cp866": ["cp1125"],
-    "iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"],
-    "iso8859_11": ["tis_620"],
-    "iso8859_13": ["cp1257"],
-    "iso8859_14": [
-        "iso8859_10",
-        "iso8859_15",
-        "iso8859_16",
-        "iso8859_3",
-        "iso8859_9",
-        "latin_1",
-    ],
-    "iso8859_15": [
-        "cp1252",
-        "cp1254",
-        "iso8859_10",
-        "iso8859_14",
-        "iso8859_16",
-        "iso8859_3",
-        "iso8859_9",
-        "latin_1",
-    ],
-    "iso8859_16": [
-        "iso8859_14",
-        "iso8859_15",
-        "iso8859_2",
-        "iso8859_3",
-        "iso8859_9",
-        "latin_1",
-    ],
-    "iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"],
-    "iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"],
-    "iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"],
-    "iso8859_7": ["cp1253"],
-    "iso8859_9": [
-        "cp1252",
-        "cp1254",
-        "cp1258",
-        "iso8859_10",
-        "iso8859_14",
-        "iso8859_15",
-        "iso8859_16",
-        "iso8859_3",
-        "iso8859_4",
-        "latin_1",
-    ],
-    "kz1048": ["cp1251", "ptcp154"],
-    "latin_1": [
-        "cp1252",
-        "cp1254",
-        "cp1258",
-        "iso8859_10",
-        "iso8859_14",
-        "iso8859_15",
-        "iso8859_16",
-        "iso8859_3",
-        "iso8859_4",
-        "iso8859_9",
-    ],
-    "mac_iceland": ["mac_roman", "mac_turkish"],
-    "mac_roman": ["mac_iceland", "mac_turkish"],
-    "mac_turkish": ["mac_iceland", "mac_roman"],
-    "ptcp154": ["cp1251", "kz1048"],
-    "tis_620": ["iso8859_11"],
-}
-CHARDET_CORRESPONDENCE: dict[str, str] = {
-    "iso2022_kr": "ISO-2022-KR",
-    "iso2022_jp": "ISO-2022-JP",
-    "euc_kr": "EUC-KR",
-    "tis_620": "TIS-620",
-    "utf_32": "UTF-32",
-    "euc_jp": "EUC-JP",
-    "koi8_r": "KOI8-R",
-    "iso8859_1": "ISO-8859-1",
-    "iso8859_2": "ISO-8859-2",
-    "iso8859_5": "ISO-8859-5",
-    "iso8859_6": "ISO-8859-6",
-    "iso8859_7": "ISO-8859-7",
-    "iso8859_8": "ISO-8859-8",
-    "utf_16": "UTF-16",
-    "cp855": "IBM855",
-    "mac_cyrillic": "MacCyrillic",
-    "gb2312": "GB2312",
-    "gb18030": "GB18030",
-    "cp932": "CP932",
-    "cp866": "IBM866",
-    "utf_8": "utf-8",
-    "utf_8_sig": "UTF-8-SIG",
-    "shift_jis": "SHIFT_JIS",
-    "big5": "Big5",
-    "cp1250": "windows-1250",
-    "cp1251": "windows-1251",
-    "cp1252": "Windows-1252",
-    "cp1253": "windows-1253",
-    "cp1255": "windows-1255",
-    "cp1256": "windows-1256",
-    "cp1254": "Windows-1254",
-    "cp949": "CP949",
-}
-COMMON_SAFE_ASCII_CHARACTERS: set[str] = {
-    "<",
-    ">",
-    "=",
-    ":",
-    "/",
-    "&",
-    ";",
-    "{",
-    "}",
-    "[",
-    "]",
-    ",",
-    "|",
-    '"',
-    "-",
-    "(",
-    ")",
-}
-KO_NAMES: set[str] = {"johab", "cp949", "euc_kr"}
-ZH_NAMES: set[str] = {"big5", "cp950", "big5hkscs", "hz"}
-# Logging LEVEL below DEBUG
-TRACE: int = 5
-# Language label that contain the em dash "—"
-# character are to be considered alternative seq to origin
-FREQUENCIES: dict[str, list[str]] = {
-    "English": [
-        "e",
-        "a",
-        "t",
-        "i",
-        "o",
-        "n",
-        "s",
-        "r",
-        "h",
-        "l",
-        "d",
-        "c",
-        "u",
-        "m",
-        "f",
-        "p",
-        "g",
-        "w",
-        "y",
-        "b",
-        "v",
-        "k",
-        "x",
-        "j",
-        "z",
-        "q",
-    ],
-    "English—": [
-        "e",
-        "a",
-        "t",
-        "i",
-        "o",
-        "n",
-        "s",
-        "r",
-        "h",
-        "l",
-        "d",
-        "c",
-        "m",
-        "u",
-        "f",
-        "p",
-        "g",
-        "w",
-        "b",
-        "y",
-        "v",
-        "k",
-        "j",
-        "x",
-        "z",
-        "q",
-    ],
-    "German": [
-        "e",
-        "n",
-        "i",
-        "r",
-        "s",
-        "t",
-        "a",
-        "d",
-        "h",
-        "u",
-        "l",
-        "g",
-        "o",
-        "c",
-        "m",
-        "b",
-        "f",
-        "k",
-        "w",
-        "z",
-        "p",
-        "v",
-        "ü",
-        "ä",
-        "ö",
-        "j",
-    ],
-    "French": [
-        "e",
-        "a",
-        "s",
-        "n",
-        "i",
-        "t",
-        "r",
-        "l",
-        "u",
-        "o",
-        "d",
-        "c",
-        "p",
-        "m",
-        "é",
-        "v",
-        "g",
-        "f",
-        "b",
-        "h",
-        "q",
-        "à",
-        "x",
-        "è",
-        "y",
-        "j",
-    ],
-    "Dutch": [
-        "e",
-        "n",
-        "a",
-        "i",
-        "r",
-        "t",
-        "o",
-        "d",
-        "s",
-        "l",
-        "g",
-        "h",
-        "v",
-        "m",
-        "u",
-        "k",
-        "c",
-        "p",
-        "b",
-        "w",
-        "j",
-        "z",
-        "f",
-        "y",
-        "x",
-        "ë",
-    ],
-    "Italian": [
-        "e",
-        "i",
-        "a",
-        "o",
-        "n",
-        "l",
-        "t",
-        "r",
-        "s",
-        "c",
-        "d",
-        "u",
-        "p",
-        "m",
-        "g",
-        "v",
-        "f",
-        "b",
-        "z",
-        "h",
-        "q",
-        "è",
-        "à",
-        "k",
-        "y",
-        "ò",
-    ],
-    "Polish": [
-        "a",
-        "i",
-        "o",
-        "e",
-        "n",
-        "r",
-        "z",
-        "w",
-        "s",
-        "c",
-        "t",
-        "k",
-        "y",
-        "d",
-        "p",
-        "m",
-        "u",
-        "l",
-        "j",
-        "ł",
-        "g",
-        "b",
-        "h",
-        "ą",
-        "ę",
-        "ó",
-    ],
-    "Spanish": [
-        "e",
-        "a",
-        "o",
-        "n",
-        "s",
-        "r",
-        "i",
-        "l",
-        "d",
-        "t",
-        "c",
-        "u",
-        "m",
-        "p",
-        "b",
-        "g",
-        "v",
-        "f",
-        "y",
-        "ó",
-        "h",
-        "q",
-        "í",
-        "j",
-        "z",
-        "á",
-    ],
-    "Russian": [
-        "о",
-        "а",
-        "е",
-        "и",
-        "н",
-        "с",
-        "т",
-        "р",
-        "в",
-        "л",
-        "к",
-        "м",
-        "д",
-        "п",
-        "у",
-        "г",
-        "я",
-        "ы",
-        "з",
-        "б",
-        "й",
-        "ь",
-        "ч",
-        "х",
-        "ж",
-        "ц",
-    ],
-    # Jap-Kanji
-    "Japanese": [
-        "人",
-        "一",
-        "大",
-        "亅",
-        "丁",
-        "丨",
-        "竹",
-        "笑",
-        "口",
-        "日",
-        "今",
-        "二",
-        "彳",
-        "行",
-        "十",
-        "土",
-        "丶",
-        "寸",
-        "寺",
-        "時",
-        "乙",
-        "丿",
-        "乂",
-        "气",
-        "気",
-        "冂",
-        "巾",
-        "亠",
-        "市",
-        "目",
-        "儿",
-        "見",
-        "八",
-        "小",
-        "凵",
-        "県",
-        "月",
-        "彐",
-        "門",
-        "間",
-        "木",
-        "東",
-        "山",
-        "出",
-        "本",
-        "中",
-        "刀",
-        "分",
-        "耳",
-        "又",
-        "取",
-        "最",
-        "言",
-        "田",
-        "心",
-        "思",
-        "刂",
-        "前",
-        "京",
-        "尹",
-        "事",
-        "生",
-        "厶",
-        "云",
-        "会",
-        "未",
-        "来",
-        "白",
-        "冫",
-        "楽",
-        "灬",
-        "馬",
-        "尸",
-        "尺",
-        "駅",
-        "明",
-        "耂",
-        "者",
-        "了",
-        "阝",
-        "都",
-        "高",
-        "卜",
-        "占",
-        "厂",
-        "广",
-        "店",
-        "子",
-        "申",
-        "奄",
-        "亻",
-        "俺",
-        "上",
-        "方",
-        "冖",
-        "学",
-        "衣",
-        "艮",
-        "食",
-        "自",
-    ],
-    # Jap-Katakana
-    "Japanese—": [
-        "ー",
-        "ン",
-        "ス",
-        "・",
-        "ル",
-        "ト",
-        "リ",
-        "イ",
-        "ア",
-        "ラ",
-        "ッ",
-        "ク",
-        "ド",
-        "シ",
-        "レ",
-        "ジ",
-        "タ",
-        "フ",
-        "ロ",
-        "カ",
-        "テ",
-        "マ",
-        "ィ",
-        "グ",
-        "バ",
-        "ム",
-        "プ",
-        "オ",
-        "コ",
-        "デ",
-        "ニ",
-        "ウ",
-        "メ",
-        "サ",
-        "ビ",
-        "ナ",
-        "ブ",
-        "ャ",
-        "エ",
-        "ュ",
-        "チ",
-        "キ",
-        "ズ",
-        "ダ",
-        "パ",
-        "ミ",
-        "ェ",
-        "ョ",
-        "ハ",
-        "セ",
-        "ベ",
-        "ガ",
-        "モ",
-        "ツ",
-        "ネ",
-        "ボ",
-        "ソ",
-        "ノ",
-        "ァ",
-        "ヴ",
-        "ワ",
-        "ポ",
-        "ペ",
-        "ピ",
-        "ケ",
-        "ゴ",
-        "ギ",
-        "ザ",
-        "ホ",
-        "ゲ",
-        "ォ",
-        "ヤ",
-        "ヒ",
-        "ユ",
-        "ヨ",
-        "ヘ",
-        "ゼ",
-        "ヌ",
-        "ゥ",
-        "ゾ",
-        "ヶ",
-        "ヂ",
-        "ヲ",
-        "ヅ",
-        "ヵ",
-        "ヱ",
-        "ヰ",
-        "ヮ",
-        "ヽ",
-        "゠",
-        "ヾ",
-        "ヷ",
-        "ヿ",
-        "ヸ",
-        "ヹ",
-        "ヺ",
-    ],
-    # Jap-Hiragana
-    "Japanese——": [
-        "の",
-        "に",
-        "る",
-        "た",
-        "と",
-        "は",
-        "し",
-        "い",
-        "を",
-        "で",
-        "て",
-        "が",
-        "な",
-        "れ",
-        "か",
-        "ら",
-        "さ",
-        "っ",
-        "り",
-        "す",
-        "あ",
-        "も",
-        "こ",
-        "ま",
-        "う",
-        "く",
-        "よ",
-        "き",
-        "ん",
-        "め",
-        "お",
-        "け",
-        "そ",
-        "つ",
-        "だ",
-        "や",
-        "え",
-        "ど",
-        "わ",
-        "ち",
-        "み",
-        "せ",
-        "じ",
-        "ば",
-        "へ",
-        "び",
-        "ず",
-        "ろ",
-        "ほ",
-        "げ",
-        "む",
-        "べ",
-        "ひ",
-        "ょ",
-        "ゆ",
-        "ぶ",
-        "ご",
-        "ゃ",
-        "ね",
-        "ふ",
-        "ぐ",
-        "ぎ",
-        "ぼ",
-        "ゅ",
-        "づ",
-        "ざ",
-        "ぞ",
-        "ぬ",
-        "ぜ",
-        "ぱ",
-        "ぽ",
-        "ぷ",
-        "ぴ",
-        "ぃ",
-        "ぁ",
-        "ぇ",
-        "ぺ",
-        "ゞ",
-        "ぢ",
-        "ぉ",
-        "ぅ",
-        "ゐ",
-        "ゝ",
-        "ゑ",
-        "゛",
-        "゜",
-        "ゎ",
-        "ゔ",
-        "゚",
-        "ゟ",
-        "゙",
-        "ゕ",
-        "ゖ",
-    ],
-    "Portuguese": [
-        "a",
-        "e",
-        "o",
-        "s",
-        "i",
-        "r",
-        "d",
-        "n",
-        "t",
-        "m",
-        "u",
-        "c",
-        "l",
-        "p",
-        "g",
-        "v",
-        "b",
-        "f",
-        "h",
-        "ã",
-        "q",
-        "é",
-        "ç",
-        "á",
-        "z",
-        "í",
-    ],
-    "Swedish": [
-        "e",
-        "a",
-        "n",
-        "r",
-        "t",
-        "s",
-        "i",
-        "l",
-        "d",
-        "o",
-        "m",
-        "k",
-        "g",
-        "v",
-        "h",
-        "f",
-        "u",
-        "p",
-        "ä",
-        "c",
-        "b",
-        "ö",
-        "å",
-        "y",
-        "j",
-        "x",
-    ],
-    "Chinese": [
-        "的",
-        "一",
-        "是",
-        "不",
-        "了",
-        "在",
-        "人",
-        "有",
-        "我",
-        "他",
-        "这",
-        "个",
-        "们",
-        "中",
-        "来",
-        "上",
-        "大",
-        "为",
-        "和",
-        "国",
-        "地",
-        "到",
-        "以",
-        "说",
-        "时",
-        "要",
-        "就",
-        "出",
-        "会",
-        "可",
-        "也",
-        "你",
-        "对",
-        "生",
-        "能",
-        "而",
-        "子",
-        "那",
-        "得",
-        "于",
-        "着",
-        "下",
-        "自",
-        "之",
-        "年",
-        "过",
-        "发",
-        "后",
-        "作",
-        "里",
-        "用",
-        "道",
-        "行",
-        "所",
-        "然",
-        "家",
-        "种",
-        "事",
-        "成",
-        "方",
-        "多",
-        "经",
-        "么",
-        "去",
-        "法",
-        "学",
-        "如",
-        "都",
-        "同",
-        "现",
-        "当",
-        "没",
-        "动",
-        "面",
-        "起",
-        "看",
-        "定",
-        "天",
-        "分",
-        "还",
-        "进",
-        "好",
-        "小",
-        "部",
-        "其",
-        "些",
-        "主",
-        "样",
-        "理",
-        "心",
-        "她",
-        "本",
-        "前",
-        "开",
-        "但",
-        "因",
-        "只",
-        "从",
-        "想",
-        "实",
-    ],
-    "Ukrainian": [
-        "о",
-        "а",
-        "н",
-        "і",
-        "и",
-        "р",
-        "в",
-        "т",
-        "е",
-        "с",
-        "к",
-        "л",
-        "у",
-        "д",
-        "м",
-        "п",
-        "з",
-        "я",
-        "ь",
-        "б",
-        "г",
-        "й",
-        "ч",
-        "х",
-        "ц",
-        "ї",
-    ],
-    "Norwegian": [
-        "e",
-        "r",
-        "n",
-        "t",
-        "a",
-        "s",
-        "i",
-        "o",
-        "l",
-        "d",
-        "g",
-        "k",
-        "m",
-        "v",
-        "f",
-        "p",
-        "u",
-        "b",
-        "h",
-        "å",
-        "y",
-        "j",
-        "ø",
-        "c",
-        "æ",
-        "w",
-    ],
-    "Finnish": [
-        "a",
-        "i",
-        "n",
-        "t",
-        "e",
-        "s",
-        "l",
-        "o",
-        "u",
-        "k",
-        "ä",
-        "m",
-        "r",
-        "v",
-        "j",
-        "h",
-        "p",
-        "y",
-        "d",
-        "ö",
-        "g",
-        "c",
-        "b",
-        "f",
-        "w",
-        "z",
-    ],
-    "Vietnamese": [
-        "n",
-        "h",
-        "t",
-        "i",
-        "c",
-        "g",
-        "a",
-        "o",
-        "u",
-        "m",
-        "l",
-        "r",
-        "à",
-        "đ",
-        "s",
-        "e",
-        "v",
-        "p",
-        "b",
-        "y",
-        "ư",
-        "d",
-        "á",
-        "k",
-        "ộ",
-        "ế",
-    ],
-    "Czech": [
-        "o",
-        "e",
-        "a",
-        "n",
-        "t",
-        "s",
-        "i",
-        "l",
-        "v",
-        "r",
-        "k",
-        "d",
-        "u",
-        "m",
-        "p",
-        "í",
-        "c",
-        "h",
-        "z",
-        "á",
-        "y",
-        "j",
-        "b",
-        "ě",
-        "é",
-        "ř",
-    ],
-    "Hungarian": [
-        "e",
-        "a",
-        "t",
-        "l",
-        "s",
-        "n",
-        "k",
-        "r",
-        "i",
-        "o",
-        "z",
-        "á",
-        "é",
-        "g",
-        "m",
-        "b",
-        "y",
-        "v",
-        "d",
-        "h",
-        "u",
-        "p",
-        "j",
-        "ö",
-        "f",
-        "c",
-    ],
-    "Korean": [
-        "이",
-        "다",
-        "에",
-        "의",
-        "는",
-        "로",
-        "하",
-        "을",
-        "가",
-        "고",
-        "지",
-        "서",
-        "한",
-        "은",
-        "기",
-        "으",
-        "년",
-        "대",
-        "사",
-        "시",
-        "를",
-        "리",
-        "도",
-        "인",
-        "스",
-        "일",
-    ],
-    "Indonesian": [
-        "a",
-        "n",
-        "e",
-        "i",
-        "r",
-        "t",
-        "u",
-        "s",
-        "d",
-        "k",
-        "m",
-        "l",
-        "g",
-        "p",
-        "b",
-        "o",
-        "h",
-        "y",
-        "j",
-        "c",
-        "w",
-        "f",
-        "v",
-        "z",
-        "x",
-        "q",
-    ],
-    "Turkish": [
-        "a",
-        "e",
-        "i",
-        "n",
-        "r",
-        "l",
-        "ı",
-        "k",
-        "d",
-        "t",
-        "s",
-        "m",
-        "y",
-        "u",
-        "o",
-        "b",
-        "ü",
-        "ş",
-        "v",
-        "g",
-        "z",
-        "h",
-        "c",
-        "p",
-        "ç",
-        "ğ",
-    ],
-    "Romanian": [
-        "e",
-        "i",
-        "a",
-        "r",
-        "n",
-        "t",
-        "u",
-        "l",
-        "o",
-        "c",
-        "s",
-        "d",
-        "p",
-        "m",
-        "ă",
-        "f",
-        "v",
-        "î",
-        "g",
-        "b",
-        "ș",
-        "ț",
-        "z",
-        "h",
-        "â",
-        "j",
-    ],
-    "Farsi": [
-        "ا",
-        "ی",
-        "ر",
-        "د",
-        "ن",
-        "ه",
-        "و",
-        "م",
-        "ت",
-        "ب",
-        "س",
-        "ل",
-        "ک",
-        "ش",
-        "ز",
-        "ف",
-        "گ",
-        "ع",
-        "خ",
-        "ق",
-        "ج",
-        "آ",
-        "پ",
-        "ح",
-        "ط",
-        "ص",
-    ],
-    "Arabic": [
-        "ا",
-        "ل",
-        "ي",
-        "م",
-        "و",
-        "ن",
-        "ر",
-        "ت",
-        "ب",
-        "ة",
-        "ع",
-        "د",
-        "س",
-        "ف",
-        "ه",
-        "ك",
-        "ق",
-        "أ",
-        "ح",
-        "ج",
-        "ش",
-        "ط",
-        "ص",
-        "ى",
-        "خ",
-        "إ",
-    ],
-    "Danish": [
-        "e",
-        "r",
-        "n",
-        "t",
-        "a",
-        "i",
-        "s",
-        "d",
-        "l",
-        "o",
-        "g",
-        "m",
-        "k",
-        "f",
-        "v",
-        "u",
-        "b",
-        "h",
-        "p",
-        "å",
-        "y",
-        "ø",
-        "æ",
-        "c",
-        "j",
-        "w",
-    ],
-    "Serbian": [
-        "а",
-        "и",
-        "о",
-        "е",
-        "н",
-        "р",
-        "с",
-        "у",
-        "т",
-        "к",
-        "ј",
-        "в",
-        "д",
-        "м",
-        "п",
-        "л",
-        "г",
-        "з",
-        "б",
-        "a",
-        "i",
-        "e",
-        "o",
-        "n",
-        "ц",
-        "ш",
-    ],
-    "Lithuanian": [
-        "i",
-        "a",
-        "s",
-        "o",
-        "r",
-        "e",
-        "t",
-        "n",
-        "u",
-        "k",
-        "m",
-        "l",
-        "p",
-        "v",
-        "d",
-        "j",
-        "g",
-        "ė",
-        "b",
-        "y",
-        "ų",
-        "š",
-        "ž",
-        "c",
-        "ą",
-        "į",
-    ],
-    "Slovene": [
-        "e",
-        "a",
-        "i",
-        "o",
-        "n",
-        "r",
-        "s",
-        "l",
-        "t",
-        "j",
-        "v",
-        "k",
-        "d",
-        "p",
-        "m",
-        "u",
-        "z",
-        "b",
-        "g",
-        "h",
-        "č",
-        "c",
-        "š",
-        "ž",
-        "f",
-        "y",
-    ],
-    "Slovak": [
-        "o",
-        "a",
-        "e",
-        "n",
-        "i",
-        "r",
-        "v",
-        "t",
-        "s",
-        "l",
-        "k",
-        "d",
-        "m",
-        "p",
-        "u",
-        "c",
-        "h",
-        "j",
-        "b",
-        "z",
-        "á",
-        "y",
-        "ý",
-        "í",
-        "č",
-        "é",
-    ],
-    "Hebrew": [
-        "י",
-        "ו",
-        "ה",
-        "ל",
-        "ר",
-        "ב",
-        "ת",
-        "מ",
-        "א",
-        "ש",
-        "נ",
-        "ע",
-        "ם",
-        "ד",
-        "ק",
-        "ח",
-        "פ",
-        "ס",
-        "כ",
-        "ג",
-        "ט",
-        "צ",
-        "ן",
-        "ז",
-        "ך",
-    ],
-    "Bulgarian": [
-        "а",
-        "и",
-        "о",
-        "е",
-        "н",
-        "т",
-        "р",
-        "с",
-        "в",
-        "л",
-        "к",
-        "д",
-        "п",
-        "м",
-        "з",
-        "г",
-        "я",
-        "ъ",
-        "у",
-        "б",
-        "ч",
-        "ц",
-        "й",
-        "ж",
-        "щ",
-        "х",
-    ],
-    "Croatian": [
-        "a",
-        "i",
-        "o",
-        "e",
-        "n",
-        "r",
-        "j",
-        "s",
-        "t",
-        "u",
-        "k",
-        "l",
-        "v",
-        "d",
-        "m",
-        "p",
-        "g",
-        "z",
-        "b",
-        "c",
-        "č",
-        "h",
-        "š",
-        "ž",
-        "ć",
-        "f",
-    ],
-    "Hindi": [
-        "क",
-        "र",
-        "स",
-        "न",
-        "त",
-        "म",
-        "ह",
-        "प",
-        "य",
-        "ल",
-        "व",
-        "ज",
-        "द",
-        "ग",
-        "ब",
-        "श",
-        "ट",
-        "अ",
-        "ए",
-        "थ",
-        "भ",
-        "ड",
-        "च",
-        "ध",
-        "ष",
-        "इ",
-    ],
-    "Estonian": [
-        "a",
-        "i",
-        "e",
-        "s",
-        "t",
-        "l",
-        "u",
-        "n",
-        "o",
-        "k",
-        "r",
-        "d",
-        "m",
-        "v",
-        "g",
-        "p",
-        "j",
-        "h",
-        "ä",
-        "b",
-        "õ",
-        "ü",
-        "f",
-        "c",
-        "ö",
-        "y",
-    ],
-    "Thai": [
-        "า",
-        "น",
-        "ร",
-        "อ",
-        "ก",
-        "เ",
-        "ง",
-        "ม",
-        "ย",
-        "ล",
-        "ว",
-        "ด",
-        "ท",
-        "ส",
-        "ต",
-        "ะ",
-        "ป",
-        "บ",
-        "ค",
-        "ห",
-        "แ",
-        "จ",
-        "พ",
-        "ช",
-        "ข",
-        "ใ",
-    ],
-    "Greek": [
-        "α",
-        "τ",
-        "ο",
-        "ι",
-        "ε",
-        "ν",
-        "ρ",
-        "σ",
-        "κ",
-        "η",
-        "π",
-        "ς",
-        "υ",
-        "μ",
-        "λ",
-        "ί",
-        "ό",
-        "ά",
-        "γ",
-        "έ",
-        "δ",
-        "ή",
-        "ω",
-        "χ",
-        "θ",
-        "ύ",
-    ],
-    "Tamil": [
-        "க",
-        "த",
-        "ப",
-        "ட",
-        "ர",
-        "ம",
-        "ல",
-        "ன",
-        "வ",
-        "ற",
-        "ய",
-        "ள",
-        "ச",
-        "ந",
-        "இ",
-        "ண",
-        "அ",
-        "ஆ",
-        "ழ",
-        "ங",
-        "எ",
-        "உ",
-        "ஒ",
-        "ஸ",
-    ],
-    "Kazakh": [
-        "а",
-        "ы",
-        "е",
-        "н",
-        "т",
-        "р",
-        "л",
-        "і",
-        "д",
-        "с",
-        "м",
-        "қ",
-        "к",
-        "о",
-        "б",
-        "и",
-        "у",
-        "ғ",
-        "ж",
-        "ң",
-        "з",
-        "ш",
-        "й",
-        "п",
-        "г",
-        "ө",
-    ],
-}
-LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)

env/Lib/site-packages/charset_normalizer/legacy.py DELETED Viewed

@@ -1,66 +0,0 @@
-from __future__ import annotations
-from typing import TYPE_CHECKING, Any
-from warnings import warn
-from .api import from_bytes
-from .constant import CHARDET_CORRESPONDENCE
-# TODO: remove this check when dropping Python 3.7 support
-if TYPE_CHECKING:
-    from typing_extensions import TypedDict
-    class ResultDict(TypedDict):
-        encoding: str | None
-        language: str
-        confidence: float | None
-def detect(
-    byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
-) -> ResultDict:
-    """
-    chardet legacy method
-    Detect the encoding of the given byte string. It should be mostly backward-compatible.
-    Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
-    This function is deprecated and should be used to migrate your project easily, consult the documentation for
-    further information. Not planned for removal.
-    :param byte_str:     The byte sequence to examine.
-    :param should_rename_legacy:  Should we rename legacy encodings
-                                  to their more modern equivalents?
-    """
-    if len(kwargs):
-        warn(
-            f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
-        )
-    if not isinstance(byte_str, (bytearray, bytes)):
-        raise TypeError(  # pragma: nocover
-            "Expected object of type bytes or bytearray, got: " "{}".format(
-                type(byte_str)
-            )
-        )
-    if isinstance(byte_str, bytearray):
-        byte_str = bytes(byte_str)
-    r = from_bytes(byte_str).best()
-    encoding = r.encoding if r is not None else None
-    language = r.language if r is not None and r.language != "Unknown" else ""
-    confidence = 1.0 - r.chaos if r is not None else None
-    # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
-    # but chardet does return 'utf-8-sig' and it is a valid codec name.
-    if r is not None and encoding == "utf_8" and r.bom:
-        encoding += "_sig"
-    if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
-        encoding = CHARDET_CORRESPONDENCE[encoding]
-    return {
-        "encoding": encoding,
-        "language": language,
-        "confidence": confidence,
-    }

env/Lib/site-packages/charset_normalizer/md.py DELETED Viewed

@@ -1,630 +0,0 @@
-from __future__ import annotations
-from functools import lru_cache
-from logging import getLogger
-from .constant import (
-    COMMON_SAFE_ASCII_CHARACTERS,
-    TRACE,
-    UNICODE_SECONDARY_RANGE_KEYWORD,
-)
-from .utils import (
-    is_accentuated,
-    is_arabic,
-    is_arabic_isolated_form,
-    is_case_variable,
-    is_cjk,
-    is_emoticon,
-    is_hangul,
-    is_hiragana,
-    is_katakana,
-    is_latin,
-    is_punctuation,
-    is_separator,
-    is_symbol,
-    is_thai,
-    is_unprintable,
-    remove_accent,
-    unicode_range,
-)
-class MessDetectorPlugin:
-    """
-    Base abstract class used for mess detection plugins.
-    All detectors MUST extend and implement given methods.
-    """
-    def eligible(self, character: str) -> bool:
-        """
-        Determine if given character should be fed in.
-        """
-        raise NotImplementedError  # pragma: nocover
-    def feed(self, character: str) -> None:
-        """
-        The main routine to be executed upon character.
-        Insert the logic in witch the text would be considered chaotic.
-        """
-        raise NotImplementedError  # pragma: nocover
-    def reset(self) -> None:  # pragma: no cover
-        """
-        Permit to reset the plugin to the initial state.
-        """
-        raise NotImplementedError
-    @property
-    def ratio(self) -> float:
-        """
-        Compute the chaos ratio based on what your feed() has seen.
-        Must NOT be lower than 0.; No restriction gt 0.
-        """
-        raise NotImplementedError  # pragma: nocover
-class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._punctuation_count: int = 0
-        self._symbol_count: int = 0
-        self._character_count: int = 0
-        self._last_printable_char: str | None = None
-        self._frenzy_symbol_in_word: bool = False
-    def eligible(self, character: str) -> bool:
-        return character.isprintable()
-    def feed(self, character: str) -> None:
-        self._character_count += 1
-        if (
-            character != self._last_printable_char
-            and character not in COMMON_SAFE_ASCII_CHARACTERS
-        ):
-            if is_punctuation(character):
-                self._punctuation_count += 1
-            elif (
-                character.isdigit() is False
-                and is_symbol(character)
-                and is_emoticon(character) is False
-            ):
-                self._symbol_count += 2
-        self._last_printable_char = character
-    def reset(self) -> None:  # Abstract
-        self._punctuation_count = 0
-        self._character_count = 0
-        self._symbol_count = 0
-    @property
-    def ratio(self) -> float:
-        if self._character_count == 0:
-            return 0.0
-        ratio_of_punctuation: float = (
-            self._punctuation_count + self._symbol_count
-        ) / self._character_count
-        return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
-class TooManyAccentuatedPlugin(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._character_count: int = 0
-        self._accentuated_count: int = 0
-    def eligible(self, character: str) -> bool:
-        return character.isalpha()
-    def feed(self, character: str) -> None:
-        self._character_count += 1
-        if is_accentuated(character):
-            self._accentuated_count += 1
-    def reset(self) -> None:  # Abstract
-        self._character_count = 0
-        self._accentuated_count = 0
-    @property
-    def ratio(self) -> float:
-        if self._character_count < 8:
-            return 0.0
-        ratio_of_accentuation: float = self._accentuated_count / self._character_count
-        return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
-class UnprintablePlugin(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._unprintable_count: int = 0
-        self._character_count: int = 0
-    def eligible(self, character: str) -> bool:
-        return True
-    def feed(self, character: str) -> None:
-        if is_unprintable(character):
-            self._unprintable_count += 1
-        self._character_count += 1
-    def reset(self) -> None:  # Abstract
-        self._unprintable_count = 0
-    @property
-    def ratio(self) -> float:
-        if self._character_count == 0:
-            return 0.0
-        return (self._unprintable_count * 8) / self._character_count
-class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._successive_count: int = 0
-        self._character_count: int = 0
-        self._last_latin_character: str | None = None
-    def eligible(self, character: str) -> bool:
-        return character.isalpha() and is_latin(character)
-    def feed(self, character: str) -> None:
-        self._character_count += 1
-        if (
-            self._last_latin_character is not None
-            and is_accentuated(character)
-            and is_accentuated(self._last_latin_character)
-        ):
-            if character.isupper() and self._last_latin_character.isupper():
-                self._successive_count += 1
-            # Worse if its the same char duplicated with different accent.
-            if remove_accent(character) == remove_accent(self._last_latin_character):
-                self._successive_count += 1
-        self._last_latin_character = character
-    def reset(self) -> None:  # Abstract
-        self._successive_count = 0
-        self._character_count = 0
-        self._last_latin_character = None
-    @property
-    def ratio(self) -> float:
-        if self._character_count == 0:
-            return 0.0
-        return (self._successive_count * 2) / self._character_count
-class SuspiciousRange(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._suspicious_successive_range_count: int = 0
-        self._character_count: int = 0
-        self._last_printable_seen: str | None = None
-    def eligible(self, character: str) -> bool:
-        return character.isprintable()
-    def feed(self, character: str) -> None:
-        self._character_count += 1
-        if (
-            character.isspace()
-            or is_punctuation(character)
-            or character in COMMON_SAFE_ASCII_CHARACTERS
-        ):
-            self._last_printable_seen = None
-            return
-        if self._last_printable_seen is None:
-            self._last_printable_seen = character
-            return
-        unicode_range_a: str | None = unicode_range(self._last_printable_seen)
-        unicode_range_b: str | None = unicode_range(character)
-        if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
-            self._suspicious_successive_range_count += 1
-        self._last_printable_seen = character
-    def reset(self) -> None:  # Abstract
-        self._character_count = 0
-        self._suspicious_successive_range_count = 0
-        self._last_printable_seen = None
-    @property
-    def ratio(self) -> float:
-        if self._character_count <= 13:
-            return 0.0
-        ratio_of_suspicious_range_usage: float = (
-            self._suspicious_successive_range_count * 2
-        ) / self._character_count
-        return ratio_of_suspicious_range_usage
-class SuperWeirdWordPlugin(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._word_count: int = 0
-        self._bad_word_count: int = 0
-        self._foreign_long_count: int = 0
-        self._is_current_word_bad: bool = False
-        self._foreign_long_watch: bool = False
-        self._character_count: int = 0
-        self._bad_character_count: int = 0
-        self._buffer: str = ""
-        self._buffer_accent_count: int = 0
-        self._buffer_glyph_count: int = 0
-    def eligible(self, character: str) -> bool:
-        return True
-    def feed(self, character: str) -> None:
-        if character.isalpha():
-            self._buffer += character
-            if is_accentuated(character):
-                self._buffer_accent_count += 1
-            if (
-                self._foreign_long_watch is False
-                and (is_latin(character) is False or is_accentuated(character))
-                and is_cjk(character) is False
-                and is_hangul(character) is False
-                and is_katakana(character) is False
-                and is_hiragana(character) is False
-                and is_thai(character) is False
-            ):
-                self._foreign_long_watch = True
-            if (
-                is_cjk(character)
-                or is_hangul(character)
-                or is_katakana(character)
-                or is_hiragana(character)
-                or is_thai(character)
-            ):
-                self._buffer_glyph_count += 1
-            return
-        if not self._buffer:
-            return
-        if (
-            character.isspace() or is_punctuation(character) or is_separator(character)
-        ) and self._buffer:
-            self._word_count += 1
-            buffer_length: int = len(self._buffer)
-            self._character_count += buffer_length
-            if buffer_length >= 4:
-                if self._buffer_accent_count / buffer_length >= 0.5:
-                    self._is_current_word_bad = True
-                # Word/Buffer ending with an upper case accentuated letter are so rare,
-                # that we will consider them all as suspicious. Same weight as foreign_long suspicious.
-                elif (
-                    is_accentuated(self._buffer[-1])
-                    and self._buffer[-1].isupper()
-                    and all(_.isupper() for _ in self._buffer) is False
-                ):
-                    self._foreign_long_count += 1
-                    self._is_current_word_bad = True
-                elif self._buffer_glyph_count == 1:
-                    self._is_current_word_bad = True
-                    self._foreign_long_count += 1
-            if buffer_length >= 24 and self._foreign_long_watch:
-                camel_case_dst = [
-                    i
-                    for c, i in zip(self._buffer, range(0, buffer_length))
-                    if c.isupper()
-                ]
-                probable_camel_cased: bool = False
-                if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
-                    probable_camel_cased = True
-                if not probable_camel_cased:
-                    self._foreign_long_count += 1
-                    self._is_current_word_bad = True
-            if self._is_current_word_bad:
-                self._bad_word_count += 1
-                self._bad_character_count += len(self._buffer)
-                self._is_current_word_bad = False
-            self._foreign_long_watch = False
-            self._buffer = ""
-            self._buffer_accent_count = 0
-            self._buffer_glyph_count = 0
-        elif (
-            character not in {"<", ">", "-", "=", "~", "|", "_"}
-            and character.isdigit() is False
-            and is_symbol(character)
-        ):
-            self._is_current_word_bad = True
-            self._buffer += character
-    def reset(self) -> None:  # Abstract
-        self._buffer = ""
-        self._is_current_word_bad = False
-        self._foreign_long_watch = False
-        self._bad_word_count = 0
-        self._word_count = 0
-        self._character_count = 0
-        self._bad_character_count = 0
-        self._foreign_long_count = 0
-    @property
-    def ratio(self) -> float:
-        if self._word_count <= 10 and self._foreign_long_count == 0:
-            return 0.0
-        return self._bad_character_count / self._character_count
-class CjkInvalidStopPlugin(MessDetectorPlugin):
-    """
-    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
-    can be easily detected. Searching for the overuse of '丅' and '丄'.
-    """
-    def __init__(self) -> None:
-        self._wrong_stop_count: int = 0
-        self._cjk_character_count: int = 0
-    def eligible(self, character: str) -> bool:
-        return True
-    def feed(self, character: str) -> None:
-        if character in {"丅", "丄"}:
-            self._wrong_stop_count += 1
-            return
-        if is_cjk(character):
-            self._cjk_character_count += 1
-    def reset(self) -> None:  # Abstract
-        self._wrong_stop_count = 0
-        self._cjk_character_count = 0
-    @property
-    def ratio(self) -> float:
-        if self._cjk_character_count < 16:
-            return 0.0
-        return self._wrong_stop_count / self._cjk_character_count
-class ArchaicUpperLowerPlugin(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._buf: bool = False
-        self._character_count_since_last_sep: int = 0
-        self._successive_upper_lower_count: int = 0
-        self._successive_upper_lower_count_final: int = 0
-        self._character_count: int = 0
-        self._last_alpha_seen: str | None = None
-        self._current_ascii_only: bool = True
-    def eligible(self, character: str) -> bool:
-        return True
-    def feed(self, character: str) -> None:
-        is_concerned = character.isalpha() and is_case_variable(character)
-        chunk_sep = is_concerned is False
-        if chunk_sep and self._character_count_since_last_sep > 0:
-            if (
-                self._character_count_since_last_sep <= 64
-                and character.isdigit() is False
-                and self._current_ascii_only is False
-            ):
-                self._successive_upper_lower_count_final += (
-                    self._successive_upper_lower_count
-                )
-            self._successive_upper_lower_count = 0
-            self._character_count_since_last_sep = 0
-            self._last_alpha_seen = None
-            self._buf = False
-            self._character_count += 1
-            self._current_ascii_only = True
-            return
-        if self._current_ascii_only is True and character.isascii() is False:
-            self._current_ascii_only = False
-        if self._last_alpha_seen is not None:
-            if (character.isupper() and self._last_alpha_seen.islower()) or (
-                character.islower() and self._last_alpha_seen.isupper()
-            ):
-                if self._buf is True:
-                    self._successive_upper_lower_count += 2
-                    self._buf = False
-                else:
-                    self._buf = True
-            else:
-                self._buf = False
-        self._character_count += 1
-        self._character_count_since_last_sep += 1
-        self._last_alpha_seen = character
-    def reset(self) -> None:  # Abstract
-        self._character_count = 0
-        self._character_count_since_last_sep = 0
-        self._successive_upper_lower_count = 0
-        self._successive_upper_lower_count_final = 0
-        self._last_alpha_seen = None
-        self._buf = False
-        self._current_ascii_only = True
-    @property
-    def ratio(self) -> float:
-        if self._character_count == 0:
-            return 0.0
-        return self._successive_upper_lower_count_final / self._character_count
-class ArabicIsolatedFormPlugin(MessDetectorPlugin):
-    def __init__(self) -> None:
-        self._character_count: int = 0
-        self._isolated_form_count: int = 0
-    def reset(self) -> None:  # Abstract
-        self._character_count = 0
-        self._isolated_form_count = 0
-    def eligible(self, character: str) -> bool:
-        return is_arabic(character)
-    def feed(self, character: str) -> None:
-        self._character_count += 1
-        if is_arabic_isolated_form(character):
-            self._isolated_form_count += 1
-    @property
-    def ratio(self) -> float:
-        if self._character_count < 8:
-            return 0.0
-        isolated_form_usage: float = self._isolated_form_count / self._character_count
-        return isolated_form_usage
-@lru_cache(maxsize=1024)
-def is_suspiciously_successive_range(
-    unicode_range_a: str | None, unicode_range_b: str | None
-) -> bool:
-    """
-    Determine if two Unicode range seen next to each other can be considered as suspicious.
-    """
-    if unicode_range_a is None or unicode_range_b is None:
-        return True
-    if unicode_range_a == unicode_range_b:
-        return False
-    if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
-        return False
-    if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
-        return False
-    # Latin characters can be accompanied with a combining diacritical mark
-    # eg. Vietnamese.
-    if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
-        "Combining" in unicode_range_a or "Combining" in unicode_range_b
-    ):
-        return False
-    keywords_range_a, keywords_range_b = (
-        unicode_range_a.split(" "),
-        unicode_range_b.split(" "),
-    )
-    for el in keywords_range_a:
-        if el in UNICODE_SECONDARY_RANGE_KEYWORD:
-            continue
-        if el in keywords_range_b:
-            return False
-    # Japanese Exception
-    range_a_jp_chars, range_b_jp_chars = (
-        unicode_range_a
-        in (
-            "Hiragana",
-            "Katakana",
-        ),
-        unicode_range_b in ("Hiragana", "Katakana"),
-    )
-    if (range_a_jp_chars or range_b_jp_chars) and (
-        "CJK" in unicode_range_a or "CJK" in unicode_range_b
-    ):
-        return False
-    if range_a_jp_chars and range_b_jp_chars:
-        return False
-    if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
-        if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
-            return False
-        if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
-            return False
-    # Chinese/Japanese use dedicated range for punctuation and/or separators.
-    if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
-        unicode_range_a in ["Katakana", "Hiragana"]
-        and unicode_range_b in ["Katakana", "Hiragana"]
-    ):
-        if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
-            return False
-        if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
-            return False
-        if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
-            return False
-    return True
-@lru_cache(maxsize=2048)
-def mess_ratio(
-    decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
-) -> float:
-    """
-    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
-    """
-    detectors: list[MessDetectorPlugin] = [
-        md_class() for md_class in MessDetectorPlugin.__subclasses__()
-    ]
-    length: int = len(decoded_sequence) + 1
-    mean_mess_ratio: float = 0.0
-    if length < 512:
-        intermediary_mean_mess_ratio_calc: int = 32
-    elif length <= 1024:
-        intermediary_mean_mess_ratio_calc = 64
-    else:
-        intermediary_mean_mess_ratio_calc = 128
-    for character, index in zip(decoded_sequence + "\n", range(length)):
-        for detector in detectors:
-            if detector.eligible(character):
-                detector.feed(character)
-        if (
-            index > 0 and index % intermediary_mean_mess_ratio_calc == 0
-        ) or index == length - 1:
-            mean_mess_ratio = sum(dt.ratio for dt in detectors)
-            if mean_mess_ratio >= maximum_threshold:
-                break
-    if debug:
-        logger = getLogger("charset_normalizer")
-        logger.log(
-            TRACE,
-            "Mess-detector extended-analysis start. "
-            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
-            f"maximum_threshold={maximum_threshold}",
-        )
-        if len(decoded_sequence) > 16:
-            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
-            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
-        for dt in detectors:
-            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
-    return round(mean_mess_ratio, 3)

env/Lib/site-packages/charset_normalizer/models.py DELETED Viewed

@@ -1,360 +0,0 @@
-from __future__ import annotations
-from encodings.aliases import aliases
-from hashlib import sha256
-from json import dumps
-from re import sub
-from typing import Any, Iterator, List, Tuple
-from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE
-from .utils import iana_name, is_multi_byte_encoding, unicode_range
-class CharsetMatch:
-    def __init__(
-        self,
-        payload: bytes,
-        guessed_encoding: str,
-        mean_mess_ratio: float,
-        has_sig_or_bom: bool,
-        languages: CoherenceMatches,
-        decoded_payload: str | None = None,
-        preemptive_declaration: str | None = None,
-    ):
-        self._payload: bytes = payload
-        self._encoding: str = guessed_encoding
-        self._mean_mess_ratio: float = mean_mess_ratio
-        self._languages: CoherenceMatches = languages
-        self._has_sig_or_bom: bool = has_sig_or_bom
-        self._unicode_ranges: list[str] | None = None
-        self._leaves: list[CharsetMatch] = []
-        self._mean_coherence_ratio: float = 0.0
-        self._output_payload: bytes | None = None
-        self._output_encoding: str | None = None
-        self._string: str | None = decoded_payload
-        self._preemptive_declaration: str | None = preemptive_declaration
-    def __eq__(self, other: object) -> bool:
-        if not isinstance(other, CharsetMatch):
-            if isinstance(other, str):
-                return iana_name(other) == self.encoding
-            return False
-        return self.encoding == other.encoding and self.fingerprint == other.fingerprint
-    def __lt__(self, other: object) -> bool:
-        """
-        Implemented to make sorted available upon CharsetMatches items.
-        """
-        if not isinstance(other, CharsetMatch):
-            raise ValueError
-        chaos_difference: float = abs(self.chaos - other.chaos)
-        coherence_difference: float = abs(self.coherence - other.coherence)
-        # Below 1% difference --> Use Coherence
-        if chaos_difference < 0.01 and coherence_difference > 0.02:
-            return self.coherence > other.coherence
-        elif chaos_difference < 0.01 and coherence_difference <= 0.02:
-            # When having a difficult decision, use the result that decoded as many multi-byte as possible.
-            # preserve RAM usage!
-            if len(self._payload) >= TOO_BIG_SEQUENCE:
-                return self.chaos < other.chaos
-            return self.multi_byte_usage > other.multi_byte_usage
-        return self.chaos < other.chaos
-    @property
-    def multi_byte_usage(self) -> float:
-        return 1.0 - (len(str(self)) / len(self.raw))
-    def __str__(self) -> str:
-        # Lazy Str Loading
-        if self._string is None:
-            self._string = str(self._payload, self._encoding, "strict")
-        return self._string
-    def __repr__(self) -> str:
-        return f"<CharsetMatch '{self.encoding}' bytes({self.fingerprint})>"
-    def add_submatch(self, other: CharsetMatch) -> None:
-        if not isinstance(other, CharsetMatch) or other == self:
-            raise ValueError(
-                "Unable to add instance <{}> as a submatch of a CharsetMatch".format(
-                    other.__class__
-                )
-            )
-        other._string = None  # Unload RAM usage; dirty trick.
-        self._leaves.append(other)
-    @property
-    def encoding(self) -> str:
-        return self._encoding
-    @property
-    def encoding_aliases(self) -> list[str]:
-        """
-        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
-        """
-        also_known_as: list[str] = []
-        for u, p in aliases.items():
-            if self.encoding == u:
-                also_known_as.append(p)
-            elif self.encoding == p:
-                also_known_as.append(u)
-        return also_known_as
-    @property
-    def bom(self) -> bool:
-        return self._has_sig_or_bom
-    @property
-    def byte_order_mark(self) -> bool:
-        return self._has_sig_or_bom
-    @property
-    def languages(self) -> list[str]:
-        """
-        Return the complete list of possible languages found in decoded sequence.
-        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
-        """
-        return [e[0] for e in self._languages]
-    @property
-    def language(self) -> str:
-        """
-        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
-        "Unknown".
-        """
-        if not self._languages:
-            # Trying to infer the language based on the given encoding
-            # Its either English or we should not pronounce ourselves in certain cases.
-            if "ascii" in self.could_be_from_charset:
-                return "English"
-            # doing it there to avoid circular import
-            from charset_normalizer.cd import encoding_languages, mb_encoding_languages
-            languages = (
-                mb_encoding_languages(self.encoding)
-                if is_multi_byte_encoding(self.encoding)
-                else encoding_languages(self.encoding)
-            )
-            if len(languages) == 0 or "Latin Based" in languages:
-                return "Unknown"
-            return languages[0]
-        return self._languages[0][0]
-    @property
-    def chaos(self) -> float:
-        return self._mean_mess_ratio
-    @property
-    def coherence(self) -> float:
-        if not self._languages:
-            return 0.0
-        return self._languages[0][1]
-    @property
-    def percent_chaos(self) -> float:
-        return round(self.chaos * 100, ndigits=3)
-    @property
-    def percent_coherence(self) -> float:
-        return round(self.coherence * 100, ndigits=3)
-    @property
-    def raw(self) -> bytes:
-        """
-        Original untouched bytes.
-        """
-        return self._payload
-    @property
-    def submatch(self) -> list[CharsetMatch]:
-        return self._leaves
-    @property
-    def has_submatch(self) -> bool:
-        return len(self._leaves) > 0
-    @property
-    def alphabets(self) -> list[str]:
-        if self._unicode_ranges is not None:
-            return self._unicode_ranges
-        # list detected ranges
-        detected_ranges: list[str | None] = [unicode_range(char) for char in str(self)]
-        # filter and sort
-        self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
-        return self._unicode_ranges
-    @property
-    def could_be_from_charset(self) -> list[str]:
-        """
-        The complete list of encoding that output the exact SAME str result and therefore could be the originating
-        encoding.
-        This list does include the encoding available in property 'encoding'.
-        """
-        return [self._encoding] + [m.encoding for m in self._leaves]
-    def output(self, encoding: str = "utf_8") -> bytes:
-        """
-        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
-        Any errors will be simply ignored by the encoder NOT replaced.
-        """
-        if self._output_encoding is None or self._output_encoding != encoding:
-            self._output_encoding = encoding
-            decoded_string = str(self)
-            if (
-                self._preemptive_declaration is not None
-                and self._preemptive_declaration.lower()
-                not in ["utf-8", "utf8", "utf_8"]
-            ):
-                patched_header = sub(
-                    RE_POSSIBLE_ENCODING_INDICATION,
-                    lambda m: m.string[m.span()[0] : m.span()[1]].replace(
-                        m.groups()[0],
-                        iana_name(self._output_encoding).replace("_", "-"),  # type: ignore[arg-type]
-                    ),
-                    decoded_string[:8192],
-                    count=1,
-                )
-                decoded_string = patched_header + decoded_string[8192:]
-            self._output_payload = decoded_string.encode(encoding, "replace")
-        return self._output_payload  # type: ignore
-    @property
-    def fingerprint(self) -> str:
-        """
-        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
-        """
-        return sha256(self.output()).hexdigest()
-class CharsetMatches:
-    """
-    Container with every CharsetMatch items ordered by default from most probable to the less one.
-    Act like a list(iterable) but does not implements all related methods.
-    """
-    def __init__(self, results: list[CharsetMatch] | None = None):
-        self._results: list[CharsetMatch] = sorted(results) if results else []
-    def __iter__(self) -> Iterator[CharsetMatch]:
-        yield from self._results
-    def __getitem__(self, item: int | str) -> CharsetMatch:
-        """
-        Retrieve a single item either by its position or encoding name (alias may be used here).
-        Raise KeyError upon invalid index or encoding not present in results.
-        """
-        if isinstance(item, int):
-            return self._results[item]
-        if isinstance(item, str):
-            item = iana_name(item, False)
-            for result in self._results:
-                if item in result.could_be_from_charset:
-                    return result
-        raise KeyError
-    def __len__(self) -> int:
-        return len(self._results)
-    def __bool__(self) -> bool:
-        return len(self._results) > 0
-    def append(self, item: CharsetMatch) -> None:
-        """
-        Insert a single match. Will be inserted accordingly to preserve sort.
-        Can be inserted as a submatch.
-        """
-        if not isinstance(item, CharsetMatch):
-            raise ValueError(
-                "Cannot append instance '{}' to CharsetMatches".format(
-                    str(item.__class__)
-                )
-            )
-        # We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
-        if len(item.raw) < TOO_BIG_SEQUENCE:
-            for match in self._results:
-                if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
-                    match.add_submatch(item)
-                    return
-        self._results.append(item)
-        self._results = sorted(self._results)
-    def best(self) -> CharsetMatch | None:
-        """
-        Simply return the first match. Strict equivalent to matches[0].
-        """
-        if not self._results:
-            return None
-        return self._results[0]
-    def first(self) -> CharsetMatch | None:
-        """
-        Redundant method, call the method best(). Kept for BC reasons.
-        """
-        return self.best()
-CoherenceMatch = Tuple[str, float]
-CoherenceMatches = List[CoherenceMatch]
-class CliDetectionResult:
-    def __init__(
-        self,
-        path: str,
-        encoding: str | None,
-        encoding_aliases: list[str],
-        alternative_encodings: list[str],
-        language: str,
-        alphabets: list[str],
-        has_sig_or_bom: bool,
-        chaos: float,
-        coherence: float,
-        unicode_path: str | None,
-        is_preferred: bool,
-    ):
-        self.path: str = path
-        self.unicode_path: str | None = unicode_path
-        self.encoding: str | None = encoding
-        self.encoding_aliases: list[str] = encoding_aliases
-        self.alternative_encodings: list[str] = alternative_encodings
-        self.language: str = language
-        self.alphabets: list[str] = alphabets
-        self.has_sig_or_bom: bool = has_sig_or_bom
-        self.chaos: float = chaos
-        self.coherence: float = coherence
-        self.is_preferred: bool = is_preferred
-    @property
-    def __dict__(self) -> dict[str, Any]:  # type: ignore
-        return {
-            "path": self.path,
-            "encoding": self.encoding,
-            "encoding_aliases": self.encoding_aliases,
-            "alternative_encodings": self.alternative_encodings,
-            "language": self.language,
-            "alphabets": self.alphabets,
-            "has_sig_or_bom": self.has_sig_or_bom,
-            "chaos": self.chaos,
-            "coherence": self.coherence,
-            "unicode_path": self.unicode_path,
-            "is_preferred": self.is_preferred,
-        }
-    def to_json(self) -> str:
-        return dumps(self.__dict__, ensure_ascii=True, indent=4)

env/Lib/site-packages/charset_normalizer/py.typed DELETED Viewed

File without changes

env/Lib/site-packages/charset_normalizer/utils.py DELETED Viewed

@@ -1,408 +0,0 @@
-from __future__ import annotations
-import importlib
-import logging
-import unicodedata
-from codecs import IncrementalDecoder
-from encodings.aliases import aliases
-from functools import lru_cache
-from re import findall
-from typing import Generator
-from _multibytecodec import (  # type: ignore[import-not-found,import]
-    MultibyteIncrementalDecoder,
-)
-from .constant import (
-    ENCODING_MARKS,
-    IANA_SUPPORTED_SIMILAR,
-    RE_POSSIBLE_ENCODING_INDICATION,
-    UNICODE_RANGES_COMBINED,
-    UNICODE_SECONDARY_RANGE_KEYWORD,
-    UTF8_MAXIMAL_ALLOCATION,
-)
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_accentuated(character: str) -> bool:
-    try:
-        description: str = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return (
-        "WITH GRAVE" in description
-        or "WITH ACUTE" in description
-        or "WITH CEDILLA" in description
-        or "WITH DIAERESIS" in description
-        or "WITH CIRCUMFLEX" in description
-        or "WITH TILDE" in description
-        or "WITH MACRON" in description
-        or "WITH RING ABOVE" in description
-    )
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def remove_accent(character: str) -> str:
-    decomposed: str = unicodedata.decomposition(character)
-    if not decomposed:
-        return character
-    codes: list[str] = decomposed.split(" ")
-    return chr(int(codes[0], 16))
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def unicode_range(character: str) -> str | None:
-    """
-    Retrieve the Unicode range official name from a single character.
-    """
-    character_ord: int = ord(character)
-    for range_name, ord_range in UNICODE_RANGES_COMBINED.items():
-        if character_ord in ord_range:
-            return range_name
-    return None
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_latin(character: str) -> bool:
-    try:
-        description: str = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "LATIN" in description
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_punctuation(character: str) -> bool:
-    character_category: str = unicodedata.category(character)
-    if "P" in character_category:
-        return True
-    character_range: str | None = unicode_range(character)
-    if character_range is None:
-        return False
-    return "Punctuation" in character_range
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_symbol(character: str) -> bool:
-    character_category: str = unicodedata.category(character)
-    if "S" in character_category or "N" in character_category:
-        return True
-    character_range: str | None = unicode_range(character)
-    if character_range is None:
-        return False
-    return "Forms" in character_range and character_category != "Lo"
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_emoticon(character: str) -> bool:
-    character_range: str | None = unicode_range(character)
-    if character_range is None:
-        return False
-    return "Emoticons" in character_range or "Pictographs" in character_range
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_separator(character: str) -> bool:
-    if character.isspace() or character in {"｜", "+", "<", ">"}:
-        return True
-    character_category: str = unicodedata.category(character)
-    return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_case_variable(character: str) -> bool:
-    return character.islower() != character.isupper()
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_cjk(character: str) -> bool:
-    try:
-        character_name = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "CJK" in character_name
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_hiragana(character: str) -> bool:
-    try:
-        character_name = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "HIRAGANA" in character_name
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_katakana(character: str) -> bool:
-    try:
-        character_name = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "KATAKANA" in character_name
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_hangul(character: str) -> bool:
-    try:
-        character_name = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "HANGUL" in character_name
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_thai(character: str) -> bool:
-    try:
-        character_name = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "THAI" in character_name
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_arabic(character: str) -> bool:
-    try:
-        character_name = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "ARABIC" in character_name
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_arabic_isolated_form(character: str) -> bool:
-    try:
-        character_name = unicodedata.name(character)
-    except ValueError:  # Defensive: unicode database outdated?
-        return False
-    return "ARABIC" in character_name and "ISOLATED FORM" in character_name
-@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
-def is_unicode_range_secondary(range_name: str) -> bool:
-    return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
-@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
-def is_unprintable(character: str) -> bool:
-    return (
-        character.isspace() is False  # includes \n \t \r \v
-        and character.isprintable() is False
-        and character != "\x1a"  # Why? Its the ASCII substitute character.
-        and character != "\ufeff"  # bug discovered in Python,
-        # Zero Width No-Break Space located in 	Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
-    )
-def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> str | None:
-    """
-    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
-    """
-    if not isinstance(sequence, bytes):
-        raise TypeError
-    seq_len: int = len(sequence)
-    results: list[str] = findall(
-        RE_POSSIBLE_ENCODING_INDICATION,
-        sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
-    )
-    if len(results) == 0:
-        return None
-    for specified_encoding in results:
-        specified_encoding = specified_encoding.lower().replace("-", "_")
-        encoding_alias: str
-        encoding_iana: str
-        for encoding_alias, encoding_iana in aliases.items():
-            if encoding_alias == specified_encoding:
-                return encoding_iana
-            if encoding_iana == specified_encoding:
-                return encoding_iana
-    return None
-@lru_cache(maxsize=128)
-def is_multi_byte_encoding(name: str) -> bool:
-    """
-    Verify is a specific encoding is a multi byte one based on it IANA name
-    """
-    return name in {
-        "utf_8",
-        "utf_8_sig",
-        "utf_16",
-        "utf_16_be",
-        "utf_16_le",
-        "utf_32",
-        "utf_32_le",
-        "utf_32_be",
-        "utf_7",
-    } or issubclass(
-        importlib.import_module(f"encodings.{name}").IncrementalDecoder,
-        MultibyteIncrementalDecoder,
-    )
-def identify_sig_or_bom(sequence: bytes) -> tuple[str | None, bytes]:
-    """
-    Identify and extract SIG/BOM in given sequence.
-    """
-    for iana_encoding in ENCODING_MARKS:
-        marks: bytes | list[bytes] = ENCODING_MARKS[iana_encoding]
-        if isinstance(marks, bytes):
-            marks = [marks]
-        for mark in marks:
-            if sequence.startswith(mark):
-                return iana_encoding, mark
-    return None, b""
-def should_strip_sig_or_bom(iana_encoding: str) -> bool:
-    return iana_encoding not in {"utf_16", "utf_32"}
-def iana_name(cp_name: str, strict: bool = True) -> str:
-    """Returns the Python normalized encoding name (Not the IANA official name)."""
-    cp_name = cp_name.lower().replace("-", "_")
-    encoding_alias: str
-    encoding_iana: str
-    for encoding_alias, encoding_iana in aliases.items():
-        if cp_name in [encoding_alias, encoding_iana]:
-            return encoding_iana
-    if strict:
-        raise ValueError(f"Unable to retrieve IANA for '{cp_name}'")
-    return cp_name
-def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
-    if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
-        return 0.0
-    decoder_a = importlib.import_module(f"encodings.{iana_name_a}").IncrementalDecoder
-    decoder_b = importlib.import_module(f"encodings.{iana_name_b}").IncrementalDecoder
-    id_a: IncrementalDecoder = decoder_a(errors="ignore")
-    id_b: IncrementalDecoder = decoder_b(errors="ignore")
-    character_match_count: int = 0
-    for i in range(255):
-        to_be_decoded: bytes = bytes([i])
-        if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded):
-            character_match_count += 1
-    return character_match_count / 254
-def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool:
-    """
-    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
-    the function cp_similarity.
-    """
-    return (
-        iana_name_a in IANA_SUPPORTED_SIMILAR
-        and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a]
-    )
-def set_logging_handler(
-    name: str = "charset_normalizer",
-    level: int = logging.INFO,
-    format_string: str = "%(asctime)s | %(levelname)s | %(message)s",
-) -> None:
-    logger = logging.getLogger(name)
-    logger.setLevel(level)
-    handler = logging.StreamHandler()
-    handler.setFormatter(logging.Formatter(format_string))
-    logger.addHandler(handler)
-def cut_sequence_chunks(
-    sequences: bytes,
-    encoding_iana: str,
-    offsets: range,
-    chunk_size: int,
-    bom_or_sig_available: bool,
-    strip_sig_or_bom: bool,
-    sig_payload: bytes,
-    is_multi_byte_decoder: bool,
-    decoded_payload: str | None = None,
-) -> Generator[str, None, None]:
-    if decoded_payload and is_multi_byte_decoder is False:
-        for i in offsets:
-            chunk = decoded_payload[i : i + chunk_size]
-            if not chunk:
-                break
-            yield chunk
-    else:
-        for i in offsets:
-            chunk_end = i + chunk_size
-            if chunk_end > len(sequences) + 8:
-                continue
-            cut_sequence = sequences[i : i + chunk_size]
-            if bom_or_sig_available and strip_sig_or_bom is False:
-                cut_sequence = sig_payload + cut_sequence
-            chunk = cut_sequence.decode(
-                encoding_iana,
-                errors="ignore" if is_multi_byte_decoder else "strict",
-            )
-            # multi-byte bad cutting detector and adjustment
-            # not the cleanest way to perform that fix but clever enough for now.
-            if is_multi_byte_decoder and i > 0:
-                chunk_partial_size_chk: int = min(chunk_size, 16)
-                if (
-                    decoded_payload
-                    and chunk[:chunk_partial_size_chk] not in decoded_payload
-                ):
-                    for j in range(i, i - 4, -1):
-                        cut_sequence = sequences[j:chunk_end]
-                        if bom_or_sig_available and strip_sig_or_bom is False:
-                            cut_sequence = sig_payload + cut_sequence
-                        chunk = cut_sequence.decode(encoding_iana, errors="ignore")
-                        if chunk[:chunk_partial_size_chk] in decoded_payload:
-                            break
-            yield chunk

env/Lib/site-packages/charset_normalizer/version.py DELETED Viewed

@@ -1,8 +0,0 @@
-"""
-Expose version
-"""
-from __future__ import annotations
-__version__ = "3.4.1"
-VERSION = __version__.split(".")

env/Lib/site-packages/colorama-0.4.6.dist-info/INSTALLER DELETED Viewed

	@@ -1 +0,0 @@
1	- pip

env/Lib/site-packages/colorama-0.4.6.dist-info/METADATA DELETED Viewed

@@ -1,441 +0,0 @@
-Metadata-Version: 2.1
-Name: colorama
-Version: 0.4.6
-Summary: Cross-platform colored terminal text.
-Project-URL: Homepage, https://github.com/tartley/colorama
-Author-email: Jonathan Hartley <[email protected]>
-License-File: LICENSE.txt
-Keywords: ansi,color,colour,crossplatform,terminal,text,windows,xplatform
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Environment :: Console
-Classifier: Intended Audience :: Developers
-Classifier: License :: OSI Approved :: BSD License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: Topic :: Terminals
-Requires-Python: !=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7
-Description-Content-Type: text/x-rst
-.. image:: https://img.shields.io/pypi/v/colorama.svg
-    :target: https://pypi.org/project/colorama/
-    :alt: Latest Version
-.. image:: https://img.shields.io/pypi/pyversions/colorama.svg
-    :target: https://pypi.org/project/colorama/
-    :alt: Supported Python versions
-.. image:: https://github.com/tartley/colorama/actions/workflows/test.yml/badge.svg
-    :target: https://github.com/tartley/colorama/actions/workflows/test.yml
-    :alt: Build Status
-Colorama
-========
-Makes ANSI escape character sequences (for producing colored terminal text and
-cursor positioning) work under MS Windows.
-.. |donate| image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_SM.gif
-  :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=2MZ9D2GMLYCUJ&item_name=Colorama&currency_code=USD
-  :alt: Donate with Paypal
-`PyPI for releases <https://pypi.org/project/colorama/>`_ |
-`Github for source <https://github.com/tartley/colorama>`_ |
-`Colorama for enterprise on Tidelift <https://github.com/tartley/colorama/blob/master/ENTERPRISE.md>`_
-If you find Colorama useful, please |donate| to the authors. Thank you!
-Installation
-------------
-Tested on CPython 2.7, 3.7, 3.8, 3.9 and 3.10 and Pypy 2.7 and 3.8.
-No requirements other than the standard library.
-.. code-block:: bash
-    pip install colorama
-    # or
-    conda install -c anaconda colorama
-Description
------------
-ANSI escape character sequences have long been used to produce colored terminal
-text and cursor positioning on Unix and Macs. Colorama makes this work on
-Windows, too, by wrapping ``stdout``, stripping ANSI sequences it finds (which
-would appear as gobbledygook in the output), and converting them into the
-appropriate win32 calls to modify the state of the terminal. On other platforms,
-Colorama does nothing.
-This has the upshot of providing a simple cross-platform API for printing
-colored terminal text from Python, and has the happy side-effect that existing
-applications or libraries which use ANSI sequences to produce colored output on
-Linux or Macs can now also work on Windows, simply by calling
-``colorama.just_fix_windows_console()`` (since v0.4.6) or ``colorama.init()``
-(all versions, but may have other side-effects – see below).
-An alternative approach is to install ``ansi.sys`` on Windows machines, which
-provides the same behaviour for all applications running in terminals. Colorama
-is intended for situations where that isn't easy (e.g., maybe your app doesn't
-have an installer.)
-Demo scripts in the source code repository print some colored text using
-ANSI sequences. Compare their output under Gnome-terminal's built in ANSI
-handling, versus on Windows Command-Prompt using Colorama:
-.. image:: https://github.com/tartley/colorama/raw/master/screenshots/ubuntu-demo.png
-    :width: 661
-    :height: 357
-    :alt: ANSI sequences on Ubuntu under gnome-terminal.
-.. image:: https://github.com/tartley/colorama/raw/master/screenshots/windows-demo.png
-    :width: 668
-    :height: 325
-    :alt: Same ANSI sequences on Windows, using Colorama.
-These screenshots show that, on Windows, Colorama does not support ANSI 'dim
-text'; it looks the same as 'normal text'.
-Usage
------
-Initialisation
-..............
-If the only thing you want from Colorama is to get ANSI escapes to work on
-Windows, then run:
-.. code-block:: python
-    from colorama import just_fix_windows_console
-    just_fix_windows_console()
-If you're on a recent version of Windows 10 or better, and your stdout/stderr
-are pointing to a Windows console, then this will flip the magic configuration
-switch to enable Windows' built-in ANSI support.
-If you're on an older version of Windows, and your stdout/stderr are pointing to
-a Windows console, then this will wrap ``sys.stdout`` and/or ``sys.stderr`` in a
-magic file object that intercepts ANSI escape sequences and issues the
-appropriate Win32 calls to emulate them.
-In all other circumstances, it does nothing whatsoever. Basically the idea is
-that this makes Windows act like Unix with respect to ANSI escape handling.
-It's safe to call this function multiple times. It's safe to call this function
-on non-Windows platforms, but it won't do anything. It's safe to call this
-function when one or both of your stdout/stderr are redirected to a file – it
-won't do anything to those streams.
-Alternatively, you can use the older interface with more features (but also more
-potential footguns):
-.. code-block:: python
-    from colorama import init
-    init()
-This does the same thing as ``just_fix_windows_console``, except for the
-following differences:
-- It's not safe to call ``init`` multiple times; you can end up with multiple
-  layers of wrapping and broken ANSI support.
-- Colorama will apply a heuristic to guess whether stdout/stderr support ANSI,
-  and if it thinks they don't, then it will wrap ``sys.stdout`` and
-  ``sys.stderr`` in a magic file object that strips out ANSI escape sequences
-  before printing them. This happens on all platforms, and can be convenient if
-  you want to write your code to emit ANSI escape sequences unconditionally, and
-  let Colorama decide whether they should actually be output. But note that
-  Colorama's heuristic is not particularly clever.
-- ``init`` also accepts explicit keyword args to enable/disable various
-  functionality – see below.
-To stop using Colorama before your program exits, simply call ``deinit()``.
-This will restore ``stdout`` and ``stderr`` to their original values, so that
-Colorama is disabled. To resume using Colorama again, call ``reinit()``; it is
-cheaper than calling ``init()`` again (but does the same thing).
-Most users should depend on ``colorama >= 0.4.6``, and use
-``just_fix_windows_console``. The old ``init`` interface will be supported
-indefinitely for backwards compatibility, but we don't plan to fix any issues
-with it, also for backwards compatibility.
-Colored Output
-..............
-Cross-platform printing of colored text can then be done using Colorama's
-constant shorthand for ANSI escape sequences. These are deliberately
-rudimentary, see below.
-.. code-block:: python
-    from colorama import Fore, Back, Style
-    print(Fore.RED + 'some red text')
-    print(Back.GREEN + 'and with a green background')
-    print(Style.DIM + 'and in dim text')
-    print(Style.RESET_ALL)
-    print('back to normal now')
-...or simply by manually printing ANSI sequences from your own code:
-.. code-block:: python
-    print('\033[31m' + 'some red text')
-    print('\033[39m') # and reset to default color
-...or, Colorama can be used in conjunction with existing ANSI libraries
-such as the venerable `Termcolor <https://pypi.org/project/termcolor/>`_
-the fabulous `Blessings <https://pypi.org/project/blessings/>`_,
-or the incredible `_Rich <https://pypi.org/project/rich/>`_.
-If you wish Colorama's Fore, Back and Style constants were more capable,
-then consider using one of the above highly capable libraries to generate
-colors, etc, and use Colorama just for its primary purpose: to convert
-those ANSI sequences to also work on Windows:
-SIMILARLY, do not send PRs adding the generation of new ANSI types to Colorama.
-We are only interested in converting ANSI codes to win32 API calls, not
-shortcuts like the above to generate ANSI characters.
-.. code-block:: python
-    from colorama import just_fix_windows_console
-    from termcolor import colored
-    # use Colorama to make Termcolor work on Windows too
-    just_fix_windows_console()
-    # then use Termcolor for all colored text output
-    print(colored('Hello, World!', 'green', 'on_red'))
-Available formatting constants are::
-    Fore: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET.
-    Back: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET.
-    Style: DIM, NORMAL, BRIGHT, RESET_ALL
-``Style.RESET_ALL`` resets foreground, background, and brightness. Colorama will
-perform this reset automatically on program exit.
-These are fairly well supported, but not part of the standard::
-    Fore: LIGHTBLACK_EX, LIGHTRED_EX, LIGHTGREEN_EX, LIGHTYELLOW_EX, LIGHTBLUE_EX, LIGHTMAGENTA_EX, LIGHTCYAN_EX, LIGHTWHITE_EX
-    Back: LIGHTBLACK_EX, LIGHTRED_EX, LIGHTGREEN_EX, LIGHTYELLOW_EX, LIGHTBLUE_EX, LIGHTMAGENTA_EX, LIGHTCYAN_EX, LIGHTWHITE_EX
-Cursor Positioning
-..................
-ANSI codes to reposition the cursor are supported. See ``demos/demo06.py`` for
-an example of how to generate them.
-Init Keyword Args
-.................
-``init()`` accepts some ``**kwargs`` to override default behaviour.
-init(autoreset=False):
-    If you find yourself repeatedly sending reset sequences to turn off color
-    changes at the end of every print, then ``init(autoreset=True)`` will
-    automate that:
-    .. code-block:: python
-        from colorama import init
-        init(autoreset=True)
-        print(Fore.RED + 'some red text')
-        print('automatically back to default color again')
-init(strip=None):
-    Pass ``True`` or ``False`` to override whether ANSI codes should be
-    stripped from the output. The default behaviour is to strip if on Windows
-    or if output is redirected (not a tty).
-init(convert=None):
-    Pass ``True`` or ``False`` to override whether to convert ANSI codes in the
-    output into win32 calls. The default behaviour is to convert if on Windows
-    and output is to a tty (terminal).
-init(wrap=True):
-    On Windows, Colorama works by replacing ``sys.stdout`` and ``sys.stderr``
-    with proxy objects, which override the ``.write()`` method to do their work.
-    If this wrapping causes you problems, then this can be disabled by passing
-    ``init(wrap=False)``. The default behaviour is to wrap if ``autoreset`` or
-    ``strip`` or ``convert`` are True.
-    When wrapping is disabled, colored printing on non-Windows platforms will
-    continue to work as normal. To do cross-platform colored output, you can
-    use Colorama's ``AnsiToWin32`` proxy directly:
-    .. code-block:: python
-        import sys
-        from colorama import init, AnsiToWin32
-        init(wrap=False)
-        stream = AnsiToWin32(sys.stderr).stream
-        # Python 2
-        print >>stream, Fore.BLUE + 'blue text on stderr'
-        # Python 3
-        print(Fore.BLUE + 'blue text on stderr', file=stream)
-Recognised ANSI Sequences
-.........................
-ANSI sequences generally take the form::
-    ESC [ <param> ; <param> ... <command>
-Where ``<param>`` is an integer, and ``<command>`` is a single letter. Zero or
-more params are passed to a ``<command>``. If no params are passed, it is
-generally synonymous with passing a single zero. No spaces exist in the
-sequence; they have been inserted here simply to read more easily.
-The only ANSI sequences that Colorama converts into win32 calls are::
-    ESC [ 0 m       # reset all (colors and brightness)
-    ESC [ 1 m       # bright
-    ESC [ 2 m       # dim (looks same as normal brightness)
-    ESC [ 22 m      # normal brightness
-    # FOREGROUND:
-    ESC [ 30 m      # black
-    ESC [ 31 m      # red
-    ESC [ 32 m      # green
-    ESC [ 33 m      # yellow
-    ESC [ 34 m      # blue
-    ESC [ 35 m      # magenta
-    ESC [ 36 m      # cyan
-    ESC [ 37 m      # white
-    ESC [ 39 m      # reset
-    # BACKGROUND
-    ESC [ 40 m      # black
-    ESC [ 41 m      # red
-    ESC [ 42 m      # green
-    ESC [ 43 m      # yellow
-    ESC [ 44 m      # blue
-    ESC [ 45 m      # magenta
-    ESC [ 46 m      # cyan
-    ESC [ 47 m      # white
-    ESC [ 49 m      # reset
-    # cursor positioning
-    ESC [ y;x H     # position cursor at x across, y down
-    ESC [ y;x f     # position cursor at x across, y down
-    ESC [ n A       # move cursor n lines up
-    ESC [ n B       # move cursor n lines down
-    ESC [ n C       # move cursor n characters forward
-    ESC [ n D       # move cursor n characters backward
-    # clear the screen
-    ESC [ mode J    # clear the screen
-    # clear the line
-    ESC [ mode K    # clear the line
-Multiple numeric params to the ``'m'`` command can be combined into a single
-sequence::
-    ESC [ 36 ; 45 ; 1 m     # bright cyan text on magenta background
-All other ANSI sequences of the form ``ESC [ <param> ; <param> ... <command>``
-are silently stripped from the output on Windows.
-Any other form of ANSI sequence, such as single-character codes or alternative
-initial characters, are not recognised or stripped. It would be cool to add
-them though. Let me know if it would be useful for you, via the Issues on
-GitHub.
-Status & Known Problems
------------------------
-I've personally only tested it on Windows XP (CMD, Console2), Ubuntu
-(gnome-terminal, xterm), and OS X.
-Some valid ANSI sequences aren't recognised.
-If you're hacking on the code, see `README-hacking.md`_. ESPECIALLY, see the
-explanation there of why we do not want PRs that allow Colorama to generate new
-types of ANSI codes.
-See outstanding issues and wish-list:
-https://github.com/tartley/colorama/issues
-If anything doesn't work for you, or doesn't do what you expected or hoped for,
-I'd love to hear about it on that issues list, would be delighted by patches,
-and would be happy to grant commit access to anyone who submits a working patch
-or two.
-.. _README-hacking.md: README-hacking.md
-License
--------
-Copyright Jonathan Hartley & Arnon Yaari, 2013-2020. BSD 3-Clause license; see
-LICENSE file.
-Professional support
---------------------
-.. |tideliftlogo| image:: https://cdn2.hubspot.net/hubfs/4008838/website/logos/logos_for_download/Tidelift_primary-shorthand-logo.png
-   :alt: Tidelift
-   :target: https://tidelift.com/subscription/pkg/pypi-colorama?utm_source=pypi-colorama&utm_medium=referral&utm_campaign=readme
-.. list-table::
-   :widths: 10 100
-   * - |tideliftlogo|
-     - Professional support for colorama is available as part of the
-       `Tidelift Subscription`_.
-       Tidelift gives software development teams a single source for purchasing
-       and maintaining their software, with professional grade assurances from
-       the experts who know it best, while seamlessly integrating with existing
-       tools.
-.. _Tidelift Subscription: https://tidelift.com/subscription/pkg/pypi-colorama?utm_source=pypi-colorama&utm_medium=referral&utm_campaign=readme
-Thanks
-------
-See the CHANGELOG for more thanks!
-* Marc Schlaich (schlamar) for a ``setup.py`` fix for Python2.5.
-* Marc Abramowitz, reported & fixed a crash on exit with closed ``stdout``,
-  providing a solution to issue #7's setuptools/distutils debate,
-  and other fixes.
-* User 'eryksun', for guidance on correctly instantiating ``ctypes.windll``.
-* Matthew McCormick for politely pointing out a longstanding crash on non-Win.
-* Ben Hoyt, for a magnificent fix under 64-bit Windows.
-* Jesse at Empty Square for submitting a fix for examples in the README.
-* User 'jamessp', an observant documentation fix for cursor positioning.
-* User 'vaal1239', Dave Mckee & Lackner Kristof for a tiny but much-needed Win7
-  fix.
-* Julien Stuyck, for wisely suggesting Python3 compatible updates to README.
-* Daniel Griffith for multiple fabulous patches.
-* Oscar Lesta for a valuable fix to stop ANSI chars being sent to non-tty
-  output.
-* Roger Binns, for many suggestions, valuable feedback, & bug reports.
-* Tim Golden for thought and much appreciated feedback on the initial idea.
-* User 'Zearin' for updates to the README file.
-* John Szakmeister for adding support for light colors
-* Charles Merriam for adding documentation to demos
-* Jurko for a fix on 64-bit Windows CPython2.5 w/o ctypes
-* Florian Bruhin for a fix when stdout or stderr are None
-* Thomas Weininger for fixing ValueError on Windows
-* Remi Rampin for better Github integration and fixes to the README file
-* Simeon Visser for closing a file handle using 'with' and updating classifiers
-  to include Python 3.3 and 3.4
-* Andy Neff for fixing RESET of LIGHT_EX colors.
-* Jonathan Hartley for the initial idea and implementation.

env/Lib/site-packages/colorama-0.4.6.dist-info/RECORD DELETED Viewed

@@ -1,31 +0,0 @@
-colorama-0.4.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
-colorama-0.4.6.dist-info/METADATA,sha256=e67SnrUMOym9sz_4TjF3vxvAV4T3aF7NyqRHHH3YEMw,17158
-colorama-0.4.6.dist-info/RECORD,,
-colorama-0.4.6.dist-info/WHEEL,sha256=cdcF4Fbd0FPtw2EMIOwH-3rSOTUdTCeOSXRMD1iLUb8,105
-colorama-0.4.6.dist-info/licenses/LICENSE.txt,sha256=ysNcAmhuXQSlpxQL-zs25zrtSWZW6JEQLkKIhteTAxg,1491
-colorama/__init__.py,sha256=wePQA4U20tKgYARySLEC047ucNX-g8pRLpYBuiHlLb8,266
-colorama/__pycache__/__init__.cpython-312.pyc,,
-colorama/__pycache__/ansi.cpython-312.pyc,,
-colorama/__pycache__/ansitowin32.cpython-312.pyc,,
-colorama/__pycache__/initialise.cpython-312.pyc,,
-colorama/__pycache__/win32.cpython-312.pyc,,
-colorama/__pycache__/winterm.cpython-312.pyc,,
-colorama/ansi.py,sha256=Top4EeEuaQdBWdteKMEcGOTeKeF19Q-Wo_6_Cj5kOzQ,2522
-colorama/ansitowin32.py,sha256=vPNYa3OZbxjbuFyaVo0Tmhmy1FZ1lKMWCnT7odXpItk,11128
-colorama/initialise.py,sha256=-hIny86ClXo39ixh5iSCfUIa2f_h_bgKRDW7gqs-KLU,3325
-colorama/tests/__init__.py,sha256=MkgPAEzGQd-Rq0w0PZXSX2LadRWhUECcisJY8lSrm4Q,75
-colorama/tests/__pycache__/__init__.cpython-312.pyc,,
-colorama/tests/__pycache__/ansi_test.cpython-312.pyc,,
-colorama/tests/__pycache__/ansitowin32_test.cpython-312.pyc,,
-colorama/tests/__pycache__/initialise_test.cpython-312.pyc,,
-colorama/tests/__pycache__/isatty_test.cpython-312.pyc,,
-colorama/tests/__pycache__/utils.cpython-312.pyc,,
-colorama/tests/__pycache__/winterm_test.cpython-312.pyc,,
-colorama/tests/ansi_test.py,sha256=FeViDrUINIZcr505PAxvU4AjXz1asEiALs9GXMhwRaE,2839
-colorama/tests/ansitowin32_test.py,sha256=RN7AIhMJ5EqDsYaCjVo-o4u8JzDD4ukJbmevWKS70rY,10678
-colorama/tests/initialise_test.py,sha256=BbPy-XfyHwJ6zKozuQOvNvQZzsx9vdb_0bYXn7hsBTc,6741
-colorama/tests/isatty_test.py,sha256=Pg26LRpv0yQDB5Ac-sxgVXG7hsA1NYvapFgApZfYzZg,1866
-colorama/tests/utils.py,sha256=1IIRylG39z5-dzq09R_ngufxyPZxgldNbrxKxUGwGKE,1079
-colorama/tests/winterm_test.py,sha256=qoWFPEjym5gm2RuMwpf3pOis3a5r_PJZFCzK254JL8A,3709
-colorama/win32.py,sha256=YQOKwMTwtGBbsY4dL5HYTvwTeP9wIQra5MvPNddpxZs,6181
-colorama/winterm.py,sha256=XCQFDHjPi6AHYNdZwy0tA02H-Jh48Jp-HvCjeLeLp3U,7134

env/Lib/site-packages/colorama-0.4.6.dist-info/WHEEL DELETED Viewed

@@ -1,5 +0,0 @@
-Wheel-Version: 1.0
-Generator: hatchling 1.11.1
-Root-Is-Purelib: true
-Tag: py2-none-any
-Tag: py3-none-any

env/Lib/site-packages/colorama-0.4.6.dist-info/licenses/LICENSE.txt DELETED Viewed

@@ -1,27 +0,0 @@
-Copyright (c) 2010 Jonathan Hartley
-All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-* Neither the name of the copyright holders, nor those of its contributors
-  may be used to endorse or promote products derived from this software without
-  specific prior written permission.
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

env/Lib/site-packages/colorama/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
-from .initialise import init, deinit, reinit, colorama_text, just_fix_windows_console
-from .ansi import Fore, Back, Style, Cursor
-from .ansitowin32 import AnsiToWin32
-__version__ = '0.4.6'

env/Lib/site-packages/colorama/ansi.py DELETED Viewed

@@ -1,102 +0,0 @@
-# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
-'''
-This module generates ANSI character codes to printing colors to terminals.
-See: http://en.wikipedia.org/wiki/ANSI_escape_code
-'''
-CSI = '\033['
-OSC = '\033]'
-BEL = '\a'
-def code_to_chars(code):
-    return CSI + str(code) + 'm'
-def set_title(title):
-    return OSC + '2;' + title + BEL
-def clear_screen(mode=2):
-    return CSI + str(mode) + 'J'
-def clear_line(mode=2):
-    return CSI + str(mode) + 'K'
-class AnsiCodes(object):
-    def __init__(self):
-        # the subclasses declare class attributes which are numbers.
-        # Upon instantiation we define instance attributes, which are the same
-        # as the class attributes but wrapped with the ANSI escape sequence
-        for name in dir(self):
-            if not name.startswith('_'):
-                value = getattr(self, name)
-                setattr(self, name, code_to_chars(value))
-class AnsiCursor(object):
-    def UP(self, n=1):
-        return CSI + str(n) + 'A'
-    def DOWN(self, n=1):
-        return CSI + str(n) + 'B'
-    def FORWARD(self, n=1):
-        return CSI + str(n) + 'C'
-    def BACK(self, n=1):
-        return CSI + str(n) + 'D'
-    def POS(self, x=1, y=1):
-        return CSI + str(y) + ';' + str(x) + 'H'
-class AnsiFore(AnsiCodes):
-    BLACK           = 30
-    RED             = 31
-    GREEN           = 32
-    YELLOW          = 33
-    BLUE            = 34
-    MAGENTA         = 35
-    CYAN            = 36
-    WHITE           = 37
-    RESET           = 39
-    # These are fairly well supported, but not part of the standard.
-    LIGHTBLACK_EX   = 90
-    LIGHTRED_EX     = 91
-    LIGHTGREEN_EX   = 92
-    LIGHTYELLOW_EX  = 93
-    LIGHTBLUE_EX    = 94
-    LIGHTMAGENTA_EX = 95
-    LIGHTCYAN_EX    = 96
-    LIGHTWHITE_EX   = 97
-class AnsiBack(AnsiCodes):
-    BLACK           = 40
-    RED             = 41
-    GREEN           = 42
-    YELLOW          = 43
-    BLUE            = 44
-    MAGENTA         = 45
-    CYAN            = 46
-    WHITE           = 47
-    RESET           = 49
-    # These are fairly well supported, but not part of the standard.
-    LIGHTBLACK_EX   = 100
-    LIGHTRED_EX     = 101
-    LIGHTGREEN_EX   = 102
-    LIGHTYELLOW_EX  = 103
-    LIGHTBLUE_EX    = 104
-    LIGHTMAGENTA_EX = 105
-    LIGHTCYAN_EX    = 106
-    LIGHTWHITE_EX   = 107
-class AnsiStyle(AnsiCodes):
-    BRIGHT    = 1
-    DIM       = 2
-    NORMAL    = 22
-    RESET_ALL = 0
-Fore   = AnsiFore()
-Back   = AnsiBack()
-Style  = AnsiStyle()
-Cursor = AnsiCursor()

env/Lib/site-packages/colorama/ansitowin32.py DELETED Viewed

@@ -1,277 +0,0 @@
-# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
-import re
-import sys
-import os
-from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style, BEL
-from .winterm import enable_vt_processing, WinTerm, WinColor, WinStyle
-from .win32 import windll, winapi_test
-winterm = None
-if windll is not None:
-    winterm = WinTerm()
-class StreamWrapper(object):
-    '''
-    Wraps a stream (such as stdout), acting as a transparent proxy for all
-    attribute access apart from method 'write()', which is delegated to our
-    Converter instance.
-    '''
-    def __init__(self, wrapped, converter):
-        # double-underscore everything to prevent clashes with names of
-        # attributes on the wrapped stream object.
-        self.__wrapped = wrapped
-        self.__convertor = converter
-    def __getattr__(self, name):
-        return getattr(self.__wrapped, name)
-    def __enter__(self, *args, **kwargs):
-        # special method lookup bypasses __getattr__/__getattribute__, see
-        # https://stackoverflow.com/questions/12632894/why-doesnt-getattr-work-with-exit
-        # thus, contextlib magic methods are not proxied via __getattr__
-        return self.__wrapped.__enter__(*args, **kwargs)
-    def __exit__(self, *args, **kwargs):
-        return self.__wrapped.__exit__(*args, **kwargs)
-    def __setstate__(self, state):
-        self.__dict__ = state
-    def __getstate__(self):
-        return self.__dict__
-    def write(self, text):
-        self.__convertor.write(text)
-    def isatty(self):
-        stream = self.__wrapped
-        if 'PYCHARM_HOSTED' in os.environ:
-            if stream is not None and (stream is sys.__stdout__ or stream is sys.__stderr__):
-                return True
-        try:
-            stream_isatty = stream.isatty
-        except AttributeError:
-            return False
-        else:
-            return stream_isatty()
-    @property
-    def closed(self):
-        stream = self.__wrapped
-        try:
-            return stream.closed
-        # AttributeError in the case that the stream doesn't support being closed
-        # ValueError for the case that the stream has already been detached when atexit runs
-        except (AttributeError, ValueError):
-            return True
-class AnsiToWin32(object):
-    '''
-    Implements a 'write()' method which, on Windows, will strip ANSI character
-    sequences from the text, and if outputting to a tty, will convert them into
-    win32 function calls.
-    '''
-    ANSI_CSI_RE = re.compile('\001?\033\\[((?:\\d|;)*)([a-zA-Z])\002?')   # Control Sequence Introducer
-    ANSI_OSC_RE = re.compile('\001?\033\\]([^\a]*)(\a)\002?')             # Operating System Command
-    def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
-        # The wrapped stream (normally sys.stdout or sys.stderr)
-        self.wrapped = wrapped
-        # should we reset colors to defaults after every .write()
-        self.autoreset = autoreset
-        # create the proxy wrapping our output stream
-        self.stream = StreamWrapper(wrapped, self)
-        on_windows = os.name == 'nt'
-        # We test if the WinAPI works, because even if we are on Windows
-        # we may be using a terminal that doesn't support the WinAPI
-        # (e.g. Cygwin Terminal). In this case it's up to the terminal
-        # to support the ANSI codes.
-        conversion_supported = on_windows and winapi_test()
-        try:
-            fd = wrapped.fileno()
-        except Exception:
-            fd = -1
-        system_has_native_ansi = not on_windows or enable_vt_processing(fd)
-        have_tty = not self.stream.closed and self.stream.isatty()
-        need_conversion = conversion_supported and not system_has_native_ansi
-        # should we strip ANSI sequences from our output?
-        if strip is None:
-            strip = need_conversion or not have_tty
-        self.strip = strip
-        # should we should convert ANSI sequences into win32 calls?
-        if convert is None:
-            convert = need_conversion and have_tty
-        self.convert = convert
-        # dict of ansi codes to win32 functions and parameters
-        self.win32_calls = self.get_win32_calls()
-        # are we wrapping stderr?
-        self.on_stderr = self.wrapped is sys.stderr
-    def should_wrap(self):
-        '''
-        True if this class is actually needed. If false, then the output
-        stream will not be affected, nor will win32 calls be issued, so
-        wrapping stdout is not actually required. This will generally be
-        False on non-Windows platforms, unless optional functionality like
-        autoreset has been requested using kwargs to init()
-        '''
-        return self.convert or self.strip or self.autoreset
-    def get_win32_calls(self):
-        if self.convert and winterm:
-            return {
-                AnsiStyle.RESET_ALL: (winterm.reset_all, ),
-                AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
-                AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
-                AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
-                AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
-                AnsiFore.RED: (winterm.fore, WinColor.RED),
-                AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
-                AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
-                AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
-                AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
-                AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
-                AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
-                AnsiFore.RESET: (winterm.fore, ),
-                AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True),
-                AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True),
-                AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True),
-                AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True),
-                AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True),
-                AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True),
-                AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True),
-                AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True),
-                AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
-                AnsiBack.RED: (winterm.back, WinColor.RED),
-                AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
-                AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
-                AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
-                AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
-                AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
-                AnsiBack.WHITE: (winterm.back, WinColor.GREY),
-                AnsiBack.RESET: (winterm.back, ),
-                AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True),
-                AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True),
-                AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True),
-                AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True),
-                AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True),
-                AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True),
-                AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True),
-                AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True),
-            }
-        return dict()
-    def write(self, text):
-        if self.strip or self.convert:
-            self.write_and_convert(text)
-        else:
-            self.wrapped.write(text)
-            self.wrapped.flush()
-        if self.autoreset:
-            self.reset_all()
-    def reset_all(self):
-        if self.convert:
-            self.call_win32('m', (0,))
-        elif not self.strip and not self.stream.closed:
-            self.wrapped.write(Style.RESET_ALL)
-    def write_and_convert(self, text):
-        '''
-        Write the given text to our wrapped stream, stripping any ANSI
-        sequences from the text, and optionally converting them into win32
-        calls.
-        '''
-        cursor = 0
-        text = self.convert_osc(text)
-        for match in self.ANSI_CSI_RE.finditer(text):
-            start, end = match.span()
-            self.write_plain_text(text, cursor, start)
-            self.convert_ansi(*match.groups())
-            cursor = end
-        self.write_plain_text(text, cursor, len(text))
-    def write_plain_text(self, text, start, end):
-        if start < end:
-            self.wrapped.write(text[start:end])
-            self.wrapped.flush()
-    def convert_ansi(self, paramstring, command):
-        if self.convert:
-            params = self.extract_params(command, paramstring)
-            self.call_win32(command, params)
-    def extract_params(self, command, paramstring):
-        if command in 'Hf':
-            params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';'))
-            while len(params) < 2:
-                # defaults:
-                params = params + (1,)
-        else:
-            params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0)
-            if len(params) == 0:
-                # defaults:
-                if command in 'JKm':
-                    params = (0,)
-                elif command in 'ABCD':
-                    params = (1,)
-        return params
-    def call_win32(self, command, params):
-        if command == 'm':
-            for param in params:
-                if param in self.win32_calls:
-                    func_args = self.win32_calls[param]
-                    func = func_args[0]
-                    args = func_args[1:]
-                    kwargs = dict(on_stderr=self.on_stderr)
-                    func(*args, **kwargs)
-        elif command in 'J':
-            winterm.erase_screen(params[0], on_stderr=self.on_stderr)
-        elif command in 'K':
-            winterm.erase_line(params[0], on_stderr=self.on_stderr)
-        elif command in 'Hf':     # cursor position - absolute
-            winterm.set_cursor_position(params, on_stderr=self.on_stderr)
-        elif command in 'ABCD':   # cursor position - relative
-            n = params[0]
-            # A - up, B - down, C - forward, D - back
-            x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command]
-            winterm.cursor_adjust(x, y, on_stderr=self.on_stderr)
-    def convert_osc(self, text):
-        for match in self.ANSI_OSC_RE.finditer(text):
-            start, end = match.span()
-            text = text[:start] + text[end:]
-            paramstring, command = match.groups()
-            if command == BEL:
-                if paramstring.count(";") == 1:
-                    params = paramstring.split(";")
-                    # 0 - change title and icon (we will only change title)
-                    # 1 - change icon (we don't support this)
-                    # 2 - change title
-                    if params[0] in '02':
-                        winterm.set_title(params[1])
-        return text
-    def flush(self):
-        self.wrapped.flush()

env/Lib/site-packages/colorama/initialise.py DELETED Viewed

@@ -1,121 +0,0 @@
-# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
-import atexit
-import contextlib
-import sys
-from .ansitowin32 import AnsiToWin32
-def _wipe_internal_state_for_tests():
-    global orig_stdout, orig_stderr
-    orig_stdout = None
-    orig_stderr = None
-    global wrapped_stdout, wrapped_stderr
-    wrapped_stdout = None
-    wrapped_stderr = None
-    global atexit_done
-    atexit_done = False
-    global fixed_windows_console
-    fixed_windows_console = False
-    try:
-        # no-op if it wasn't registered
-        atexit.unregister(reset_all)
-    except AttributeError:
-        # python 2: no atexit.unregister. Oh well, we did our best.
-        pass
-def reset_all():
-    if AnsiToWin32 is not None:    # Issue #74: objects might become None at exit
-        AnsiToWin32(orig_stdout).reset_all()
-def init(autoreset=False, convert=None, strip=None, wrap=True):
-    if not wrap and any([autoreset, convert, strip]):
-        raise ValueError('wrap=False conflicts with any other arg=True')
-    global wrapped_stdout, wrapped_stderr
-    global orig_stdout, orig_stderr
-    orig_stdout = sys.stdout
-    orig_stderr = sys.stderr
-    if sys.stdout is None:
-        wrapped_stdout = None
-    else:
-        sys.stdout = wrapped_stdout = \
-            wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
-    if sys.stderr is None:
-        wrapped_stderr = None
-    else:
-        sys.stderr = wrapped_stderr = \
-            wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
-    global atexit_done
-    if not atexit_done:
-        atexit.register(reset_all)
-        atexit_done = True
-def deinit():
-    if orig_stdout is not None:
-        sys.stdout = orig_stdout
-    if orig_stderr is not None:
-        sys.stderr = orig_stderr
-def just_fix_windows_console():
-    global fixed_windows_console
-    if sys.platform != "win32":
-        return
-    if fixed_windows_console:
-        return
-    if wrapped_stdout is not None or wrapped_stderr is not None:
-        # Someone already ran init() and it did stuff, so we won't second-guess them
-        return
-    # On newer versions of Windows, AnsiToWin32.__init__ will implicitly enable the
-    # native ANSI support in the console as a side-effect. We only need to actually
-    # replace sys.stdout/stderr if we're in the old-style conversion mode.
-    new_stdout = AnsiToWin32(sys.stdout, convert=None, strip=None, autoreset=False)
-    if new_stdout.convert:
-        sys.stdout = new_stdout
-    new_stderr = AnsiToWin32(sys.stderr, convert=None, strip=None, autoreset=False)
-    if new_stderr.convert:
-        sys.stderr = new_stderr
-    fixed_windows_console = True
-@contextlib.contextmanager
-def colorama_text(*args, **kwargs):
-    init(*args, **kwargs)
-    try:
-        yield
-    finally:
-        deinit()
-def reinit():
-    if wrapped_stdout is not None:
-        sys.stdout = wrapped_stdout
-    if wrapped_stderr is not None:
-        sys.stderr = wrapped_stderr
-def wrap_stream(stream, convert, strip, autoreset, wrap):
-    if wrap:
-        wrapper = AnsiToWin32(stream,
-            convert=convert, strip=strip, autoreset=autoreset)
-        if wrapper.should_wrap():
-            stream = wrapper.stream
-    return stream
-# Use this for initial setup as well, to reduce code duplication
-_wipe_internal_state_for_tests()

env/Lib/site-packages/colorama/tests/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.

env/Lib/site-packages/colorama/tests/ansi_test.py DELETED Viewed

@@ -1,76 +0,0 @@
-# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
-import sys
-from unittest import TestCase, main
-from ..ansi import Back, Fore, Style
-from ..ansitowin32 import AnsiToWin32
-stdout_orig = sys.stdout
-stderr_orig = sys.stderr
-class AnsiTest(TestCase):
-    def setUp(self):
-        # sanity check: stdout should be a file or StringIO object.
-        # It will only be AnsiToWin32 if init() has previously wrapped it
-        self.assertNotEqual(type(sys.stdout), AnsiToWin32)
-        self.assertNotEqual(type(sys.stderr), AnsiToWin32)
-    def tearDown(self):
-        sys.stdout = stdout_orig
-        sys.stderr = stderr_orig
-    def testForeAttributes(self):
-        self.assertEqual(Fore.BLACK, '\033[30m')
-        self.assertEqual(Fore.RED, '\033[31m')
-        self.assertEqual(Fore.GREEN, '\033[32m')
-        self.assertEqual(Fore.YELLOW, '\033[33m')
-        self.assertEqual(Fore.BLUE, '\033[34m')
-        self.assertEqual(Fore.MAGENTA, '\033[35m')
-        self.assertEqual(Fore.CYAN, '\033[36m')
-        self.assertEqual(Fore.WHITE, '\033[37m')
-        self.assertEqual(Fore.RESET, '\033[39m')
-        # Check the light, extended versions.
-        self.assertEqual(Fore.LIGHTBLACK_EX, '\033[90m')
-        self.assertEqual(Fore.LIGHTRED_EX, '\033[91m')
-        self.assertEqual(Fore.LIGHTGREEN_EX, '\033[92m')
-        self.assertEqual(Fore.LIGHTYELLOW_EX, '\033[93m')
-        self.assertEqual(Fore.LIGHTBLUE_EX, '\033[94m')
-        self.assertEqual(Fore.LIGHTMAGENTA_EX, '\033[95m')
-        self.assertEqual(Fore.LIGHTCYAN_EX, '\033[96m')
-        self.assertEqual(Fore.LIGHTWHITE_EX, '\033[97m')
-    def testBackAttributes(self):
-        self.assertEqual(Back.BLACK, '\033[40m')
-        self.assertEqual(Back.RED, '\033[41m')
-        self.assertEqual(Back.GREEN, '\033[42m')
-        self.assertEqual(Back.YELLOW, '\033[43m')
-        self.assertEqual(Back.BLUE, '\033[44m')
-        self.assertEqual(Back.MAGENTA, '\033[45m')
-        self.assertEqual(Back.CYAN, '\033[46m')
-        self.assertEqual(Back.WHITE, '\033[47m')
-        self.assertEqual(Back.RESET, '\033[49m')
-        # Check the light, extended versions.
-        self.assertEqual(Back.LIGHTBLACK_EX, '\033[100m')
-        self.assertEqual(Back.LIGHTRED_EX, '\033[101m')
-        self.assertEqual(Back.LIGHTGREEN_EX, '\033[102m')
-        self.assertEqual(Back.LIGHTYELLOW_EX, '\033[103m')
-        self.assertEqual(Back.LIGHTBLUE_EX, '\033[104m')
-        self.assertEqual(Back.LIGHTMAGENTA_EX, '\033[105m')
-        self.assertEqual(Back.LIGHTCYAN_EX, '\033[106m')
-        self.assertEqual(Back.LIGHTWHITE_EX, '\033[107m')
-    def testStyleAttributes(self):
-        self.assertEqual(Style.DIM, '\033[2m')
-        self.assertEqual(Style.NORMAL, '\033[22m')
-        self.assertEqual(Style.BRIGHT, '\033[1m')
-if __name__ == '__main__':
-    main()

env/Lib/site-packages/colorama/tests/ansitowin32_test.py DELETED Viewed

@@ -1,294 +0,0 @@
-# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
-from io import StringIO, TextIOWrapper
-from unittest import TestCase, main
-try:
-    from contextlib import ExitStack
-except ImportError:
-    # python 2
-    from contextlib2 import ExitStack
-try:
-    from unittest.mock import MagicMock, Mock, patch
-except ImportError:
-    from mock import MagicMock, Mock, patch
-from ..ansitowin32 import AnsiToWin32, StreamWrapper
-from ..win32 import ENABLE_VIRTUAL_TERMINAL_PROCESSING
-from .utils import osname
-class StreamWrapperTest(TestCase):
-    def testIsAProxy(self):
-        mockStream = Mock()
-        wrapper = StreamWrapper(mockStream, None)
-        self.assertTrue( wrapper.random_attr is mockStream.random_attr )
-    def testDelegatesWrite(self):
-        mockStream = Mock()
-        mockConverter = Mock()
-        wrapper = StreamWrapper(mockStream, mockConverter)
-        wrapper.write('hello')
-        self.assertTrue(mockConverter.write.call_args, (('hello',), {}))
-    def testDelegatesContext(self):
-        mockConverter = Mock()
-        s = StringIO()
-        with StreamWrapper(s, mockConverter) as fp:
-            fp.write(u'hello')
-        self.assertTrue(s.closed)
-    def testProxyNoContextManager(self):
-        mockStream = MagicMock()
-        mockStream.__enter__.side_effect = AttributeError()
-        mockConverter = Mock()
-        with self.assertRaises(AttributeError) as excinfo:
-            with StreamWrapper(mockStream, mockConverter) as wrapper:
-                wrapper.write('hello')
-    def test_closed_shouldnt_raise_on_closed_stream(self):
-        stream = StringIO()
-        stream.close()
-        wrapper = StreamWrapper(stream, None)
-        self.assertEqual(wrapper.closed, True)
-    def test_closed_shouldnt_raise_on_detached_stream(self):
-        stream = TextIOWrapper(StringIO())
-        stream.detach()
-        wrapper = StreamWrapper(stream, None)
-        self.assertEqual(wrapper.closed, True)
-class AnsiToWin32Test(TestCase):
-    def testInit(self):
-        mockStdout = Mock()
-        auto = Mock()
-        stream = AnsiToWin32(mockStdout, autoreset=auto)
-        self.assertEqual(stream.wrapped, mockStdout)
-        self.assertEqual(stream.autoreset, auto)
-    @patch('colorama.ansitowin32.winterm', None)
-    @patch('colorama.ansitowin32.winapi_test', lambda *_: True)
-    def testStripIsTrueOnWindows(self):
-        with osname('nt'):
-            mockStdout = Mock()
-            stream = AnsiToWin32(mockStdout)
-            self.assertTrue(stream.strip)
-    def testStripIsFalseOffWindows(self):
-        with osname('posix'):
-            mockStdout = Mock(closed=False)
-            stream = AnsiToWin32(mockStdout)
-            self.assertFalse(stream.strip)
-    def testWriteStripsAnsi(self):
-        mockStdout = Mock()
-        stream = AnsiToWin32(mockStdout)
-        stream.wrapped = Mock()
-        stream.write_and_convert = Mock()
-        stream.strip = True
-        stream.write('abc')
-        self.assertFalse(stream.wrapped.write.called)
-        self.assertEqual(stream.write_and_convert.call_args, (('abc',), {}))
-    def testWriteDoesNotStripAnsi(self):
-        mockStdout = Mock()
-        stream = AnsiToWin32(mockStdout)
-        stream.wrapped = Mock()
-        stream.write_and_convert = Mock()
-        stream.strip = False
-        stream.convert = False
-        stream.write('abc')
-        self.assertFalse(stream.write_and_convert.called)
-        self.assertEqual(stream.wrapped.write.call_args, (('abc',), {}))
-    def assert_autoresets(self, convert, autoreset=True):
-        stream = AnsiToWin32(Mock())
-        stream.convert = convert
-        stream.reset_all = Mock()
-        stream.autoreset = autoreset
-        stream.winterm = Mock()
-        stream.write('abc')
-        self.assertEqual(stream.reset_all.called, autoreset)
-    def testWriteAutoresets(self):
-        self.assert_autoresets(convert=True)
-        self.assert_autoresets(convert=False)
-        self.assert_autoresets(convert=True, autoreset=False)
-        self.assert_autoresets(convert=False, autoreset=False)
-    def testWriteAndConvertWritesPlainText(self):
-        stream = AnsiToWin32(Mock())
-        stream.write_and_convert( 'abc' )
-        self.assertEqual( stream.wrapped.write.call_args, (('abc',), {}) )
-    def testWriteAndConvertStripsAllValidAnsi(self):
-        stream = AnsiToWin32(Mock())
-        stream.call_win32 = Mock()
-        data = [
-            'abc\033[mdef',
-            'abc\033[0mdef',
-            'abc\033[2mdef',
-            'abc\033[02mdef',
-            'abc\033[002mdef',
-            'abc\033[40mdef',
-            'abc\033[040mdef',
-            'abc\033[0;1mdef',
-            'abc\033[40;50mdef',
-            'abc\033[50;30;40mdef',
-            'abc\033[Adef',
-            'abc\033[0Gdef',
-            'abc\033[1;20;128Hdef',
-        ]
-        for datum in data:
-            stream.wrapped.write.reset_mock()
-            stream.write_and_convert( datum )
-            self.assertEqual(
-               [args[0] for args in stream.wrapped.write.call_args_list],
-               [ ('abc',), ('def',) ]
-            )
-    def testWriteAndConvertSkipsEmptySnippets(self):
-        stream = AnsiToWin32(Mock())
-        stream.call_win32 = Mock()
-        stream.write_and_convert( '\033[40m\033[41m' )
-        self.assertFalse( stream.wrapped.write.called )
-    def testWriteAndConvertCallsWin32WithParamsAndCommand(self):
-        stream = AnsiToWin32(Mock())
-        stream.convert = True
-        stream.call_win32 = Mock()
-        stream.extract_params = Mock(return_value='params')
-        data = {
-            'abc\033[adef':         ('a', 'params'),
-            'abc\033[;;bdef':       ('b', 'params'),
-            'abc\033[0cdef':        ('c', 'params'),
-            'abc\033[;;0;;Gdef':    ('G', 'params'),
-            'abc\033[1;20;128Hdef': ('H', 'params'),
-        }
-        for datum, expected in data.items():
-            stream.call_win32.reset_mock()
-            stream.write_and_convert( datum )
-            self.assertEqual( stream.call_win32.call_args[0], expected )
-    def test_reset_all_shouldnt_raise_on_closed_orig_stdout(self):
-        stream = StringIO()
-        converter = AnsiToWin32(stream)
-        stream.close()
-        converter.reset_all()
-    def test_wrap_shouldnt_raise_on_closed_orig_stdout(self):
-        stream = StringIO()
-        stream.close()
-        with \
-            patch("colorama.ansitowin32.os.name", "nt"), \
-            patch("colorama.ansitowin32.winapi_test", lambda: True):
-                converter = AnsiToWin32(stream)
-        self.assertTrue(converter.strip)
-        self.assertFalse(converter.convert)
-    def test_wrap_shouldnt_raise_on_missing_closed_attr(self):
-        with \
-            patch("colorama.ansitowin32.os.name", "nt"), \
-            patch("colorama.ansitowin32.winapi_test", lambda: True):
-                converter = AnsiToWin32(object())
-        self.assertTrue(converter.strip)
-        self.assertFalse(converter.convert)
-    def testExtractParams(self):
-        stream = AnsiToWin32(Mock())
-        data = {
-            '':               (0,),
-            ';;':             (0,),
-            '2':              (2,),
-            ';;002;;':        (2,),
-            '0;1':            (0, 1),
-            ';;003;;456;;':   (3, 456),
-            '11;22;33;44;55': (11, 22, 33, 44, 55),
-        }
-        for datum, expected in data.items():
-            self.assertEqual(stream.extract_params('m', datum), expected)
-    def testCallWin32UsesLookup(self):
-        listener = Mock()
-        stream = AnsiToWin32(listener)
-        stream.win32_calls = {
-            1: (lambda *_, **__: listener(11),),
-            2: (lambda *_, **__: listener(22),),
-            3: (lambda *_, **__: listener(33),),
-        }
-        stream.call_win32('m', (3, 1, 99, 2))
-        self.assertEqual(
-            [a[0][0] for a in listener.call_args_list],
-            [33, 11, 22] )
-    def test_osc_codes(self):
-        mockStdout = Mock()
-        stream = AnsiToWin32(mockStdout, convert=True)
-        with patch('colorama.ansitowin32.winterm') as winterm:
-            data = [
-                '\033]0\x07',                      # missing arguments
-                '\033]0;foo\x08',                  # wrong OSC command
-                '\033]0;colorama_test_title\x07',  # should work
-                '\033]1;colorama_test_title\x07',  # wrong set command
-                '\033]2;colorama_test_title\x07',  # should work
-                '\033]' + ';' * 64 + '\x08',       # see issue #247
-            ]
-            for code in data:
-                stream.write(code)
-            self.assertEqual(winterm.set_title.call_count, 2)
-    def test_native_windows_ansi(self):
-        with ExitStack() as stack:
-            def p(a, b):
-                stack.enter_context(patch(a, b, create=True))
-            # Pretend to be on Windows
-            p("colorama.ansitowin32.os.name", "nt")
-            p("colorama.ansitowin32.winapi_test", lambda: True)
-            p("colorama.win32.winapi_test", lambda: True)
-            p("colorama.winterm.win32.windll", "non-None")
-            p("colorama.winterm.get_osfhandle", lambda _: 1234)
-            # Pretend that our mock stream has native ANSI support
-            p(
-                "colorama.winterm.win32.GetConsoleMode",
-                lambda _: ENABLE_VIRTUAL_TERMINAL_PROCESSING,
-            )
-            SetConsoleMode = Mock()
-            p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode)
-            stdout = Mock()
-            stdout.closed = False
-            stdout.isatty.return_value = True
-            stdout.fileno.return_value = 1
-            # Our fake console says it has native vt support, so AnsiToWin32 should
-            # enable that support and do nothing else.
-            stream = AnsiToWin32(stdout)
-            SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING)
-            self.assertFalse(stream.strip)
-            self.assertFalse(stream.convert)
-            self.assertFalse(stream.should_wrap())
-            # Now let's pretend we're on an old Windows console, that doesn't have
-            # native ANSI support.
-            p("colorama.winterm.win32.GetConsoleMode", lambda _: 0)
-            SetConsoleMode = Mock()
-            p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode)
-            stream = AnsiToWin32(stdout)
-            SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING)
-            self.assertTrue(stream.strip)
-            self.assertTrue(stream.convert)
-            self.assertTrue(stream.should_wrap())
-if __name__ == '__main__':
-    main()