Spaces:

rudra0410hf
/

medimage-translator

Running

App Files Files Community

rudra0410hf commited on 3 days ago

Commit

1ccf66a

verified ·

1 Parent(s): f518ebf

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.dockerignore +4 -0
.gitattributes +20 -35
.gitignore +46 -0
Dockerfile +17 -0
app.py +112 -0
brain_ai.py +113 -0
code.ipynb +1758 -0
demo.html +66 -0
env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER +1 -0
env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE +20 -0
env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA +46 -0
env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD +43 -0
env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL +5 -0
env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt +2 -0
env/Lib/site-packages/_yaml/__init__.py +33 -0
env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER +1 -0
env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE +20 -0
env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA +77 -0
env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD +14 -0
env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL +5 -0
env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt +1 -0
env/Lib/site-packages/certifi/__init__.py +4 -0
env/Lib/site-packages/certifi/__main__.py +12 -0
env/Lib/site-packages/certifi/cacert.pem +0 -0
env/Lib/site-packages/certifi/core.py +114 -0
env/Lib/site-packages/certifi/py.typed +0 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER +1 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE +21 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA +721 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD +35 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL +5 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt +2 -0
env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt +1 -0
env/Lib/site-packages/charset_normalizer/__init__.py +48 -0
env/Lib/site-packages/charset_normalizer/__main__.py +6 -0
env/Lib/site-packages/charset_normalizer/api.py +668 -0
env/Lib/site-packages/charset_normalizer/cd.py +395 -0
env/Lib/site-packages/charset_normalizer/cli/__init__.py +8 -0
env/Lib/site-packages/charset_normalizer/cli/__main__.py +321 -0
env/Lib/site-packages/charset_normalizer/constant.py +1998 -0
env/Lib/site-packages/charset_normalizer/legacy.py +66 -0
env/Lib/site-packages/charset_normalizer/md.py +630 -0
env/Lib/site-packages/charset_normalizer/models.py +360 -0
env/Lib/site-packages/charset_normalizer/py.typed +0 -0
env/Lib/site-packages/charset_normalizer/utils.py +408 -0
env/Lib/site-packages/charset_normalizer/version.py +8 -0
env/Lib/site-packages/colorama-0.4.6.dist-info/INSTALLER +1 -0
env/Lib/site-packages/colorama-0.4.6.dist-info/METADATA +441 -0
env/Lib/site-packages/colorama-0.4.6.dist-info/RECORD +31 -0
env/Lib/site-packages/colorama-0.4.6.dist-info/WHEEL +5 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__
+.git
+.vscode
+venv

.gitattributes CHANGED Viewed

@@ -1,35 +1,20 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+# Auto detect text files and perform LF normalization
+* text=auto
+env/Lib/site-packages/pip/_vendor/distlib/t64-arm.exe filter=lfs diff=lfs merge=lfs -text
+env/Lib/site-packages/pip/_vendor/distlib/t64.exe filter=lfs diff=lfs merge=lfs -text
+env/Lib/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
+env/Lib/site-packages/pip/_vendor/distlib/w64.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/dotenv.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/huggingface-cli.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/normalizer.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/pip.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/pip3.12.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/pip3.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/python.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/pythonw.exe filter=lfs diff=lfs merge=lfs -text
+env/Scripts/tqdm.exe filter=lfs diff=lfs merge=lfs -text
+static/progress/epoch_1_batch_0.png filter=lfs diff=lfs merge=lfs -text
+static/progress/epoch_2_batch_0.png filter=lfs diff=lfs merge=lfs -text
+static/progress/epoch_3_batch_0.png filter=lfs diff=lfs merge=lfs -text
+static/progress/epoch_4_batch_0.png filter=lfs diff=lfs merge=lfs -text
+static/progress/epoch_5_batch_0.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,46 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Virtual environment
+venv/
+.env
+*.env
+# Logs & Debugging
+*.log
+logs/
+debug.log
+# System Files
+.DS_Store
+Thumbs.db
+# Python dependencies
+Pipfile
+Pipfile.lock
+requirements.txt
+# IDE & Editor Specific
+.vscode/
+.idea/
+*.iml
+# Compiled Python packages
+*.egg
+*.egg-info/
+dist/
+build/
+# Hugging Face Cache
+.huggingface/
+# Node.js & Frontend files
+node_modules/
+npm-debug.log
+yarn-error.log
+# Flask Specific
+instance/
+config.py

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Use an official Python runtime as a parent image
+FROM python:3.9
+# Set the working directory in the container
+WORKDIR /BrainAI
+# Copy the current directory contents into the container
+COPY . /BrainAI
+# Install any needed dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Expose the application port (default Flask port)
+EXPOSE 5000
+# Run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os
+import cv2
+import numpy as np
+from flask import Flask, render_template, request, redirect, url_for, jsonify
+from tensorflow.keras.layers import Layer
+from tensorflow.keras.models import load_model
+import matplotlib.pyplot as plt
+import tensorflow as tf
+app = Flask(__name__)
+app.config['UPLOAD_FOLDER'] = 'static/uploads'
+app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg'}
+class Sampling(tf.keras.layers.Layer):
+    def call(self, inputs):
+        z_mean, z_log_var = inputs
+        batch = tf.shape(z_mean)[0]
+        dim = tf.shape(z_mean)[1]
+        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
+        return z_mean + tf.exp(0.5 * z_log_var) * epsilon
+# Load models with explicit TensorFlow context
+with tf.init_scope():
+    ct_to_mri_model = load_model('models/ct_to_mri_epoch_39.h5',
+                               custom_objects={'Sampling': Sampling})
+    mri_to_ct_model = load_model('models/mri_to_ct_epoch_39.h5',
+                               custom_objects={'Sampling': Sampling})
+def allowed_file(filename):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
+def process_image(image_path, model):
+    img = cv2.imread(image_path)
+    if img is None:
+        raise ValueError("Could not load image")
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    img = cv2.resize(img, (256, 256))
+    img = img.astype(np.float32) / 255.0
+    img = np.expand_dims(img, axis=0)
+    prediction = model.predict(img)
+    if isinstance(prediction, (list, tuple)):
+        prediction = prediction[0]
+    prediction = np.squeeze(prediction)
+    prediction = (prediction * 255).astype(np.uint8)
+    return prediction
+def clean_uploads():
+    upload_dir = app.config['UPLOAD_FOLDER']
+    for filename in os.listdir(upload_dir):
+        file_path = os.path.join(upload_dir, filename)
+        try:
+            if os.path.isfile(file_path):
+                os.unlink(file_path)
+        except Exception as e:
+            print(f'Error deleting {file_path}: {e}')
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/try_now')
+def try_now():
+    return render_template('try_now.html')
+@app.route('/samples')
+def samples():
+    return render_template('samples.html')
+@app.route('/model_info')
+def model_info():
+    return render_template('model_info.html')
+@app.route('/translate', methods=['POST'])
+def translate():
+    clean_uploads()  # Clean previous uploads
+    if 'file' not in request.files:
+        return redirect(request.url)
+    file = request.files['file']
+    if file.filename == '':
+        return redirect(request.url)
+    if file and allowed_file(file.filename):
+        # Save original image
+        upload_path = os.path.join(app.config['UPLOAD_FOLDER'], 'original.png')
+        file.save(upload_path)
+        # Choose model based on translation direction
+        direction = request.form.get('direction')
+        model = ct_to_mri_model if direction == 'ct_to_mri' else mri_to_ct_model
+        try:
+            result = process_image(upload_path, model)
+            result_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.png')
+            plt.imsave(result_path, result)
+            return render_template('result.html',
+                                original=url_for('static', filename='uploads/original.png'),
+                                result=url_for('static', filename='uploads/result.png'))
+        except Exception as e:
+            return f"Error processing image: {str(e)}"
+    return redirect(url_for('try_now'))
+if __name__ == '__main__':
+    os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+    app.run(host='0.0.0.0', port=5000, debug=False)  # Set debug=False for production

brain_ai.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import streamlit as st
+import time
+from langchain_ollama import ChatOllama
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import (
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+    AIMessagePromptTemplate,
+    ChatPromptTemplate
+)
+st.title("🧠 BrainAI")
+st.caption("🚀 Your own AI Neurologist with SuperPowers!!")
+# Common user query suggestions
+suggestions = [
+    "What are the early symptoms of a brain tumor?",
+    "How is a brain tumor diagnosed?",
+    "What are the treatment options for brain tumors?",
+    "Can a brain tumor be non-cancerous?",
+    "What lifestyle changes can help manage brain tumors?"
+]
+# Display suggestions in rows and keep them fixed at the top
+# st.write("### 💡 Common Questions")
+suggestion_container = st.container()
+with suggestion_container:
+    for query in suggestions:
+        if st.button(query, key=query):
+            st.session_state["user_input"] = query
+            st.rerun()
+# Initiate chat engine
+llm_engine = ChatOllama(
+    model="deepseek-r1:1.5b",
+    base_url="http://localhost:11434",
+    temperature=0.3
+)
+# System prompt
+system_prompt = SystemMessagePromptTemplate.from_template("""
+    You are BrainAI, an AI-powered neurologist assistant designed to provide non-emergency guidance, education,
+    and support for neurological health. Your expertise includes brain anatomy, neurological disorders (e.g.,
+    epilepsy, Alzheimer’s, brain tumors, migraines), symptoms, diagnostics, and general brain health tips.
+    Always prioritize ethical guidelines, clarify your limitations, and emphasize consulting a licensed professional
+    for personal care. Answer only in English language.
+""")
+# Session management
+if "message_log" not in st.session_state:
+    st.session_state.message_log = [{"role": "assistant", "content": "Hello! How can I assist you with brain health today?"}]
+# Chat container
+chat_container = st.container()
+# Display messages with animation
+def display_text_with_animation(text):
+    message_placeholder = st.empty()
+    displayed_text = ""
+    for char in text:
+        displayed_text += char
+        message_placeholder.markdown(displayed_text)
+        time.sleep(0.01)
+with chat_container:
+    for message in st.session_state.message_log:
+        with st.chat_message(message["role"]):
+            if "<think>" in message["content"]:
+                parts = message["content"].split("</think>")
+                think_content = parts[0].replace("<think>", "").strip()
+                actual_response = parts[-1].strip()
+                with st.expander("🔍 View AI's Thinking Process"):
+                    st.markdown(f"*Internal Analysis:*\n{think_content}")
+                display_text_with_animation(actual_response)
+            else:
+                display_text_with_animation(message["content"])
+# Chat input
+user_query = st.chat_input(" Ask anything about brain health ...")
+# If a suggestion was selected, use it as the input
+if "user_input" in st.session_state:
+    user_query = st.session_state["user_input"]
+    del st.session_state["user_input"]
+def generate_ai_response(prompt_chain):
+    processing_pipeline = prompt_chain | llm_engine | StrOutputParser()
+    return processing_pipeline.invoke({})
+def build_prompt_chain():
+    prompt_sequence = [system_prompt]
+    for msg in st.session_state.message_log:
+        if msg["role"] == "user":
+            prompt_sequence.append(HumanMessagePromptTemplate.from_template(msg["content"]))
+        elif msg["role"] == "assistant":
+            prompt_sequence.append(AIMessagePromptTemplate.from_template(msg["content"]))
+    return ChatPromptTemplate.from_messages(prompt_sequence)
+if user_query:
+    st.session_state.message_log.append({"role": "user", "content": user_query})
+    with st.spinner("🧠 Thinking ..."):
+        prompt_chain = build_prompt_chain()
+        raw_response = generate_ai_response(prompt_chain)
+        st.session_state.message_log.append({
+            "role": "assistant",
+            "content": raw_response
+        })
+    st.rerun()

code.ipynb ADDED Viewed

	@@ -0,0 +1,1758 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*#Image to Image Translation#*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: tensorflow==2.15.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (2.15.0)\n",
+      "Requirement already satisfied: tensorflow-intel==2.15.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow==2.15.0) (2.15.0)\n",
+      "Requirement already satisfied: grpcio<2.0,>=1.24.3 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (1.70.0)\n",
+      "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.31.0)\n",
+      "Requirement already satisfied: numpy<2.0.0,>=1.23.5 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (1.26.4)\n",
+      "Requirement already satisfied: packaging in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (24.2)\n",
+      "Requirement already satisfied: tensorflow-estimator<2.16,>=2.15.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.15.0)\n",
+      "Requirement already satisfied: libclang>=13.0.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (18.1.1)\n",
+      "Requirement already satisfied: six>=1.12.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (1.17.0)\n",
+      "Requirement already satisfied: absl-py>=1.0.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.1.0)\n",
+      "Requirement already satisfied: setuptools in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (65.5.0)\n",
+      "Requirement already satisfied: h5py>=2.9.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.12.1)\n",
+      "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.6.0)\n",
+      "Requirement already satisfied: flatbuffers>=23.5.26 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (25.1.24)\n",
+      "Requirement already satisfied: google-pasta>=0.1.1 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.2.0)\n",
+      "Requirement already satisfied: termcolor>=1.1.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.5.0)\n",
+      "Requirement already satisfied: astunparse>=1.6.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (1.6.3)\n",
+      "Requirement already satisfied: typing-extensions>=3.6.6 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (4.12.2)\n",
+      "Requirement already satisfied: tensorboard<2.16,>=2.15 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.15.2)\n",
+      "Requirement already satisfied: keras<2.16,>=2.15.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.15.0)\n",
+      "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.20.3)\n",
+      "Requirement already satisfied: ml-dtypes~=0.2.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.2.0)\n",
+      "Requirement already satisfied: wrapt<1.15,>=1.11.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (1.14.1)\n",
+      "Requirement already satisfied: opt-einsum>=2.3.2 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.4.0)\n",
+      "Requirement already satisfied: wheel<1.0,>=0.23.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from astunparse>=1.6.0->tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.45.1)\n",
+      "Requirement already satisfied: requests<3,>=2.21.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.32.3)\n",
+      "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.7.2)\n",
+      "Requirement already satisfied: werkzeug>=1.0.1 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.1.3)\n",
+      "Requirement already satisfied: markdown>=2.6.8 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.7)\n",
+      "Requirement already satisfied: google-auth<3,>=1.6.3 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.38.0)\n",
+      "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (1.2.1)\n",
+      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (5.5.1)\n",
+      "Requirement already satisfied: pyasn1-modules>=0.2.1 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.4.1)\n",
+      "Requirement already satisfied: rsa<5,>=3.1.4 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (4.9)\n",
+      "Requirement already satisfied: requests-oauthlib>=0.7.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.0.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.4.1)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.10)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (2025.1.31)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (2.3.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.1.1 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from werkzeug>=1.0.1->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.0.2)\n",
+      "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (0.6.1)\n",
+      "Requirement already satisfied: oauthlib>=3.0.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->tensorflow==2.15.0) (3.2.2)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "[notice] A new release of pip is available: 23.0.1 -> 25.0.1\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install tensorflow==2.15.0\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: tensorflow-probability==0.23.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (0.23.0)\n",
+      "Requirement already satisfied: six>=1.10.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-probability==0.23.0) (1.17.0)\n",
+      "Requirement already satisfied: decorator in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-probability==0.23.0) (5.1.1)\n",
+      "Requirement already satisfied: cloudpickle>=1.3 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-probability==0.23.0) (3.1.1)\n",
+      "Requirement already satisfied: gast>=0.3.2 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-probability==0.23.0) (0.6.0)\n",
+      "Requirement already satisfied: numpy>=1.13.3 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-probability==0.23.0) (1.26.4)\n",
+      "Requirement already satisfied: absl-py in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-probability==0.23.0) (2.1.0)\n",
+      "Requirement already satisfied: dm-tree in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from tensorflow-probability==0.23.0) (0.1.9)\n",
+      "Requirement already satisfied: attrs>=18.2.0 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from dm-tree->tensorflow-probability==0.23.0) (25.1.0)\n",
+      "Requirement already satisfied: wrapt>=1.11.2 in d:\\vs code\\web dev\\projects\\image2image\\image\\lib\\site-packages (from dm-tree->tensorflow-probability==0.23.0) (1.14.1)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "[notice] A new release of pip is available: 23.0.1 -> 25.0.1\n",
+      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install tensorflow-probability==0.23.0"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*1️⃣ Import Necessary Libraries*\n",
+    "\n",
+    "1.*TensorFlow/Keras* for building and training deep learning models.\n",
+    "\n",
+    "2.*NumPy* for numerical operations/n.\n",
+    "\n",
+    "3.*Matplotlib* for visualizing the results.\n",
+    "\n",
+    "4.*OpenCV/PIL* for image processing.\n",
+    "\n",
+    "5.*TensorFlow* Addons for additional loss functions and layers (e.g., InstanceNorm).\n",
+    "\n",
+    "6.*TensorFlow Datasets* (or custom loaders) to load CT & MRI images."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From d:\\VS CODE\\Web Dev\\Projects\\Image2Image\\image\\lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
+      "\n",
+      "WARNING:tensorflow:From d:\\VS CODE\\Web Dev\\Projects\\Image2Image\\image\\lib\\site-packages\\tensorflow_probability\\python\\internal\\backend\\numpy\\_utils.py:48: The name tf.logging.TaskLevelStatusMessage is deprecated. Please use tf.compat.v1.logging.TaskLevelStatusMessage instead.\n",
+      "\n",
+      "WARNING:tensorflow:From d:\\VS CODE\\Web Dev\\Projects\\Image2Image\\image\\lib\\site-packages\\tensorflow_probability\\python\\internal\\backend\\numpy\\_utils.py:48: The name tf.control_flow_v2_enabled is deprecated. Please use tf.compat.v1.control_flow_v2_enabled instead.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import tensorflow as tf\n",
+    "from tensorflow.keras import layers, Model\n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "import pathlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "import tensorflow_probability as tfp\n",
+    "\n",
+    "tfd = tfp.distributions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "2️⃣ *Configuration (Hyperparameters)*\n",
+    "\n",
+    "This step defines the key settings for training.\n",
+    "\n",
+    "\n",
+    "Image size: The input image dimensions.\n",
+    "\n",
+    "Latent dimension: The size of the encoded representation in the VAE.\n",
+    "\n",
+    "Learning rate: Defines how fast the model updates weights.\n",
+    "\n",
+    "Batch size & epochs: Training parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "IMAGE_SHAPE = (256, 256, 3)\n",
+    "LATENT_DIM = 256\n",
+    "FILTERS = 16\n",
+    "KERNEL = 3\n",
+    "LEARNING_RATE = 0.0001\n",
+    "WEIGHT_DECAY = 6e-8\n",
+    "BATCH_SIZE = 1\n",
+    "EPOCHS = 10"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* ===================== Architecture Components =====================*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*3️⃣ Sampling Layer for Variational Autoencoder (VAE)*\n",
+    "\n",
+    "The sampling layer is a crucial part of the VAE, where we sample from the latent space.\n",
+    "\n",
+    "🔹 What We Need  \n",
+    "\n",
+    "The encoder outputs μ (mean) and σ (log variance).\n",
+    "\n",
+    "This layer samples from a normal distribution using the reparameterization trick."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Sampling(layers.Layer):\n",
+    "    def call(self, inputs):\n",
+    "        z_mean, z_log_var = inputs\n",
+    "        batch = tf.shape(z_mean)[0]\n",
+    "        dim = tf.shape(z_mean)[1]\n",
+    "        epsilon = tf.random.normal(shape=(batch, dim))\n",
+    "        return z_mean + tf.exp(0.5 * z_log_var) * epsilon"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It inherits from layers.Layer, meaning it's a custom layer that can be used like any other Keras layer.\n",
+    "\n",
+    "📍inputs is a tuple containing:\n",
+    "\n",
+    "    z_mean: The mean vector output from the encoder.\n",
+    "\n",
+    "    z_log_var: The log variance vector output from the encoder.\n",
+    "\n",
+    "📍This unpacks the inputs into two separate variables:\n",
+    "\n",
+    "    z_mean: Represents the mean of the latent distribution.\n",
+    "\n",
+    "    z_log_var: Represents the log variance of the latent distribution.\n",
+    "\n",
+    "📍Why log variance?\n",
+    "\n",
+    "Instead of using variance (σ²), we use log(σ²) because:\n",
+    "\n",
+    "Numerical Stability: Log prevents exploding/vanishing gradients.\n",
+    "\n",
+    "Easier Optimization: exp(log(σ²) / 2) makes variance always positive.\n",
+    "\n",
+    "\n",
+    "📍This determines:\n",
+    "    batch: The number of samples in the batch.\n",
+    "    dim: The size of the latent space (e.g., 128 if LATENT_DIM = 128).\n",
+    "\n",
+    "\n",
+    "📍Generates random values from a standard normal distribution (𝒩(0,1)).\n",
+    "\n",
+    "epsilon.shape = (batch, dim), meaning every sample gets a unique noise vector.\n",
+    "\n",
+    "Why do we need epsilon?\n",
+    "\n",
+    "Instead of directly using z_mean, we add controlled randomness to ensure the VAE learns a smooth latent space.\n",
+    "\n",
+    "* Reparameterization Trick*\n",
+    "\n",
+    "    The latent space follows a normal distribution:\n",
+    "\n",
+    "    𝑧 ∼ 𝒩(μ, σ²)\n",
+    "\n",
+    "    A sample is drawn from this distribution:\n",
+    "\n",
+    "    𝑧 = μ + σ * ε, where ε ∼ 𝒩(0,1).\n",
+    "\n",
+    "    Since z_log_var = log(σ²), we compute:\n",
+    "\n",
+    "    σ = exp(0.5 * log(σ²)) = exp(0.5 * z_log_var).\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*🔹 Residual Block in Detail*\n",
+    "\n",
+    "This function defines a residual block, a key building block inspired by ResNet (Residual Networks). Residual blocks help in training deep neural networks efficiently by allowing gradient flow through skip connections.\n",
+    "\n",
+    "inputs: The input tensor (features from the previous layer).\n",
+    "\n",
+    "filters: The number of filters (channels) in the convolution layers.\n",
+    "\n",
+    "use_norm: Whether to apply Group Normalization (helps stabilize training)\n",
+    "\n",
+    "Step 1️⃣: First Convolution + Activation :\n",
+    "\n",
+    "    Applies a 2D Convolution (Conv2D) with filters filters.\n",
+    "\n",
+    "    KERNEL (not defined in this function) should be the kernel size (e.g., 3x3 or 5x5).\n",
+    "\n",
+    "    padding='same': Ensures the output size is the same as the input.\n",
+    "\n",
+    "    Leaky ReLU activation (alpha=0.2):\n",
+    "\n",
+    "        Helps avoid dead neurons (better than regular ReLU).\n",
+    "        \n",
+    "        Allows a small gradient flow for negative values.\n",
+    "\n",
+    "Step 2️⃣: Group Normalization (Optional)\n",
+    "\n",
+    "Step 3️⃣: Second Convolution + Activation\n",
+    "\n",
+    "    Applies another Conv2D layer with the same number of filters.\n",
+    "\n",
+    "    Uses LeakyReLU again for better gradient flow.\n",
+    "\n",
+    "    Why two convolutions?\n",
+    "\n",
+    "        The first convolution learns low-level features.\n",
+    "        \n",
+    "        The second convolution refines the learned features.\n",
+    "\n",
+    "\n",
+    "Step 5️⃣: Shortcut Connection (Skip Connection)\n",
+    "\n",
+    "    The original input is passed through a 1x1 convolution.\n",
+    "\n",
+    "    This matches the number of filters with the residual output.\n",
+    "\n",
+    "    Why 1x1 convolution?\n",
+    "\n",
+    "    Ensures the shortcut has the same number of filters as x.\n",
+    "\n",
+    "    Helps in adjusting dimensions when the number of channels changes.\n",
+    "\n",
+    "Step 6️⃣: Merge Shortcut & Residual Path\n",
+    "\n",
+    "    Merges the shortcut and residual path using element-wise maximum.\n",
+    "        \n",
+    "    Why maximum() instead of addition (+)?\n",
+    "\n",
+    "    Prevents negative values, which can help improve training stability.\n",
+    "    \n",
+    "    Focuses on stronger features from either the residual or shortcut path.\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def residual_block(inputs, filters, use_norm=True):\n",
+    "    x = layers.Conv2D(filters, KERNEL, padding='same')(inputs)\n",
+    "    x = layers.LeakyReLU(alpha=0.2)(x)\n",
+    "    if use_norm:\n",
+    "        x = layers.GroupNormalization(groups=1)(x)\n",
+    "    x = layers.Conv2D(filters, KERNEL, padding='same')(x)\n",
+    "    x = layers.LeakyReLU(alpha=0.2)(x)\n",
+    "    if use_norm:\n",
+    "        x = layers.GroupNormalization(groups=1)(x)\n",
+    "    shortcut = layers.Conv2D(filters, 1, padding='same')(inputs)\n",
+    "    return layers.maximum([x, shortcut])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* 1️⃣ Encoder  and Decoder Block*\n",
+    "\n",
+    "*Encoder*\n",
+    "\n",
+    "1️⃣ Pass Input Through Residual Block\n",
+    "\n",
+    "Uses a residual block (previously defined).\n",
+    "\n",
+    "Extracts important features while keeping the original information.\n",
+    "\n",
+    "Helps prevent vanishing gradients and allows deep networks to train effectively.\n",
+    "\n",
+    "2️⃣ Store the Skip Connection\n",
+    "\n",
+    "The skip connection stores the output of the residual block.\n",
+    "\n",
+    "It will be used later in the decoder to restore lost details.\n",
+    "\n",
+    "3️⃣ Downsampling (Reduce Spatial Size)\n",
+    "\n",
+    "Applies Max Pooling to reduce the spatial size (height & width).\n",
+    "\n",
+    "Why?\n",
+    "\n",
+    "Reduces computation.\n",
+    "\n",
+    "Forces the model to learn high-level features instead of pixel details.\n",
+    "\n",
+    "4️⃣ Return Downsampled Output & Skip Connection\n",
+    "\n",
+    "Outputs:\n",
+    "\n",
+    "x: The downsampled feature map.\n",
+    "\n",
+    "skip: The saved feature map (used later in the decoder).\n",
+    "\n",
+    "\n",
+    "🔥 2️⃣ Decoder Block\n",
+    "\n",
+    "1️⃣ Upsampling (Increase Spatial Size):\n",
+    "\n",
+    "Uses Conv2DTranspose (transposed convolution, aka deconvolution).\n",
+    "\n",
+    "Upsamples the input by a factor of 2 (increases spatial size).\n",
+    "\n",
+    "Why?\n",
+    "\n",
+    "Increases resolution to match the original input image.\n",
+    "\n",
+    "2️⃣ Merge Skip Connection\n",
+    "\n",
+    "Combines the upsampled output with the skip connection.\n",
+    "\n",
+    "Uses element-wise maximum instead of addition.\n",
+    "\n",
+    "Why?\n",
+    "\n",
+    "Ensures the model focuses on the most important features.\n",
+    "\n",
+    "Prevents loss of key information during encoding.\n",
+    "\n",
+    "3️⃣ Apply a Residual Block\n",
+    "\n",
+    "Uses a residual block to refine the upsampled output.\n",
+    "\n",
+    "Helps recover lost details and maintain stability.\n",
+    "\n",
+    "4️⃣ Return the Processed Output\n",
+    "\n",
+    "Returns the final feature map after upsampling and refinement.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def encoder_block(inputs, filters, use_norm=True):\n",
+    "    x = residual_block(inputs, filters, use_norm)\n",
+    "    skip = x\n",
+    "    x = layers.MaxPooling2D()(x)\n",
+    "    return x, skip\n",
+    "\n",
+    "def decoder_block(inputs, skip, filters, use_norm=True):\n",
+    "    x = layers.Conv2DTranspose(filters, KERNEL, strides=2, padding='same')(inputs)\n",
+    "    x = layers.maximum([x, skip])\n",
+    "    x = residual_block(x, filters, use_norm)\n",
+    "    return x\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* ===================== Generator =====================*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This function builds the generator model for a Variational Autoencoder (VAE) with a CycleGAN architecture. The generator is responsible for converting a CT scan into an MRI image (or vice versa) by learning to map the two domains.\n",
+    "\n",
+    ".\n",
+    "\n",
+    "🛠️ What This Function Does?\n",
+    "\n",
+    "It encodes an input image into a latent space.\n",
+    "\n",
+    "It applies variational sampling to introduce a probabilistic distribution.\n",
+    "\n",
+    "It decodes the latent representation back into an image.\n",
+    "\n",
+    "Uses skip connections to retain features across layers.\n",
+    "\n",
+    "1️⃣ Input Layer\n",
+    "\n",
+    "Defines the input tensor with the given IMAGE_SHAPE (e.g., (256, 256, 3), for RGB images).\n",
+    "\n",
+    "2️⃣ Encoder: Downsampling the Image\n",
+    "\n",
+    "    Each encoder block halves the spatial resolution but doubles the filters.\n",
+    "\n",
+    "    Stores skip connections (s1, s2, ..., s7) for later use in the decoder.\n",
+    "\n",
+    "    After e7, the image is highly compressed into a feature map.\n",
+    "\n",
+    "3️⃣ Latent Space (Variational Sampling)\n",
+    "\n",
+    "    Flattens the feature map into a 1D vector.\n",
+    "\n",
+    "    Uses two dense layers to compute:\n",
+    "\n",
+    "    z_mean → The mean of the latent distribution.\n",
+    "\n",
+    "    z_log_var → The logarithm of the variance.\n",
+    "\n",
+    "    Uses reparameterization trick (Sampling layer) to ensure backpropagation works in VAE.\n",
+    "\n",
+    "4️⃣ Reshape for Decoder\n",
+    "\n",
+    "    Expands z into a 2x2 feature map to match e7 dimensions.\n",
+    "\n",
+    "    Prepares the latent vector for decoding.\n",
+    "\n",
+    "5️⃣ Decoder: Upsampling the Image\n",
+    "\n",
+    "    Each decoder block upsamples the feature map back to the original size.\n",
+    "\n",
+    "    Uses skip connections (s1, s2, ..., s7) to restore spatial information.\n",
+    "\n",
+    "    Mirrors the encoder process but in reverse.\n",
+    "\n",
+    "6️⃣ Final Output Layer\n",
+    "\n",
+    "    Uses a Conv2D layer to produce the final RGB image.\n",
+    "    \n",
+    "    Applies sigmoid activation to ensure pixel values remain between [0,1].\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_generator(name):\n",
+    "    inputs = layers.Input(IMAGE_SHAPE)\n",
+    "    \n",
+    "    # Encoder\n",
+    "    e1, s1 = encoder_block(inputs, FILTERS)\n",
+    "    e2, s2 = encoder_block(e1, FILTERS*2)\n",
+    "    e3, s3 = encoder_block(e2, FILTERS*4)\n",
+    "    e4, s4 = encoder_block(e3, FILTERS*8)\n",
+    "    e5, s5 = encoder_block(e4, FILTERS*16)\n",
+    "    e6, s6 = encoder_block(e5, FILTERS*32)\n",
+    "    e7, s7 = encoder_block(e6, FILTERS*64)\n",
+    "    \n",
+    "    # Latent Space\n",
+    "    x = layers.Flatten()(e7)\n",
+    "    z_mean = layers.Dense(LATENT_DIM, name=f\"z_mean_{name.split('_')[-1]}\")(x)\n",
+    "    z_log_var = layers.Dense(LATENT_DIM, name=f\"z_log_var_{name.split('_')[-1]}\")(x)\n",
+    "    z = Sampling()([z_mean, z_log_var])\n",
+    "    \n",
+    "    # Reshape for decoder\n",
+    "    x = layers.Dense(2 * 2 * FILTERS*64)(z)\n",
+    "    x = layers.Reshape((2, 2, FILTERS*64))(x)\n",
+    "    \n",
+    "    # Decoder\n",
+    "    d0 = decoder_block(x, s7, FILTERS*64)\n",
+    "    d1 = decoder_block(d0, s6, FILTERS*32)\n",
+    "    d2 = decoder_block(d1, s5, FILTERS*16)\n",
+    "    d3 = decoder_block(d2, s4, FILTERS*8)\n",
+    "    d4 = decoder_block(d3, s3, FILTERS*4)\n",
+    "    d5 = decoder_block(d4, s2, FILTERS*2)\n",
+    "    d6 = decoder_block(d5, s1, FILTERS)\n",
+    "    \n",
+    "    outputs = layers.Conv2D(3, KERNEL, activation='sigmoid', padding='same')(d6)\n",
+    "    return Model(inputs, [outputs, z_mean, z_log_var], name=name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "*===================== Discriminator =====================*\n",
+    "\n",
+    "\n",
+    "This function constructs the discriminator in a Generative Adversarial Network (GAN). The discriminator’s role is to classify an image as real or fake by extracting hierarchical features and making multi-scale predictions.\n",
+    "\n",
+    "What Does This Function Do?\n",
+    "\n",
+    "    Extracts features from the input image using convolutional layers.\n",
+    "\n",
+    "    Downsamples the image through multiple layers to capture both local and global features.\n",
+    "\n",
+    "    Generates multiple outputs from different feature scales for better discrimination.\n",
+    "\n",
+    "1️⃣ Input Layer \n",
+    "\n",
+    "    Defines the input tensor with a shape of IMAGE_SHAPE (e.g., (256, 256, 3) for RGB images).\n",
+    "\n",
+    "    This means the discriminator takes an image as input.\n",
+    "\n",
+    "2️⃣ Feature Extraction\n",
+    "\n",
+    "    x = inputs initializes x as the input image.\n",
+    "\n",
+    "    features = [] creates a list to store intermediate feature map\n",
+    "\n",
+    "3️⃣ Initial Convolution\n",
+    "\n",
+    "    Applies a convolutional layer (Conv2D) with FILTERS (e.g., 64 filters) to extract basic edges and textures.\n",
+    "\n",
+    "    Uses LeakyReLU activation (alpha=0.2) instead of ReLU to allow small gradients for negative values.\n",
+    "\n",
+    "    Stores the feature map in features.\n",
+    "\n",
+    "4️⃣ Downsampling Blocks (Feature Hierarchy)\n",
+    "\n",
+    "    Defines filter_sizes, increasing filter count at each stage to learn complex features.\n",
+    "\n",
+    "    Uses a loop to pass x through multiple encoder_block layers:\n",
+    "\n",
+    "        Each encoder_block downsamples the feature map (reducing spatial size).\n",
+    "\n",
+    "        Each block doubles the number of filters to capture more detailed features.\n",
+    "\n",
+    "    Stores all extracted feature maps in features.\n",
+    "\n",
+    "5️⃣ Multi-Scale Outputs (Final Classification Layers)\n",
+    "\n",
+    "    The discriminator does not produce a single output; it uses multiple feature scales.\n",
+    "\n",
+    "    Extracts the last 4 feature maps (features[-4:]) to classify at different resolutions.\n",
+    "\n",
+    "    Each feature map is passed through a final Conv2D layer with 1 filter to predict real vs fake scores.\n",
+    "\n",
+    "    Stores the outputs in outputs.\n",
+    "\n",
+    "6️⃣ Return the Discriminator Model\n",
+    "\n",
+    "    Creates a Keras Model that takes an image as input and outputs multiple classification scores.\n",
+    "\n",
+    "    This helps in making fine-grained real/fake decisions.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_discriminator(name):\n",
+    "    inputs = layers.Input(IMAGE_SHAPE)\n",
+    "    \n",
+    "    # Feature extraction\n",
+    "    x = inputs\n",
+    "    features = []\n",
+    "    \n",
+    "    # Initial convolution\n",
+    "    x = layers.Conv2D(FILTERS, KERNEL, padding='same')(x)\n",
+    "    x = layers.LeakyReLU(alpha=0.2)(x)\n",
+    "    features.append(x)\n",
+    "    \n",
+    "    # Downsampling blocks\n",
+    "    filter_sizes = [FILTERS*2, FILTERS*4, FILTERS*8, FILTERS*16, FILTERS*32, FILTERS*64]\n",
+    "    for filters in filter_sizes:\n",
+    "        x, _ = encoder_block(x, filters, use_norm=False)\n",
+    "        features.append(x)\n",
+    "    \n",
+    "    # Multi-scale outputs\n",
+    "    outputs = []\n",
+    "    for i, feature in enumerate(features[-4:]):\n",
+    "        out = layers.Conv2D(1, KERNEL, padding='same')(feature)\n",
+    "        outputs.append(out)\n",
+    "    \n",
+    "    return Model(inputs, outputs, name=name)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*===================== Data Loading =====================*\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_images(path):\n",
+    "    images = []\n",
+    "    for p in pathlib.Path(path).glob('*.*'):\n",
+    "        try:\n",
+    "            img = cv2.imread(str(p))\n",
+    "            if img is not None:\n",
+    "                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
+    "                img = cv2.resize(img, IMAGE_SHAPE[:2])\n",
+    "                img = img.astype(np.float32) / 255.0\n",
+    "                images.append(img)\n",
+    "        except Exception as e:\n",
+    "            print(f\"Error loading image {p}: {e}\")\n",
+    "    return np.array(images)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This function is responsible for loading and balancing two different medical imaging datasets: CT scans and MRI scans. The goal is to ensure that both datasets contain the same number of images to avoid class imbalance in training.\n",
+    "\n",
+    "\n",
+    "📌 What Does This Function Do?\n",
+    "\n",
+    "Loads CT scans from the given directory.\n",
+    "Loads MRI scans from the given directory.\n",
+    "Finds the smaller dataset (CT or MRI) and trims the larger one to match its size.\n",
+    "Returns balanced datasets with the same number of images.\n",
+    "\n",
+    "1️⃣ Loading CT Scans:\n",
+    "\n",
+    "    Prints \"Loading CT scans...\" to inform the user.\n",
+    "\n",
+    "    Calls load_images(ct_path), a function (likely defined elsewhere) that reads images from the directory specified by ct_path.\n",
+    "\n",
+    "    Stores the loaded images in ct_scans.\n",
+    "\n",
+    "2️⃣ Loading MRI Scans\n",
+    "\n",
+    "    Prints \"Loading MRI scans...\" to indicate MRI loading.\n",
+    "\n",
+    "    Calls load_images(mri_path), which loads images from mri_path.\n",
+    "\n",
+    "    Stores the MRI images in mri_scans.\n",
+    "\n",
+    "3️⃣ Balancing the Datasets\n",
+    "\n",
+    "    Computes the minimum length between the two datasets.\n",
+    "\n",
+    "    Ensures that the dataset with more images is trimmed to match the smaller one.\n",
+    "\n",
+    "    Computes the minimum length between the two datasets.\n",
+    "\n",
+    "    Ensures that the dataset with more images is trimmed to match the smaller one.\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "    \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def load_and_balance_datasets(ct_path, mri_path):\n",
+    "    print(\"Loading CT scans...\")\n",
+    "    ct_scans = load_images(ct_path)\n",
+    "    print(\"Loading MRI scans...\")\n",
+    "    mri_scans = load_images(mri_path)\n",
+    "    \n",
+    "    min_length = min(len(ct_scans), len(mri_scans))\n",
+    "    ct_scans = ct_scans[:min_length]\n",
+    "    mri_scans = mri_scans[:min_length]\n",
+    "    \n",
+    "    print(f\"Balanced datasets to {min_length} images each\")\n",
+    "    return ct_scans, mri_scans"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*Training Setup - Detailed Explanation*\n",
+    "\n",
+    "This block of code sets up the models and optimizers required for training a CycleGAN for CT ↔ MRI image translation. Let’s break it down step by step.\n",
+    "\n",
+    "📌 What Does This Code Do?\n",
+    "    Builds the generator models (CT → MRI and MRI → CT).\n",
+    "\n",
+    "    Builds the discriminator models for CT and MRI.\n",
+    "\n",
+    "    Creates optimizers for training the generators and discriminators.\n",
+    "\n",
+    "    Initializes model variables (trainable parameters for both generators and discriminators).\n",
+    "\n",
+    "    Builds optimizers using the trainable variables.\n",
+    "\n",
+    "1️⃣ Building the Generator Models\n",
+    "\n",
+    "    build_generator(name): This function (explained earlier) builds a U-Net-based Variational Autoencoder (VAE) generator.\n",
+    "\n",
+    "    g_ct_mri: The generator that converts CT scans → MRI images.\n",
+    "\n",
+    "    g_mri_ct: The generator that converts MRI images → CT scans\n",
+    "\n",
+    "2️⃣ Building the Discriminator Models\n",
+    "\n",
+    "    build_discriminator(name): This function (explained earlier) builds the discriminators to differentiate real and fake images.\n",
+    "\n",
+    "    d_ct: The discriminator that distinguishes real CT scans from fake ones.\n",
+    "\n",
+    "    d_mri: The discriminator that distinguishes real MRI scans from fake ones.\n",
+    "\n",
+    "\n",
+    "3️⃣ Creating Optimizers\n",
+    "\n",
+    "    g_opt: Optimizer for training both generators.\n",
+    "\n",
+    "    d_opt: Optimizer for training both discriminators.\n",
+    "\n",
+    "    Uses RMSprop as the optimizer.\n",
+    "\n",
+    "        The learning rate (LEARNING_RATE) controls the step size for updates.\n",
+    "\n",
+    "        Weight decay (WEIGHT_DECAY) prevents overfitting by penalizing large weights.\n",
+    "\n",
+    "4️⃣ Initializing Model Variables\n",
+    "\n",
+    "    g_vars: Stores all trainable variables (weights & biases) of both generators.\n",
+    "    d_vars: Stores all trainable variables of both discriminators.\n",
+    "\n",
+    "    📝 Why store trainable variables separately?\n",
+    "\n",
+    "        Since generators and discriminators have separate losses, they need to be updated separately.\n",
+    "\n",
+    "5️⃣ Building Optimizers with Model Variables\n",
+    "\n",
+    "    g_opt.build(g_vars): Tells TensorFlow that g_opt will optimize generator variables.\n",
+    "\n",
+    "    d_opt.build(d_vars): Tells TensorFlow that d_opt will optimize discriminator variables.\n",
+    "\n",
+    "    📝 Why explicitly build the optimizers?\n",
+    "\n",
+    "    In Eager Execution mode, TensorFlow automatically tracks variables.\n",
+    "    \n",
+    "    However, explicitly calling build() can help with performance optimization.\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From d:\\VS CODE\\Web Dev\\Projects\\Image2Image\\image\\lib\\site-packages\\keras\\src\\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.\n",
+      "\n",
+      "WARNING:tensorflow:From d:\\VS CODE\\Web Dev\\Projects\\Image2Image\\image\\lib\\site-packages\\keras\\src\\layers\\pooling\\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "NameError",
+     "evalue": "name 'Sampling' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[12], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# ===================== Training Setup =====================\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m# Build models\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m g_ct_mri \u001b[38;5;241m=\u001b[39m \u001b[43mbuild_generator\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mCT_to_MRI\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m      4\u001b[0m g_mri_ct \u001b[38;5;241m=\u001b[39m build_generator(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mMRI_to_CT\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m      5\u001b[0m d_ct \u001b[38;5;241m=\u001b[39m build_discriminator(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mD_CT\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
+      "Cell \u001b[1;32mIn[7], line 17\u001b[0m, in \u001b[0;36mbuild_generator\u001b[1;34m(name)\u001b[0m\n\u001b[0;32m     15\u001b[0m z_mean \u001b[38;5;241m=\u001b[39m layers\u001b[38;5;241m.\u001b[39mDense(LATENT_DIM, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mz_mean_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)(x)\n\u001b[0;32m     16\u001b[0m z_log_var \u001b[38;5;241m=\u001b[39m layers\u001b[38;5;241m.\u001b[39mDense(LATENT_DIM, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mz_log_var_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)(x)\n\u001b[1;32m---> 17\u001b[0m z \u001b[38;5;241m=\u001b[39m \u001b[43mSampling\u001b[49m()([z_mean, z_log_var])\n\u001b[0;32m     19\u001b[0m \u001b[38;5;66;03m# Reshape for decoder\u001b[39;00m\n\u001b[0;32m     20\u001b[0m x \u001b[38;5;241m=\u001b[39m layers\u001b[38;5;241m.\u001b[39mDense(\u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m FILTERS\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m64\u001b[39m)(z)\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'Sampling' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "# ===================== Training Setup =====================\n",
+    "# Build models\n",
+    "g_ct_mri = build_generator('CT_to_MRI')\n",
+    "g_mri_ct = build_generator('MRI_to_CT')\n",
+    "d_ct = build_discriminator('D_CT')\n",
+    "d_mri = build_discriminator('D_MRI')\n",
+    "\n",
+    "# Create optimizers\n",
+    "g_opt = tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY)\n",
+    "d_opt = tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY)\n",
+    "\n",
+    "# Initialize model variables\n",
+    "g_vars = g_ct_mri.trainable_variables + g_mri_ct.trainable_variables\n",
+    "d_vars = d_ct.trainable_variables + d_mri.trainable_variables\n",
+    "\n",
+    "# Build optimizers\n",
+    "g_opt.build(g_vars)\n",
+    "d_opt.build(d_vars)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*Explanation of train_step Function in CycleGAN with Variational Autoencoder (VAE)*\n",
+    "\n",
+    "This function performs one training step for the CycleGAN with VAE-style latent representations. It does the following:\n",
+    "\n",
+    "Generates fake images using the generators.\n",
+    "\n",
+    "Evaluates the fake and real images using the discriminators.\n",
+    "\n",
+    "Computes the loss functions for both generators and discriminators.\n",
+    "\n",
+    "Computes gradients and updates the model parameters.\n",
+    "\n",
+    "1️⃣ Forward Pass - Generate Fake Images\n",
+    "\n",
+    "    g_ct_mri(real_ct): Translates CT → Fake MRI and produces:\n",
+    "        \n",
+    "        fake_mri: The generated MRI image.\n",
+    "        z_mean_fwd, z_log_var_fwd: Latent variables (from the Variational Autoencoder).\n",
+    "    g_mri_ct(real_mri): Translates MRI → Fake CT with similar outputs.\n",
+    "\n",
+    "    📝 Why store z_mean and z_log_var?\n",
+    "\n",
+    "    These come from the VAE latent space and are used for the KL divergence loss.\n",
+    "\n",
+    "2️⃣ Compute Discriminator Outputs\n",
+    "\n",
+    "    d_ct(real_ct): Discriminator’s prediction for real CT images.\n",
+    "\n",
+    "    d_ct(fake_ct): Discriminator’s prediction for fake CT images.\n",
+    "\n",
+    "    d_mri(real_mri): Discriminator’s prediction for real MRI images.\n",
+    "\n",
+    "    d_mri(fake_mri): Discriminator’s prediction for fake MRI images.\n",
+    "\n",
+    "    📝 Goal of Discriminators?\n",
+    "\n",
+    "\n",
+    "        Real images should be classified close to 1.\n",
+    "\n",
+    "        Fake images should be classified close to 0.\n",
+    "\n",
+    "3️⃣ Compute Discriminator Losses\n",
+    "\n",
+    "    Uses Least Squares GAN (LSGAN) loss:\n",
+    "\n",
+    "        For real images: (real - 1)^2 → Encourages real images to be classified as 1.\n",
+    "\n",
+    "        For fake images: fake^2 → Encourages fake images to be classified as 0.\n",
+    "\n",
+    "    sum([...]): If there are multiple output layers in the discriminator, we sum their losses.\n",
+    "\n",
+    "    📝 Why LSGAN loss?\n",
+    "\n",
+    "        Helps stabilize training compared to standard GAN loss.\n",
+    "\n",
+    "\n",
+    "4️⃣ Cycle Consistency Loss (CycleGAN Component)\n",
+    "\n",
+    "    cycled_ct = g_mri_ct(fake_mri): The fake MRI is translated back to CT.\n",
+    "\n",
+    "    cycled_mri = g_ct_mri(fake_ct): The fake CT is translated back to MRI.\n",
+    "\n",
+    "    📝 Why cycle consistency?\n",
+    "\n",
+    "        The network should learn round-trip consistency:\n",
+    "\n",
+    "        CT → Fake MRI → CT (should look like original CT)\n",
+    "\n",
+    "        MRI → Fake CT → MRI (should look like original MRI)\n",
+    "\n",
+    "5️⃣ KL Divergence Loss (VAE Component)\n",
+    "\n",
+    "    This is the KL divergence loss from VAE:\n",
+    "\n",
+    "    Encourages the latent space to follow a Gaussian distribution.\n",
+    "\n",
+    "    Prevents mode collapse.\n",
+    "\n",
+    "    📝 Why add KL divergence loss?\n",
+    "\n",
+    "        Regularizes the latent space so the generator produces diverse outputs.\n",
+    "\n",
+    "6️⃣ Compute Generator Losses\n",
+    "\n",
+    "    The generator wants fake images to be classified as real (1), so we use:\n",
+    "\n",
+    "    (fake - 1)^2 → Fake images should be close to 1.\n",
+    "\n",
+    "    Cycle consistency loss: L1 loss (|original - reconstructed|).\n",
+    "\n",
+    "    Encourages faithful reconstructions.\n",
+    "\n",
+    "\n",
+    "    Final generator loss combines:\n",
+    "\n",
+    "        Adversarial loss (GAN loss).\n",
+    "\n",
+    "        Cycle consistency loss (weighted by 10 for stronger enforcement).\n",
+    "\n",
+    "        KL divergence loss (weighted by 0.5).\n",
+    "\n",
+    "        \n",
+    "7️⃣ Compute Total Discriminator Loss\n",
+    "\n",
+    "Adds both discriminator losses.\n",
+    "\n",
+    "8️⃣ Compute Gradients & Update Model Parameters\n",
+    "\n",
+    "    Computes gradients of discriminator loss (d_total_loss).\n",
+    "\n",
+    "    Updates discriminator weights (d_vars).\n",
+    "\n",
+    "    Computes gradients of generator loss (g_total_loss).\n",
+    "\n",
+    "    Updates generator weights (g_vars).\n",
+    "\n",
+    "📝 Why use tf.GradientTape(persistent=True)?\n",
+    "\n",
+    "    We need gradients twice (once for discriminators, once for generators).\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "@tf.function\n",
+    "def train_step(real_ct, real_mri):\n",
+    "    with tf.GradientTape(persistent=True) as tape:\n",
+    "        # Forward passes\n",
+    "        fake_mri, z_mean_fwd, z_log_var_fwd = g_ct_mri(real_ct, training=True)\n",
+    "        fake_ct, z_mean_bwd, z_log_var_bwd = g_mri_ct(real_mri, training=True)\n",
+    "        \n",
+    "        # Discriminator outputs\n",
+    "        d_real_ct = d_ct(real_ct, training=True)\n",
+    "        d_fake_ct = d_ct(fake_ct, training=True)\n",
+    "        d_real_mri = d_mri(real_mri, training=True)\n",
+    "        d_fake_mri = d_mri(fake_mri, training=True)\n",
+    "        \n",
+    "        # Discriminator losses\n",
+    "        d_ct_loss = sum([tf.reduce_mean((real - 1)**2) + tf.reduce_mean(fake**2) \n",
+    "                        for real, fake in zip(d_real_ct, d_fake_ct)])\n",
+    "        d_mri_loss = sum([tf.reduce_mean((real - 1)**2) + tf.reduce_mean(fake**2) \n",
+    "                         for real, fake in zip(d_real_mri, d_fake_mri)])\n",
+    "        \n",
+    "        # Cycle consistency\n",
+    "        cycled_ct, _, _ = g_mri_ct(fake_mri, training=True)\n",
+    "        cycled_mri, _, _ = g_ct_mri(fake_ct, training=True)\n",
+    "        \n",
+    "        # KL Divergence\n",
+    "        kl_fwd = -0.5 * tf.reduce_mean(1 + z_log_var_fwd - tf.square(z_mean_fwd) - tf.exp(z_log_var_fwd))\n",
+    "        kl_bwd = -0.5 * tf.reduce_mean(1 + z_log_var_bwd - tf.square(z_mean_bwd) - tf.exp(z_log_var_bwd))\n",
+    "        \n",
+    "        # Generator losses\n",
+    "        g_adv_loss = sum([tf.reduce_mean((fake - 1)**2) for fake in d_fake_mri + d_fake_ct])\n",
+    "        g_cycle_loss = (tf.reduce_mean(tf.abs(real_ct - cycled_ct)) + \n",
+    "                       tf.reduce_mean(tf.abs(real_mri - cycled_mri)))\n",
+    "        g_total_loss = g_adv_loss + 10 * g_cycle_loss + 0.5 * (kl_fwd + kl_bwd)\n",
+    "        \n",
+    "        # Total discriminator loss\n",
+    "        d_total_loss = d_ct_loss + d_mri_loss\n",
+    "    \n",
+    "    # Update discriminators\n",
+    "    d_grads = tape.gradient(d_total_loss, d_vars)\n",
+    "    d_opt.apply_gradients(zip(d_grads, d_vars))\n",
+    "    \n",
+    "    # Update generators\n",
+    "    g_grads = tape.gradient(g_total_loss, g_vars)\n",
+    "    g_opt.apply_gradients(zip(g_grads, g_vars))\n",
+    "    \n",
+    "    return {\n",
+    "        'd_ct': d_ct_loss,\n",
+    "        'd_mri': d_mri_loss,\n",
+    "        'g_total': g_total_loss,\n",
+    "        'fake_mri': fake_mri,\n",
+    "        'fake_ct': fake_ct\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This code defines the main training loop for a CycleGAN-based model that translates between CT and MRI images. It consists of data preparation, training iteration, progress tracking, and model saving. Below is a step-by-step breakdown:\n",
+    "\n",
+    "\n",
+    "1. Create Progress Directory\n",
+    "\n",
+    "The script creates a directory named progress/ inside Kaggle's working directory.\n",
+    "\n",
+    "This directory will store progress images showing how well the model is learning over time.\n",
+    "\n",
+    "2. Load and Balance the Datasets\n",
+    "\n",
+    "Calls load_and_balance_datasets() to load CT and MRI images from the dataset folders.\n",
+    "\n",
+    "Ensures both datasets have the same number of images by truncating the larger set.\n",
+    "\n",
+    "3. Create TensorFlow Dataset for Training\n",
+    "\n",
+    "Creates a TensorFlow dataset from the loaded images.\n",
+    "\n",
+    "Shuffles the dataset to introduce randomness and prevent overfitting.\n",
+    "\n",
+    "Batches the dataset to process multiple images in parallel during training.\n",
+    "\n",
+    "\n",
+    "4. Training Loop\n",
+    "\n",
+    "Starts iterating over epochs (EPOCHS defines the total number of passes over the dataset).\n",
+    "\n",
+    "Iterates through mini-batches of CT and MRI scans using train_dataset.\n",
+    "\n",
+    "5. Train the Model (Forward & Backward Pass)\n",
+    "\n",
+    "Calls train_step(ct_batch, mri_batch), which:\n",
+    "\n",
+    "    Generates fake MRI from CT (G_CT→MRI) and fake CT from MRI (G_MRI→CT).\n",
+    "\n",
+    "    Passes both real and fake images through the discriminators (D_CT and D_MRI).\n",
+    "\n",
+    "    Computes adversarial losses, cycle consistency loss, and KL divergence.\n",
+    "\n",
+    "    Updates the discriminators (D_CT, D_MRI) and generators (G_CT→MRI, G_MRI→CT).\n",
+    "\n",
+    "Stores the loss values (d_ct_loss, d_mri_loss, g_total_loss) and the generated images.\n",
+    "\n",
+    "6. Print Losses for Monitoring\n",
+    "\n",
+    "Every 10 batches, prints:\n",
+    "\n",
+    "D_CT: Discriminator loss for CT.\n",
+    "\n",
+    "D_MRI: Discriminator loss for MRI.\n",
+    "\n",
+    "G: Total generator loss.\n",
+    "\n",
+    "This helps monitor model performance during training.\n",
+    "\n",
+    "7. Save Sample Images for Progress Tracking\n",
+    "\n",
+    "Every 100 batches, saves progress images to progress/.\n",
+    "\n",
+    "Displays real CT & MRI images alongside their fake counterparts generated by the model.\n",
+    "\n",
+    "Helps visually track improvements in image quality over time.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'os' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[14], line 4\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# ===================== Main Training Loop =====================\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m# Create progress directory if it doesn't exist\u001b[39;00m\n\u001b[0;32m      3\u001b[0m progress_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/kaggle/working/progress\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[43mos\u001b[49m\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(progress_dir):\n\u001b[0;32m      5\u001b[0m     os\u001b[38;5;241m.\u001b[39mmakedirs(progress_dir)\n\u001b[0;32m      7\u001b[0m \u001b[38;5;66;03m# Load and prepare data\u001b[39;00m\n",
+      "\u001b[1;31mNameError\u001b[0m: name 'os' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# ===================== Main Training Loop =====================\n",
+    "# Create progress directory if it doesn't exist\n",
+    "progress_dir = '/kaggle/working/progress'\n",
+    "if not os.path.exists(progress_dir):\n",
+    "    os.makedirs(progress_dir)\n",
+    "\n",
+    "# Load and prepare data\n",
+    "print(\"Loading datasets...\")\n",
+    "ct_scans, mri_scans = load_and_balance_datasets('/kaggle/input/ct-to-mri-cgan/Dataset/images/trainA', \n",
+    "                                               '/kaggle/input/ct-to-mri-cgan/Dataset/images/trainB')\n",
+    "\n",
+    "# Create TensorFlow dataset\n",
+    "train_dataset = tf.data.Dataset.from_tensor_slices((ct_scans, mri_scans))\n",
+    "train_dataset = train_dataset.shuffle(buffer_size=len(ct_scans)).batch(BATCH_SIZE)\n",
+    "# Training loop\n",
+    "print(\"Starting training...\")\n",
+    "for epoch in range(EPOCHS):\n",
+    "    for batch_idx, (ct_batch, mri_batch) in enumerate(train_dataset):\n",
+    "        results = train_step(ct_batch, mri_batch)\n",
+    "        \n",
+    "        if batch_idx % 10 == 0:\n",
+    "            print(f\"Epoch {epoch}, Batch {batch_idx}: \"\n",
+    "                  f\"D_CT={float(results['d_ct']):.4f}, \"\n",
+    "                  f\"D_MRI={float(results['d_mri']):.4f}, \"\n",
+    "                  f\"G={float(results['g_total']):.4f}\")\n",
+    "            \n",
+    "            # Save sample images every 100 batches\n",
+    "            if batch_idx % 100 == 0:\n",
+    "                fig, axes = plt.subplots(2, 2, figsize=(10, 10))\n",
+    "                \n",
+    "                # Real CT and Fake MRI\n",
+    "                axes[0,0].imshow(ct_batch[0].numpy())\n",
+    "                axes[0,0].set_title(\"Real CT\")\n",
+    "                axes[0,0].axis('off')\n",
+    "                \n",
+    "                axes[0,1].imshow(results['fake_mri'][0].numpy())\n",
+    "                axes[0,1].set_title(\"Fake MRI\")\n",
+    "                axes[0,1].axis('off')\n",
+    "                \n",
+    "                # Real MRI and Fake CT\n",
+    "                axes[1,0].imshow(mri_batch[0].numpy())\n",
+    "                axes[1,0].set_title(\"Real MRI\")\n",
+    "                axes[1,0].axis('off')\n",
+    "                \n",
+    "                axes[1,1].imshow(results['fake_ct'][0].numpy())\n",
+    "                axes[1,1].set_title(\"Fake CT\")\n",
+    "                axes[1,1].axis('off')\n",
+    "                \n",
+    "                plt.tight_layout()\n",
+    "                plt.savefig(f'progress/epoch_{epoch}_batch_{batch_idx}.png')\n",
+    "                plt.close()\n",
+    "    \n",
+    "    # Save models after each epoch\n",
+    "    save_models(g_ct_mri, g_mri_ct, epoch)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def translate_image(model_path, image_path, output_path, mode='ct_to_mri'):\n",
+    "    \"\"\"\n",
+    "    Translate a single image using the trained model\n",
+    "    \n",
+    "    Parameters:\n",
+    "    model_path: Path to the saved model\n",
+    "    image_path: Path to the input image\n",
+    "    output_path: Path to save the translated image\n",
+    "    mode: 'ct_to_mri' or 'mri_to_ct'\n",
+    "    \"\"\"\n",
+    "    # Load model\n",
+    "    print(f\"Loading model from {model_path}\")\n",
+    "    model = tf.keras.models.load_model(model_path, \n",
+    "                                     custom_objects={'Sampling': Sampling})\n",
+    "    \n",
+    "    # Load and preprocess image\n",
+    "    input_image = load_and_preprocess_image(image_path)\n",
+    "    \n",
+    "    # Generate translation\n",
+    "    print(\"Generating translation...\")\n",
+    "    translated_image, _, _ = model(input_image, training=False)\n",
+    "    \n",
+    "    # Convert to numpy and denormalize\n",
+    "    translated_image = translated_image.numpy()[0] * 255\n",
+    "    translated_image = translated_image.astype(np.uint8)\n",
+    "    \n",
+    "    # Save the result\n",
+    "    print(f\"Saving translated image to {output_path}\")\n",
+    "    plt.figure(figsize=(10, 5))\n",
+    "    \n",
+    "    plt.subplot(1, 2, 1)\n",
+    "    plt.title(\"Input Image\")\n",
+    "    plt.imshow(input_image[0])\n",
+    "    plt.axis('off')\n",
+    "    \n",
+    "    plt.subplot(1, 2, 2)\n",
+    "    plt.title(\"Translated Image\")\n",
+    "    plt.imshow(translated_image)\n",
+    "    plt.axis('off')\n",
+    "    \n",
+    "    plt.tight_layout()\n",
+    "    plt.savefig(output_path)\n",
+    "    plt.close()\n",
+    "    \n",
+    "    return translated_image\n",
+    "'''\n",
+    "# Example usage of the translation function\n",
+    "def example_translation():\n",
+    "    \"\"\"Example of how to use the translation function\"\"\"\n",
+    "    # Paths\n",
+    "    ct_to_mri_model = 'saved_models/ct_to_mri_epoch_1000'\n",
+    "    mri_to_ct_model = 'saved_models/mri_to_ct_epoch_1000'\n",
+    "    \n",
+    "    # CT to MRI translation\n",
+    "    input_ct = 'path/to/your/ct_image.jpg'\n",
+    "    output_mri = 'results/translated_mri.png'\n",
+    "    translated_mri = translate_image(ct_to_mri_model, input_ct, output_mri, \n",
+    "                                   mode='ct_to_mri')\n",
+    "    \n",
+    "    # MRI to CT translation\n",
+    "    input_mri = 'path/to/your/mri_image.jpg'\n",
+    "    output_ct = 'results/translated_ct.png'\n",
+    "    translated_ct = translate_image(mri_to_ct_model, input_mri, output_ct, \n",
+    "                                  mode='mri_to_ct')'''"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "*Complete code in Single Block*"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow as tf\n",
+    "from tensorflow.keras import layers, Model\n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "import pathlib\n",
+    "import matplotlib.pyplot as plt\n",
+    "import tensorflow_probability as tfp\n",
+    "\n",
+    "tfd = tfp.distributions\n",
+    "\n",
+    "# ===================== Configuration =====================\n",
+    "IMAGE_SHAPE = (256, 256, 3)\n",
+    "LATENT_DIM = 256\n",
+    "FILTERS = 16\n",
+    "KERNEL = 3\n",
+    "LEARNING_RATE = 0.0001\n",
+    "WEIGHT_DECAY = 6e-8\n",
+    "BATCH_SIZE = 1\n",
+    "EPOCHS = 10\n",
+    "\n",
+    "# ===================== Architecture Components =====================\n",
+    "class Sampling(layers.Layer):\n",
+    "    def call(self, inputs):\n",
+    "        z_mean, z_log_var = inputs\n",
+    "        batch = tf.shape(z_mean)[0]\n",
+    "        dim = tf.shape(z_mean)[1]\n",
+    "        epsilon = tf.random.normal(shape=(batch, dim))\n",
+    "        return z_mean + tf.exp(0.5 * z_log_var) * epsilon\n",
+    "\n",
+    "def residual_block(inputs, filters, use_norm=True):\n",
+    "    x = layers.Conv2D(filters, KERNEL, padding='same')(inputs)\n",
+    "    x = layers.LeakyReLU(alpha=0.2)(x)\n",
+    "    if use_norm:\n",
+    "        x = layers.GroupNormalization(groups=1)(x)\n",
+    "    x = layers.Conv2D(filters, KERNEL, padding='same')(x)\n",
+    "    x = layers.LeakyReLU(alpha=0.2)(x)\n",
+    "    if use_norm:\n",
+    "        x = layers.GroupNormalization(groups=1)(x)\n",
+    "    shortcut = layers.Conv2D(filters, 1, padding='same')(inputs)\n",
+    "    return layers.maximum([x, shortcut])\n",
+    "\n",
+    "def encoder_block(inputs, filters, use_norm=True):\n",
+    "    x = residual_block(inputs, filters, use_norm)\n",
+    "    skip = x\n",
+    "    x = layers.MaxPooling2D()(x)\n",
+    "    return x, skip\n",
+    "\n",
+    "def decoder_block(inputs, skip, filters, use_norm=True):\n",
+    "    x = layers.Conv2DTranspose(filters, KERNEL, strides=2, padding='same')(inputs)\n",
+    "    x = layers.maximum([x, skip])\n",
+    "    x = residual_block(x, filters, use_norm)\n",
+    "    return x\n",
+    "\n",
+    "# ===================== Generator =====================\n",
+    "def build_generator(name):\n",
+    "    inputs = layers.Input(IMAGE_SHAPE)\n",
+    "    \n",
+    "    # Encoder\n",
+    "    e1, s1 = encoder_block(inputs, FILTERS)\n",
+    "    e2, s2 = encoder_block(e1, FILTERS*2)\n",
+    "    e3, s3 = encoder_block(e2, FILTERS*4)\n",
+    "    e4, s4 = encoder_block(e3, FILTERS*8)\n",
+    "    e5, s5 = encoder_block(e4, FILTERS*16)\n",
+    "    e6, s6 = encoder_block(e5, FILTERS*32)\n",
+    "    e7, s7 = encoder_block(e6, FILTERS*64)\n",
+    "    \n",
+    "    # Latent Space\n",
+    "    x = layers.Flatten()(e7)\n",
+    "    z_mean = layers.Dense(LATENT_DIM, name=f\"z_mean_{name.split('_')[-1]}\")(x)\n",
+    "    z_log_var = layers.Dense(LATENT_DIM, name=f\"z_log_var_{name.split('_')[-1]}\")(x)\n",
+    "    z = Sampling()([z_mean, z_log_var])\n",
+    "    \n",
+    "    # Reshape for decoder\n",
+    "    x = layers.Dense(2 * 2 * FILTERS*64)(z)\n",
+    "    x = layers.Reshape((2, 2, FILTERS*64))(x)\n",
+    "    \n",
+    "    # Decoder\n",
+    "    d0 = decoder_block(x, s7, FILTERS*64)\n",
+    "    d1 = decoder_block(d0, s6, FILTERS*32)\n",
+    "    d2 = decoder_block(d1, s5, FILTERS*16)\n",
+    "    d3 = decoder_block(d2, s4, FILTERS*8)\n",
+    "    d4 = decoder_block(d3, s3, FILTERS*4)\n",
+    "    d5 = decoder_block(d4, s2, FILTERS*2)\n",
+    "    d6 = decoder_block(d5, s1, FILTERS)\n",
+    "    \n",
+    "    outputs = layers.Conv2D(3, KERNEL, activation='sigmoid', padding='same')(d6)\n",
+    "    return Model(inputs, [outputs, z_mean, z_log_var], name=name)\n",
+    "\n",
+    "# ===================== Discriminator =====================\n",
+    "def build_discriminator(name):\n",
+    "    inputs = layers.Input(IMAGE_SHAPE)\n",
+    "    \n",
+    "    # Feature extraction\n",
+    "    x = inputs\n",
+    "    features = []\n",
+    "    \n",
+    "    # Initial convolution\n",
+    "    x = layers.Conv2D(FILTERS, KERNEL, padding='same')(x)\n",
+    "    x = layers.LeakyReLU(alpha=0.2)(x)\n",
+    "    features.append(x)\n",
+    "    \n",
+    "    # Downsampling blocks\n",
+    "    filter_sizes = [FILTERS*2, FILTERS*4, FILTERS*8, FILTERS*16, FILTERS*32, FILTERS*64]\n",
+    "    for filters in filter_sizes:\n",
+    "        x, _ = encoder_block(x, filters, use_norm=False)\n",
+    "        features.append(x)\n",
+    "    \n",
+    "    # Multi-scale outputs\n",
+    "    outputs = []\n",
+    "    for i, feature in enumerate(features[-4:]):\n",
+    "        out = layers.Conv2D(1, KERNEL, padding='same')(feature)\n",
+    "        outputs.append(out)\n",
+    "    \n",
+    "    return Model(inputs, outputs, name=name)\n",
+    "\n",
+    "# ===================== Data Loading =====================\n",
+    "def load_images(path):\n",
+    "    images = []\n",
+    "    for p in pathlib.Path(path).glob('*.*'):\n",
+    "        try:\n",
+    "            img = cv2.imread(str(p))\n",
+    "            if img is not None:\n",
+    "                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
+    "                img = cv2.resize(img, IMAGE_SHAPE[:2])\n",
+    "                img = img.astype(np.float32) / 255.0\n",
+    "                images.append(img)\n",
+    "        except Exception as e:\n",
+    "            print(f\"Error loading image {p}: {e}\")\n",
+    "    return np.array(images)\n",
+    "\n",
+    "def load_and_balance_datasets(ct_path, mri_path):\n",
+    "    print(\"Loading CT scans...\")\n",
+    "    ct_scans = load_images(ct_path)\n",
+    "    print(\"Loading MRI scans...\")\n",
+    "    mri_scans = load_images(mri_path)\n",
+    "    \n",
+    "    min_length = min(len(ct_scans), len(mri_scans))\n",
+    "    ct_scans = ct_scans[:min_length]\n",
+    "    mri_scans = mri_scans[:min_length]\n",
+    "    \n",
+    "    print(f\"Balanced datasets to {min_length} images each\")\n",
+    "    return ct_scans, mri_scans\n",
+    "\n",
+    "# ===================== Training Setup =====================\n",
+    "# Build models\n",
+    "g_ct_mri = build_generator('CT_to_MRI')\n",
+    "g_mri_ct = build_generator('MRI_to_CT')\n",
+    "d_ct = build_discriminator('D_CT')\n",
+    "d_mri = build_discriminator('D_MRI')\n",
+    "\n",
+    "# Create optimizers\n",
+    "g_opt = tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY)\n",
+    "d_opt = tf.keras.optimizers.RMSprop(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY)\n",
+    "\n",
+    "# Initialize model variables\n",
+    "g_vars = g_ct_mri.trainable_variables + g_mri_ct.trainable_variables\n",
+    "d_vars = d_ct.trainable_variables + d_mri.trainable_variables\n",
+    "\n",
+    "# Build optimizers\n",
+    "g_opt.build(g_vars)\n",
+    "d_opt.build(d_vars)\n",
+    "\n",
+    "# ===================== Training Function =====================\n",
+    "@tf.function\n",
+    "def train_step(real_ct, real_mri):\n",
+    "    with tf.GradientTape(persistent=True) as tape:\n",
+    "        # Forward passes\n",
+    "        fake_mri, z_mean_fwd, z_log_var_fwd = g_ct_mri(real_ct, training=True)\n",
+    "        fake_ct, z_mean_bwd, z_log_var_bwd = g_mri_ct(real_mri, training=True)\n",
+    "        \n",
+    "        # Discriminator outputs\n",
+    "        d_real_ct = d_ct(real_ct, training=True)\n",
+    "        d_fake_ct = d_ct(fake_ct, training=True)\n",
+    "        d_real_mri = d_mri(real_mri, training=True)\n",
+    "        d_fake_mri = d_mri(fake_mri, training=True)\n",
+    "        \n",
+    "        # Discriminator losses\n",
+    "        d_ct_loss = sum([tf.reduce_mean((real - 1)**2) + tf.reduce_mean(fake**2) \n",
+    "                        for real, fake in zip(d_real_ct, d_fake_ct)])\n",
+    "        d_mri_loss = sum([tf.reduce_mean((real - 1)**2) + tf.reduce_mean(fake**2) \n",
+    "                         for real, fake in zip(d_real_mri, d_fake_mri)])\n",
+    "        \n",
+    "        # Cycle consistency\n",
+    "        cycled_ct, _, _ = g_mri_ct(fake_mri, training=True)\n",
+    "        cycled_mri, _, _ = g_ct_mri(fake_ct, training=True)\n",
+    "        \n",
+    "        # KL Divergence\n",
+    "        kl_fwd = -0.5 * tf.reduce_mean(1 + z_log_var_fwd - tf.square(z_mean_fwd) - tf.exp(z_log_var_fwd))\n",
+    "        kl_bwd = -0.5 * tf.reduce_mean(1 + z_log_var_bwd - tf.square(z_mean_bwd) - tf.exp(z_log_var_bwd))\n",
+    "        \n",
+    "        # Generator losses\n",
+    "        g_adv_loss = sum([tf.reduce_mean((fake - 1)**2) for fake in d_fake_mri + d_fake_ct])\n",
+    "        g_cycle_loss = (tf.reduce_mean(tf.abs(real_ct - cycled_ct)) + \n",
+    "                       tf.reduce_mean(tf.abs(real_mri - cycled_mri)))\n",
+    "        g_total_loss = g_adv_loss + 10 * g_cycle_loss + 0.5 * (kl_fwd + kl_bwd)\n",
+    "        \n",
+    "        # Total discriminator loss\n",
+    "        d_total_loss = d_ct_loss + d_mri_loss\n",
+    "    \n",
+    "    # Update discriminators\n",
+    "    d_grads = tape.gradient(d_total_loss, d_vars)\n",
+    "    d_opt.apply_gradients(zip(d_grads, d_vars))\n",
+    "    \n",
+    "    # Update generators\n",
+    "    g_grads = tape.gradient(g_total_loss, g_vars)\n",
+    "    g_opt.apply_gradients(zip(g_grads, g_vars))\n",
+    "    \n",
+    "    return {\n",
+    "        'd_ct': d_ct_loss,\n",
+    "        'd_mri': d_mri_loss,\n",
+    "        'g_total': g_total_loss,\n",
+    "        'fake_mri': fake_mri,\n",
+    "        'fake_ct': fake_ct\n",
+    "    }\n",
+    "\n",
+    "\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "def save_models(g_ct_mri, g_mri_ct, epoch, model_dir='/kaggle/working/saved_models'):\n",
+    "    \"\"\"Save models in HDF5 format after each epoch\"\"\"\n",
+    "    if not os.path.exists(model_dir):\n",
+    "        os.makedirs(model_dir)\n",
+    "    \n",
+    "    # Save as .h5 files\n",
+    "    ct_path = os.path.join(model_dir, f'ct_to_mri_epoch_{epoch}.h5')\n",
+    "    mri_path = os.path.join(model_dir, f'mri_to_ct_epoch_{epoch}.h5')\n",
+    "    \n",
+    "    g_ct_mri.save(ct_path)\n",
+    "    g_mri_ct.save(mri_path)\n",
+    "    print(f\"Models saved: {ct_path} and {mri_path}\")\n",
+    "\n",
+    "\n",
+    "# ===================== Main Training Loop =====================\n",
+    "# Create progress directory if it doesn't exist\n",
+    "progress_dir = '/kaggle/working/progress'\n",
+    "if not os.path.exists(progress_dir):\n",
+    "    os.makedirs(progress_dir)\n",
+    "\n",
+    "# Load and prepare data\n",
+    "print(\"Loading datasets...\")\n",
+    "ct_scans, mri_scans = load_and_balance_datasets('/kaggle/input/ct-to-mri-cgan/Dataset/images/trainA', \n",
+    "                                               '/kaggle/input/ct-to-mri-cgan/Dataset/images/trainB')\n",
+    "\n",
+    "# Create TensorFlow dataset\n",
+    "train_dataset = tf.data.Dataset.from_tensor_slices((ct_scans, mri_scans))\n",
+    "train_dataset = train_dataset.shuffle(buffer_size=len(ct_scans)).batch(BATCH_SIZE)\n",
+    "# Training loop\n",
+    "print(\"Starting training...\")\n",
+    "for epoch in range(EPOCHS):\n",
+    "    for batch_idx, (ct_batch, mri_batch) in enumerate(train_dataset):\n",
+    "        results = train_step(ct_batch, mri_batch)\n",
+    "        \n",
+    "        if batch_idx % 10 == 0:\n",
+    "            print(f\"Epoch {epoch}, Batch {batch_idx}: \"\n",
+    "                  f\"D_CT={float(results['d_ct']):.4f}, \"\n",
+    "                  f\"D_MRI={float(results['d_mri']):.4f}, \"\n",
+    "                  f\"G={float(results['g_total']):.4f}\")\n",
+    "            \n",
+    "            # Save sample images every 100 batches\n",
+    "            if batch_idx % 100 == 0:\n",
+    "                fig, axes = plt.subplots(2, 2, figsize=(10, 10))\n",
+    "                \n",
+    "                # Real CT and Fake MRI\n",
+    "                axes[0,0].imshow(ct_batch[0].numpy())\n",
+    "                axes[0,0].set_title(\"Real CT\")\n",
+    "                axes[0,0].axis('off')\n",
+    "                \n",
+    "                axes[0,1].imshow(results['fake_mri'][0].numpy())\n",
+    "                axes[0,1].set_title(\"Fake MRI\")\n",
+    "                axes[0,1].axis('off')\n",
+    "                \n",
+    "                # Real MRI and Fake CT\n",
+    "                axes[1,0].imshow(mri_batch[0].numpy())\n",
+    "                axes[1,0].set_title(\"Real MRI\")\n",
+    "                axes[1,0].axis('off')\n",
+    "                \n",
+    "                axes[1,1].imshow(results['fake_ct'][0].numpy())\n",
+    "                axes[1,1].set_title(\"Fake CT\")\n",
+    "                axes[1,1].axis('off')\n",
+    "                \n",
+    "                plt.tight_layout()\n",
+    "                plt.savefig(f'progress/epoch_{epoch}_batch_{batch_idx}.png')\n",
+    "                plt.close()\n",
+    "    \n",
+    "    # Save models after each epoch\n",
+    "    save_models(g_ct_mri, g_mri_ct, epoch)\n",
+    "\n",
+    "def load_and_preprocess_image(image_path):\n",
+    "    \"\"\"Load and preprocess a single image for inference\"\"\"\n",
+    "    # Read image\n",
+    "    img = cv2.imread(image_path)\n",
+    "    if img is None:\n",
+    "        raise ValueError(f\"Could not load image from {image_path}\")\n",
+    "    \n",
+    "    # Convert BGR to RGB\n",
+    "    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
+    "    \n",
+    "    # Resize to model's input size\n",
+    "    img = cv2.resize(img, (256, 256))\n",
+    "    \n",
+    "    # Normalize to [0, 1]\n",
+    "    img = img.astype(np.float32) / 255.0\n",
+    "    \n",
+    "    # Add batch dimension\n",
+    "    img = np.expand_dims(img, axis=0)\n",
+    "    \n",
+    "    return img\n",
+    "\n",
+    "def translate_image(model_path, image_path, output_path, mode='ct_to_mri'):\n",
+    "    \"\"\"\n",
+    "    Translate a single image using the trained model\n",
+    "    \n",
+    "    Parameters:\n",
+    "    model_path: Path to the saved model\n",
+    "    image_path: Path to the input image\n",
+    "    output_path: Path to save the translated image\n",
+    "    mode: 'ct_to_mri' or 'mri_to_ct'\n",
+    "    \"\"\"\n",
+    "    # Load model\n",
+    "    print(f\"Loading model from {model_path}\")\n",
+    "    model = tf.keras.models.load_model(model_path, \n",
+    "                                     custom_objects={'Sampling': Sampling})\n",
+    "    \n",
+    "    # Load and preprocess image\n",
+    "    input_image = load_and_preprocess_image(image_path)\n",
+    "    \n",
+    "    # Generate translation\n",
+    "    print(\"Generating translation...\")\n",
+    "    translated_image, _, _ = model(input_image, training=False)\n",
+    "    \n",
+    "    # Convert to numpy and denormalize\n",
+    "    translated_image = translated_image.numpy()[0] * 255\n",
+    "    translated_image = translated_image.astype(np.uint8)\n",
+    "    \n",
+    "    # Save the result\n",
+    "    print(f\"Saving translated image to {output_path}\")\n",
+    "    plt.figure(figsize=(10, 5))\n",
+    "    \n",
+    "    plt.subplot(1, 2, 1)\n",
+    "    plt.title(\"Input Image\")\n",
+    "    plt.imshow(input_image[0])\n",
+    "    plt.axis('off')\n",
+    "    \n",
+    "    plt.subplot(1, 2, 2)\n",
+    "    plt.title(\"Translated Image\")\n",
+    "    plt.imshow(translated_image)\n",
+    "    plt.axis('off')\n",
+    "    \n",
+    "    plt.tight_layout()\n",
+    "    plt.savefig(output_path)\n",
+    "    plt.close()\n",
+    "    \n",
+    "    return translated_image\n",
+    "'''\n",
+    "# Example usage of the translation function\n",
+    "def example_translation():\n",
+    "    \"\"\"Example of how to use the translation function\"\"\"\n",
+    "    # Paths\n",
+    "    ct_to_mri_model = 'saved_models/ct_to_mri_epoch_1000'\n",
+    "    mri_to_ct_model = 'saved_models/mri_to_ct_epoch_1000'\n",
+    "    \n",
+    "    # CT to MRI translation\n",
+    "    input_ct = 'path/to/your/ct_image.jpg'\n",
+    "    output_mri = 'results/translated_mri.png'\n",
+    "    translated_mri = translate_image(ct_to_mri_model, input_ct, output_mri, \n",
+    "                                   mode='ct_to_mri')\n",
+    "    \n",
+    "    # MRI to CT translation\n",
+    "    input_mri = 'path/to/your/mri_image.jpg'\n",
+    "    output_ct = 'results/translated_ct.png'\n",
+    "    translated_ct = translate_image(mri_to_ct_model, input_mri, output_ct, \n",
+    "                                  mode='mri_to_ct')'''"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "image",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

demo.html ADDED Viewed

	@@ -0,0 +1,66 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Simple CSS Page</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            margin: 0;
+            padding: 0;
+            text-align: center;
+            background-color: #f4f4f4;
+        }
+        header {
+            background-color: #3498db;
+            color: white;
+            padding: 20px;
+            font-size: 24px;
+        }
+        section {
+            margin: 20px auto;
+            padding: 20px;
+            background-color: white;
+            border-radius: 10px;
+            max-width: 600px;
+        }
+        button {
+            background-color: #2ecc71;
+            color: white;
+            padding: 10px 20px;
+            border: none;
+            border-radius: 5px;
+            cursor: pointer;
+            font-size: 16px;
+            transition: background-color 0.3s ease;
+        }
+        button:hover {
+            background-color: #27ae60;
+        }
+        footer {
+            background-color: #333;
+            color: white;
+            padding: 10px;
+            margin-top: 20px;
+            font-size: 14px;
+        }
+    </style>
+</head>
+<body>
+    <header>Welcome to My Simple CSS Page</header>
+    <section>
+        <h2>Styled Elements</h2>
+        <p>This is a simple webpage demonstrating basic CSS styling.</p>
+        <button>Click Me</button>
+    </section>
+    <footer>&copy; 2025 My Simple Page. All rights reserved.</footer>
+</body>
+</html>

env/Lib/site-packages/PyYAML-6.0.2.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

env/Lib/site-packages/PyYAML-6.0.2.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,20 @@

+Copyright (c) 2017-2021 Ingy döt Net
+Copyright (c) 2006-2016 Kirill Simonov
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

env/Lib/site-packages/PyYAML-6.0.2.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,46 @@

+Metadata-Version: 2.1
+Name: PyYAML
+Version: 6.0.2
+Summary: YAML parser and emitter for Python
+Home-page: https://pyyaml.org/
+Download-URL: https://pypi.org/project/PyYAML/
+Author: Kirill Simonov
+Author-email: [email protected]
+License: MIT
+Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues
+Project-URL: CI, https://github.com/yaml/pyyaml/actions
+Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation
+Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core
+Project-URL: Source Code, https://github.com/yaml/pyyaml
+Platform: Any
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Cython
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Text Processing :: Markup
+Requires-Python: >=3.8
+License-File: LICENSE
+YAML is a data serialization format designed for human readability
+and interaction with scripting languages.  PyYAML is a YAML parser
+and emitter for Python.
+PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
+support, capable extension API, and sensible error messages.  PyYAML
+supports standard YAML tags and provides Python-specific tags that
+allow to represent an arbitrary Python object.
+PyYAML is applicable for a broad range of tasks from complex
+configuration files to object serialization and persistence.

env/Lib/site-packages/PyYAML-6.0.2.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,43 @@

+PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101
+PyYAML-6.0.2.dist-info/METADATA,sha256=9lwXqTOrXPts-jI2Lo5UwuaAYo0hiRA0BZqjch0WjAk,2106
+PyYAML-6.0.2.dist-info/RECORD,,
+PyYAML-6.0.2.dist-info/WHEEL,sha256=c7SWG1_hRvc9HXHEkmWlTu1Jr4WpzRucfzqTP-_8q0s,102
+PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11
+_yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402
+_yaml/__pycache__/__init__.cpython-312.pyc,,
+yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311
+yaml/__pycache__/__init__.cpython-312.pyc,,
+yaml/__pycache__/composer.cpython-312.pyc,,
+yaml/__pycache__/constructor.cpython-312.pyc,,
+yaml/__pycache__/cyaml.cpython-312.pyc,,
+yaml/__pycache__/dumper.cpython-312.pyc,,
+yaml/__pycache__/emitter.cpython-312.pyc,,
+yaml/__pycache__/error.cpython-312.pyc,,
+yaml/__pycache__/events.cpython-312.pyc,,
+yaml/__pycache__/loader.cpython-312.pyc,,
+yaml/__pycache__/nodes.cpython-312.pyc,,
+yaml/__pycache__/parser.cpython-312.pyc,,
+yaml/__pycache__/reader.cpython-312.pyc,,
+yaml/__pycache__/representer.cpython-312.pyc,,
+yaml/__pycache__/resolver.cpython-312.pyc,,
+yaml/__pycache__/scanner.cpython-312.pyc,,
+yaml/__pycache__/serializer.cpython-312.pyc,,
+yaml/__pycache__/tokens.cpython-312.pyc,,
+yaml/_yaml.cp312-win_amd64.pyd,sha256=Bx7e_LEQx7cnd1_A9_nClp3X77g-_Lw1aoAAtYZbwWk,263680
+yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883
+yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639
+yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851
+yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837
+yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006
+yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533
+yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445
+yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061
+yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440
+yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495
+yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794
+yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190
+yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004
+yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279
+yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165
+yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573

env/Lib/site-packages/PyYAML-6.0.2.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: bdist_wheel (0.44.0)
+Root-Is-Purelib: false
+Tag: cp312-cp312-win_amd64

env/Lib/site-packages/PyYAML-6.0.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ _yaml
2	+ yaml

env/Lib/site-packages/_yaml/__init__.py ADDED Viewed

	@@ -0,0 +1,33 @@

+# This is a stub package designed to roughly emulate the _yaml
+# extension module, which previously existed as a standalone module
+# and has been moved into the `yaml` package namespace.
+# It does not perfectly mimic its old counterpart, but should get
+# close enough for anyone who's relying on it even when they shouldn't.
+import yaml
+# in some circumstances, the yaml module we imoprted may be from a different version, so we need
+# to tread carefully when poking at it here (it may not have the attributes we expect)
+if not getattr(yaml, '__with_libyaml__', False):
+    from sys import version_info
+    exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
+    raise exc("No module named '_yaml'")
+else:
+    from yaml._yaml import *
+    import warnings
+    warnings.warn(
+        'The _yaml extension module is now located at yaml._yaml'
+        ' and its location is subject to change.  To use the'
+        ' LibYAML-based parser and emitter, import from `yaml`:'
+        ' `from yaml import CLoader as Loader, CDumper as Dumper`.',
+        DeprecationWarning
+    )
+    del warnings
+    # Don't `del yaml` here because yaml is actually an existing
+    # namespace member of _yaml.
+__name__ = '_yaml'
+# If the module is top-level (i.e. not a part of any specific package)
+# then the attribute should be set to ''.
+# https://docs.python.org/3.8/library/types.html
+__package__ = ''

env/Lib/site-packages/certifi-2025.1.31.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

env/Lib/site-packages/certifi-2025.1.31.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,20 @@

+This package contains a modified version of ca-bundle.crt:
+ca-bundle.crt -- Bundle of CA Root Certificates
+This is a bundle of X.509 certificates of public Certificate Authorities
+(CA). These were automatically extracted from Mozilla's root certificates
+file (certdata.txt).  This file can be found in the mozilla source tree:
+https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
+It contains the certificates in PEM format and therefore
+can be directly used with curl / libcurl / php_curl, or with
+an Apache+mod_ssl webserver for SSL client authentication.
+Just configure this file as the SSLCACertificateFile.#
+***** BEGIN LICENSE BLOCK *****
+This Source Code Form is subject to the terms of the Mozilla Public License,
+v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
+one at http://mozilla.org/MPL/2.0/.
+***** END LICENSE BLOCK *****
+@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $

env/Lib/site-packages/certifi-2025.1.31.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,77 @@

+Metadata-Version: 2.2
+Name: certifi
+Version: 2025.1.31
+Summary: Python package for providing Mozilla's CA Bundle.
+Home-page: https://github.com/certifi/python-certifi
+Author: Kenneth Reitz
+Author-email: [email protected]
+License: MPL-2.0
+Project-URL: Source, https://github.com/certifi/python-certifi
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
+Classifier: Natural Language :: English
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.6
+License-File: LICENSE
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: home-page
+Dynamic: license
+Dynamic: project-url
+Dynamic: requires-python
+Dynamic: summary
+Certifi: Python SSL Certificates
+================================
+Certifi provides Mozilla's carefully curated collection of Root Certificates for
+validating the trustworthiness of SSL certificates while verifying the identity
+of TLS hosts. It has been extracted from the `Requests`_ project.
+Installation
+------------
+``certifi`` is available on PyPI. Simply install it with ``pip``::
+    $ pip install certifi
+Usage
+-----
+To reference the installed certificate authority (CA) bundle, you can use the
+built-in function::
+    >>> import certifi
+    >>> certifi.where()
+    '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
+Or from the command line::
+    $ python -m certifi
+    /usr/local/lib/python3.7/site-packages/certifi/cacert.pem
+Enjoy!
+.. _`Requests`: https://requests.readthedocs.io/en/master/
+Addition/Removal of Certificates
+--------------------------------
+Certifi does not support any addition/removal or other modification of the
+CA trust store content. This project is intended to provide a reliable and
+highly portable root of trust to python deployments. Look to upstream projects
+for methods to use alternate trust.

env/Lib/site-packages/certifi-2025.1.31.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,14 @@

+certifi-2025.1.31.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+certifi-2025.1.31.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
+certifi-2025.1.31.dist-info/METADATA,sha256=t5kcT5aGu0dQ6_psUNZYTqnC0uCRnponewm3uYjeHbg,2451
+certifi-2025.1.31.dist-info/RECORD,,
+certifi-2025.1.31.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+certifi-2025.1.31.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
+certifi/__init__.py,sha256=neIaAf7BM36ygmQCmy-ZsSyjnvjWghFeu13wwEAnjj0,94
+certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
+certifi/__pycache__/__init__.cpython-312.pyc,,
+certifi/__pycache__/__main__.cpython-312.pyc,,
+certifi/__pycache__/core.cpython-312.pyc,,
+certifi/cacert.pem,sha256=xVsh-Qf3-G1IrdCTVS-1ZRdJ_1-GBQjMu0I9bB-9gMc,297255
+certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
+certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

env/Lib/site-packages/certifi-2025.1.31.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.8.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

env/Lib/site-packages/certifi-2025.1.31.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ certifi

env/Lib/site-packages/certifi/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .core import contents, where
+__all__ = ["contents", "where"]
+__version__ = "2025.01.31"

env/Lib/site-packages/certifi/__main__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import argparse
+from certifi import contents, where
+parser = argparse.ArgumentParser()
+parser.add_argument("-c", "--contents", action="store_true")
+args = parser.parse_args()
+if args.contents:
+    print(contents())
+else:
+    print(where())

env/Lib/site-packages/certifi/cacert.pem ADDED Viewed

The diff for this file is too large to render. See raw diff

env/Lib/site-packages/certifi/core.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+certifi.py
+~~~~~~~~~~
+This module returns the installation location of cacert.pem or its contents.
+"""
+import sys
+import atexit
+def exit_cacert_ctx() -> None:
+    _CACERT_CTX.__exit__(None, None, None)  # type: ignore[union-attr]
+if sys.version_info >= (3, 11):
+    from importlib.resources import as_file, files
+    _CACERT_CTX = None
+    _CACERT_PATH = None
+    def where() -> str:
+        # This is slightly terrible, but we want to delay extracting the file
+        # in cases where we're inside of a zipimport situation until someone
+        # actually calls where(), but we don't want to re-extract the file
+        # on every call of where(), so we'll do it once then store it in a
+        # global variable.
+        global _CACERT_CTX
+        global _CACERT_PATH
+        if _CACERT_PATH is None:
+            # This is slightly janky, the importlib.resources API wants you to
+            # manage the cleanup of this file, so it doesn't actually return a
+            # path, it returns a context manager that will give you the path
+            # when you enter it and will do any cleanup when you leave it. In
+            # the common case of not needing a temporary file, it will just
+            # return the file system location and the __exit__() is a no-op.
+            #
+            # We also have to hold onto the actual context manager, because
+            # it will do the cleanup whenever it gets garbage collected, so
+            # we will also store that at the global level as well.
+            _CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
+            _CACERT_PATH = str(_CACERT_CTX.__enter__())
+            atexit.register(exit_cacert_ctx)
+        return _CACERT_PATH
+    def contents() -> str:
+        return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
+elif sys.version_info >= (3, 7):
+    from importlib.resources import path as get_path, read_text
+    _CACERT_CTX = None
+    _CACERT_PATH = None
+    def where() -> str:
+        # This is slightly terrible, but we want to delay extracting the
+        # file in cases where we're inside of a zipimport situation until
+        # someone actually calls where(), but we don't want to re-extract
+        # the file on every call of where(), so we'll do it once then store
+        # it in a global variable.
+        global _CACERT_CTX
+        global _CACERT_PATH
+        if _CACERT_PATH is None:
+            # This is slightly janky, the importlib.resources API wants you
+            # to manage the cleanup of this file, so it doesn't actually
+            # return a path, it returns a context manager that will give
+            # you the path when you enter it and will do any cleanup when
+            # you leave it. In the common case of not needing a temporary
+            # file, it will just return the file system location and the
+            # __exit__() is a no-op.
+            #
+            # We also have to hold onto the actual context manager, because
+            # it will do the cleanup whenever it gets garbage collected, so
+            # we will also store that at the global level as well.
+            _CACERT_CTX = get_path("certifi", "cacert.pem")
+            _CACERT_PATH = str(_CACERT_CTX.__enter__())
+            atexit.register(exit_cacert_ctx)
+        return _CACERT_PATH
+    def contents() -> str:
+        return read_text("certifi", "cacert.pem", encoding="ascii")
+else:
+    import os
+    import types
+    from typing import Union
+    Package = Union[types.ModuleType, str]
+    Resource = Union[str, "os.PathLike"]
+    # This fallback will work for Python versions prior to 3.7 that lack the
+    # importlib.resources module but relies on the existing `where` function
+    # so won't address issues with environments like PyOxidizer that don't set
+    # __file__ on modules.
+    def read_text(
+        package: Package,
+        resource: Resource,
+        encoding: str = 'utf-8',
+        errors: str = 'strict'
+    ) -> str:
+        with open(where(), encoding=encoding) as data:
+            return data.read()
+    # If we don't have importlib.resources, then we will just do the old logic
+    # of assuming we're on the filesystem and munge the path directly.
+    def where() -> str:
+        f = os.path.dirname(__file__)
+        return os.path.join(f, "cacert.pem")
+    def contents() -> str:
+        return read_text("certifi", "cacert.pem", encoding="ascii")

env/Lib/site-packages/certifi/py.typed ADDED Viewed

File without changes

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 TAHRI Ahmed R.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,721 @@

+Metadata-Version: 2.1
+Name: charset-normalizer
+Version: 3.4.1
+Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
+Author-email: "Ahmed R. TAHRI" <[email protected]>
+Maintainer-email: "Ahmed R. TAHRI" <[email protected]>
+License: MIT
+Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
+Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
+Project-URL: Code, https://github.com/jawah/charset_normalizer
+Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
+Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Text Processing :: Linguistic
+Classifier: Topic :: Utilities
+Classifier: Typing :: Typed
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Provides-Extra: unicode-backport
+<h1 align="center">Charset Detection, for Everyone 👋</h1>
+<p align="center">
+  <sup>The Real First Universal Charset Detector</sup><br>
+  <a href="https://pypi.org/project/charset-normalizer">
+    <img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
+  </a>
+  <a href="https://pepy.tech/project/charset-normalizer/">
+    <img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
+  </a>
+  <a href="https://bestpractices.coreinfrastructure.org/projects/7297">
+    <img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
+  </a>
+</p>
+<p align="center">
+  <sup><i>Featured Packages</i></sup><br>
+  <a href="https://github.com/jawah/niquests">
+   <img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Best_HTTP_Client-cyan">
+  </a>
+  <a href="https://github.com/jawah/wassima">
+   <img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
+  </a>
+</p>
+<p align="center">
+  <sup><i>In other language (unofficial port - by the community)</i></sup><br>
+  <a href="https://github.com/nickspring/charset-normalizer-rs">
+   <img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
+  </a>
+</p>
+> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
+> I'm trying to resolve the issue by taking a new approach.
+> All IANA character set names for which the Python core library provides codecs are supported.
+<p align="center">
+  >>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
+</p>
+This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
+| Feature                                          | [Chardet](https://github.com/chardet/chardet) |                                         Charset Normalizer                                         | [cChardet](https://github.com/PyYoshi/cChardet) |
+|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
+| `Fast`                                           |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
+| `Universal**`                                    |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
+| `Reliable` **without** distinguishable standards |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
+| `Reliable` **with** distinguishable standards    |                       ✅                       |                                                 ✅                                                  |                        ✅                        |
+| `License`                                        |           LGPL-2.1<br>_restrictive_           |                                                MIT                                                 |            MPL-1.1<br>_restrictive_             |
+| `Native Python`                                  |                       ✅                       |                                                 ✅                                                  |                        ❌                        |
+| `Detect spoken language`                         |                       ❌                       |                                                 ✅                                                  |                       N/A                       |
+| `UnicodeDecodeError Safety`                      |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
+| `Whl Size (min)`                                 |                   193.6 kB                    |                                               42 kB                                                |                     ~200 kB                     |
+| `Supported Encoding`                             |                      33                       | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) |                       40                        |
+<p align="center">
+<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
+</p>
+*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
+## ⚡ Performance
+This package offer better performance than its counterpart Chardet. Here are some numbers.
+| Package                                       | Accuracy | Mean per file (ms) | File per sec (est) |
+|-----------------------------------------------|:--------:|:------------------:|:------------------:|
+| [chardet](https://github.com/chardet/chardet) |   86 %   |       63 ms        |    16 file/sec     |
+| charset-normalizer                            | **98 %** |     **10 ms**      |    100 file/sec    |
+| Package                                       | 99th percentile | 95th percentile | 50th percentile |
+|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
+| [chardet](https://github.com/chardet/chardet) |     265 ms      |      71 ms      |      7 ms       |
+| charset-normalizer                            |     100 ms      |      50 ms      |      5 ms       |
+_updated as of december 2024 using CPython 3.12_
+Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
+> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
+> And yes, these results might change at any time. The dataset can be updated to include more files.
+> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
+> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
+> (e.g. Supported Encoding) Challenge-them if you want.
+## ✨ Installation
+Using pip:
+```sh
+pip install charset-normalizer -U
+```
+## 🚀 Basic Usage
+### CLI
+This package comes with a CLI.
+```
+usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
+                  file [file ...]
+The Real First Universal Charset Detector. Discover originating encoding used
+on text file. Normalize text to unicode.
+positional arguments:
+  files                 File(s) to be analysed
+optional arguments:
+  -h, --help            show this help message and exit
+  -v, --verbose         Display complementary information about file if any.
+                        Stdout will contain logs about the detection process.
+  -a, --with-alternative
+                        Output complementary possibilities if any. Top-level
+                        JSON WILL be a list.
+  -n, --normalize       Permit to normalize input file. If not set, program
+                        does not write anything.
+  -m, --minimal         Only output the charset detected to STDOUT. Disabling
+                        JSON output.
+  -r, --replace         Replace file when trying to normalize it instead of
+                        creating a new one.
+  -f, --force           Replace file without asking if you are sure, use this
+                        flag with caution.
+  -t THRESHOLD, --threshold THRESHOLD
+                        Define a custom maximum amount of chaos allowed in
+                        decoded content. 0. <= chaos <= 1.
+  --version             Show version information and exit.
+```
+```bash
+normalizer ./data/sample.1.fr.srt
+```
+or
+```bash
+python -m charset_normalizer ./data/sample.1.fr.srt
+```
+🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
+```json
+{
+    "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
+    "encoding": "cp1252",
+    "encoding_aliases": [
+        "1252",
+        "windows_1252"
+    ],
+    "alternative_encodings": [
+        "cp1254",
+        "cp1256",
+        "cp1258",
+        "iso8859_14",
+        "iso8859_15",
+        "iso8859_16",
+        "iso8859_3",
+        "iso8859_9",
+        "latin_1",
+        "mbcs"
+    ],
+    "language": "French",
+    "alphabets": [
+        "Basic Latin",
+        "Latin-1 Supplement"
+    ],
+    "has_sig_or_bom": false,
+    "chaos": 0.149,
+    "coherence": 97.152,
+    "unicode_path": null,
+    "is_preferred": true
+}
+```
+### Python
+*Just print out normalized text*
+```python
+from charset_normalizer import from_path
+results = from_path('./my_subtitle.srt')
+print(str(results.best()))
+```
+*Upgrade your code without effort*
+```python
+from charset_normalizer import detect
+```
+The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
+See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
+## 😇 Why
+When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
+reliable alternative using a completely different method. Also! I never back down on a good challenge!
+I **don't care** about the **originating charset** encoding, because **two different tables** can
+produce **two identical rendered string.**
+What I want is to get readable text, the best I can.
+In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
+Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
+## 🍰 How
+  - Discard all charset encoding table that could not fit the binary content.
+  - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
+  - Extract matches with the lowest mess detected.
+  - Additionally, we measure coherence / probe for a language.
+**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
+*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
+**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
+ I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
+ improve or rewrite it.
+*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
+that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
+## ⚡ Known limitations
+  - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
+  - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
+## ⚠️ About Python EOLs
+**If you are running:**
+- Python >=2.7,<3.5: Unsupported
+- Python 3.5: charset-normalizer < 2.1
+- Python 3.6: charset-normalizer < 3.1
+- Python 3.7: charset-normalizer < 4.0
+Upgrade your Python interpreter as soon as possible.
+## 👤 Contributing
+Contributions, issues and feature requests are very much welcome.<br />
+Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
+## 📝 License
+Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
+This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
+Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
+## 💼 For Enterprise
+Professional support for charset-normalizer is available as part of the [Tidelift
+Subscription][1]. Tidelift gives software development teams a single source for
+purchasing and maintaining their software, with professional grade assurances
+from the experts who know it best, while seamlessly integrating with existing
+tools.
+[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
+# Changelog
+All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
+## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
+### Changed
+- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
+- Enforce annotation delayed loading for a simpler and consistent types in the project.
+- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
+### Added
+- pre-commit configuration.
+- noxfile.
+### Removed
+- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
+- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
+- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
+- Unused `utils.range_scan` function.
+### Fixed
+- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
+- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
+## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
+### Added
+- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
+- Support for Python 3.13 (#512)
+### Fixed
+- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
+- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
+- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
+## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
+### Fixed
+- Unintentional memory usage regression when using large payload that match several encoding (#376)
+- Regression on some detection case showcased in the documentation (#371)
+### Added
+- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
+## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
+### Changed
+- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
+- Improved the general detection reliability based on reports from the community
+## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
+### Added
+- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
+- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
+### Removed
+- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
+- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
+### Changed
+- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
+- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
+### Fixed
+- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
+## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
+### Changed
+- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
+- Minor improvement over the global detection reliability
+### Added
+- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
+- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
+- Explicit support for Python 3.12
+### Fixed
+- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
+## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
+### Added
+- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
+### Removed
+- Support for Python 3.6 (PR #260)
+### Changed
+- Optional speedup provided by mypy/c 1.0.1
+## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
+### Fixed
+- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
+### Changed
+- Speedup provided by mypy/c 0.990 on Python >= 3.7
+## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
+### Added
+- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
+- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
+- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
+- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
+### Changed
+- Build with static metadata using 'build' frontend
+- Make the language detection stricter
+- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
+### Fixed
+- CLI with opt --normalize fail when using full path for files
+- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
+- Sphinx warnings when generating the documentation
+### Removed
+- Coherence detector no longer return 'Simple English' instead return 'English'
+- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
+- Breaking: Method `first()` and `best()` from CharsetMatch
+- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
+- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
+- Breaking: Top-level function `normalize`
+- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
+- Support for the backport `unicodedata2`
+## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
+### Added
+- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
+- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
+- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
+### Changed
+- Build with static metadata using 'build' frontend
+- Make the language detection stricter
+### Fixed
+- CLI with opt --normalize fail when using full path for files
+- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
+### Removed
+- Coherence detector no longer return 'Simple English' instead return 'English'
+- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
+## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
+### Added
+- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
+### Removed
+- Breaking: Method `first()` and `best()` from CharsetMatch
+- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
+### Fixed
+- Sphinx warnings when generating the documentation
+## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
+### Changed
+- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
+### Removed
+- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
+- Breaking: Top-level function `normalize`
+- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
+- Support for the backport `unicodedata2`
+## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
+### Deprecated
+- Function `normalize` scheduled for removal in 3.0
+### Changed
+- Removed useless call to decode in fn is_unprintable (#206)
+### Fixed
+- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
+## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
+### Added
+- Output the Unicode table version when running the CLI with `--version` (PR #194)
+### Changed
+- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
+- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
+### Fixed
+- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
+- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
+### Removed
+- Support for Python 3.5 (PR #192)
+### Deprecated
+- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
+## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
+### Fixed
+- ASCII miss-detection on rare cases (PR #170)
+## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
+### Added
+- Explicit support for Python 3.11 (PR #164)
+### Changed
+- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
+## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
+### Fixed
+- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
+### Changed
+- Skipping the language-detection (CD) on ASCII (PR #155)
+## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
+### Changed
+- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
+### Fixed
+- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
+## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
+### Changed
+- Improvement over Vietnamese detection (PR #126)
+- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
+- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
+- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
+- Code style as refactored by Sourcery-AI (PR #131)
+- Minor adjustment on the MD around european words (PR #133)
+- Remove and replace SRTs from assets / tests (PR #139)
+- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
+- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
+### Fixed
+- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
+- Avoid using too insignificant chunk (PR #137)
+### Added
+- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
+- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
+## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
+### Added
+- Add support for Kazakh (Cyrillic) language detection (PR #109)
+### Changed
+- Further, improve inferring the language from a given single-byte code page (PR #112)
+- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
+- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
+- Various detection improvement (MD+CD) (PR #117)
+### Removed
+- Remove redundant logging entry about detected language(s) (PR #115)
+### Fixed
+- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
+## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
+### Fixed
+- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
+- Fix CLI crash when using --minimal output in certain cases (PR #103)
+### Changed
+- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
+## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
+### Changed
+- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
+- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
+- The Unicode detection is slightly improved (PR #93)
+- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
+### Removed
+- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
+### Fixed
+- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
+- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
+- The MANIFEST.in was not exhaustive (PR #78)
+## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
+### Fixed
+- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
+- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
+- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
+- Submatch factoring could be wrong in rare edge cases (PR #72)
+- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
+- Fix line endings from CRLF to LF for certain project files (PR #67)
+### Changed
+- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
+- Allow fallback on specified encoding if any (PR #71)
+## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
+### Changed
+- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
+- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
+## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
+### Fixed
+- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
+### Changed
+- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
+## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
+### Fixed
+- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
+- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
+- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
+- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
+### Changed
+- Public function normalize default args values were not aligned with from_bytes (PR #53)
+### Added
+- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
+## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
+### Changed
+- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
+- Accent has been made on UTF-8 detection, should perform rather instantaneous.
+- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
+- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
+- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
+- utf_7 detection has been reinstated.
+### Removed
+- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
+- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
+- The exception hook on UnicodeDecodeError has been removed.
+### Deprecated
+- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
+### Fixed
+- The CLI output used the relative path of the file(s). Should be absolute.
+## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
+### Fixed
+- Logger configuration/usage no longer conflict with others (PR #44)
+## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
+### Removed
+- Using standard logging instead of using the package loguru.
+- Dropping nose test framework in favor of the maintained pytest.
+- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
+- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
+- Stop support for UTF-7 that does not contain a SIG.
+- Dropping PrettyTable, replaced with pure JSON output in CLI.
+### Fixed
+- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
+- Not searching properly for the BOM when trying utf32/16 parent codec.
+### Changed
+- Improving the package final size by compressing frequencies.json.
+- Huge improvement over the larges payload.
+### Added
+- CLI now produces JSON consumable output.
+- Return ASCII if given sequences fit. Given reasonable confidence.
+## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
+### Fixed
+- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
+## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
+### Fixed
+- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
+## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
+### Fixed
+- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
+## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
+### Changed
+- Amend the previous release to allow prettytable 2.0 (PR #35)
+## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
+### Fixed
+- Fix error while using the package with a python pre-release interpreter (PR #33)
+### Changed
+- Dependencies refactoring, constraints revised.
+### Added
+- Add python 3.9 and 3.10 to the supported interpreters
+MIT License
+Copyright (c) 2025 TAHRI Ahmed R.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,35 @@

+../../Scripts/normalizer.exe,sha256=aGyf7WAVLi4gHrr8F-d9-4fQG9ifpfMEXEvLwyt8KjI,108411
+charset_normalizer-3.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+charset_normalizer-3.4.1.dist-info/LICENSE,sha256=GFd0hdNwTxpHne2OVzwJds_tMV_S_ReYP6mI2kwvcNE,1092
+charset_normalizer-3.4.1.dist-info/METADATA,sha256=0_fAC3DknimRZusm6kkP4ylPD0JVzBq5mKHWLNBJM6w,36034
+charset_normalizer-3.4.1.dist-info/RECORD,,
+charset_normalizer-3.4.1.dist-info/WHEEL,sha256=pWXrJbnZSH-J-PhYmKs2XNn4DHCPNBYq965vsBJBFvA,101
+charset_normalizer-3.4.1.dist-info/entry_points.txt,sha256=8C-Y3iXIfyXQ83Tpir2B8t-XLJYpxF5xbb38d_js-h4,65
+charset_normalizer-3.4.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
+charset_normalizer/__init__.py,sha256=0NT8MHi7SKq3juMqYfOdrkzjisK0L73lneNHH4qaUAs,1638
+charset_normalizer/__main__.py,sha256=2sj_BS6H0sU25C1bMqz9DVwa6kOK9lchSEbSU-_iu7M,115
+charset_normalizer/__pycache__/__init__.cpython-312.pyc,,
+charset_normalizer/__pycache__/__main__.cpython-312.pyc,,
+charset_normalizer/__pycache__/api.cpython-312.pyc,,
+charset_normalizer/__pycache__/cd.cpython-312.pyc,,
+charset_normalizer/__pycache__/constant.cpython-312.pyc,,
+charset_normalizer/__pycache__/legacy.cpython-312.pyc,,
+charset_normalizer/__pycache__/md.cpython-312.pyc,,
+charset_normalizer/__pycache__/models.cpython-312.pyc,,
+charset_normalizer/__pycache__/utils.cpython-312.pyc,,
+charset_normalizer/__pycache__/version.cpython-312.pyc,,
+charset_normalizer/api.py,sha256=2a0p2Gnhbdo9O6C04CNxTSN23fIbgOF20nxb0pWPNFM,23285
+charset_normalizer/cd.py,sha256=uq8nVxRpR6Guc16ACvOWtL8KO3w7vYaCh8hHisuOyTg,12917
+charset_normalizer/cli/__init__.py,sha256=d9MUx-1V_qD3x9igIy4JT4oC5CU0yjulk7QyZWeRFhg,144
+charset_normalizer/cli/__main__.py,sha256=lZ89qRWun7FRxX0qm1GhK-m0DH0i048yiMAX1mVIuRg,10731
+charset_normalizer/cli/__pycache__/__init__.cpython-312.pyc,,
+charset_normalizer/cli/__pycache__/__main__.cpython-312.pyc,,
+charset_normalizer/constant.py,sha256=7OKYi28cJjZxIcX3lQCwfK9ijoOgaVEbERww7SqqNSY,42475
+charset_normalizer/legacy.py,sha256=v8An1aAQHUu036UWOhyIaDGkirZ0t4hfNVlyje5KInU,2394
+charset_normalizer/md.cp312-win_amd64.pyd,sha256=XBGy--IKda7c3iBfvw_dovocqb2RSucmVtxvtlG_3tA,10752
+charset_normalizer/md.py,sha256=e452fhwIAguEUr3FJzG7QZvFgXI-dVLOh_M1ZUiFI6U,20666
+charset_normalizer/md__mypyc.cp312-win_amd64.pyd,sha256=_-jWSji0BgBVvrIHbmabYQNMBF4-xTusdO5mu6P8JsA,125440
+charset_normalizer/models.py,sha256=ZR2PE-fqf6dASZfqdE5Uhkmr0o1MciSdXOjuNqwkmvg,12754
+charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+charset_normalizer/utils.py,sha256=oH9Q3WcAMwmsSB7uM8uDozz9DXnkYecbkTNbdnMbgzI,12410
+charset_normalizer/version.py,sha256=7_thI7FzRQxEsbtUYwrJs3FCFWF666mw74H8mggPRR0,123

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.6.0)
+Root-Is-Purelib: false
+Tag: cp312-cp312-win_amd64

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ normalizer = charset_normalizer:cli.cli_detect

env/Lib/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ charset_normalizer

env/Lib/site-packages/charset_normalizer/__init__.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""
+Charset-Normalizer
+~~~~~~~~~~~~~~
+The Real First Universal Charset Detector.
+A library that helps you read text from an unknown charset encoding.
+Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
+All IANA character set names for which the Python core library provides codecs are supported.
+Basic usage:
+   >>> from charset_normalizer import from_bytes
+   >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
+   >>> best_guess = results.best()
+   >>> str(best_guess)
+   'Bсеки човек има право на образование. Oбразованието!'
+Others methods and usages are available - see the full documentation
+at <https://github.com/Ousret/charset_normalizer>.
+:copyright: (c) 2021 by Ahmed TAHRI
+:license: MIT, see LICENSE for more details.
+"""
+from __future__ import annotations
+import logging
+from .api import from_bytes, from_fp, from_path, is_binary
+from .legacy import detect
+from .models import CharsetMatch, CharsetMatches
+from .utils import set_logging_handler
+from .version import VERSION, __version__
+__all__ = (
+    "from_fp",
+    "from_path",
+    "from_bytes",
+    "is_binary",
+    "detect",
+    "CharsetMatch",
+    "CharsetMatches",
+    "__version__",
+    "VERSION",
+    "set_logging_handler",
+)
+# Attach a NullHandler to the top level logger by default
+# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
+logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())

env/Lib/site-packages/charset_normalizer/__main__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from __future__ import annotations
+from .cli import cli_detect
+if __name__ == "__main__":
+    cli_detect()

env/Lib/site-packages/charset_normalizer/api.py ADDED Viewed

	@@ -0,0 +1,668 @@

+from __future__ import annotations
+import logging
+from os import PathLike
+from typing import BinaryIO
+from .cd import (
+    coherence_ratio,
+    encoding_languages,
+    mb_encoding_languages,
+    merge_coherence_ratios,
+)
+from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
+from .md import mess_ratio
+from .models import CharsetMatch, CharsetMatches
+from .utils import (
+    any_specified_encoding,
+    cut_sequence_chunks,
+    iana_name,
+    identify_sig_or_bom,
+    is_cp_similar,
+    is_multi_byte_encoding,
+    should_strip_sig_or_bom,
+)
+logger = logging.getLogger("charset_normalizer")
+explain_handler = logging.StreamHandler()
+explain_handler.setFormatter(
+    logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
+)
+def from_bytes(
+    sequences: bytes | bytearray,
+    steps: int = 5,
+    chunk_size: int = 512,
+    threshold: float = 0.2,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
+    preemptive_behaviour: bool = True,
+    explain: bool = False,
+    language_threshold: float = 0.1,
+    enable_fallback: bool = True,
+) -> CharsetMatches:
+    """
+    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
+    If there is no results, it is a strong indicator that the source is binary/not text.
+    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
+    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
+    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
+    but never take it for granted. Can improve the performance.
+    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
+    purpose.
+    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
+    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
+    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
+    Custom logging format and handler can be set manually.
+    """
+    if not isinstance(sequences, (bytearray, bytes)):
+        raise TypeError(
+            "Expected object of type bytes or bytearray, got: {}".format(
+                type(sequences)
+            )
+        )
+    if explain:
+        previous_logger_level: int = logger.level
+        logger.addHandler(explain_handler)
+        logger.setLevel(TRACE)
+    length: int = len(sequences)
+    if length == 0:
+        logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
+        if explain:  # Defensive: ensure exit path clean handler
+            logger.removeHandler(explain_handler)
+            logger.setLevel(previous_logger_level or logging.WARNING)
+        return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
+    if cp_isolation is not None:
+        logger.log(
+            TRACE,
+            "cp_isolation is set. use this flag for debugging purpose. "
+            "limited list of encoding allowed : %s.",
+            ", ".join(cp_isolation),
+        )
+        cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
+    else:
+        cp_isolation = []
+    if cp_exclusion is not None:
+        logger.log(
+            TRACE,
+            "cp_exclusion is set. use this flag for debugging purpose. "
+            "limited list of encoding excluded : %s.",
+            ", ".join(cp_exclusion),
+        )
+        cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
+    else:
+        cp_exclusion = []
+    if length <= (chunk_size * steps):
+        logger.log(
+            TRACE,
+            "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
+            steps,
+            chunk_size,
+            length,
+        )
+        steps = 1
+        chunk_size = length
+    if steps > 1 and length / steps < chunk_size:
+        chunk_size = int(length / steps)
+    is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
+    is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
+    if is_too_small_sequence:
+        logger.log(
+            TRACE,
+            "Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
+                length
+            ),
+        )
+    elif is_too_large_sequence:
+        logger.log(
+            TRACE,
+            "Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
+                length
+            ),
+        )
+    prioritized_encodings: list[str] = []
+    specified_encoding: str | None = (
+        any_specified_encoding(sequences) if preemptive_behaviour else None
+    )
+    if specified_encoding is not None:
+        prioritized_encodings.append(specified_encoding)
+        logger.log(
+            TRACE,
+            "Detected declarative mark in sequence. Priority +1 given for %s.",
+            specified_encoding,
+        )
+    tested: set[str] = set()
+    tested_but_hard_failure: list[str] = []
+    tested_but_soft_failure: list[str] = []
+    fallback_ascii: CharsetMatch | None = None
+    fallback_u8: CharsetMatch | None = None
+    fallback_specified: CharsetMatch | None = None
+    results: CharsetMatches = CharsetMatches()
+    early_stop_results: CharsetMatches = CharsetMatches()
+    sig_encoding, sig_payload = identify_sig_or_bom(sequences)
+    if sig_encoding is not None:
+        prioritized_encodings.append(sig_encoding)
+        logger.log(
+            TRACE,
+            "Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
+            len(sig_payload),
+            sig_encoding,
+        )
+    prioritized_encodings.append("ascii")
+    if "utf_8" not in prioritized_encodings:
+        prioritized_encodings.append("utf_8")
+    for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
+        if cp_isolation and encoding_iana not in cp_isolation:
+            continue
+        if cp_exclusion and encoding_iana in cp_exclusion:
+            continue
+        if encoding_iana in tested:
+            continue
+        tested.add(encoding_iana)
+        decoded_payload: str | None = None
+        bom_or_sig_available: bool = sig_encoding == encoding_iana
+        strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
+            encoding_iana
+        )
+        if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
+            logger.log(
+                TRACE,
+                "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
+                encoding_iana,
+            )
+            continue
+        if encoding_iana in {"utf_7"} and not bom_or_sig_available:
+            logger.log(
+                TRACE,
+                "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
+                encoding_iana,
+            )
+            continue
+        try:
+            is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
+        except (ModuleNotFoundError, ImportError):
+            logger.log(
+                TRACE,
+                "Encoding %s does not provide an IncrementalDecoder",
+                encoding_iana,
+            )
+            continue
+        try:
+            if is_too_large_sequence and is_multi_byte_decoder is False:
+                str(
+                    (
+                        sequences[: int(50e4)]
+                        if strip_sig_or_bom is False
+                        else sequences[len(sig_payload) : int(50e4)]
+                    ),
+                    encoding=encoding_iana,
+                )
+            else:
+                decoded_payload = str(
+                    (
+                        sequences
+                        if strip_sig_or_bom is False
+                        else sequences[len(sig_payload) :]
+                    ),
+                    encoding=encoding_iana,
+                )
+        except (UnicodeDecodeError, LookupError) as e:
+            if not isinstance(e, LookupError):
+                logger.log(
+                    TRACE,
+                    "Code page %s does not fit given bytes sequence at ALL. %s",
+                    encoding_iana,
+                    str(e),
+                )
+            tested_but_hard_failure.append(encoding_iana)
+            continue
+        similar_soft_failure_test: bool = False
+        for encoding_soft_failed in tested_but_soft_failure:
+            if is_cp_similar(encoding_iana, encoding_soft_failed):
+                similar_soft_failure_test = True
+                break
+        if similar_soft_failure_test:
+            logger.log(
+                TRACE,
+                "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
+                encoding_iana,
+                encoding_soft_failed,
+            )
+            continue
+        r_ = range(
+            0 if not bom_or_sig_available else len(sig_payload),
+            length,
+            int(length / steps),
+        )
+        multi_byte_bonus: bool = (
+            is_multi_byte_decoder
+            and decoded_payload is not None
+            and len(decoded_payload) < length
+        )
+        if multi_byte_bonus:
+            logger.log(
+                TRACE,
+                "Code page %s is a multi byte encoding table and it appear that at least one character "
+                "was encoded using n-bytes.",
+                encoding_iana,
+            )
+        max_chunk_gave_up: int = int(len(r_) / 4)
+        max_chunk_gave_up = max(max_chunk_gave_up, 2)
+        early_stop_count: int = 0
+        lazy_str_hard_failure = False
+        md_chunks: list[str] = []
+        md_ratios = []
+        try:
+            for chunk in cut_sequence_chunks(
+                sequences,
+                encoding_iana,
+                r_,
+                chunk_size,
+                bom_or_sig_available,
+                strip_sig_or_bom,
+                sig_payload,
+                is_multi_byte_decoder,
+                decoded_payload,
+            ):
+                md_chunks.append(chunk)
+                md_ratios.append(
+                    mess_ratio(
+                        chunk,
+                        threshold,
+                        explain is True and 1 <= len(cp_isolation) <= 2,
+                    )
+                )
+                if md_ratios[-1] >= threshold:
+                    early_stop_count += 1
+                if (early_stop_count >= max_chunk_gave_up) or (
+                    bom_or_sig_available and strip_sig_or_bom is False
+                ):
+                    break
+        except (
+            UnicodeDecodeError
+        ) as e:  # Lazy str loading may have missed something there
+            logger.log(
+                TRACE,
+                "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
+                encoding_iana,
+                str(e),
+            )
+            early_stop_count = max_chunk_gave_up
+            lazy_str_hard_failure = True
+        # We might want to check the sequence again with the whole content
+        # Only if initial MD tests passes
+        if (
+            not lazy_str_hard_failure
+            and is_too_large_sequence
+            and not is_multi_byte_decoder
+        ):
+            try:
+                sequences[int(50e3) :].decode(encoding_iana, errors="strict")
+            except UnicodeDecodeError as e:
+                logger.log(
+                    TRACE,
+                    "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
+                    encoding_iana,
+                    str(e),
+                )
+                tested_but_hard_failure.append(encoding_iana)
+                continue
+        mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
+        if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
+            tested_but_soft_failure.append(encoding_iana)
+            logger.log(
+                TRACE,
+                "%s was excluded because of initial chaos probing. Gave up %i time(s). "
+                "Computed mean chaos is %f %%.",
+                encoding_iana,
+                early_stop_count,
+                round(mean_mess_ratio * 100, ndigits=3),
+            )
+            # Preparing those fallbacks in case we got nothing.
+            if (
+                enable_fallback
+                and encoding_iana in ["ascii", "utf_8", specified_encoding]
+                and not lazy_str_hard_failure
+            ):
+                fallback_entry = CharsetMatch(
+                    sequences,
+                    encoding_iana,
+                    threshold,
+                    False,
+                    [],
+                    decoded_payload,
+                    preemptive_declaration=specified_encoding,
+                )
+                if encoding_iana == specified_encoding:
+                    fallback_specified = fallback_entry
+                elif encoding_iana == "ascii":
+                    fallback_ascii = fallback_entry
+                else:
+                    fallback_u8 = fallback_entry
+            continue
+        logger.log(
+            TRACE,
+            "%s passed initial chaos probing. Mean measured chaos is %f %%",
+            encoding_iana,
+            round(mean_mess_ratio * 100, ndigits=3),
+        )
+        if not is_multi_byte_decoder:
+            target_languages: list[str] = encoding_languages(encoding_iana)
+        else:
+            target_languages = mb_encoding_languages(encoding_iana)
+        if target_languages:
+            logger.log(
+                TRACE,
+                "{} should target any language(s) of {}".format(
+                    encoding_iana, str(target_languages)
+                ),
+            )
+        cd_ratios = []
+        # We shall skip the CD when its about ASCII
+        # Most of the time its not relevant to run "language-detection" on it.
+        if encoding_iana != "ascii":
+            for chunk in md_chunks:
+                chunk_languages = coherence_ratio(
+                    chunk,
+                    language_threshold,
+                    ",".join(target_languages) if target_languages else None,
+                )
+                cd_ratios.append(chunk_languages)
+        cd_ratios_merged = merge_coherence_ratios(cd_ratios)
+        if cd_ratios_merged:
+            logger.log(
+                TRACE,
+                "We detected language {} using {}".format(
+                    cd_ratios_merged, encoding_iana
+                ),
+            )
+        current_match = CharsetMatch(
+            sequences,
+            encoding_iana,
+            mean_mess_ratio,
+            bom_or_sig_available,
+            cd_ratios_merged,
+            (
+                decoded_payload
+                if (
+                    is_too_large_sequence is False
+                    or encoding_iana in [specified_encoding, "ascii", "utf_8"]
+                )
+                else None
+            ),
+            preemptive_declaration=specified_encoding,
+        )
+        results.append(current_match)
+        if (
+            encoding_iana in [specified_encoding, "ascii", "utf_8"]
+            and mean_mess_ratio < 0.1
+        ):
+            # If md says nothing to worry about, then... stop immediately!
+            if mean_mess_ratio == 0.0:
+                logger.debug(
+                    "Encoding detection: %s is most likely the one.",
+                    current_match.encoding,
+                )
+                if explain:  # Defensive: ensure exit path clean handler
+                    logger.removeHandler(explain_handler)
+                    logger.setLevel(previous_logger_level)
+                return CharsetMatches([current_match])
+            early_stop_results.append(current_match)
+        if (
+            len(early_stop_results)
+            and (specified_encoding is None or specified_encoding in tested)
+            and "ascii" in tested
+            and "utf_8" in tested
+        ):
+            probable_result: CharsetMatch = early_stop_results.best()  # type: ignore[assignment]
+            logger.debug(
+                "Encoding detection: %s is most likely the one.",
+                probable_result.encoding,
+            )
+            if explain:  # Defensive: ensure exit path clean handler
+                logger.removeHandler(explain_handler)
+                logger.setLevel(previous_logger_level)
+            return CharsetMatches([probable_result])
+        if encoding_iana == sig_encoding:
+            logger.debug(
+                "Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
+                "the beginning of the sequence.",
+                encoding_iana,
+            )
+            if explain:  # Defensive: ensure exit path clean handler
+                logger.removeHandler(explain_handler)
+                logger.setLevel(previous_logger_level)
+            return CharsetMatches([results[encoding_iana]])
+    if len(results) == 0:
+        if fallback_u8 or fallback_ascii or fallback_specified:
+            logger.log(
+                TRACE,
+                "Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
+            )
+        if fallback_specified:
+            logger.debug(
+                "Encoding detection: %s will be used as a fallback match",
+                fallback_specified.encoding,
+            )
+            results.append(fallback_specified)
+        elif (
+            (fallback_u8 and fallback_ascii is None)
+            or (
+                fallback_u8
+                and fallback_ascii
+                and fallback_u8.fingerprint != fallback_ascii.fingerprint
+            )
+            or (fallback_u8 is not None)
+        ):
+            logger.debug("Encoding detection: utf_8 will be used as a fallback match")
+            results.append(fallback_u8)
+        elif fallback_ascii:
+            logger.debug("Encoding detection: ascii will be used as a fallback match")
+            results.append(fallback_ascii)
+    if results:
+        logger.debug(
+            "Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
+            results.best().encoding,  # type: ignore
+            len(results) - 1,
+        )
+    else:
+        logger.debug("Encoding detection: Unable to determine any suitable charset.")
+    if explain:
+        logger.removeHandler(explain_handler)
+        logger.setLevel(previous_logger_level)
+    return results
+def from_fp(
+    fp: BinaryIO,
+    steps: int = 5,
+    chunk_size: int = 512,
+    threshold: float = 0.20,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
+    preemptive_behaviour: bool = True,
+    explain: bool = False,
+    language_threshold: float = 0.1,
+    enable_fallback: bool = True,
+) -> CharsetMatches:
+    """
+    Same thing than the function from_bytes but using a file pointer that is already ready.
+    Will not close the file pointer.
+    """
+    return from_bytes(
+        fp.read(),
+        steps,
+        chunk_size,
+        threshold,
+        cp_isolation,
+        cp_exclusion,
+        preemptive_behaviour,
+        explain,
+        language_threshold,
+        enable_fallback,
+    )
+def from_path(
+    path: str | bytes | PathLike,  # type: ignore[type-arg]
+    steps: int = 5,
+    chunk_size: int = 512,
+    threshold: float = 0.20,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
+    preemptive_behaviour: bool = True,
+    explain: bool = False,
+    language_threshold: float = 0.1,
+    enable_fallback: bool = True,
+) -> CharsetMatches:
+    """
+    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
+    Can raise IOError.
+    """
+    with open(path, "rb") as fp:
+        return from_fp(
+            fp,
+            steps,
+            chunk_size,
+            threshold,
+            cp_isolation,
+            cp_exclusion,
+            preemptive_behaviour,
+            explain,
+            language_threshold,
+            enable_fallback,
+        )
+def is_binary(
+    fp_or_path_or_payload: PathLike | str | BinaryIO | bytes,  # type: ignore[type-arg]
+    steps: int = 5,
+    chunk_size: int = 512,
+    threshold: float = 0.20,
+    cp_isolation: list[str] | None = None,
+    cp_exclusion: list[str] | None = None,
+    preemptive_behaviour: bool = True,
+    explain: bool = False,
+    language_threshold: float = 0.1,
+    enable_fallback: bool = False,
+) -> bool:
+    """
+    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
+    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
+    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
+    """
+    if isinstance(fp_or_path_or_payload, (str, PathLike)):
+        guesses = from_path(
+            fp_or_path_or_payload,
+            steps=steps,
+            chunk_size=chunk_size,
+            threshold=threshold,
+            cp_isolation=cp_isolation,
+            cp_exclusion=cp_exclusion,
+            preemptive_behaviour=preemptive_behaviour,
+            explain=explain,
+            language_threshold=language_threshold,
+            enable_fallback=enable_fallback,
+        )
+    elif isinstance(
+        fp_or_path_or_payload,
+        (
+            bytes,
+            bytearray,
+        ),
+    ):
+        guesses = from_bytes(
+            fp_or_path_or_payload,
+            steps=steps,
+            chunk_size=chunk_size,
+            threshold=threshold,
+            cp_isolation=cp_isolation,
+            cp_exclusion=cp_exclusion,
+            preemptive_behaviour=preemptive_behaviour,
+            explain=explain,
+            language_threshold=language_threshold,
+            enable_fallback=enable_fallback,
+        )
+    else:
+        guesses = from_fp(
+            fp_or_path_or_payload,
+            steps=steps,
+            chunk_size=chunk_size,
+            threshold=threshold,
+            cp_isolation=cp_isolation,
+            cp_exclusion=cp_exclusion,
+            preemptive_behaviour=preemptive_behaviour,
+            explain=explain,
+            language_threshold=language_threshold,
+            enable_fallback=enable_fallback,
+        )
+    return not guesses

env/Lib/site-packages/charset_normalizer/cd.py ADDED Viewed

	@@ -0,0 +1,395 @@

+from __future__ import annotations
+import importlib
+from codecs import IncrementalDecoder
+from collections import Counter
+from functools import lru_cache
+from typing import Counter as TypeCounter
+from .constant import (
+    FREQUENCIES,
+    KO_NAMES,
+    LANGUAGE_SUPPORTED_COUNT,
+    TOO_SMALL_SEQUENCE,
+    ZH_NAMES,
+)
+from .md import is_suspiciously_successive_range
+from .models import CoherenceMatches
+from .utils import (
+    is_accentuated,
+    is_latin,
+    is_multi_byte_encoding,
+    is_unicode_range_secondary,
+    unicode_range,
+)
+def encoding_unicode_range(iana_name: str) -> list[str]:
+    """
+    Return associated unicode ranges in a single byte code page.
+    """
+    if is_multi_byte_encoding(iana_name):
+        raise OSError("Function not supported on multi-byte code page")
+    decoder = importlib.import_module(f"encodings.{iana_name}").IncrementalDecoder
+    p: IncrementalDecoder = decoder(errors="ignore")
+    seen_ranges: dict[str, int] = {}
+    character_count: int = 0
+    for i in range(0x40, 0xFF):
+        chunk: str = p.decode(bytes([i]))
+        if chunk:
+            character_range: str | None = unicode_range(chunk)
+            if character_range is None:
+                continue
+            if is_unicode_range_secondary(character_range) is False:
+                if character_range not in seen_ranges:
+                    seen_ranges[character_range] = 0
+                seen_ranges[character_range] += 1
+                character_count += 1
+    return sorted(
+        [
+            character_range
+            for character_range in seen_ranges
+            if seen_ranges[character_range] / character_count >= 0.15
+        ]
+    )
+def unicode_range_languages(primary_range: str) -> list[str]:
+    """
+    Return inferred languages used with a unicode range.
+    """
+    languages: list[str] = []
+    for language, characters in FREQUENCIES.items():
+        for character in characters:
+            if unicode_range(character) == primary_range:
+                languages.append(language)
+                break
+    return languages
+@lru_cache()
+def encoding_languages(iana_name: str) -> list[str]:
+    """
+    Single-byte encoding language association. Some code page are heavily linked to particular language(s).
+    This function does the correspondence.
+    """
+    unicode_ranges: list[str] = encoding_unicode_range(iana_name)
+    primary_range: str | None = None
+    for specified_range in unicode_ranges:
+        if "Latin" not in specified_range:
+            primary_range = specified_range
+            break
+    if primary_range is None:
+        return ["Latin Based"]
+    return unicode_range_languages(primary_range)
+@lru_cache()
+def mb_encoding_languages(iana_name: str) -> list[str]:
+    """
+    Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
+    This function does the correspondence.
+    """
+    if (
+        iana_name.startswith("shift_")
+        or iana_name.startswith("iso2022_jp")
+        or iana_name.startswith("euc_j")
+        or iana_name == "cp932"
+    ):
+        return ["Japanese"]
+    if iana_name.startswith("gb") or iana_name in ZH_NAMES:
+        return ["Chinese"]
+    if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
+        return ["Korean"]
+    return []
+@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
+def get_target_features(language: str) -> tuple[bool, bool]:
+    """
+    Determine main aspects from a supported language if it contains accents and if is pure Latin.
+    """
+    target_have_accents: bool = False
+    target_pure_latin: bool = True
+    for character in FREQUENCIES[language]:
+        if not target_have_accents and is_accentuated(character):
+            target_have_accents = True
+        if target_pure_latin and is_latin(character) is False:
+            target_pure_latin = False
+    return target_have_accents, target_pure_latin
+def alphabet_languages(
+    characters: list[str], ignore_non_latin: bool = False
+) -> list[str]:
+    """
+    Return associated languages associated to given characters.
+    """
+    languages: list[tuple[str, float]] = []
+    source_have_accents = any(is_accentuated(character) for character in characters)
+    for language, language_characters in FREQUENCIES.items():
+        target_have_accents, target_pure_latin = get_target_features(language)
+        if ignore_non_latin and target_pure_latin is False:
+            continue
+        if target_have_accents is False and source_have_accents:
+            continue
+        character_count: int = len(language_characters)
+        character_match_count: int = len(
+            [c for c in language_characters if c in characters]
+        )
+        ratio: float = character_match_count / character_count
+        if ratio >= 0.2:
+            languages.append((language, ratio))
+    languages = sorted(languages, key=lambda x: x[1], reverse=True)
+    return [compatible_language[0] for compatible_language in languages]
+def characters_popularity_compare(
+    language: str, ordered_characters: list[str]
+) -> float:
+    """
+    Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
+    The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
+    Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
+    """
+    if language not in FREQUENCIES:
+        raise ValueError(f"{language} not available")
+    character_approved_count: int = 0
+    FREQUENCIES_language_set = set(FREQUENCIES[language])
+    ordered_characters_count: int = len(ordered_characters)
+    target_language_characters_count: int = len(FREQUENCIES[language])
+    large_alphabet: bool = target_language_characters_count > 26
+    for character, character_rank in zip(
+        ordered_characters, range(0, ordered_characters_count)
+    ):
+        if character not in FREQUENCIES_language_set:
+            continue
+        character_rank_in_language: int = FREQUENCIES[language].index(character)
+        expected_projection_ratio: float = (
+            target_language_characters_count / ordered_characters_count
+        )
+        character_rank_projection: int = int(character_rank * expected_projection_ratio)
+        if (
+            large_alphabet is False
+            and abs(character_rank_projection - character_rank_in_language) > 4
+        ):
+            continue
+        if (
+            large_alphabet is True
+            and abs(character_rank_projection - character_rank_in_language)
+            < target_language_characters_count / 3
+        ):
+            character_approved_count += 1
+            continue
+        characters_before_source: list[str] = FREQUENCIES[language][
+            0:character_rank_in_language
+        ]
+        characters_after_source: list[str] = FREQUENCIES[language][
+            character_rank_in_language:
+        ]
+        characters_before: list[str] = ordered_characters[0:character_rank]
+        characters_after: list[str] = ordered_characters[character_rank:]
+        before_match_count: int = len(
+            set(characters_before) & set(characters_before_source)
+        )
+        after_match_count: int = len(
+            set(characters_after) & set(characters_after_source)
+        )
+        if len(characters_before_source) == 0 and before_match_count <= 4:
+            character_approved_count += 1
+            continue
+        if len(characters_after_source) == 0 and after_match_count <= 4:
+            character_approved_count += 1
+            continue
+        if (
+            before_match_count / len(characters_before_source) >= 0.4
+            or after_match_count / len(characters_after_source) >= 0.4
+        ):
+            character_approved_count += 1
+            continue
+    return character_approved_count / len(ordered_characters)
+def alpha_unicode_split(decoded_sequence: str) -> list[str]:
+    """
+    Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
+    Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
+    One containing the latin letters and the other hebrew.
+    """
+    layers: dict[str, str] = {}
+    for character in decoded_sequence:
+        if character.isalpha() is False:
+            continue
+        character_range: str | None = unicode_range(character)
+        if character_range is None:
+            continue
+        layer_target_range: str | None = None
+        for discovered_range in layers:
+            if (
+                is_suspiciously_successive_range(discovered_range, character_range)
+                is False
+            ):
+                layer_target_range = discovered_range
+                break
+        if layer_target_range is None:
+            layer_target_range = character_range
+        if layer_target_range not in layers:
+            layers[layer_target_range] = character.lower()
+            continue
+        layers[layer_target_range] += character.lower()
+    return list(layers.values())
+def merge_coherence_ratios(results: list[CoherenceMatches]) -> CoherenceMatches:
+    """
+    This function merge results previously given by the function coherence_ratio.
+    The return type is the same as coherence_ratio.
+    """
+    per_language_ratios: dict[str, list[float]] = {}
+    for result in results:
+        for sub_result in result:
+            language, ratio = sub_result
+            if language not in per_language_ratios:
+                per_language_ratios[language] = [ratio]
+                continue
+            per_language_ratios[language].append(ratio)
+    merge = [
+        (
+            language,
+            round(
+                sum(per_language_ratios[language]) / len(per_language_ratios[language]),
+                4,
+            ),
+        )
+        for language in per_language_ratios
+    ]
+    return sorted(merge, key=lambda x: x[1], reverse=True)
+def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
+    """
+    We shall NOT return "English—" in CoherenceMatches because it is an alternative
+    of "English". This function only keeps the best match and remove the em-dash in it.
+    """
+    index_results: dict[str, list[float]] = dict()
+    for result in results:
+        language, ratio = result
+        no_em_name: str = language.replace("—", "")
+        if no_em_name not in index_results:
+            index_results[no_em_name] = []
+        index_results[no_em_name].append(ratio)
+    if any(len(index_results[e]) > 1 for e in index_results):
+        filtered_results: CoherenceMatches = []
+        for language in index_results:
+            filtered_results.append((language, max(index_results[language])))
+        return filtered_results
+    return results
+@lru_cache(maxsize=2048)
+def coherence_ratio(
+    decoded_sequence: str, threshold: float = 0.1, lg_inclusion: str | None = None
+) -> CoherenceMatches:
+    """
+    Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
+    A layer = Character extraction by alphabets/ranges.
+    """
+    results: list[tuple[str, float]] = []
+    ignore_non_latin: bool = False
+    sufficient_match_count: int = 0
+    lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
+    if "Latin Based" in lg_inclusion_list:
+        ignore_non_latin = True
+        lg_inclusion_list.remove("Latin Based")
+    for layer in alpha_unicode_split(decoded_sequence):
+        sequence_frequencies: TypeCounter[str] = Counter(layer)
+        most_common = sequence_frequencies.most_common()
+        character_count: int = sum(o for c, o in most_common)
+        if character_count <= TOO_SMALL_SEQUENCE:
+            continue
+        popular_character_ordered: list[str] = [c for c, o in most_common]
+        for language in lg_inclusion_list or alphabet_languages(
+            popular_character_ordered, ignore_non_latin
+        ):
+            ratio: float = characters_popularity_compare(
+                language, popular_character_ordered
+            )
+            if ratio < threshold:
+                continue
+            elif ratio >= 0.8:
+                sufficient_match_count += 1
+            results.append((language, round(ratio, 4)))
+            if sufficient_match_count >= 3:
+                break
+    return sorted(
+        filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
+    )

env/Lib/site-packages/charset_normalizer/cli/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from __future__ import annotations
+from .__main__ import cli_detect, query_yes_no
+__all__ = (
+    "cli_detect",
+    "query_yes_no",
+)

env/Lib/site-packages/charset_normalizer/cli/__main__.py ADDED Viewed

	@@ -0,0 +1,321 @@

+from __future__ import annotations
+import argparse
+import sys
+from json import dumps
+from os.path import abspath, basename, dirname, join, realpath
+from platform import python_version
+from unicodedata import unidata_version
+import charset_normalizer.md as md_module
+from charset_normalizer import from_fp
+from charset_normalizer.models import CliDetectionResult
+from charset_normalizer.version import __version__
+def query_yes_no(question: str, default: str = "yes") -> bool:
+    """Ask a yes/no question via input() and return their answer.
+    "question" is a string that is presented to the user.
+    "default" is the presumed answer if the user just hits <Enter>.
+        It must be "yes" (the default), "no" or None (meaning
+        an answer is required of the user).
+    The "answer" return value is True for "yes" or False for "no".
+    Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
+    """
+    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
+    if default is None:
+        prompt = " [y/n] "
+    elif default == "yes":
+        prompt = " [Y/n] "
+    elif default == "no":
+        prompt = " [y/N] "
+    else:
+        raise ValueError("invalid default answer: '%s'" % default)
+    while True:
+        sys.stdout.write(question + prompt)
+        choice = input().lower()
+        if default is not None and choice == "":
+            return valid[default]
+        elif choice in valid:
+            return valid[choice]
+        else:
+            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
+def cli_detect(argv: list[str] | None = None) -> int:
+    """
+    CLI assistant using ARGV and ArgumentParser
+    :param argv:
+    :return: 0 if everything is fine, anything else equal trouble
+    """
+    parser = argparse.ArgumentParser(
+        description="The Real First Universal Charset Detector. "
+        "Discover originating encoding used on text file. "
+        "Normalize text to unicode."
+    )
+    parser.add_argument(
+        "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        default=False,
+        dest="verbose",
+        help="Display complementary information about file if any. "
+        "Stdout will contain logs about the detection process.",
+    )
+    parser.add_argument(
+        "-a",
+        "--with-alternative",
+        action="store_true",
+        default=False,
+        dest="alternatives",
+        help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
+    )
+    parser.add_argument(
+        "-n",
+        "--normalize",
+        action="store_true",
+        default=False,
+        dest="normalize",
+        help="Permit to normalize input file. If not set, program does not write anything.",
+    )
+    parser.add_argument(
+        "-m",
+        "--minimal",
+        action="store_true",
+        default=False,
+        dest="minimal",
+        help="Only output the charset detected to STDOUT. Disabling JSON output.",
+    )
+    parser.add_argument(
+        "-r",
+        "--replace",
+        action="store_true",
+        default=False,
+        dest="replace",
+        help="Replace file when trying to normalize it instead of creating a new one.",
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        action="store_true",
+        default=False,
+        dest="force",
+        help="Replace file without asking if you are sure, use this flag with caution.",
+    )
+    parser.add_argument(
+        "-i",
+        "--no-preemptive",
+        action="store_true",
+        default=False,
+        dest="no_preemptive",
+        help="Disable looking at a charset declaration to hint the detector.",
+    )
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        action="store",
+        default=0.2,
+        type=float,
+        dest="threshold",
+        help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+            __version__,
+            python_version(),
+            unidata_version,
+            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
+        ),
+        help="Show version information and exit.",
+    )
+    args = parser.parse_args(argv)
+    if args.replace is True and args.normalize is False:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("Use --replace in addition of --normalize only.", file=sys.stderr)
+        return 1
+    if args.force is True and args.replace is False:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("Use --force in addition of --replace only.", file=sys.stderr)
+        return 1
+    if args.threshold < 0.0 or args.threshold > 1.0:
+        if args.files:
+            for my_file in args.files:
+                my_file.close()
+        print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
+        return 1
+    x_ = []
+    for my_file in args.files:
+        matches = from_fp(
+            my_file,
+            threshold=args.threshold,
+            explain=args.verbose,
+            preemptive_behaviour=args.no_preemptive is False,
+        )
+        best_guess = matches.best()
+        if best_guess is None:
+            print(
+                'Unable to identify originating encoding for "{}". {}'.format(
+                    my_file.name,
+                    (
+                        "Maybe try increasing maximum amount of chaos."
+                        if args.threshold < 1.0
+                        else ""
+                    ),
+                ),
+                file=sys.stderr,
+            )
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    None,
+                    [],
+                    [],
+                    "Unknown",
+                    [],
+                    False,
+                    1.0,
+                    0.0,
+                    None,
+                    True,
+                )
+            )
+        else:
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    best_guess.encoding,
+                    best_guess.encoding_aliases,
+                    [
+                        cp
+                        for cp in best_guess.could_be_from_charset
+                        if cp != best_guess.encoding
+                    ],
+                    best_guess.language,
+                    best_guess.alphabets,
+                    best_guess.bom,
+                    best_guess.percent_chaos,
+                    best_guess.percent_coherence,
+                    None,
+                    True,
+                )
+            )
+            if len(matches) > 1 and args.alternatives:
+                for el in matches:
+                    if el != best_guess:
+                        x_.append(
+                            CliDetectionResult(
+                                abspath(my_file.name),
+                                el.encoding,
+                                el.encoding_aliases,
+                                [
+                                    cp
+                                    for cp in el.could_be_from_charset
+                                    if cp != el.encoding
+                                ],
+                                el.language,
+                                el.alphabets,
+                                el.bom,
+                                el.percent_chaos,
+                                el.percent_coherence,
+                                None,
+                                False,
+                            )
+                        )
+            if args.normalize is True:
+                if best_guess.encoding.startswith("utf") is True:
+                    print(
+                        '"{}" file does not need to be normalized, as it already came from unicode.'.format(
+                            my_file.name
+                        ),
+                        file=sys.stderr,
+                    )
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+                dir_path = dirname(realpath(my_file.name))
+                file_name = basename(realpath(my_file.name))
+                o_: list[str] = file_name.split(".")
+                if args.replace is False:
+                    o_.insert(-1, best_guess.encoding)
+                    if my_file.closed is False:
+                        my_file.close()
+                elif (
+                    args.force is False
+                    and query_yes_no(
+                        'Are you sure to normalize "{}" by replacing it ?'.format(
+                            my_file.name
+                        ),
+                        "no",
+                    )
+                    is False
+                ):
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+                try:
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))
+                    with open(x_[0].unicode_path, "wb") as fp:
+                        fp.write(best_guess.output())
+                except OSError as e:
+                    print(str(e), file=sys.stderr)
+                    if my_file.closed is False:
+                        my_file.close()
+                    return 2
+        if my_file.closed is False:
+            my_file.close()
+    if args.minimal is False:
+        print(
+            dumps(
+                [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
+                ensure_ascii=True,
+                indent=4,
+            )
+        )
+    else:
+        for my_file in args.files:
+            print(
+                ", ".join(
+                    [
+                        el.encoding or "undefined"
+                        for el in x_
+                        if el.path == abspath(my_file.name)
+                    ]
+                )
+            )
+    return 0
+if __name__ == "__main__":
+    cli_detect()

env/Lib/site-packages/charset_normalizer/constant.py ADDED Viewed

	@@ -0,0 +1,1998 @@

+from __future__ import annotations
+from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
+from encodings.aliases import aliases
+from re import IGNORECASE
+from re import compile as re_compile
+# Contain for each eligible encoding a list of/item bytes SIG/BOM
+ENCODING_MARKS: dict[str, bytes | list[bytes]] = {
+    "utf_8": BOM_UTF8,
+    "utf_7": [
+        b"\x2b\x2f\x76\x38",
+        b"\x2b\x2f\x76\x39",
+        b"\x2b\x2f\x76\x2b",
+        b"\x2b\x2f\x76\x2f",
+        b"\x2b\x2f\x76\x38\x2d",
+    ],
+    "gb18030": b"\x84\x31\x95\x33",
+    "utf_32": [BOM_UTF32_BE, BOM_UTF32_LE],
+    "utf_16": [BOM_UTF16_BE, BOM_UTF16_LE],
+}
+TOO_SMALL_SEQUENCE: int = 32
+TOO_BIG_SEQUENCE: int = int(10e6)
+UTF8_MAXIMAL_ALLOCATION: int = 1_112_064
+# Up-to-date Unicode ucd/15.0.0
+UNICODE_RANGES_COMBINED: dict[str, range] = {
+    "Control character": range(32),
+    "Basic Latin": range(32, 128),
+    "Latin-1 Supplement": range(128, 256),
+    "Latin Extended-A": range(256, 384),
+    "Latin Extended-B": range(384, 592),
+    "IPA Extensions": range(592, 688),
+    "Spacing Modifier Letters": range(688, 768),
+    "Combining Diacritical Marks": range(768, 880),
+    "Greek and Coptic": range(880, 1024),
+    "Cyrillic": range(1024, 1280),
+    "Cyrillic Supplement": range(1280, 1328),
+    "Armenian": range(1328, 1424),
+    "Hebrew": range(1424, 1536),
+    "Arabic": range(1536, 1792),
+    "Syriac": range(1792, 1872),
+    "Arabic Supplement": range(1872, 1920),
+    "Thaana": range(1920, 1984),
+    "NKo": range(1984, 2048),
+    "Samaritan": range(2048, 2112),
+    "Mandaic": range(2112, 2144),
+    "Syriac Supplement": range(2144, 2160),
+    "Arabic Extended-B": range(2160, 2208),
+    "Arabic Extended-A": range(2208, 2304),
+    "Devanagari": range(2304, 2432),
+    "Bengali": range(2432, 2560),
+    "Gurmukhi": range(2560, 2688),
+    "Gujarati": range(2688, 2816),
+    "Oriya": range(2816, 2944),
+    "Tamil": range(2944, 3072),
+    "Telugu": range(3072, 3200),
+    "Kannada": range(3200, 3328),
+    "Malayalam": range(3328, 3456),
+    "Sinhala": range(3456, 3584),
+    "Thai": range(3584, 3712),
+    "Lao": range(3712, 3840),
+    "Tibetan": range(3840, 4096),
+    "Myanmar": range(4096, 4256),
+    "Georgian": range(4256, 4352),
+    "Hangul Jamo": range(4352, 4608),
+    "Ethiopic": range(4608, 4992),
+    "Ethiopic Supplement": range(4992, 5024),
+    "Cherokee": range(5024, 5120),
+    "Unified Canadian Aboriginal Syllabics": range(5120, 5760),
+    "Ogham": range(5760, 5792),
+    "Runic": range(5792, 5888),
+    "Tagalog": range(5888, 5920),
+    "Hanunoo": range(5920, 5952),
+    "Buhid": range(5952, 5984),
+    "Tagbanwa": range(5984, 6016),
+    "Khmer": range(6016, 6144),
+    "Mongolian": range(6144, 6320),
+    "Unified Canadian Aboriginal Syllabics Extended": range(6320, 6400),
+    "Limbu": range(6400, 6480),
+    "Tai Le": range(6480, 6528),
+    "New Tai Lue": range(6528, 6624),
+    "Khmer Symbols": range(6624, 6656),
+    "Buginese": range(6656, 6688),
+    "Tai Tham": range(6688, 6832),
+    "Combining Diacritical Marks Extended": range(6832, 6912),
+    "Balinese": range(6912, 7040),
+    "Sundanese": range(7040, 7104),
+    "Batak": range(7104, 7168),
+    "Lepcha": range(7168, 7248),
+    "Ol Chiki": range(7248, 7296),
+    "Cyrillic Extended-C": range(7296, 7312),
+    "Georgian Extended": range(7312, 7360),
+    "Sundanese Supplement": range(7360, 7376),
+    "Vedic Extensions": range(7376, 7424),
+    "Phonetic Extensions": range(7424, 7552),
+    "Phonetic Extensions Supplement": range(7552, 7616),
+    "Combining Diacritical Marks Supplement": range(7616, 7680),
+    "Latin Extended Additional": range(7680, 7936),
+    "Greek Extended": range(7936, 8192),
+    "General Punctuation": range(8192, 8304),
+    "Superscripts and Subscripts": range(8304, 8352),
+    "Currency Symbols": range(8352, 8400),
+    "Combining Diacritical Marks for Symbols": range(8400, 8448),
+    "Letterlike Symbols": range(8448, 8528),
+    "Number Forms": range(8528, 8592),
+    "Arrows": range(8592, 8704),
+    "Mathematical Operators": range(8704, 8960),
+    "Miscellaneous Technical": range(8960, 9216),
+    "Control Pictures": range(9216, 9280),
+    "Optical Character Recognition": range(9280, 9312),
+    "Enclosed Alphanumerics": range(9312, 9472),
+    "Box Drawing": range(9472, 9600),
+    "Block Elements": range(9600, 9632),
+    "Geometric Shapes": range(9632, 9728),
+    "Miscellaneous Symbols": range(9728, 9984),
+    "Dingbats": range(9984, 10176),
+    "Miscellaneous Mathematical Symbols-A": range(10176, 10224),
+    "Supplemental Arrows-A": range(10224, 10240),
+    "Braille Patterns": range(10240, 10496),
+    "Supplemental Arrows-B": range(10496, 10624),
+    "Miscellaneous Mathematical Symbols-B": range(10624, 10752),
+    "Supplemental Mathematical Operators": range(10752, 11008),
+    "Miscellaneous Symbols and Arrows": range(11008, 11264),
+    "Glagolitic": range(11264, 11360),
+    "Latin Extended-C": range(11360, 11392),
+    "Coptic": range(11392, 11520),
+    "Georgian Supplement": range(11520, 11568),
+    "Tifinagh": range(11568, 11648),
+    "Ethiopic Extended": range(11648, 11744),
+    "Cyrillic Extended-A": range(11744, 11776),
+    "Supplemental Punctuation": range(11776, 11904),
+    "CJK Radicals Supplement": range(11904, 12032),
+    "Kangxi Radicals": range(12032, 12256),
+    "Ideographic Description Characters": range(12272, 12288),
+    "CJK Symbols and Punctuation": range(12288, 12352),
+    "Hiragana": range(12352, 12448),
+    "Katakana": range(12448, 12544),
+    "Bopomofo": range(12544, 12592),
+    "Hangul Compatibility Jamo": range(12592, 12688),
+    "Kanbun": range(12688, 12704),
+    "Bopomofo Extended": range(12704, 12736),
+    "CJK Strokes": range(12736, 12784),
+    "Katakana Phonetic Extensions": range(12784, 12800),
+    "Enclosed CJK Letters and Months": range(12800, 13056),
+    "CJK Compatibility": range(13056, 13312),
+    "CJK Unified Ideographs Extension A": range(13312, 19904),
+    "Yijing Hexagram Symbols": range(19904, 19968),
+    "CJK Unified Ideographs": range(19968, 40960),
+    "Yi Syllables": range(40960, 42128),
+    "Yi Radicals": range(42128, 42192),
+    "Lisu": range(42192, 42240),
+    "Vai": range(42240, 42560),
+    "Cyrillic Extended-B": range(42560, 42656),
+    "Bamum": range(42656, 42752),
+    "Modifier Tone Letters": range(42752, 42784),
+    "Latin Extended-D": range(42784, 43008),
+    "Syloti Nagri": range(43008, 43056),
+    "Common Indic Number Forms": range(43056, 43072),
+    "Phags-pa": range(43072, 43136),
+    "Saurashtra": range(43136, 43232),
+    "Devanagari Extended": range(43232, 43264),
+    "Kayah Li": range(43264, 43312),
+    "Rejang": range(43312, 43360),
+    "Hangul Jamo Extended-A": range(43360, 43392),
+    "Javanese": range(43392, 43488),
+    "Myanmar Extended-B": range(43488, 43520),
+    "Cham": range(43520, 43616),
+    "Myanmar Extended-A": range(43616, 43648),
+    "Tai Viet": range(43648, 43744),
+    "Meetei Mayek Extensions": range(43744, 43776),
+    "Ethiopic Extended-A": range(43776, 43824),
+    "Latin Extended-E": range(43824, 43888),
+    "Cherokee Supplement": range(43888, 43968),
+    "Meetei Mayek": range(43968, 44032),
+    "Hangul Syllables": range(44032, 55216),
+    "Hangul Jamo Extended-B": range(55216, 55296),
+    "High Surrogates": range(55296, 56192),
+    "High Private Use Surrogates": range(56192, 56320),
+    "Low Surrogates": range(56320, 57344),
+    "Private Use Area": range(57344, 63744),
+    "CJK Compatibility Ideographs": range(63744, 64256),
+    "Alphabetic Presentation Forms": range(64256, 64336),
+    "Arabic Presentation Forms-A": range(64336, 65024),
+    "Variation Selectors": range(65024, 65040),
+    "Vertical Forms": range(65040, 65056),
+    "Combining Half Marks": range(65056, 65072),
+    "CJK Compatibility Forms": range(65072, 65104),
+    "Small Form Variants": range(65104, 65136),
+    "Arabic Presentation Forms-B": range(65136, 65280),
+    "Halfwidth and Fullwidth Forms": range(65280, 65520),
+    "Specials": range(65520, 65536),
+    "Linear B Syllabary": range(65536, 65664),
+    "Linear B Ideograms": range(65664, 65792),
+    "Aegean Numbers": range(65792, 65856),
+    "Ancient Greek Numbers": range(65856, 65936),
+    "Ancient Symbols": range(65936, 66000),
+    "Phaistos Disc": range(66000, 66048),
+    "Lycian": range(66176, 66208),
+    "Carian": range(66208, 66272),
+    "Coptic Epact Numbers": range(66272, 66304),
+    "Old Italic": range(66304, 66352),
+    "Gothic": range(66352, 66384),
+    "Old Permic": range(66384, 66432),
+    "Ugaritic": range(66432, 66464),
+    "Old Persian": range(66464, 66528),
+    "Deseret": range(66560, 66640),
+    "Shavian": range(66640, 66688),
+    "Osmanya": range(66688, 66736),
+    "Osage": range(66736, 66816),
+    "Elbasan": range(66816, 66864),
+    "Caucasian Albanian": range(66864, 66928),
+    "Vithkuqi": range(66928, 67008),
+    "Linear A": range(67072, 67456),
+    "Latin Extended-F": range(67456, 67520),
+    "Cypriot Syllabary": range(67584, 67648),
+    "Imperial Aramaic": range(67648, 67680),
+    "Palmyrene": range(67680, 67712),
+    "Nabataean": range(67712, 67760),
+    "Hatran": range(67808, 67840),
+    "Phoenician": range(67840, 67872),
+    "Lydian": range(67872, 67904),
+    "Meroitic Hieroglyphs": range(67968, 68000),
+    "Meroitic Cursive": range(68000, 68096),
+    "Kharoshthi": range(68096, 68192),
+    "Old South Arabian": range(68192, 68224),
+    "Old North Arabian": range(68224, 68256),
+    "Manichaean": range(68288, 68352),
+    "Avestan": range(68352, 68416),
+    "Inscriptional Parthian": range(68416, 68448),
+    "Inscriptional Pahlavi": range(68448, 68480),
+    "Psalter Pahlavi": range(68480, 68528),
+    "Old Turkic": range(68608, 68688),
+    "Old Hungarian": range(68736, 68864),
+    "Hanifi Rohingya": range(68864, 68928),
+    "Rumi Numeral Symbols": range(69216, 69248),
+    "Yezidi": range(69248, 69312),
+    "Arabic Extended-C": range(69312, 69376),
+    "Old Sogdian": range(69376, 69424),
+    "Sogdian": range(69424, 69488),
+    "Old Uyghur": range(69488, 69552),
+    "Chorasmian": range(69552, 69600),
+    "Elymaic": range(69600, 69632),
+    "Brahmi": range(69632, 69760),
+    "Kaithi": range(69760, 69840),
+    "Sora Sompeng": range(69840, 69888),
+    "Chakma": range(69888, 69968),
+    "Mahajani": range(69968, 70016),
+    "Sharada": range(70016, 70112),
+    "Sinhala Archaic Numbers": range(70112, 70144),
+    "Khojki": range(70144, 70224),
+    "Multani": range(70272, 70320),
+    "Khudawadi": range(70320, 70400),
+    "Grantha": range(70400, 70528),
+    "Newa": range(70656, 70784),
+    "Tirhuta": range(70784, 70880),
+    "Siddham": range(71040, 71168),
+    "Modi": range(71168, 71264),
+    "Mongolian Supplement": range(71264, 71296),
+    "Takri": range(71296, 71376),
+    "Ahom": range(71424, 71504),
+    "Dogra": range(71680, 71760),
+    "Warang Citi": range(71840, 71936),
+    "Dives Akuru": range(71936, 72032),
+    "Nandinagari": range(72096, 72192),
+    "Zanabazar Square": range(72192, 72272),
+    "Soyombo": range(72272, 72368),
+    "Unified Canadian Aboriginal Syllabics Extended-A": range(72368, 72384),
+    "Pau Cin Hau": range(72384, 72448),
+    "Devanagari Extended-A": range(72448, 72544),
+    "Bhaiksuki": range(72704, 72816),
+    "Marchen": range(72816, 72896),
+    "Masaram Gondi": range(72960, 73056),
+    "Gunjala Gondi": range(73056, 73136),
+    "Makasar": range(73440, 73472),
+    "Kawi": range(73472, 73568),
+    "Lisu Supplement": range(73648, 73664),
+    "Tamil Supplement": range(73664, 73728),
+    "Cuneiform": range(73728, 74752),
+    "Cuneiform Numbers and Punctuation": range(74752, 74880),
+    "Early Dynastic Cuneiform": range(74880, 75088),
+    "Cypro-Minoan": range(77712, 77824),
+    "Egyptian Hieroglyphs": range(77824, 78896),
+    "Egyptian Hieroglyph Format Controls": range(78896, 78944),
+    "Anatolian Hieroglyphs": range(82944, 83584),
+    "Bamum Supplement": range(92160, 92736),
+    "Mro": range(92736, 92784),
+    "Tangsa": range(92784, 92880),
+    "Bassa Vah": range(92880, 92928),
+    "Pahawh Hmong": range(92928, 93072),
+    "Medefaidrin": range(93760, 93856),
+    "Miao": range(93952, 94112),
+    "Ideographic Symbols and Punctuation": range(94176, 94208),
+    "Tangut": range(94208, 100352),
+    "Tangut Components": range(100352, 101120),
+    "Khitan Small Script": range(101120, 101632),
+    "Tangut Supplement": range(101632, 101760),
+    "Kana Extended-B": range(110576, 110592),
+    "Kana Supplement": range(110592, 110848),
+    "Kana Extended-A": range(110848, 110896),
+    "Small Kana Extension": range(110896, 110960),
+    "Nushu": range(110960, 111360),
+    "Duployan": range(113664, 113824),
+    "Shorthand Format Controls": range(113824, 113840),
+    "Znamenny Musical Notation": range(118528, 118736),
+    "Byzantine Musical Symbols": range(118784, 119040),
+    "Musical Symbols": range(119040, 119296),
+    "Ancient Greek Musical Notation": range(119296, 119376),
+    "Kaktovik Numerals": range(119488, 119520),
+    "Mayan Numerals": range(119520, 119552),
+    "Tai Xuan Jing Symbols": range(119552, 119648),
+    "Counting Rod Numerals": range(119648, 119680),
+    "Mathematical Alphanumeric Symbols": range(119808, 120832),
+    "Sutton SignWriting": range(120832, 121520),
+    "Latin Extended-G": range(122624, 122880),
+    "Glagolitic Supplement": range(122880, 122928),
+    "Cyrillic Extended-D": range(122928, 123024),
+    "Nyiakeng Puachue Hmong": range(123136, 123216),
+    "Toto": range(123536, 123584),
+    "Wancho": range(123584, 123648),
+    "Nag Mundari": range(124112, 124160),
+    "Ethiopic Extended-B": range(124896, 124928),
+    "Mende Kikakui": range(124928, 125152),
+    "Adlam": range(125184, 125280),
+    "Indic Siyaq Numbers": range(126064, 126144),
+    "Ottoman Siyaq Numbers": range(126208, 126288),
+    "Arabic Mathematical Alphabetic Symbols": range(126464, 126720),
+    "Mahjong Tiles": range(126976, 127024),
+    "Domino Tiles": range(127024, 127136),
+    "Playing Cards": range(127136, 127232),
+    "Enclosed Alphanumeric Supplement": range(127232, 127488),
+    "Enclosed Ideographic Supplement": range(127488, 127744),
+    "Miscellaneous Symbols and Pictographs": range(127744, 128512),
+    "Emoticons range(Emoji)": range(128512, 128592),
+    "Ornamental Dingbats": range(128592, 128640),
+    "Transport and Map Symbols": range(128640, 128768),
+    "Alchemical Symbols": range(128768, 128896),
+    "Geometric Shapes Extended": range(128896, 129024),
+    "Supplemental Arrows-C": range(129024, 129280),
+    "Supplemental Symbols and Pictographs": range(129280, 129536),
+    "Chess Symbols": range(129536, 129648),
+    "Symbols and Pictographs Extended-A": range(129648, 129792),
+    "Symbols for Legacy Computing": range(129792, 130048),
+    "CJK Unified Ideographs Extension B": range(131072, 173792),
+    "CJK Unified Ideographs Extension C": range(173824, 177984),
+    "CJK Unified Ideographs Extension D": range(177984, 178208),
+    "CJK Unified Ideographs Extension E": range(178208, 183984),
+    "CJK Unified Ideographs Extension F": range(183984, 191472),
+    "CJK Compatibility Ideographs Supplement": range(194560, 195104),
+    "CJK Unified Ideographs Extension G": range(196608, 201552),
+    "CJK Unified Ideographs Extension H": range(201552, 205744),
+    "Tags": range(917504, 917632),
+    "Variation Selectors Supplement": range(917760, 918000),
+    "Supplementary Private Use Area-A": range(983040, 1048576),
+    "Supplementary Private Use Area-B": range(1048576, 1114112),
+}
+UNICODE_SECONDARY_RANGE_KEYWORD: list[str] = [
+    "Supplement",
+    "Extended",
+    "Extensions",
+    "Modifier",
+    "Marks",
+    "Punctuation",
+    "Symbols",
+    "Forms",
+    "Operators",
+    "Miscellaneous",
+    "Drawing",
+    "Block",
+    "Shapes",
+    "Supplemental",
+    "Tags",
+]
+RE_POSSIBLE_ENCODING_INDICATION = re_compile(
+    r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)",
+    IGNORECASE,
+)
+IANA_NO_ALIASES = [
+    "cp720",
+    "cp737",
+    "cp856",
+    "cp874",
+    "cp875",
+    "cp1006",
+    "koi8_r",
+    "koi8_t",
+    "koi8_u",
+]
+IANA_SUPPORTED: list[str] = sorted(
+    filter(
+        lambda x: x.endswith("_codec") is False
+        and x not in {"rot_13", "tactis", "mbcs"},
+        list(set(aliases.values())) + IANA_NO_ALIASES,
+    )
+)
+IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED)
+# pre-computed code page that are similar using the function cp_similarity.
+IANA_SUPPORTED_SIMILAR: dict[str, list[str]] = {
+    "cp037": ["cp1026", "cp1140", "cp273", "cp500"],
+    "cp1026": ["cp037", "cp1140", "cp273", "cp500"],
+    "cp1125": ["cp866"],
+    "cp1140": ["cp037", "cp1026", "cp273", "cp500"],
+    "cp1250": ["iso8859_2"],
+    "cp1251": ["kz1048", "ptcp154"],
+    "cp1252": ["iso8859_15", "iso8859_9", "latin_1"],
+    "cp1253": ["iso8859_7"],
+    "cp1254": ["iso8859_15", "iso8859_9", "latin_1"],
+    "cp1257": ["iso8859_13"],
+    "cp273": ["cp037", "cp1026", "cp1140", "cp500"],
+    "cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"],
+    "cp500": ["cp037", "cp1026", "cp1140", "cp273"],
+    "cp850": ["cp437", "cp857", "cp858", "cp865"],
+    "cp857": ["cp850", "cp858", "cp865"],
+    "cp858": ["cp437", "cp850", "cp857", "cp865"],
+    "cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"],
+    "cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"],
+    "cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"],
+    "cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"],
+    "cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"],
+    "cp866": ["cp1125"],
+    "iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"],
+    "iso8859_11": ["tis_620"],
+    "iso8859_13": ["cp1257"],
+    "iso8859_14": [
+        "iso8859_10",
+        "iso8859_15",
+        "iso8859_16",
+        "iso8859_3",
+        "iso8859_9",
+        "latin_1",
+    ],
+    "iso8859_15": [
+        "cp1252",
+        "cp1254",
+        "iso8859_10",
+        "iso8859_14",
+        "iso8859_16",
+        "iso8859_3",
+        "iso8859_9",
+        "latin_1",
+    ],
+    "iso8859_16": [
+        "iso8859_14",
+        "iso8859_15",
+        "iso8859_2",
+        "iso8859_3",
+        "iso8859_9",
+        "latin_1",
+    ],
+    "iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"],
+    "iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"],
+    "iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"],
+    "iso8859_7": ["cp1253"],
+    "iso8859_9": [
+        "cp1252",
+        "cp1254",
+        "cp1258",
+        "iso8859_10",
+        "iso8859_14",
+        "iso8859_15",
+        "iso8859_16",
+        "iso8859_3",
+        "iso8859_4",
+        "latin_1",
+    ],
+    "kz1048": ["cp1251", "ptcp154"],
+    "latin_1": [
+        "cp1252",
+        "cp1254",
+        "cp1258",
+        "iso8859_10",
+        "iso8859_14",
+        "iso8859_15",
+        "iso8859_16",
+        "iso8859_3",
+        "iso8859_4",
+        "iso8859_9",
+    ],
+    "mac_iceland": ["mac_roman", "mac_turkish"],
+    "mac_roman": ["mac_iceland", "mac_turkish"],
+    "mac_turkish": ["mac_iceland", "mac_roman"],
+    "ptcp154": ["cp1251", "kz1048"],
+    "tis_620": ["iso8859_11"],
+}
+CHARDET_CORRESPONDENCE: dict[str, str] = {
+    "iso2022_kr": "ISO-2022-KR",
+    "iso2022_jp": "ISO-2022-JP",
+    "euc_kr": "EUC-KR",
+    "tis_620": "TIS-620",
+    "utf_32": "UTF-32",
+    "euc_jp": "EUC-JP",
+    "koi8_r": "KOI8-R",
+    "iso8859_1": "ISO-8859-1",
+    "iso8859_2": "ISO-8859-2",
+    "iso8859_5": "ISO-8859-5",
+    "iso8859_6": "ISO-8859-6",
+    "iso8859_7": "ISO-8859-7",
+    "iso8859_8": "ISO-8859-8",
+    "utf_16": "UTF-16",
+    "cp855": "IBM855",
+    "mac_cyrillic": "MacCyrillic",
+    "gb2312": "GB2312",
+    "gb18030": "GB18030",
+    "cp932": "CP932",
+    "cp866": "IBM866",
+    "utf_8": "utf-8",
+    "utf_8_sig": "UTF-8-SIG",
+    "shift_jis": "SHIFT_JIS",
+    "big5": "Big5",
+    "cp1250": "windows-1250",
+    "cp1251": "windows-1251",
+    "cp1252": "Windows-1252",
+    "cp1253": "windows-1253",
+    "cp1255": "windows-1255",
+    "cp1256": "windows-1256",
+    "cp1254": "Windows-1254",
+    "cp949": "CP949",
+}
+COMMON_SAFE_ASCII_CHARACTERS: set[str] = {
+    "<",
+    ">",
+    "=",
+    ":",
+    "/",
+    "&",
+    ";",
+    "{",
+    "}",
+    "[",
+    "]",
+    ",",
+    "|",
+    '"',
+    "-",
+    "(",
+    ")",
+}
+KO_NAMES: set[str] = {"johab", "cp949", "euc_kr"}
+ZH_NAMES: set[str] = {"big5", "cp950", "big5hkscs", "hz"}
+# Logging LEVEL below DEBUG
+TRACE: int = 5
+# Language label that contain the em dash "—"
+# character are to be considered alternative seq to origin
+FREQUENCIES: dict[str, list[str]] = {
+    "English": [
+        "e",
+        "a",
+        "t",
+        "i",
+        "o",
+        "n",
+        "s",
+        "r",
+        "h",
+        "l",
+        "d",
+        "c",
+        "u",
+        "m",
+        "f",
+        "p",
+        "g",
+        "w",
+        "y",
+        "b",
+        "v",
+        "k",
+        "x",
+        "j",
+        "z",
+        "q",
+    ],
+    "English—": [
+        "e",
+        "a",
+        "t",
+        "i",
+        "o",
+        "n",
+        "s",
+        "r",
+        "h",
+        "l",
+        "d",
+        "c",
+        "m",
+        "u",
+        "f",
+        "p",
+        "g",
+        "w",
+        "b",
+        "y",
+        "v",
+        "k",
+        "j",
+        "x",
+        "z",
+        "q",
+    ],
+    "German": [
+        "e",
+        "n",
+        "i",
+        "r",
+        "s",
+        "t",
+        "a",
+        "d",
+        "h",
+        "u",
+        "l",
+        "g",
+        "o",
+        "c",
+        "m",
+        "b",
+        "f",
+        "k",
+        "w",
+        "z",
+        "p",
+        "v",
+        "ü",
+        "ä",
+        "ö",
+        "j",
+    ],
+    "French": [
+        "e",
+        "a",
+        "s",
+        "n",
+        "i",
+        "t",
+        "r",
+        "l",
+        "u",
+        "o",
+        "d",
+        "c",
+        "p",
+        "m",
+        "é",
+        "v",
+        "g",
+        "f",
+        "b",
+        "h",
+        "q",
+        "à",
+        "x",
+        "è",
+        "y",
+        "j",
+    ],
+    "Dutch": [
+        "e",
+        "n",
+        "a",
+        "i",
+        "r",
+        "t",
+        "o",
+        "d",
+        "s",
+        "l",
+        "g",
+        "h",
+        "v",
+        "m",
+        "u",
+        "k",
+        "c",
+        "p",
+        "b",
+        "w",
+        "j",
+        "z",
+        "f",
+        "y",
+        "x",
+        "ë",
+    ],
+    "Italian": [
+        "e",
+        "i",
+        "a",
+        "o",
+        "n",
+        "l",
+        "t",
+        "r",
+        "s",
+        "c",
+        "d",
+        "u",
+        "p",
+        "m",
+        "g",
+        "v",
+        "f",
+        "b",
+        "z",
+        "h",
+        "q",
+        "è",
+        "à",
+        "k",
+        "y",
+        "ò",
+    ],
+    "Polish": [
+        "a",
+        "i",
+        "o",
+        "e",
+        "n",
+        "r",
+        "z",
+        "w",
+        "s",
+        "c",
+        "t",
+        "k",
+        "y",
+        "d",
+        "p",
+        "m",
+        "u",
+        "l",
+        "j",
+        "ł",
+        "g",
+        "b",
+        "h",
+        "ą",
+        "ę",
+        "ó",
+    ],
+    "Spanish": [
+        "e",
+        "a",
+        "o",
+        "n",
+        "s",
+        "r",
+        "i",
+        "l",
+        "d",
+        "t",
+        "c",
+        "u",
+        "m",
+        "p",
+        "b",
+        "g",
+        "v",
+        "f",
+        "y",
+        "ó",
+        "h",
+        "q",
+        "í",
+        "j",
+        "z",
+        "á",
+    ],
+    "Russian": [
+        "о",
+        "а",
+        "е",
+        "и",
+        "н",
+        "с",
+        "т",
+        "р",
+        "в",
+        "л",
+        "к",
+        "м",
+        "д",
+        "п",
+        "у",
+        "г",
+        "я",
+        "ы",
+        "з",
+        "б",
+        "й",
+        "ь",
+        "ч",
+        "х",
+        "ж",
+        "ц",
+    ],
+    # Jap-Kanji
+    "Japanese": [
+        "人",
+        "一",
+        "大",
+        "亅",
+        "丁",
+        "丨",
+        "竹",
+        "笑",
+        "口",
+        "日",
+        "今",
+        "二",
+        "彳",
+        "行",
+        "十",
+        "土",
+        "丶",
+        "寸",
+        "寺",
+        "時",
+        "乙",
+        "丿",
+        "乂",
+        "气",
+        "気",
+        "冂",
+        "巾",
+        "亠",
+        "市",
+        "目",
+        "儿",
+        "見",
+        "八",
+        "小",
+        "凵",
+        "県",
+        "月",
+        "彐",
+        "門",
+        "間",
+        "木",
+        "東",
+        "山",
+        "出",
+        "本",
+        "中",
+        "刀",
+        "分",
+        "耳",
+        "又",
+        "取",
+        "最",
+        "言",
+        "田",
+        "心",
+        "思",
+        "刂",
+        "前",
+        "京",
+        "尹",
+        "事",
+        "生",
+        "厶",
+        "云",
+        "会",
+        "未",
+        "来",
+        "白",
+        "冫",
+        "楽",
+        "灬",
+        "馬",
+        "尸",
+        "尺",
+        "駅",
+        "明",
+        "耂",
+        "者",
+        "了",
+        "阝",
+        "都",
+        "高",
+        "卜",
+        "占",
+        "厂",
+        "广",
+        "店",
+        "子",
+        "申",
+        "奄",
+        "亻",
+        "俺",
+        "上",
+        "方",
+        "冖",
+        "学",
+        "衣",
+        "艮",
+        "食",
+        "自",
+    ],
+    # Jap-Katakana
+    "Japanese—": [
+        "ー",
+        "ン",
+        "ス",
+        "・",
+        "ル",
+        "ト",
+        "リ",
+        "イ",
+        "ア",
+        "ラ",
+        "ッ",
+        "ク",
+        "ド",
+        "シ",
+        "レ",
+        "ジ",
+        "タ",
+        "フ",
+        "ロ",
+        "カ",
+        "テ",
+        "マ",
+        "ィ",
+        "グ",
+        "バ",
+        "ム",
+        "プ",
+        "オ",
+        "コ",
+        "デ",
+        "ニ",
+        "ウ",
+        "メ",
+        "サ",
+        "ビ",
+        "ナ",
+        "ブ",
+        "ャ",
+        "エ",
+        "ュ",
+        "チ",
+        "キ",
+        "ズ",
+        "ダ",
+        "パ",
+        "ミ",
+        "ェ",
+        "ョ",
+        "ハ",
+        "セ",
+        "ベ",
+        "ガ",
+        "モ",
+        "ツ",
+        "ネ",
+        "ボ",
+        "ソ",
+        "ノ",
+        "ァ",
+        "ヴ",
+        "ワ",
+        "ポ",
+        "ペ",
+        "ピ",
+        "ケ",
+        "ゴ",
+        "ギ",
+        "ザ",
+        "ホ",
+        "ゲ",
+        "ォ",
+        "ヤ",
+        "ヒ",
+        "ユ",
+        "ヨ",
+        "ヘ",
+        "ゼ",
+        "ヌ",
+        "ゥ",
+        "ゾ",
+        "ヶ",
+        "ヂ",
+        "ヲ",
+        "ヅ",
+        "ヵ",
+        "ヱ",
+        "ヰ",
+        "ヮ",
+        "ヽ",
+        "゠",
+        "ヾ",
+        "ヷ",
+        "ヿ",
+        "ヸ",
+        "ヹ",
+        "ヺ",
+    ],
+    # Jap-Hiragana
+    "Japanese——": [
+        "の",
+        "に",
+        "る",
+        "た",
+        "と",
+        "は",
+        "し",
+        "い",
+        "を",
+        "で",
+        "て",
+        "が",
+        "な",
+        "れ",
+        "か",
+        "ら",
+        "さ",
+        "っ",
+        "り",
+        "す",
+        "あ",
+        "も",
+        "こ",
+        "ま",
+        "う",
+        "く",
+        "よ",
+        "き",
+        "ん",
+        "め",
+        "お",
+        "け",
+        "そ",
+        "つ",
+        "だ",
+        "や",
+        "え",
+        "ど",
+        "わ",
+        "ち",
+        "み",
+        "せ",
+        "じ",
+        "ば",
+        "へ",
+        "び",
+        "ず",
+        "ろ",
+        "ほ",
+        "げ",
+        "む",
+        "べ",
+        "ひ",
+        "ょ",
+        "ゆ",
+        "ぶ",
+        "ご",
+        "ゃ",
+        "ね",
+        "ふ",
+        "ぐ",
+        "ぎ",
+        "ぼ",
+        "ゅ",
+        "づ",
+        "ざ",
+        "ぞ",
+        "ぬ",
+        "ぜ",
+        "ぱ",
+        "ぽ",
+        "ぷ",
+        "ぴ",
+        "ぃ",
+        "ぁ",
+        "ぇ",
+        "ぺ",
+        "ゞ",
+        "ぢ",
+        "ぉ",
+        "ぅ",
+        "ゐ",
+        "ゝ",
+        "ゑ",
+        "゛",
+        "゜",
+        "ゎ",
+        "ゔ",
+        "゚",
+        "ゟ",
+        "゙",
+        "ゕ",
+        "ゖ",
+    ],
+    "Portuguese": [
+        "a",
+        "e",
+        "o",
+        "s",
+        "i",
+        "r",
+        "d",
+        "n",
+        "t",
+        "m",
+        "u",
+        "c",
+        "l",
+        "p",
+        "g",
+        "v",
+        "b",
+        "f",
+        "h",
+        "ã",
+        "q",
+        "é",
+        "ç",
+        "á",
+        "z",
+        "í",
+    ],
+    "Swedish": [
+        "e",
+        "a",
+        "n",
+        "r",
+        "t",
+        "s",
+        "i",
+        "l",
+        "d",
+        "o",
+        "m",
+        "k",
+        "g",
+        "v",
+        "h",
+        "f",
+        "u",
+        "p",
+        "ä",
+        "c",
+        "b",
+        "ö",
+        "å",
+        "y",
+        "j",
+        "x",
+    ],
+    "Chinese": [
+        "的",
+        "一",
+        "是",
+        "不",
+        "了",
+        "在",
+        "人",
+        "有",
+        "我",
+        "他",
+        "这",
+        "个",
+        "们",
+        "中",
+        "来",
+        "上",
+        "大",
+        "为",
+        "和",
+        "国",
+        "地",
+        "到",
+        "以",
+        "说",
+        "时",
+        "要",
+        "就",
+        "出",
+        "会",
+        "可",
+        "也",
+        "你",
+        "对",
+        "生",
+        "能",
+        "而",
+        "子",
+        "那",
+        "得",
+        "于",
+        "着",
+        "下",
+        "自",
+        "之",
+        "年",
+        "过",
+        "发",
+        "后",
+        "作",
+        "里",
+        "用",
+        "道",
+        "行",
+        "所",
+        "然",
+        "家",
+        "种",
+        "事",
+        "成",
+        "方",
+        "多",
+        "经",
+        "么",
+        "去",
+        "法",
+        "学",
+        "如",
+        "都",
+        "同",
+        "现",
+        "当",
+        "没",
+        "动",
+        "面",
+        "起",
+        "看",
+        "定",
+        "天",
+        "分",
+        "还",
+        "进",
+        "好",
+        "小",
+        "部",
+        "其",
+        "些",
+        "主",
+        "样",
+        "理",
+        "心",
+        "她",
+        "本",
+        "前",
+        "开",
+        "但",
+        "因",
+        "只",
+        "从",
+        "想",
+        "实",
+    ],
+    "Ukrainian": [
+        "о",
+        "а",
+        "н",
+        "і",
+        "и",
+        "р",
+        "в",
+        "т",
+        "е",
+        "с",
+        "к",
+        "л",
+        "у",
+        "д",
+        "м",
+        "п",
+        "з",
+        "я",
+        "ь",
+        "б",
+        "г",
+        "й",
+        "ч",
+        "х",
+        "ц",
+        "ї",
+    ],
+    "Norwegian": [
+        "e",
+        "r",
+        "n",
+        "t",
+        "a",
+        "s",
+        "i",
+        "o",
+        "l",
+        "d",
+        "g",
+        "k",
+        "m",
+        "v",
+        "f",
+        "p",
+        "u",
+        "b",
+        "h",
+        "å",
+        "y",
+        "j",
+        "ø",
+        "c",
+        "æ",
+        "w",
+    ],
+    "Finnish": [
+        "a",
+        "i",
+        "n",
+        "t",
+        "e",
+        "s",
+        "l",
+        "o",
+        "u",
+        "k",
+        "ä",
+        "m",
+        "r",
+        "v",
+        "j",
+        "h",
+        "p",
+        "y",
+        "d",
+        "ö",
+        "g",
+        "c",
+        "b",
+        "f",
+        "w",
+        "z",
+    ],
+    "Vietnamese": [
+        "n",
+        "h",
+        "t",
+        "i",
+        "c",
+        "g",
+        "a",
+        "o",
+        "u",
+        "m",
+        "l",
+        "r",
+        "à",
+        "đ",
+        "s",
+        "e",
+        "v",
+        "p",
+        "b",
+        "y",
+        "ư",
+        "d",
+        "á",
+        "k",
+        "ộ",
+        "ế",
+    ],
+    "Czech": [
+        "o",
+        "e",
+        "a",
+        "n",
+        "t",
+        "s",
+        "i",
+        "l",
+        "v",
+        "r",
+        "k",
+        "d",
+        "u",
+        "m",
+        "p",
+        "í",
+        "c",
+        "h",
+        "z",
+        "á",
+        "y",
+        "j",
+        "b",
+        "ě",
+        "é",
+        "ř",
+    ],
+    "Hungarian": [
+        "e",
+        "a",
+        "t",
+        "l",
+        "s",
+        "n",
+        "k",
+        "r",
+        "i",
+        "o",
+        "z",
+        "á",
+        "é",
+        "g",
+        "m",
+        "b",
+        "y",
+        "v",
+        "d",
+        "h",
+        "u",
+        "p",
+        "j",
+        "ö",
+        "f",
+        "c",
+    ],
+    "Korean": [
+        "이",
+        "다",
+        "에",
+        "의",
+        "는",
+        "로",
+        "하",
+        "을",
+        "가",
+        "고",
+        "지",
+        "서",
+        "한",
+        "은",
+        "기",
+        "으",
+        "년",
+        "대",
+        "사",
+        "시",
+        "를",
+        "리",
+        "도",
+        "인",
+        "스",
+        "일",
+    ],
+    "Indonesian": [
+        "a",
+        "n",
+        "e",
+        "i",
+        "r",
+        "t",
+        "u",
+        "s",
+        "d",
+        "k",
+        "m",
+        "l",
+        "g",
+        "p",
+        "b",
+        "o",
+        "h",
+        "y",
+        "j",
+        "c",
+        "w",
+        "f",
+        "v",
+        "z",
+        "x",
+        "q",
+    ],
+    "Turkish": [
+        "a",
+        "e",
+        "i",
+        "n",
+        "r",
+        "l",
+        "ı",
+        "k",
+        "d",
+        "t",
+        "s",
+        "m",
+        "y",
+        "u",
+        "o",
+        "b",
+        "ü",
+        "ş",
+        "v",
+        "g",
+        "z",
+        "h",
+        "c",
+        "p",
+        "ç",
+        "ğ",
+    ],
+    "Romanian": [
+        "e",
+        "i",
+        "a",
+        "r",
+        "n",
+        "t",
+        "u",
+        "l",
+        "o",
+        "c",
+        "s",
+        "d",
+        "p",
+        "m",
+        "ă",
+        "f",
+        "v",
+        "î",
+        "g",
+        "b",
+        "ș",
+        "ț",
+        "z",
+        "h",
+        "â",
+        "j",
+    ],
+    "Farsi": [
+        "ا",
+        "ی",
+        "ر",
+        "د",
+        "ن",
+        "ه",
+        "و",
+        "م",
+        "ت",
+        "ب",
+        "س",
+        "ل",
+        "ک",
+        "ش",
+        "ز",
+        "ف",
+        "گ",
+        "ع",
+        "خ",
+        "ق",
+        "ج",
+        "آ",
+        "پ",
+        "ح",
+        "ط",
+        "ص",
+    ],
+    "Arabic": [
+        "ا",
+        "ل",
+        "ي",
+        "م",
+        "و",
+        "ن",
+        "ر",
+        "ت",
+        "ب",
+        "ة",
+        "ع",
+        "د",
+        "س",
+        "ف",
+        "ه",
+        "ك",
+        "ق",
+        "أ",
+        "ح",
+        "ج",
+        "ش",
+        "ط",
+        "ص",
+        "ى",
+        "خ",
+        "إ",
+    ],
+    "Danish": [
+        "e",
+        "r",
+        "n",
+        "t",
+        "a",
+        "i",
+        "s",
+        "d",
+        "l",
+        "o",
+        "g",
+        "m",
+        "k",
+        "f",
+        "v",
+        "u",
+        "b",
+        "h",
+        "p",
+        "å",
+        "y",
+        "ø",
+        "æ",
+        "c",
+        "j",
+        "w",
+    ],
+    "Serbian": [
+        "а",
+        "и",
+        "о",
+        "е",
+        "н",
+        "р",
+        "с",
+        "у",
+        "т",
+        "к",
+        "ј",
+        "в",
+        "д",
+        "м",
+        "п",
+        "л",
+        "г",
+        "з",
+        "б",
+        "a",
+        "i",
+        "e",
+        "o",
+        "n",
+        "ц",
+        "ш",
+    ],
+    "Lithuanian": [
+        "i",
+        "a",
+        "s",
+        "o",
+        "r",
+        "e",
+        "t",
+        "n",
+        "u",
+        "k",
+        "m",
+        "l",
+        "p",
+        "v",
+        "d",
+        "j",
+        "g",
+        "ė",
+        "b",
+        "y",
+        "ų",
+        "š",
+        "ž",
+        "c",
+        "ą",
+        "į",
+    ],
+    "Slovene": [
+        "e",
+        "a",
+        "i",
+        "o",
+        "n",
+        "r",
+        "s",
+        "l",
+        "t",
+        "j",
+        "v",
+        "k",
+        "d",
+        "p",
+        "m",
+        "u",
+        "z",
+        "b",
+        "g",
+        "h",
+        "č",
+        "c",
+        "š",
+        "ž",
+        "f",
+        "y",
+    ],
+    "Slovak": [
+        "o",
+        "a",
+        "e",
+        "n",
+        "i",
+        "r",
+        "v",
+        "t",
+        "s",
+        "l",
+        "k",
+        "d",
+        "m",
+        "p",
+        "u",
+        "c",
+        "h",
+        "j",
+        "b",
+        "z",
+        "á",
+        "y",
+        "ý",
+        "í",
+        "č",
+        "é",
+    ],
+    "Hebrew": [
+        "י",
+        "ו",
+        "ה",
+        "ל",
+        "ר",
+        "ב",
+        "ת",
+        "מ",
+        "א",
+        "ש",
+        "נ",
+        "ע",
+        "ם",
+        "ד",
+        "ק",
+        "ח",
+        "פ",
+        "ס",
+        "כ",
+        "ג",
+        "ט",
+        "צ",
+        "ן",
+        "ז",
+        "ך",
+    ],
+    "Bulgarian": [
+        "а",
+        "и",
+        "о",
+        "е",
+        "н",
+        "т",
+        "р",
+        "с",
+        "в",
+        "л",
+        "к",
+        "д",
+        "п",
+        "м",
+        "з",
+        "г",
+        "я",
+        "ъ",
+        "у",
+        "б",
+        "ч",
+        "ц",
+        "й",
+        "ж",
+        "щ",
+        "х",
+    ],
+    "Croatian": [
+        "a",
+        "i",
+        "o",
+        "e",
+        "n",
+        "r",
+        "j",
+        "s",
+        "t",
+        "u",
+        "k",
+        "l",
+        "v",
+        "d",
+        "m",
+        "p",
+        "g",
+        "z",
+        "b",
+        "c",
+        "č",
+        "h",
+        "š",
+        "ž",
+        "ć",
+        "f",
+    ],
+    "Hindi": [
+        "क",
+        "र",
+        "स",
+        "न",
+        "त",
+        "म",
+        "ह",
+        "प",
+        "य",
+        "ल",
+        "व",
+        "ज",
+        "द",
+        "ग",
+        "ब",
+        "श",
+        "ट",
+        "अ",
+        "ए",
+        "थ",
+        "भ",
+        "ड",
+        "च",
+        "ध",
+        "ष",
+        "इ",
+    ],
+    "Estonian": [
+        "a",
+        "i",
+        "e",
+        "s",
+        "t",
+        "l",
+        "u",
+        "n",
+        "o",
+        "k",
+        "r",
+        "d",
+        "m",
+        "v",
+        "g",
+        "p",
+        "j",
+        "h",
+        "ä",
+        "b",
+        "õ",
+        "ü",
+        "f",
+        "c",
+        "ö",
+        "y",
+    ],
+    "Thai": [
+        "า",
+        "น",
+        "ร",
+        "อ",
+        "ก",
+        "เ",
+        "ง",
+        "ม",
+        "ย",
+        "ล",
+        "ว",
+        "ด",
+        "ท",
+        "ส",
+        "ต",
+        "ะ",
+        "ป",
+        "บ",
+        "ค",
+        "ห",
+        "แ",
+        "จ",
+        "พ",
+        "ช",
+        "ข",
+        "ใ",
+    ],
+    "Greek": [
+        "α",
+        "τ",
+        "ο",
+        "ι",
+        "ε",
+        "ν",
+        "ρ",
+        "σ",
+        "κ",
+        "η",
+        "π",
+        "ς",
+        "υ",
+        "μ",
+        "λ",
+        "ί",
+        "ό",
+        "ά",
+        "γ",
+        "έ",
+        "δ",
+        "ή",
+        "ω",
+        "χ",
+        "θ",
+        "ύ",
+    ],
+    "Tamil": [
+        "க",
+        "த",
+        "ப",
+        "ட",
+        "ர",
+        "ம",
+        "ல",
+        "ன",
+        "வ",
+        "ற",
+        "ய",
+        "ள",
+        "ச",
+        "ந",
+        "இ",
+        "ண",
+        "அ",
+        "ஆ",
+        "ழ",
+        "ங",
+        "எ",
+        "உ",
+        "ஒ",
+        "ஸ",
+    ],
+    "Kazakh": [
+        "а",
+        "ы",
+        "е",
+        "н",
+        "т",
+        "р",
+        "л",
+        "і",
+        "д",
+        "с",
+        "м",
+        "қ",
+        "к",
+        "о",
+        "б",
+        "и",
+        "у",
+        "ғ",
+        "ж",
+        "ң",
+        "з",
+        "ш",
+        "й",
+        "п",
+        "г",
+        "ө",
+    ],
+}
+LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)

env/Lib/site-packages/charset_normalizer/legacy.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+from warnings import warn
+from .api import from_bytes
+from .constant import CHARDET_CORRESPONDENCE
+# TODO: remove this check when dropping Python 3.7 support
+if TYPE_CHECKING:
+    from typing_extensions import TypedDict
+    class ResultDict(TypedDict):
+        encoding: str | None
+        language: str
+        confidence: float | None
+def detect(
+    byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
+) -> ResultDict:
+    """
+    chardet legacy method
+    Detect the encoding of the given byte string. It should be mostly backward-compatible.
+    Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
+    This function is deprecated and should be used to migrate your project easily, consult the documentation for
+    further information. Not planned for removal.
+    :param byte_str:     The byte sequence to examine.
+    :param should_rename_legacy:  Should we rename legacy encodings
+                                  to their more modern equivalents?
+    """
+    if len(kwargs):
+        warn(
+            f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
+        )
+    if not isinstance(byte_str, (bytearray, bytes)):
+        raise TypeError(  # pragma: nocover
+            "Expected object of type bytes or bytearray, got: " "{}".format(
+                type(byte_str)
+            )
+        )
+    if isinstance(byte_str, bytearray):
+        byte_str = bytes(byte_str)
+    r = from_bytes(byte_str).best()
+    encoding = r.encoding if r is not None else None
+    language = r.language if r is not None and r.language != "Unknown" else ""
+    confidence = 1.0 - r.chaos if r is not None else None
+    # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
+    # but chardet does return 'utf-8-sig' and it is a valid codec name.
+    if r is not None and encoding == "utf_8" and r.bom:
+        encoding += "_sig"
+    if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
+        encoding = CHARDET_CORRESPONDENCE[encoding]
+    return {
+        "encoding": encoding,
+        "language": language,
+        "confidence": confidence,
+    }

env/Lib/site-packages/charset_normalizer/md.py ADDED Viewed

	@@ -0,0 +1,630 @@

+from __future__ import annotations
+from functools import lru_cache
+from logging import getLogger
+from .constant import (
+    COMMON_SAFE_ASCII_CHARACTERS,
+    TRACE,
+    UNICODE_SECONDARY_RANGE_KEYWORD,
+)
+from .utils import (
+    is_accentuated,
+    is_arabic,
+    is_arabic_isolated_form,
+    is_case_variable,
+    is_cjk,
+    is_emoticon,
+    is_hangul,
+    is_hiragana,
+    is_katakana,
+    is_latin,
+    is_punctuation,
+    is_separator,
+    is_symbol,
+    is_thai,
+    is_unprintable,
+    remove_accent,
+    unicode_range,
+)
+class MessDetectorPlugin:
+    """
+    Base abstract class used for mess detection plugins.
+    All detectors MUST extend and implement given methods.
+    """
+    def eligible(self, character: str) -> bool:
+        """
+        Determine if given character should be fed in.
+        """
+        raise NotImplementedError  # pragma: nocover
+    def feed(self, character: str) -> None:
+        """
+        The main routine to be executed upon character.
+        Insert the logic in witch the text would be considered chaotic.
+        """
+        raise NotImplementedError  # pragma: nocover
+    def reset(self) -> None:  # pragma: no cover
+        """
+        Permit to reset the plugin to the initial state.
+        """
+        raise NotImplementedError
+    @property
+    def ratio(self) -> float:
+        """
+        Compute the chaos ratio based on what your feed() has seen.
+        Must NOT be lower than 0.; No restriction gt 0.
+        """
+        raise NotImplementedError  # pragma: nocover
+class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._punctuation_count: int = 0
+        self._symbol_count: int = 0
+        self._character_count: int = 0
+        self._last_printable_char: str | None = None
+        self._frenzy_symbol_in_word: bool = False
+    def eligible(self, character: str) -> bool:
+        return character.isprintable()
+    def feed(self, character: str) -> None:
+        self._character_count += 1
+        if (
+            character != self._last_printable_char
+            and character not in COMMON_SAFE_ASCII_CHARACTERS
+        ):
+            if is_punctuation(character):
+                self._punctuation_count += 1
+            elif (
+                character.isdigit() is False
+                and is_symbol(character)
+                and is_emoticon(character) is False
+            ):
+                self._symbol_count += 2
+        self._last_printable_char = character
+    def reset(self) -> None:  # Abstract
+        self._punctuation_count = 0
+        self._character_count = 0
+        self._symbol_count = 0
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:
+            return 0.0
+        ratio_of_punctuation: float = (
+            self._punctuation_count + self._symbol_count
+        ) / self._character_count
+        return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
+class TooManyAccentuatedPlugin(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._character_count: int = 0
+        self._accentuated_count: int = 0
+    def eligible(self, character: str) -> bool:
+        return character.isalpha()
+    def feed(self, character: str) -> None:
+        self._character_count += 1
+        if is_accentuated(character):
+            self._accentuated_count += 1
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._accentuated_count = 0
+    @property
+    def ratio(self) -> float:
+        if self._character_count < 8:
+            return 0.0
+        ratio_of_accentuation: float = self._accentuated_count / self._character_count
+        return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
+class UnprintablePlugin(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._unprintable_count: int = 0
+        self._character_count: int = 0
+    def eligible(self, character: str) -> bool:
+        return True
+    def feed(self, character: str) -> None:
+        if is_unprintable(character):
+            self._unprintable_count += 1
+        self._character_count += 1
+    def reset(self) -> None:  # Abstract
+        self._unprintable_count = 0
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:
+            return 0.0
+        return (self._unprintable_count * 8) / self._character_count
+class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._successive_count: int = 0
+        self._character_count: int = 0
+        self._last_latin_character: str | None = None
+    def eligible(self, character: str) -> bool:
+        return character.isalpha() and is_latin(character)
+    def feed(self, character: str) -> None:
+        self._character_count += 1
+        if (
+            self._last_latin_character is not None
+            and is_accentuated(character)
+            and is_accentuated(self._last_latin_character)
+        ):
+            if character.isupper() and self._last_latin_character.isupper():
+                self._successive_count += 1
+            # Worse if its the same char duplicated with different accent.
+            if remove_accent(character) == remove_accent(self._last_latin_character):
+                self._successive_count += 1
+        self._last_latin_character = character
+    def reset(self) -> None:  # Abstract
+        self._successive_count = 0
+        self._character_count = 0
+        self._last_latin_character = None
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:
+            return 0.0
+        return (self._successive_count * 2) / self._character_count
+class SuspiciousRange(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._suspicious_successive_range_count: int = 0
+        self._character_count: int = 0
+        self._last_printable_seen: str | None = None
+    def eligible(self, character: str) -> bool:
+        return character.isprintable()
+    def feed(self, character: str) -> None:
+        self._character_count += 1
+        if (
+            character.isspace()
+            or is_punctuation(character)
+            or character in COMMON_SAFE_ASCII_CHARACTERS
+        ):
+            self._last_printable_seen = None
+            return
+        if self._last_printable_seen is None:
+            self._last_printable_seen = character
+            return
+        unicode_range_a: str | None = unicode_range(self._last_printable_seen)
+        unicode_range_b: str | None = unicode_range(character)
+        if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
+            self._suspicious_successive_range_count += 1
+        self._last_printable_seen = character
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._suspicious_successive_range_count = 0
+        self._last_printable_seen = None
+    @property
+    def ratio(self) -> float:
+        if self._character_count <= 13:
+            return 0.0
+        ratio_of_suspicious_range_usage: float = (
+            self._suspicious_successive_range_count * 2
+        ) / self._character_count
+        return ratio_of_suspicious_range_usage
+class SuperWeirdWordPlugin(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._word_count: int = 0
+        self._bad_word_count: int = 0
+        self._foreign_long_count: int = 0
+        self._is_current_word_bad: bool = False
+        self._foreign_long_watch: bool = False
+        self._character_count: int = 0
+        self._bad_character_count: int = 0
+        self._buffer: str = ""
+        self._buffer_accent_count: int = 0
+        self._buffer_glyph_count: int = 0
+    def eligible(self, character: str) -> bool:
+        return True
+    def feed(self, character: str) -> None:
+        if character.isalpha():
+            self._buffer += character
+            if is_accentuated(character):
+                self._buffer_accent_count += 1
+            if (
+                self._foreign_long_watch is False
+                and (is_latin(character) is False or is_accentuated(character))
+                and is_cjk(character) is False
+                and is_hangul(character) is False
+                and is_katakana(character) is False
+                and is_hiragana(character) is False
+                and is_thai(character) is False
+            ):
+                self._foreign_long_watch = True
+            if (
+                is_cjk(character)
+                or is_hangul(character)
+                or is_katakana(character)
+                or is_hiragana(character)
+                or is_thai(character)
+            ):
+                self._buffer_glyph_count += 1
+            return
+        if not self._buffer:
+            return
+        if (
+            character.isspace() or is_punctuation(character) or is_separator(character)
+        ) and self._buffer:
+            self._word_count += 1
+            buffer_length: int = len(self._buffer)
+            self._character_count += buffer_length
+            if buffer_length >= 4:
+                if self._buffer_accent_count / buffer_length >= 0.5:
+                    self._is_current_word_bad = True
+                # Word/Buffer ending with an upper case accentuated letter are so rare,
+                # that we will consider them all as suspicious. Same weight as foreign_long suspicious.
+                elif (
+                    is_accentuated(self._buffer[-1])
+                    and self._buffer[-1].isupper()
+                    and all(_.isupper() for _ in self._buffer) is False
+                ):
+                    self._foreign_long_count += 1
+                    self._is_current_word_bad = True
+                elif self._buffer_glyph_count == 1:
+                    self._is_current_word_bad = True
+                    self._foreign_long_count += 1
+            if buffer_length >= 24 and self._foreign_long_watch:
+                camel_case_dst = [
+                    i
+                    for c, i in zip(self._buffer, range(0, buffer_length))
+                    if c.isupper()
+                ]
+                probable_camel_cased: bool = False
+                if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
+                    probable_camel_cased = True
+                if not probable_camel_cased:
+                    self._foreign_long_count += 1
+                    self._is_current_word_bad = True
+            if self._is_current_word_bad:
+                self._bad_word_count += 1
+                self._bad_character_count += len(self._buffer)
+                self._is_current_word_bad = False
+            self._foreign_long_watch = False
+            self._buffer = ""
+            self._buffer_accent_count = 0
+            self._buffer_glyph_count = 0
+        elif (
+            character not in {"<", ">", "-", "=", "~", "|", "_"}
+            and character.isdigit() is False
+            and is_symbol(character)
+        ):
+            self._is_current_word_bad = True
+            self._buffer += character
+    def reset(self) -> None:  # Abstract
+        self._buffer = ""
+        self._is_current_word_bad = False
+        self._foreign_long_watch = False
+        self._bad_word_count = 0
+        self._word_count = 0
+        self._character_count = 0
+        self._bad_character_count = 0
+        self._foreign_long_count = 0
+    @property
+    def ratio(self) -> float:
+        if self._word_count <= 10 and self._foreign_long_count == 0:
+            return 0.0
+        return self._bad_character_count / self._character_count
+class CjkInvalidStopPlugin(MessDetectorPlugin):
+    """
+    GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
+    can be easily detected. Searching for the overuse of '丅' and '丄'.
+    """
+    def __init__(self) -> None:
+        self._wrong_stop_count: int = 0
+        self._cjk_character_count: int = 0
+    def eligible(self, character: str) -> bool:
+        return True
+    def feed(self, character: str) -> None:
+        if character in {"丅", "丄"}:
+            self._wrong_stop_count += 1
+            return
+        if is_cjk(character):
+            self._cjk_character_count += 1
+    def reset(self) -> None:  # Abstract
+        self._wrong_stop_count = 0
+        self._cjk_character_count = 0
+    @property
+    def ratio(self) -> float:
+        if self._cjk_character_count < 16:
+            return 0.0
+        return self._wrong_stop_count / self._cjk_character_count
+class ArchaicUpperLowerPlugin(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._buf: bool = False
+        self._character_count_since_last_sep: int = 0
+        self._successive_upper_lower_count: int = 0
+        self._successive_upper_lower_count_final: int = 0
+        self._character_count: int = 0
+        self._last_alpha_seen: str | None = None
+        self._current_ascii_only: bool = True
+    def eligible(self, character: str) -> bool:
+        return True
+    def feed(self, character: str) -> None:
+        is_concerned = character.isalpha() and is_case_variable(character)
+        chunk_sep = is_concerned is False
+        if chunk_sep and self._character_count_since_last_sep > 0:
+            if (
+                self._character_count_since_last_sep <= 64
+                and character.isdigit() is False
+                and self._current_ascii_only is False
+            ):
+                self._successive_upper_lower_count_final += (
+                    self._successive_upper_lower_count
+                )
+            self._successive_upper_lower_count = 0
+            self._character_count_since_last_sep = 0
+            self._last_alpha_seen = None
+            self._buf = False
+            self._character_count += 1
+            self._current_ascii_only = True
+            return
+        if self._current_ascii_only is True and character.isascii() is False:
+            self._current_ascii_only = False
+        if self._last_alpha_seen is not None:
+            if (character.isupper() and self._last_alpha_seen.islower()) or (
+                character.islower() and self._last_alpha_seen.isupper()
+            ):
+                if self._buf is True:
+                    self._successive_upper_lower_count += 2
+                    self._buf = False
+                else:
+                    self._buf = True
+            else:
+                self._buf = False
+        self._character_count += 1
+        self._character_count_since_last_sep += 1
+        self._last_alpha_seen = character
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._character_count_since_last_sep = 0
+        self._successive_upper_lower_count = 0
+        self._successive_upper_lower_count_final = 0
+        self._last_alpha_seen = None
+        self._buf = False
+        self._current_ascii_only = True
+    @property
+    def ratio(self) -> float:
+        if self._character_count == 0:
+            return 0.0
+        return self._successive_upper_lower_count_final / self._character_count
+class ArabicIsolatedFormPlugin(MessDetectorPlugin):
+    def __init__(self) -> None:
+        self._character_count: int = 0
+        self._isolated_form_count: int = 0
+    def reset(self) -> None:  # Abstract
+        self._character_count = 0
+        self._isolated_form_count = 0
+    def eligible(self, character: str) -> bool:
+        return is_arabic(character)
+    def feed(self, character: str) -> None:
+        self._character_count += 1
+        if is_arabic_isolated_form(character):
+            self._isolated_form_count += 1
+    @property
+    def ratio(self) -> float:
+        if self._character_count < 8:
+            return 0.0
+        isolated_form_usage: float = self._isolated_form_count / self._character_count
+        return isolated_form_usage
+@lru_cache(maxsize=1024)
+def is_suspiciously_successive_range(
+    unicode_range_a: str | None, unicode_range_b: str | None
+) -> bool:
+    """
+    Determine if two Unicode range seen next to each other can be considered as suspicious.
+    """
+    if unicode_range_a is None or unicode_range_b is None:
+        return True
+    if unicode_range_a == unicode_range_b:
+        return False
+    if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
+        return False
+    if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
+        return False
+    # Latin characters can be accompanied with a combining diacritical mark
+    # eg. Vietnamese.
+    if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
+        "Combining" in unicode_range_a or "Combining" in unicode_range_b
+    ):
+        return False
+    keywords_range_a, keywords_range_b = (
+        unicode_range_a.split(" "),
+        unicode_range_b.split(" "),
+    )
+    for el in keywords_range_a:
+        if el in UNICODE_SECONDARY_RANGE_KEYWORD:
+            continue
+        if el in keywords_range_b:
+            return False
+    # Japanese Exception
+    range_a_jp_chars, range_b_jp_chars = (
+        unicode_range_a
+        in (
+            "Hiragana",
+            "Katakana",
+        ),
+        unicode_range_b in ("Hiragana", "Katakana"),
+    )
+    if (range_a_jp_chars or range_b_jp_chars) and (
+        "CJK" in unicode_range_a or "CJK" in unicode_range_b
+    ):
+        return False
+    if range_a_jp_chars and range_b_jp_chars:
+        return False
+    if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
+        if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
+            return False
+        if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
+            return False
+    # Chinese/Japanese use dedicated range for punctuation and/or separators.
+    if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
+        unicode_range_a in ["Katakana", "Hiragana"]
+        and unicode_range_b in ["Katakana", "Hiragana"]
+    ):
+        if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
+            return False
+        if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
+            return False
+        if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
+            return False
+    return True
+@lru_cache(maxsize=2048)
+def mess_ratio(
+    decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
+) -> float:
+    """
+    Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
+    """
+    detectors: list[MessDetectorPlugin] = [
+        md_class() for md_class in MessDetectorPlugin.__subclasses__()
+    ]
+    length: int = len(decoded_sequence) + 1
+    mean_mess_ratio: float = 0.0
+    if length < 512:
+        intermediary_mean_mess_ratio_calc: int = 32
+    elif length <= 1024:
+        intermediary_mean_mess_ratio_calc = 64
+    else:
+        intermediary_mean_mess_ratio_calc = 128
+    for character, index in zip(decoded_sequence + "\n", range(length)):
+        for detector in detectors:
+            if detector.eligible(character):
+                detector.feed(character)
+        if (
+            index > 0 and index % intermediary_mean_mess_ratio_calc == 0
+        ) or index == length - 1:
+            mean_mess_ratio = sum(dt.ratio for dt in detectors)
+            if mean_mess_ratio >= maximum_threshold:
+                break
+    if debug:
+        logger = getLogger("charset_normalizer")
+        logger.log(
+            TRACE,
+            "Mess-detector extended-analysis start. "
+            f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
+            f"maximum_threshold={maximum_threshold}",
+        )
+        if len(decoded_sequence) > 16:
+            logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
+            logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
+        for dt in detectors:
+            logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
+    return round(mean_mess_ratio, 3)

env/Lib/site-packages/charset_normalizer/models.py ADDED Viewed

	@@ -0,0 +1,360 @@

+from __future__ import annotations
+from encodings.aliases import aliases
+from hashlib import sha256
+from json import dumps
+from re import sub
+from typing import Any, Iterator, List, Tuple
+from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE
+from .utils import iana_name, is_multi_byte_encoding, unicode_range
+class CharsetMatch:
+    def __init__(
+        self,
+        payload: bytes,
+        guessed_encoding: str,
+        mean_mess_ratio: float,
+        has_sig_or_bom: bool,
+        languages: CoherenceMatches,
+        decoded_payload: str | None = None,
+        preemptive_declaration: str | None = None,
+    ):
+        self._payload: bytes = payload
+        self._encoding: str = guessed_encoding
+        self._mean_mess_ratio: float = mean_mess_ratio
+        self._languages: CoherenceMatches = languages
+        self._has_sig_or_bom: bool = has_sig_or_bom
+        self._unicode_ranges: list[str] | None = None
+        self._leaves: list[CharsetMatch] = []
+        self._mean_coherence_ratio: float = 0.0
+        self._output_payload: bytes | None = None
+        self._output_encoding: str | None = None
+        self._string: str | None = decoded_payload
+        self._preemptive_declaration: str | None = preemptive_declaration
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, CharsetMatch):
+            if isinstance(other, str):
+                return iana_name(other) == self.encoding
+            return False
+        return self.encoding == other.encoding and self.fingerprint == other.fingerprint
+    def __lt__(self, other: object) -> bool:
+        """
+        Implemented to make sorted available upon CharsetMatches items.
+        """
+        if not isinstance(other, CharsetMatch):
+            raise ValueError
+        chaos_difference: float = abs(self.chaos - other.chaos)
+        coherence_difference: float = abs(self.coherence - other.coherence)
+        # Below 1% difference --> Use Coherence
+        if chaos_difference < 0.01 and coherence_difference > 0.02:
+            return self.coherence > other.coherence
+        elif chaos_difference < 0.01 and coherence_difference <= 0.02:
+            # When having a difficult decision, use the result that decoded as many multi-byte as possible.
+            # preserve RAM usage!
+            if len(self._payload) >= TOO_BIG_SEQUENCE:
+                return self.chaos < other.chaos
+            return self.multi_byte_usage > other.multi_byte_usage
+        return self.chaos < other.chaos
+    @property
+    def multi_byte_usage(self) -> float:
+        return 1.0 - (len(str(self)) / len(self.raw))
+    def __str__(self) -> str:
+        # Lazy Str Loading
+        if self._string is None:
+            self._string = str(self._payload, self._encoding, "strict")
+        return self._string
+    def __repr__(self) -> str:
+        return f"<CharsetMatch '{self.encoding}' bytes({self.fingerprint})>"
+    def add_submatch(self, other: CharsetMatch) -> None:
+        if not isinstance(other, CharsetMatch) or other == self:
+            raise ValueError(
+                "Unable to add instance <{}> as a submatch of a CharsetMatch".format(
+                    other.__class__
+                )
+            )
+        other._string = None  # Unload RAM usage; dirty trick.
+        self._leaves.append(other)
+    @property
+    def encoding(self) -> str:
+        return self._encoding
+    @property
+    def encoding_aliases(self) -> list[str]:
+        """
+        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
+        """
+        also_known_as: list[str] = []
+        for u, p in aliases.items():
+            if self.encoding == u:
+                also_known_as.append(p)
+            elif self.encoding == p:
+                also_known_as.append(u)
+        return also_known_as
+    @property
+    def bom(self) -> bool:
+        return self._has_sig_or_bom
+    @property
+    def byte_order_mark(self) -> bool:
+        return self._has_sig_or_bom
+    @property
+    def languages(self) -> list[str]:
+        """
+        Return the complete list of possible languages found in decoded sequence.
+        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
+        """
+        return [e[0] for e in self._languages]
+    @property
+    def language(self) -> str:
+        """
+        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
+        "Unknown".
+        """
+        if not self._languages:
+            # Trying to infer the language based on the given encoding
+            # Its either English or we should not pronounce ourselves in certain cases.
+            if "ascii" in self.could_be_from_charset:
+                return "English"
+            # doing it there to avoid circular import
+            from charset_normalizer.cd import encoding_languages, mb_encoding_languages
+            languages = (
+                mb_encoding_languages(self.encoding)
+                if is_multi_byte_encoding(self.encoding)
+                else encoding_languages(self.encoding)
+            )
+            if len(languages) == 0 or "Latin Based" in languages:
+                return "Unknown"
+            return languages[0]
+        return self._languages[0][0]
+    @property
+    def chaos(self) -> float:
+        return self._mean_mess_ratio
+    @property
+    def coherence(self) -> float:
+        if not self._languages:
+            return 0.0
+        return self._languages[0][1]
+    @property
+    def percent_chaos(self) -> float:
+        return round(self.chaos * 100, ndigits=3)
+    @property
+    def percent_coherence(self) -> float:
+        return round(self.coherence * 100, ndigits=3)
+    @property
+    def raw(self) -> bytes:
+        """
+        Original untouched bytes.
+        """
+        return self._payload
+    @property
+    def submatch(self) -> list[CharsetMatch]:
+        return self._leaves
+    @property
+    def has_submatch(self) -> bool:
+        return len(self._leaves) > 0
+    @property
+    def alphabets(self) -> list[str]:
+        if self._unicode_ranges is not None:
+            return self._unicode_ranges
+        # list detected ranges
+        detected_ranges: list[str | None] = [unicode_range(char) for char in str(self)]
+        # filter and sort
+        self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
+        return self._unicode_ranges
+    @property
+    def could_be_from_charset(self) -> list[str]:
+        """
+        The complete list of encoding that output the exact SAME str result and therefore could be the originating
+        encoding.
+        This list does include the encoding available in property 'encoding'.
+        """
+        return [self._encoding] + [m.encoding for m in self._leaves]
+    def output(self, encoding: str = "utf_8") -> bytes:
+        """
+        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
+        Any errors will be simply ignored by the encoder NOT replaced.
+        """
+        if self._output_encoding is None or self._output_encoding != encoding:
+            self._output_encoding = encoding
+            decoded_string = str(self)
+            if (
+                self._preemptive_declaration is not None
+                and self._preemptive_declaration.lower()
+                not in ["utf-8", "utf8", "utf_8"]
+            ):
+                patched_header = sub(
+                    RE_POSSIBLE_ENCODING_INDICATION,
+                    lambda m: m.string[m.span()[0] : m.span()[1]].replace(
+                        m.groups()[0],
+                        iana_name(self._output_encoding).replace("_", "-"),  # type: ignore[arg-type]
+                    ),
+                    decoded_string[:8192],
+                    count=1,
+                )
+                decoded_string = patched_header + decoded_string[8192:]
+            self._output_payload = decoded_string.encode(encoding, "replace")
+        return self._output_payload  # type: ignore
+    @property
+    def fingerprint(self) -> str:
+        """
+        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
+        """
+        return sha256(self.output()).hexdigest()
+class CharsetMatches:
+    """
+    Container with every CharsetMatch items ordered by default from most probable to the less one.
+    Act like a list(iterable) but does not implements all related methods.
+    """
+    def __init__(self, results: list[CharsetMatch] | None = None):
+        self._results: list[CharsetMatch] = sorted(results) if results else []
+    def __iter__(self) -> Iterator[CharsetMatch]:
+        yield from self._results
+    def __getitem__(self, item: int | str) -> CharsetMatch:
+        """
+        Retrieve a single item either by its position or encoding name (alias may be used here).
+        Raise KeyError upon invalid index or encoding not present in results.
+        """
+        if isinstance(item, int):
+            return self._results[item]
+        if isinstance(item, str):
+            item = iana_name(item, False)
+            for result in self._results:
+                if item in result.could_be_from_charset:
+                    return result
+        raise KeyError
+    def __len__(self) -> int:
+        return len(self._results)
+    def __bool__(self) -> bool:
+        return len(self._results) > 0
+    def append(self, item: CharsetMatch) -> None:
+        """
+        Insert a single match. Will be inserted accordingly to preserve sort.
+        Can be inserted as a submatch.
+        """
+        if not isinstance(item, CharsetMatch):
+            raise ValueError(
+                "Cannot append instance '{}' to CharsetMatches".format(
+                    str(item.__class__)
+                )
+            )
+        # We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
+        if len(item.raw) < TOO_BIG_SEQUENCE:
+            for match in self._results:
+                if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
+                    match.add_submatch(item)
+                    return
+        self._results.append(item)
+        self._results = sorted(self._results)
+    def best(self) -> CharsetMatch | None:
+        """
+        Simply return the first match. Strict equivalent to matches[0].
+        """
+        if not self._results:
+            return None
+        return self._results[0]
+    def first(self) -> CharsetMatch | None:
+        """
+        Redundant method, call the method best(). Kept for BC reasons.
+        """
+        return self.best()
+CoherenceMatch = Tuple[str, float]
+CoherenceMatches = List[CoherenceMatch]
+class CliDetectionResult:
+    def __init__(
+        self,
+        path: str,
+        encoding: str | None,
+        encoding_aliases: list[str],
+        alternative_encodings: list[str],
+        language: str,
+        alphabets: list[str],
+        has_sig_or_bom: bool,
+        chaos: float,
+        coherence: float,
+        unicode_path: str | None,
+        is_preferred: bool,
+    ):
+        self.path: str = path
+        self.unicode_path: str | None = unicode_path
+        self.encoding: str | None = encoding
+        self.encoding_aliases: list[str] = encoding_aliases
+        self.alternative_encodings: list[str] = alternative_encodings
+        self.language: str = language
+        self.alphabets: list[str] = alphabets
+        self.has_sig_or_bom: bool = has_sig_or_bom
+        self.chaos: float = chaos
+        self.coherence: float = coherence
+        self.is_preferred: bool = is_preferred
+    @property
+    def __dict__(self) -> dict[str, Any]:  # type: ignore
+        return {
+            "path": self.path,
+            "encoding": self.encoding,
+            "encoding_aliases": self.encoding_aliases,
+            "alternative_encodings": self.alternative_encodings,
+            "language": self.language,
+            "alphabets": self.alphabets,
+            "has_sig_or_bom": self.has_sig_or_bom,
+            "chaos": self.chaos,
+            "coherence": self.coherence,
+            "unicode_path": self.unicode_path,
+            "is_preferred": self.is_preferred,
+        }
+    def to_json(self) -> str:
+        return dumps(self.__dict__, ensure_ascii=True, indent=4)

env/Lib/site-packages/charset_normalizer/py.typed ADDED Viewed

File without changes

env/Lib/site-packages/charset_normalizer/utils.py ADDED Viewed

	@@ -0,0 +1,408 @@

+from __future__ import annotations
+import importlib
+import logging
+import unicodedata
+from codecs import IncrementalDecoder
+from encodings.aliases import aliases
+from functools import lru_cache
+from re import findall
+from typing import Generator
+from _multibytecodec import (  # type: ignore[import-not-found,import]
+    MultibyteIncrementalDecoder,
+)
+from .constant import (
+    ENCODING_MARKS,
+    IANA_SUPPORTED_SIMILAR,
+    RE_POSSIBLE_ENCODING_INDICATION,
+    UNICODE_RANGES_COMBINED,
+    UNICODE_SECONDARY_RANGE_KEYWORD,
+    UTF8_MAXIMAL_ALLOCATION,
+)
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_accentuated(character: str) -> bool:
+    try:
+        description: str = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return (
+        "WITH GRAVE" in description
+        or "WITH ACUTE" in description
+        or "WITH CEDILLA" in description
+        or "WITH DIAERESIS" in description
+        or "WITH CIRCUMFLEX" in description
+        or "WITH TILDE" in description
+        or "WITH MACRON" in description
+        or "WITH RING ABOVE" in description
+    )
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def remove_accent(character: str) -> str:
+    decomposed: str = unicodedata.decomposition(character)
+    if not decomposed:
+        return character
+    codes: list[str] = decomposed.split(" ")
+    return chr(int(codes[0], 16))
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def unicode_range(character: str) -> str | None:
+    """
+    Retrieve the Unicode range official name from a single character.
+    """
+    character_ord: int = ord(character)
+    for range_name, ord_range in UNICODE_RANGES_COMBINED.items():
+        if character_ord in ord_range:
+            return range_name
+    return None
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_latin(character: str) -> bool:
+    try:
+        description: str = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "LATIN" in description
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_punctuation(character: str) -> bool:
+    character_category: str = unicodedata.category(character)
+    if "P" in character_category:
+        return True
+    character_range: str | None = unicode_range(character)
+    if character_range is None:
+        return False
+    return "Punctuation" in character_range
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_symbol(character: str) -> bool:
+    character_category: str = unicodedata.category(character)
+    if "S" in character_category or "N" in character_category:
+        return True
+    character_range: str | None = unicode_range(character)
+    if character_range is None:
+        return False
+    return "Forms" in character_range and character_category != "Lo"
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_emoticon(character: str) -> bool:
+    character_range: str | None = unicode_range(character)
+    if character_range is None:
+        return False
+    return "Emoticons" in character_range or "Pictographs" in character_range
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_separator(character: str) -> bool:
+    if character.isspace() or character in {"｜", "+", "<", ">"}:
+        return True
+    character_category: str = unicodedata.category(character)
+    return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_case_variable(character: str) -> bool:
+    return character.islower() != character.isupper()
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_cjk(character: str) -> bool:
+    try:
+        character_name = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "CJK" in character_name
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_hiragana(character: str) -> bool:
+    try:
+        character_name = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "HIRAGANA" in character_name
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_katakana(character: str) -> bool:
+    try:
+        character_name = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "KATAKANA" in character_name
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_hangul(character: str) -> bool:
+    try:
+        character_name = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "HANGUL" in character_name
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_thai(character: str) -> bool:
+    try:
+        character_name = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "THAI" in character_name
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_arabic(character: str) -> bool:
+    try:
+        character_name = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "ARABIC" in character_name
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_arabic_isolated_form(character: str) -> bool:
+    try:
+        character_name = unicodedata.name(character)
+    except ValueError:  # Defensive: unicode database outdated?
+        return False
+    return "ARABIC" in character_name and "ISOLATED FORM" in character_name
+@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
+def is_unicode_range_secondary(range_name: str) -> bool:
+    return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
+@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
+def is_unprintable(character: str) -> bool:
+    return (
+        character.isspace() is False  # includes \n \t \r \v
+        and character.isprintable() is False
+        and character != "\x1a"  # Why? Its the ASCII substitute character.
+        and character != "\ufeff"  # bug discovered in Python,
+        # Zero Width No-Break Space located in 	Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
+    )
+def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> str | None:
+    """
+    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
+    """
+    if not isinstance(sequence, bytes):
+        raise TypeError
+    seq_len: int = len(sequence)
+    results: list[str] = findall(
+        RE_POSSIBLE_ENCODING_INDICATION,
+        sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
+    )
+    if len(results) == 0:
+        return None
+    for specified_encoding in results:
+        specified_encoding = specified_encoding.lower().replace("-", "_")
+        encoding_alias: str
+        encoding_iana: str
+        for encoding_alias, encoding_iana in aliases.items():
+            if encoding_alias == specified_encoding:
+                return encoding_iana
+            if encoding_iana == specified_encoding:
+                return encoding_iana
+    return None
+@lru_cache(maxsize=128)
+def is_multi_byte_encoding(name: str) -> bool:
+    """
+    Verify is a specific encoding is a multi byte one based on it IANA name
+    """
+    return name in {
+        "utf_8",
+        "utf_8_sig",
+        "utf_16",
+        "utf_16_be",
+        "utf_16_le",
+        "utf_32",
+        "utf_32_le",
+        "utf_32_be",
+        "utf_7",
+    } or issubclass(
+        importlib.import_module(f"encodings.{name}").IncrementalDecoder,
+        MultibyteIncrementalDecoder,
+    )
+def identify_sig_or_bom(sequence: bytes) -> tuple[str | None, bytes]:
+    """
+    Identify and extract SIG/BOM in given sequence.
+    """
+    for iana_encoding in ENCODING_MARKS:
+        marks: bytes | list[bytes] = ENCODING_MARKS[iana_encoding]
+        if isinstance(marks, bytes):
+            marks = [marks]
+        for mark in marks:
+            if sequence.startswith(mark):
+                return iana_encoding, mark
+    return None, b""
+def should_strip_sig_or_bom(iana_encoding: str) -> bool:
+    return iana_encoding not in {"utf_16", "utf_32"}
+def iana_name(cp_name: str, strict: bool = True) -> str:
+    """Returns the Python normalized encoding name (Not the IANA official name)."""
+    cp_name = cp_name.lower().replace("-", "_")
+    encoding_alias: str
+    encoding_iana: str
+    for encoding_alias, encoding_iana in aliases.items():
+        if cp_name in [encoding_alias, encoding_iana]:
+            return encoding_iana
+    if strict:
+        raise ValueError(f"Unable to retrieve IANA for '{cp_name}'")
+    return cp_name
+def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
+    if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
+        return 0.0
+    decoder_a = importlib.import_module(f"encodings.{iana_name_a}").IncrementalDecoder
+    decoder_b = importlib.import_module(f"encodings.{iana_name_b}").IncrementalDecoder
+    id_a: IncrementalDecoder = decoder_a(errors="ignore")
+    id_b: IncrementalDecoder = decoder_b(errors="ignore")
+    character_match_count: int = 0
+    for i in range(255):
+        to_be_decoded: bytes = bytes([i])
+        if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded):
+            character_match_count += 1
+    return character_match_count / 254
+def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool:
+    """
+    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
+    the function cp_similarity.
+    """
+    return (
+        iana_name_a in IANA_SUPPORTED_SIMILAR
+        and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a]
+    )
+def set_logging_handler(
+    name: str = "charset_normalizer",
+    level: int = logging.INFO,
+    format_string: str = "%(asctime)s | %(levelname)s | %(message)s",
+) -> None:
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    handler = logging.StreamHandler()
+    handler.setFormatter(logging.Formatter(format_string))
+    logger.addHandler(handler)
+def cut_sequence_chunks(
+    sequences: bytes,
+    encoding_iana: str,
+    offsets: range,
+    chunk_size: int,
+    bom_or_sig_available: bool,
+    strip_sig_or_bom: bool,
+    sig_payload: bytes,
+    is_multi_byte_decoder: bool,
+    decoded_payload: str | None = None,
+) -> Generator[str, None, None]:
+    if decoded_payload and is_multi_byte_decoder is False:
+        for i in offsets:
+            chunk = decoded_payload[i : i + chunk_size]
+            if not chunk:
+                break
+            yield chunk
+    else:
+        for i in offsets:
+            chunk_end = i + chunk_size
+            if chunk_end > len(sequences) + 8:
+                continue
+            cut_sequence = sequences[i : i + chunk_size]
+            if bom_or_sig_available and strip_sig_or_bom is False:
+                cut_sequence = sig_payload + cut_sequence
+            chunk = cut_sequence.decode(
+                encoding_iana,
+                errors="ignore" if is_multi_byte_decoder else "strict",
+            )
+            # multi-byte bad cutting detector and adjustment
+            # not the cleanest way to perform that fix but clever enough for now.
+            if is_multi_byte_decoder and i > 0:
+                chunk_partial_size_chk: int = min(chunk_size, 16)
+                if (
+                    decoded_payload
+                    and chunk[:chunk_partial_size_chk] not in decoded_payload
+                ):
+                    for j in range(i, i - 4, -1):
+                        cut_sequence = sequences[j:chunk_end]
+                        if bom_or_sig_available and strip_sig_or_bom is False:
+                            cut_sequence = sig_payload + cut_sequence
+                        chunk = cut_sequence.decode(encoding_iana, errors="ignore")
+                        if chunk[:chunk_partial_size_chk] in decoded_payload:
+                            break
+            yield chunk

env/Lib/site-packages/charset_normalizer/version.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+Expose version
+"""
+from __future__ import annotations
+__version__ = "3.4.1"
+VERSION = __version__.split(".")

env/Lib/site-packages/colorama-0.4.6.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

env/Lib/site-packages/colorama-0.4.6.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,441 @@

+Metadata-Version: 2.1
+Name: colorama
+Version: 0.4.6
+Summary: Cross-platform colored terminal text.
+Project-URL: Homepage, https://github.com/tartley/colorama
+Author-email: Jonathan Hartley <[email protected]>
+License-File: LICENSE.txt
+Keywords: ansi,color,colour,crossplatform,terminal,text,windows,xplatform
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Topic :: Terminals
+Requires-Python: !=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7
+Description-Content-Type: text/x-rst
+.. image:: https://img.shields.io/pypi/v/colorama.svg
+    :target: https://pypi.org/project/colorama/
+    :alt: Latest Version
+.. image:: https://img.shields.io/pypi/pyversions/colorama.svg
+    :target: https://pypi.org/project/colorama/
+    :alt: Supported Python versions
+.. image:: https://github.com/tartley/colorama/actions/workflows/test.yml/badge.svg
+    :target: https://github.com/tartley/colorama/actions/workflows/test.yml
+    :alt: Build Status
+Colorama
+========
+Makes ANSI escape character sequences (for producing colored terminal text and
+cursor positioning) work under MS Windows.
+.. |donate| image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_SM.gif
+  :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=2MZ9D2GMLYCUJ&item_name=Colorama&currency_code=USD
+  :alt: Donate with Paypal
+`PyPI for releases <https://pypi.org/project/colorama/>`_ |
+`Github for source <https://github.com/tartley/colorama>`_ |
+`Colorama for enterprise on Tidelift <https://github.com/tartley/colorama/blob/master/ENTERPRISE.md>`_
+If you find Colorama useful, please |donate| to the authors. Thank you!
+Installation
+------------
+Tested on CPython 2.7, 3.7, 3.8, 3.9 and 3.10 and Pypy 2.7 and 3.8.
+No requirements other than the standard library.
+.. code-block:: bash
+    pip install colorama
+    # or
+    conda install -c anaconda colorama
+Description
+-----------
+ANSI escape character sequences have long been used to produce colored terminal
+text and cursor positioning on Unix and Macs. Colorama makes this work on
+Windows, too, by wrapping ``stdout``, stripping ANSI sequences it finds (which
+would appear as gobbledygook in the output), and converting them into the
+appropriate win32 calls to modify the state of the terminal. On other platforms,
+Colorama does nothing.
+This has the upshot of providing a simple cross-platform API for printing
+colored terminal text from Python, and has the happy side-effect that existing
+applications or libraries which use ANSI sequences to produce colored output on
+Linux or Macs can now also work on Windows, simply by calling
+``colorama.just_fix_windows_console()`` (since v0.4.6) or ``colorama.init()``
+(all versions, but may have other side-effects – see below).
+An alternative approach is to install ``ansi.sys`` on Windows machines, which
+provides the same behaviour for all applications running in terminals. Colorama
+is intended for situations where that isn't easy (e.g., maybe your app doesn't
+have an installer.)
+Demo scripts in the source code repository print some colored text using
+ANSI sequences. Compare their output under Gnome-terminal's built in ANSI
+handling, versus on Windows Command-Prompt using Colorama:
+.. image:: https://github.com/tartley/colorama/raw/master/screenshots/ubuntu-demo.png
+    :width: 661
+    :height: 357
+    :alt: ANSI sequences on Ubuntu under gnome-terminal.
+.. image:: https://github.com/tartley/colorama/raw/master/screenshots/windows-demo.png
+    :width: 668
+    :height: 325
+    :alt: Same ANSI sequences on Windows, using Colorama.
+These screenshots show that, on Windows, Colorama does not support ANSI 'dim
+text'; it looks the same as 'normal text'.
+Usage
+-----
+Initialisation
+..............
+If the only thing you want from Colorama is to get ANSI escapes to work on
+Windows, then run:
+.. code-block:: python
+    from colorama import just_fix_windows_console
+    just_fix_windows_console()
+If you're on a recent version of Windows 10 or better, and your stdout/stderr
+are pointing to a Windows console, then this will flip the magic configuration
+switch to enable Windows' built-in ANSI support.
+If you're on an older version of Windows, and your stdout/stderr are pointing to
+a Windows console, then this will wrap ``sys.stdout`` and/or ``sys.stderr`` in a
+magic file object that intercepts ANSI escape sequences and issues the
+appropriate Win32 calls to emulate them.
+In all other circumstances, it does nothing whatsoever. Basically the idea is
+that this makes Windows act like Unix with respect to ANSI escape handling.
+It's safe to call this function multiple times. It's safe to call this function
+on non-Windows platforms, but it won't do anything. It's safe to call this
+function when one or both of your stdout/stderr are redirected to a file – it
+won't do anything to those streams.
+Alternatively, you can use the older interface with more features (but also more
+potential footguns):
+.. code-block:: python
+    from colorama import init
+    init()
+This does the same thing as ``just_fix_windows_console``, except for the
+following differences:
+- It's not safe to call ``init`` multiple times; you can end up with multiple
+  layers of wrapping and broken ANSI support.
+- Colorama will apply a heuristic to guess whether stdout/stderr support ANSI,
+  and if it thinks they don't, then it will wrap ``sys.stdout`` and
+  ``sys.stderr`` in a magic file object that strips out ANSI escape sequences
+  before printing them. This happens on all platforms, and can be convenient if
+  you want to write your code to emit ANSI escape sequences unconditionally, and
+  let Colorama decide whether they should actually be output. But note that
+  Colorama's heuristic is not particularly clever.
+- ``init`` also accepts explicit keyword args to enable/disable various
+  functionality – see below.
+To stop using Colorama before your program exits, simply call ``deinit()``.
+This will restore ``stdout`` and ``stderr`` to their original values, so that
+Colorama is disabled. To resume using Colorama again, call ``reinit()``; it is
+cheaper than calling ``init()`` again (but does the same thing).
+Most users should depend on ``colorama >= 0.4.6``, and use
+``just_fix_windows_console``. The old ``init`` interface will be supported
+indefinitely for backwards compatibility, but we don't plan to fix any issues
+with it, also for backwards compatibility.
+Colored Output
+..............
+Cross-platform printing of colored text can then be done using Colorama's
+constant shorthand for ANSI escape sequences. These are deliberately
+rudimentary, see below.
+.. code-block:: python
+    from colorama import Fore, Back, Style
+    print(Fore.RED + 'some red text')
+    print(Back.GREEN + 'and with a green background')
+    print(Style.DIM + 'and in dim text')
+    print(Style.RESET_ALL)
+    print('back to normal now')
+...or simply by manually printing ANSI sequences from your own code:
+.. code-block:: python
+    print('\033[31m' + 'some red text')
+    print('\033[39m') # and reset to default color
+...or, Colorama can be used in conjunction with existing ANSI libraries
+such as the venerable `Termcolor <https://pypi.org/project/termcolor/>`_
+the fabulous `Blessings <https://pypi.org/project/blessings/>`_,
+or the incredible `_Rich <https://pypi.org/project/rich/>`_.
+If you wish Colorama's Fore, Back and Style constants were more capable,
+then consider using one of the above highly capable libraries to generate
+colors, etc, and use Colorama just for its primary purpose: to convert
+those ANSI sequences to also work on Windows:
+SIMILARLY, do not send PRs adding the generation of new ANSI types to Colorama.
+We are only interested in converting ANSI codes to win32 API calls, not
+shortcuts like the above to generate ANSI characters.
+.. code-block:: python
+    from colorama import just_fix_windows_console
+    from termcolor import colored
+    # use Colorama to make Termcolor work on Windows too
+    just_fix_windows_console()
+    # then use Termcolor for all colored text output
+    print(colored('Hello, World!', 'green', 'on_red'))
+Available formatting constants are::
+    Fore: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET.
+    Back: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET.
+    Style: DIM, NORMAL, BRIGHT, RESET_ALL
+``Style.RESET_ALL`` resets foreground, background, and brightness. Colorama will
+perform this reset automatically on program exit.
+These are fairly well supported, but not part of the standard::
+    Fore: LIGHTBLACK_EX, LIGHTRED_EX, LIGHTGREEN_EX, LIGHTYELLOW_EX, LIGHTBLUE_EX, LIGHTMAGENTA_EX, LIGHTCYAN_EX, LIGHTWHITE_EX
+    Back: LIGHTBLACK_EX, LIGHTRED_EX, LIGHTGREEN_EX, LIGHTYELLOW_EX, LIGHTBLUE_EX, LIGHTMAGENTA_EX, LIGHTCYAN_EX, LIGHTWHITE_EX
+Cursor Positioning
+..................
+ANSI codes to reposition the cursor are supported. See ``demos/demo06.py`` for
+an example of how to generate them.
+Init Keyword Args
+.................
+``init()`` accepts some ``**kwargs`` to override default behaviour.
+init(autoreset=False):
+    If you find yourself repeatedly sending reset sequences to turn off color
+    changes at the end of every print, then ``init(autoreset=True)`` will
+    automate that:
+    .. code-block:: python
+        from colorama import init
+        init(autoreset=True)
+        print(Fore.RED + 'some red text')
+        print('automatically back to default color again')
+init(strip=None):
+    Pass ``True`` or ``False`` to override whether ANSI codes should be
+    stripped from the output. The default behaviour is to strip if on Windows
+    or if output is redirected (not a tty).
+init(convert=None):
+    Pass ``True`` or ``False`` to override whether to convert ANSI codes in the
+    output into win32 calls. The default behaviour is to convert if on Windows
+    and output is to a tty (terminal).
+init(wrap=True):
+    On Windows, Colorama works by replacing ``sys.stdout`` and ``sys.stderr``
+    with proxy objects, which override the ``.write()`` method to do their work.
+    If this wrapping causes you problems, then this can be disabled by passing
+    ``init(wrap=False)``. The default behaviour is to wrap if ``autoreset`` or
+    ``strip`` or ``convert`` are True.
+    When wrapping is disabled, colored printing on non-Windows platforms will
+    continue to work as normal. To do cross-platform colored output, you can
+    use Colorama's ``AnsiToWin32`` proxy directly:
+    .. code-block:: python
+        import sys
+        from colorama import init, AnsiToWin32
+        init(wrap=False)
+        stream = AnsiToWin32(sys.stderr).stream
+        # Python 2
+        print >>stream, Fore.BLUE + 'blue text on stderr'
+        # Python 3
+        print(Fore.BLUE + 'blue text on stderr', file=stream)
+Recognised ANSI Sequences
+.........................
+ANSI sequences generally take the form::
+    ESC [ <param> ; <param> ... <command>
+Where ``<param>`` is an integer, and ``<command>`` is a single letter. Zero or
+more params are passed to a ``<command>``. If no params are passed, it is
+generally synonymous with passing a single zero. No spaces exist in the
+sequence; they have been inserted here simply to read more easily.
+The only ANSI sequences that Colorama converts into win32 calls are::
+    ESC [ 0 m       # reset all (colors and brightness)
+    ESC [ 1 m       # bright
+    ESC [ 2 m       # dim (looks same as normal brightness)
+    ESC [ 22 m      # normal brightness
+    # FOREGROUND:
+    ESC [ 30 m      # black
+    ESC [ 31 m      # red
+    ESC [ 32 m      # green
+    ESC [ 33 m      # yellow
+    ESC [ 34 m      # blue
+    ESC [ 35 m      # magenta
+    ESC [ 36 m      # cyan
+    ESC [ 37 m      # white
+    ESC [ 39 m      # reset
+    # BACKGROUND
+    ESC [ 40 m      # black
+    ESC [ 41 m      # red
+    ESC [ 42 m      # green
+    ESC [ 43 m      # yellow
+    ESC [ 44 m      # blue
+    ESC [ 45 m      # magenta
+    ESC [ 46 m      # cyan
+    ESC [ 47 m      # white
+    ESC [ 49 m      # reset
+    # cursor positioning
+    ESC [ y;x H     # position cursor at x across, y down
+    ESC [ y;x f     # position cursor at x across, y down
+    ESC [ n A       # move cursor n lines up
+    ESC [ n B       # move cursor n lines down
+    ESC [ n C       # move cursor n characters forward
+    ESC [ n D       # move cursor n characters backward
+    # clear the screen
+    ESC [ mode J    # clear the screen
+    # clear the line
+    ESC [ mode K    # clear the line
+Multiple numeric params to the ``'m'`` command can be combined into a single
+sequence::
+    ESC [ 36 ; 45 ; 1 m     # bright cyan text on magenta background
+All other ANSI sequences of the form ``ESC [ <param> ; <param> ... <command>``
+are silently stripped from the output on Windows.
+Any other form of ANSI sequence, such as single-character codes or alternative
+initial characters, are not recognised or stripped. It would be cool to add
+them though. Let me know if it would be useful for you, via the Issues on
+GitHub.
+Status & Known Problems
+-----------------------
+I've personally only tested it on Windows XP (CMD, Console2), Ubuntu
+(gnome-terminal, xterm), and OS X.
+Some valid ANSI sequences aren't recognised.
+If you're hacking on the code, see `README-hacking.md`_. ESPECIALLY, see the
+explanation there of why we do not want PRs that allow Colorama to generate new
+types of ANSI codes.
+See outstanding issues and wish-list:
+https://github.com/tartley/colorama/issues
+If anything doesn't work for you, or doesn't do what you expected or hoped for,
+I'd love to hear about it on that issues list, would be delighted by patches,
+and would be happy to grant commit access to anyone who submits a working patch
+or two.
+.. _README-hacking.md: README-hacking.md
+License
+-------
+Copyright Jonathan Hartley & Arnon Yaari, 2013-2020. BSD 3-Clause license; see
+LICENSE file.
+Professional support
+--------------------
+.. |tideliftlogo| image:: https://cdn2.hubspot.net/hubfs/4008838/website/logos/logos_for_download/Tidelift_primary-shorthand-logo.png
+   :alt: Tidelift
+   :target: https://tidelift.com/subscription/pkg/pypi-colorama?utm_source=pypi-colorama&utm_medium=referral&utm_campaign=readme
+.. list-table::
+   :widths: 10 100
+   * - |tideliftlogo|
+     - Professional support for colorama is available as part of the
+       `Tidelift Subscription`_.
+       Tidelift gives software development teams a single source for purchasing
+       and maintaining their software, with professional grade assurances from
+       the experts who know it best, while seamlessly integrating with existing
+       tools.
+.. _Tidelift Subscription: https://tidelift.com/subscription/pkg/pypi-colorama?utm_source=pypi-colorama&utm_medium=referral&utm_campaign=readme
+Thanks
+------
+See the CHANGELOG for more thanks!
+* Marc Schlaich (schlamar) for a ``setup.py`` fix for Python2.5.
+* Marc Abramowitz, reported & fixed a crash on exit with closed ``stdout``,
+  providing a solution to issue #7's setuptools/distutils debate,
+  and other fixes.
+* User 'eryksun', for guidance on correctly instantiating ``ctypes.windll``.
+* Matthew McCormick for politely pointing out a longstanding crash on non-Win.
+* Ben Hoyt, for a magnificent fix under 64-bit Windows.
+* Jesse at Empty Square for submitting a fix for examples in the README.
+* User 'jamessp', an observant documentation fix for cursor positioning.
+* User 'vaal1239', Dave Mckee & Lackner Kristof for a tiny but much-needed Win7
+  fix.
+* Julien Stuyck, for wisely suggesting Python3 compatible updates to README.
+* Daniel Griffith for multiple fabulous patches.
+* Oscar Lesta for a valuable fix to stop ANSI chars being sent to non-tty
+  output.
+* Roger Binns, for many suggestions, valuable feedback, & bug reports.
+* Tim Golden for thought and much appreciated feedback on the initial idea.
+* User 'Zearin' for updates to the README file.
+* John Szakmeister for adding support for light colors
+* Charles Merriam for adding documentation to demos
+* Jurko for a fix on 64-bit Windows CPython2.5 w/o ctypes
+* Florian Bruhin for a fix when stdout or stderr are None
+* Thomas Weininger for fixing ValueError on Windows
+* Remi Rampin for better Github integration and fixes to the README file
+* Simeon Visser for closing a file handle using 'with' and updating classifiers
+  to include Python 3.3 and 3.4
+* Andy Neff for fixing RESET of LIGHT_EX colors.
+* Jonathan Hartley for the initial idea and implementation.

env/Lib/site-packages/colorama-0.4.6.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,31 @@

+colorama-0.4.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+colorama-0.4.6.dist-info/METADATA,sha256=e67SnrUMOym9sz_4TjF3vxvAV4T3aF7NyqRHHH3YEMw,17158
+colorama-0.4.6.dist-info/RECORD,,
+colorama-0.4.6.dist-info/WHEEL,sha256=cdcF4Fbd0FPtw2EMIOwH-3rSOTUdTCeOSXRMD1iLUb8,105
+colorama-0.4.6.dist-info/licenses/LICENSE.txt,sha256=ysNcAmhuXQSlpxQL-zs25zrtSWZW6JEQLkKIhteTAxg,1491
+colorama/__init__.py,sha256=wePQA4U20tKgYARySLEC047ucNX-g8pRLpYBuiHlLb8,266
+colorama/__pycache__/__init__.cpython-312.pyc,,
+colorama/__pycache__/ansi.cpython-312.pyc,,
+colorama/__pycache__/ansitowin32.cpython-312.pyc,,
+colorama/__pycache__/initialise.cpython-312.pyc,,
+colorama/__pycache__/win32.cpython-312.pyc,,
+colorama/__pycache__/winterm.cpython-312.pyc,,
+colorama/ansi.py,sha256=Top4EeEuaQdBWdteKMEcGOTeKeF19Q-Wo_6_Cj5kOzQ,2522
+colorama/ansitowin32.py,sha256=vPNYa3OZbxjbuFyaVo0Tmhmy1FZ1lKMWCnT7odXpItk,11128
+colorama/initialise.py,sha256=-hIny86ClXo39ixh5iSCfUIa2f_h_bgKRDW7gqs-KLU,3325
+colorama/tests/__init__.py,sha256=MkgPAEzGQd-Rq0w0PZXSX2LadRWhUECcisJY8lSrm4Q,75
+colorama/tests/__pycache__/__init__.cpython-312.pyc,,
+colorama/tests/__pycache__/ansi_test.cpython-312.pyc,,
+colorama/tests/__pycache__/ansitowin32_test.cpython-312.pyc,,
+colorama/tests/__pycache__/initialise_test.cpython-312.pyc,,
+colorama/tests/__pycache__/isatty_test.cpython-312.pyc,,
+colorama/tests/__pycache__/utils.cpython-312.pyc,,
+colorama/tests/__pycache__/winterm_test.cpython-312.pyc,,
+colorama/tests/ansi_test.py,sha256=FeViDrUINIZcr505PAxvU4AjXz1asEiALs9GXMhwRaE,2839
+colorama/tests/ansitowin32_test.py,sha256=RN7AIhMJ5EqDsYaCjVo-o4u8JzDD4ukJbmevWKS70rY,10678
+colorama/tests/initialise_test.py,sha256=BbPy-XfyHwJ6zKozuQOvNvQZzsx9vdb_0bYXn7hsBTc,6741
+colorama/tests/isatty_test.py,sha256=Pg26LRpv0yQDB5Ac-sxgVXG7hsA1NYvapFgApZfYzZg,1866
+colorama/tests/utils.py,sha256=1IIRylG39z5-dzq09R_ngufxyPZxgldNbrxKxUGwGKE,1079
+colorama/tests/winterm_test.py,sha256=qoWFPEjym5gm2RuMwpf3pOis3a5r_PJZFCzK254JL8A,3709
+colorama/win32.py,sha256=YQOKwMTwtGBbsY4dL5HYTvwTeP9wIQra5MvPNddpxZs,6181
+colorama/winterm.py,sha256=XCQFDHjPi6AHYNdZwy0tA02H-Jh48Jp-HvCjeLeLp3U,7134

env/Lib/site-packages/colorama-0.4.6.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: hatchling 1.11.1
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any