Spaces:

Kuberwastaken
/

TREAT

Running

App Files Files Community

Kuberwastaken commited on Jan 9

Commit

9b00313

1 Parent(s): e5f090b

Bug Fixes + Better UI

Browse files

Files changed (2) hide show

gradio_app.py +5 -15
model/analyzer.py +150 -52

gradio_app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import gradio as gr
-from model.analyzer import analyze_content, analyze_while_loading
 import time  # For simulating the loading bar
 # Define the analysis function with a simulated loading bar
@@ -11,16 +11,7 @@ def analyze_with_loading(script):
     result = analyze_content(script)
     yield gr.update(value=f"Analysis Complete! Triggers Detected: {result['detected_triggers']}")
-# Define a function to process based on user's choice
-def process_script(script, analyze_during_upload):
-    if analyze_during_upload:
-        # Use the new analyze_while_loading function
-        result = analyze_while_loading(script)
-        return {"detected_triggers": result['detected_triggers']}
-    else:
-        # Use the existing analyze_with_loading function
-        return gr.update(value="Processing with detailed analysis..."), analyze_with_loading(script)
 # Create the Gradio interface
 with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background: linear-gradient(135deg, #ff9a9e, #fad0c4);}") as iface:
     # Header with centered text
@@ -36,7 +27,6 @@ with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background:
     # Input Section
     script_input = gr.Textbox(lines=8, label="Input Text", placeholder="Paste your script here...")
-    analyze_during_upload = gr.Checkbox(label="Analyze while loading?", value=False)
     analyze_button = gr.Button("Analyze Content")
     # Loading Bar and Results
@@ -45,11 +35,11 @@ with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background:
     # Connect the button to the function
     analyze_button.click(
-        fn=process_script,
-        inputs=[script_input, analyze_during_upload],
         outputs=[loading_bar, results_output],
     )
 # Launch the app
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
+from model.analyzer import analyze_content
 import time  # For simulating the loading bar
 # Define the analysis function with a simulated loading bar
     result = analyze_content(script)
     yield gr.update(value=f"Analysis Complete! Triggers Detected: {result['detected_triggers']}")
+# Create the Gradio interface
 # Create the Gradio interface
 with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background: linear-gradient(135deg, #ff9a9e, #fad0c4);}") as iface:
     # Header with centered text
     # Input Section
     script_input = gr.Textbox(lines=8, label="Input Text", placeholder="Paste your script here...")
     analyze_button = gr.Button("Analyze Content")
     # Loading Bar and Results
     # Connect the button to the function
     analyze_button.click(
+        fn=analyze_with_loading,
+        inputs=script_input,
         outputs=[loading_bar, results_output],
     )
 # Launch the app
 if __name__ == "__main__":
+    iface.launch()

model/analyzer.py CHANGED Viewed

@@ -10,51 +10,122 @@ hf_token = os.getenv("HF_TOKEN")
 if not hf_token:
     raise ValueError("HF_TOKEN environment variable is not set!")
-# Define trigger categories
-trigger_categories = {
-    "Violence": {
-        "mapped_name": "Violence",
-        "description": (
-            "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
-            "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
-            "or large-scale events like wars, riots, or violent protests."
-        )
-    },
-    "Death": {
-        "mapped_name": "Death References",
-        "description": (
-            "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
-            "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
-            "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
-        )
-    },
-    # Add other trigger categories here
-}
 def analyze_script(script):
     print("\n=== Starting Analysis ===")
-    print(f"Time: {datetime.now()}")
     try:
-        print("Loading model and tokenizer...")
         tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_fast=True)
-        device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"Using device: {device}")
         model = AutoModelForCausalLM.from_pretrained(
             "meta-llama/Llama-3.2-1B",
-            token=hf_token,
-            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-            device_map="auto"
         )
         print("Model loaded successfully")
     except Exception as e:
-        print(f"An error occurred while loading model: {e}")
         return []
-    print("\nProcessing text...")
-    chunk_size = 256
-    overlap = 15
     script_chunks = [script[i:i + chunk_size] for i in range(0, len(script), chunk_size - overlap)]
     identified_triggers = {}
@@ -65,6 +136,7 @@ def analyze_script(script):
             mapped_name = info["mapped_name"]
             description = info["description"]
             prompt = f"""
             Check this text for any indication of {mapped_name} ({description}).
             Be sensitive to subtle references or implications, make sure the text is not metaphorical.
@@ -73,53 +145,79 @@ def analyze_script(script):
             Answer:
             """
-            print(f"Analyzing for {mapped_name}...")
-            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-            inputs = {k: v.to(device) for k, v in inputs.items()}
-            with torch.no_grad():
                 outputs = model.generate(
                     **inputs,
-                    max_new_tokens=3,
-                    do_sample=True,
-                    temperature=0.7,
-                    top_p=0.8,
-                    pad_token_id=tokenizer.eos_token_id
                 )
-            response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()
-            first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO"
             print(f"Model response for {mapped_name}: {first_word}")
             if first_word == "YES":
                 identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1
             elif first_word == "MAYBE":
                 identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5
-    final_triggers = [k for k, v in identified_triggers.items() if v > 0.5]
     if not final_triggers:
         final_triggers = ["None"]
     return final_triggers
 def analyze_content(script):
     triggers = analyze_script(script)
-    result = {
-        "detected_triggers": triggers,
-        "confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
-        "model": "Llama-3.2-1B",
-        "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    }
     print("\nFinal Result Dictionary:", result)
     return result
-# Gradio interface
 iface = gr.Interface(
     fn=analyze_content,
     inputs=gr.Textbox(lines=8, label="Input Text"),
     outputs=gr.JSON(),
     title="Content Analysis",
-    description="Analyze text content for triggers like violence, death, and more."
 )
 if __name__ == "__main__":

 if not hf_token:
     raise ValueError("HF_TOKEN environment variable is not set!")
 def analyze_script(script):
+    # Starting the script analysis
     print("\n=== Starting Analysis ===")
+    print(f"Time: {datetime.now()}")  # Outputting the current timestamp
+    print("Loading model and tokenizer...")
     try:
+        # Load the tokenizer and model, selecting the appropriate device (CPU or CUDA)
         tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_fast=True)
+        device = "cuda" if torch.cuda.is_available() else "cpu"  # Use CUDA if available, else use CPU
         print(f"Using device: {device}")
+        # Load model with token authentication
         model = AutoModelForCausalLM.from_pretrained(
             "meta-llama/Llama-3.2-1B",
+            token=hf_token,  # Pass the token to authenticate
+            torch_dtype=torch.float16 if device == "cuda" else torch.float32,  # Use 16-bit precision for CUDA, 32-bit for CPU
+            device_map="auto"  # Automatically map model to available device
         )
         print("Model loaded successfully")
     except Exception as e:
+        print(f"An error occurred: {e}")
         return []
+    # Define trigger categories with their descriptions
+    trigger_categories = {
+        "Violence": {
+            "mapped_name": "Violence",
+            "description": (
+                "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
+                "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
+                "or large-scale events like wars, riots, or violent protests."
+            )
+        },
+        "Death": {
+            "mapped_name": "Death References",
+            "description": (
+                "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
+                "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
+                "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
+            )
+        },
+        "Substance Use": {
+            "mapped_name": "Substance Use",
+            "description": (
+                "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
+                "Includes scenes of drinking, smoking, or drug use, whether recreational or addictive. May also cover references to withdrawal symptoms, "
+                "rehabilitation, or substance-related paraphernalia (e.g., needles, bottles, pipes)."
+            )
+        },
+        "Gore": {
+            "mapped_name": "Gore",
+            "description": (
+                "Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm, often accompanied by descriptions of heavy blood, exposed organs, "
+                "or dismemberment. This includes war scenes with severe casualties, horror scenarios involving grotesque creatures, or medical procedures depicted with excessive detail."
+            )
+        },
+        "Vomit": {
+            "mapped_name": "Vomit",
+            "description": (
+                "Any reference to the act of vomiting, whether directly described, implied, or depicted in detail. This includes sounds or visual descriptions of the act, "
+                "mentions of nausea leading to vomiting, or its aftermath (e.g., the presence of vomit, cleaning it up, or characters reacting to it)."
+            )
+        },
+        "Sexual Content": {
+            "mapped_name": "Sexual Content",
+            "description": (
+                "Any depiction or mention of sexual activity, intimacy, or sexual behavior, ranging from implied scenes to explicit descriptions. "
+                "This includes romantic encounters, physical descriptions of characters in a sexual context, sexual dialogue, or references to sexual themes (e.g., harassment, innuendos)."
+            )
+        },
+        "Sexual Abuse": {
+            "mapped_name": "Sexual Abuse",
+            "description": (
+                "Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force. "
+                "This includes incidents of sexual assault, molestation, exploitation, harassment, and any acts where an individual is subjected to sexual acts against their will or without their consent. "
+                "It also covers discussions or depictions of the aftermath of such abuse, such as trauma, emotional distress, legal proceedings, or therapy. "
+                "References to inappropriate sexual advances, groping, or any other form of sexual misconduct are also included, as well as the psychological and emotional impact on survivors. "
+                "Scenes where individuals are placed in sexually compromising situations, even if not directly acted upon, may also fall under this category."
+            )
+        },
+        "Self-Harm": {
+            "mapped_name": "Self-Harm",
+            "description": (
+                "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, "
+                "as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions. References to scars, bruises, or other lasting signs of self-harm are also included."
+            )
+        },
+        "Gun Use": {
+            "mapped_name": "Gun Use",
+            "description": (
+                "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner. This includes scenes of gun violence, references to shootings, "
+                "gun-related accidents, or the presence of firearms in a tense or dangerous context (e.g., holstered weapons during an argument)."
+            )
+        },
+        "Animal Cruelty": {
+            "mapped_name": "Animal Cruelty",
+            "description": (
+                "Any act of harm, abuse, or neglect toward animals, whether intentional or accidental. This includes physical abuse (e.g., hitting, injuring, or killing animals), "
+                "mental or emotional mistreatment (e.g., starvation, isolation), and scenes where animals are subjected to pain or suffering for human entertainment or experimentation."
+            )
+        },
+        "Mental Health Issues": {
+            "mapped_name": "Mental Health Issues",
+            "description": (
+                "Any reference to mental health struggles, disorders, or psychological distress. This includes mentions of depression, anxiety, PTSD, bipolar disorder, schizophrenia, "
+                "or other conditions. Scenes depicting therapy sessions, psychiatric treatment, or coping mechanisms (e.g., medication, journaling) are also included. May cover subtle hints "
+                "like a character expressing feelings of worthlessness, hopelessness, or detachment from reality."
+            )
+        }
+    }
+    print("\nProcessing text...")  # Output indicating the text is being processed
+    chunk_size = 256  # Set the chunk size for text processing
+    overlap = 15  # Overlap between chunks for context preservation
     script_chunks = [script[i:i + chunk_size] for i in range(0, len(script), chunk_size - overlap)]
     identified_triggers = {}
             mapped_name = info["mapped_name"]
             description = info["description"]
+            print(f"\nAnalyzing for {mapped_name}...")
             prompt = f"""
             Check this text for any indication of {mapped_name} ({description}).
             Be sensitive to subtle references or implications, make sure the text is not metaphorical.
             Answer:
             """
+            print("Sending prompt to model...")  # Indicate that prompt is being sent to the model
+            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)  # Tokenize the prompt
+            inputs = {k: v.to(device) for k, v in inputs.items()}  # Send inputs to the chosen device
+            with torch.no_grad():  # Disable gradient calculation for inference
+                print("Generating response...")  # Indicate that the model is generating a response
                 outputs = model.generate(
                     **inputs,
+                    max_new_tokens=3,  # Limit response length
+                    do_sample=True,  # Enable sampling for more diverse output
+                    temperature=0.7,  # Control randomness of the output
+                    top_p=0.8,  # Use nucleus sampling
+                    pad_token_id=tokenizer.eos_token_id  # Pad token ID
                 )
+            response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()  # Decode and format the response
+            first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO"  # Get the first word of the response
             print(f"Model response for {mapped_name}: {first_word}")
+            # Update identified triggers based on model response
             if first_word == "YES":
+                print(f"Detected {mapped_name} in this chunk!")  # Trigger detected
                 identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1
             elif first_word == "MAYBE":
+                print(f"Possible {mapped_name} detected, marking for further review.")  # Possible trigger detected
                 identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5
+            else:
+                print(f"No {mapped_name} detected in this chunk.")  # No trigger detected
+    print("\n=== Analysis Complete ===")  # Indicate that analysis is complete
+    final_triggers = []  # List to store final triggers
+    # Filter and output the final trigger results
+    for mapped_name, count in identified_triggers.items():
+        if count > 0.5:
+            final_triggers.append(mapped_name)
+        print(f"- {mapped_name}: found in {count} chunks")
     if not final_triggers:
         final_triggers = ["None"]
     return final_triggers
 def analyze_content(script):
+    # Simulate trigger analysis (you can replace this with your actual logic)
     triggers = analyze_script(script)
+    # Define the result based on triggers found
+    if isinstance(triggers, list) and triggers != ["None"]:
+        result = {
+            "detected_triggers": triggers,
+            "confidence": "High - Content detected",
+            "model": "Llama-3.2-1B",
+            "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
+    else:
+        result = {
+            "detected_triggers": ["None"],
+            "confidence": "High - No concerning content detected",
+            "model": "Llama-3.2-1B",
+            "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
     print("\nFinal Result Dictionary:", result)
     return result
+# Create and launch the Gradio interface
 iface = gr.Interface(
     fn=analyze_content,
     inputs=gr.Textbox(lines=8, label="Input Text"),
     outputs=gr.JSON(),
     title="Content Analysis",
+    description="Analyze text content for sensitive topics"
 )
 if __name__ == "__main__":