Kuberwastaken commited on
Commit
9b00313
·
1 Parent(s): e5f090b

Bug Fixes + Better UI

Browse files
Files changed (2) hide show
  1. gradio_app.py +5 -15
  2. model/analyzer.py +150 -52
gradio_app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from model.analyzer import analyze_content, analyze_while_loading
3
  import time # For simulating the loading bar
4
 
5
  # Define the analysis function with a simulated loading bar
@@ -11,16 +11,7 @@ def analyze_with_loading(script):
11
  result = analyze_content(script)
12
  yield gr.update(value=f"Analysis Complete! Triggers Detected: {result['detected_triggers']}")
13
 
14
- # Define a function to process based on user's choice
15
- def process_script(script, analyze_during_upload):
16
- if analyze_during_upload:
17
- # Use the new analyze_while_loading function
18
- result = analyze_while_loading(script)
19
- return {"detected_triggers": result['detected_triggers']}
20
- else:
21
- # Use the existing analyze_with_loading function
22
- return gr.update(value="Processing with detailed analysis..."), analyze_with_loading(script)
23
-
24
  # Create the Gradio interface
25
  with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background: linear-gradient(135deg, #ff9a9e, #fad0c4);}") as iface:
26
  # Header with centered text
@@ -36,7 +27,6 @@ with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background:
36
 
37
  # Input Section
38
  script_input = gr.Textbox(lines=8, label="Input Text", placeholder="Paste your script here...")
39
- analyze_during_upload = gr.Checkbox(label="Analyze while loading?", value=False)
40
  analyze_button = gr.Button("Analyze Content")
41
 
42
  # Loading Bar and Results
@@ -45,11 +35,11 @@ with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background:
45
 
46
  # Connect the button to the function
47
  analyze_button.click(
48
- fn=process_script,
49
- inputs=[script_input, analyze_during_upload],
50
  outputs=[loading_bar, results_output],
51
  )
52
 
53
  # Launch the app
54
  if __name__ == "__main__":
55
- iface.launch()
 
1
  import gradio as gr
2
+ from model.analyzer import analyze_content
3
  import time # For simulating the loading bar
4
 
5
  # Define the analysis function with a simulated loading bar
 
11
  result = analyze_content(script)
12
  yield gr.update(value=f"Analysis Complete! Triggers Detected: {result['detected_triggers']}")
13
 
14
+ # Create the Gradio interface
 
 
 
 
 
 
 
 
 
15
  # Create the Gradio interface
16
  with gr.Blocks(css=".center-text {text-align: center;} .gradient-bg {background: linear-gradient(135deg, #ff9a9e, #fad0c4);}") as iface:
17
  # Header with centered text
 
27
 
28
  # Input Section
29
  script_input = gr.Textbox(lines=8, label="Input Text", placeholder="Paste your script here...")
 
30
  analyze_button = gr.Button("Analyze Content")
31
 
32
  # Loading Bar and Results
 
35
 
36
  # Connect the button to the function
37
  analyze_button.click(
38
+ fn=analyze_with_loading,
39
+ inputs=script_input,
40
  outputs=[loading_bar, results_output],
41
  )
42
 
43
  # Launch the app
44
  if __name__ == "__main__":
45
+ iface.launch()
model/analyzer.py CHANGED
@@ -10,51 +10,122 @@ hf_token = os.getenv("HF_TOKEN")
10
  if not hf_token:
11
  raise ValueError("HF_TOKEN environment variable is not set!")
12
 
13
- # Define trigger categories
14
- trigger_categories = {
15
- "Violence": {
16
- "mapped_name": "Violence",
17
- "description": (
18
- "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
19
- "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
20
- "or large-scale events like wars, riots, or violent protests."
21
- )
22
- },
23
- "Death": {
24
- "mapped_name": "Death References",
25
- "description": (
26
- "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
27
- "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
28
- "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
29
- )
30
- },
31
- # Add other trigger categories here
32
- }
33
-
34
  def analyze_script(script):
 
35
  print("\n=== Starting Analysis ===")
36
- print(f"Time: {datetime.now()}")
 
37
 
38
  try:
39
- print("Loading model and tokenizer...")
40
  tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_fast=True)
41
- device = "cuda" if torch.cuda.is_available() else "cpu"
42
  print(f"Using device: {device}")
43
 
 
44
  model = AutoModelForCausalLM.from_pretrained(
45
  "meta-llama/Llama-3.2-1B",
46
- token=hf_token,
47
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
48
- device_map="auto"
49
  )
50
  print("Model loaded successfully")
 
51
  except Exception as e:
52
- print(f"An error occurred while loading model: {e}")
53
  return []
54
 
55
- print("\nProcessing text...")
56
- chunk_size = 256
57
- overlap = 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  script_chunks = [script[i:i + chunk_size] for i in range(0, len(script), chunk_size - overlap)]
59
 
60
  identified_triggers = {}
@@ -65,6 +136,7 @@ def analyze_script(script):
65
  mapped_name = info["mapped_name"]
66
  description = info["description"]
67
 
 
68
  prompt = f"""
69
  Check this text for any indication of {mapped_name} ({description}).
70
  Be sensitive to subtle references or implications, make sure the text is not metaphorical.
@@ -73,53 +145,79 @@ def analyze_script(script):
73
  Answer:
74
  """
75
 
76
- print(f"Analyzing for {mapped_name}...")
77
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
78
- inputs = {k: v.to(device) for k, v in inputs.items()}
79
 
80
- with torch.no_grad():
 
81
  outputs = model.generate(
82
  **inputs,
83
- max_new_tokens=3,
84
- do_sample=True,
85
- temperature=0.7,
86
- top_p=0.8,
87
- pad_token_id=tokenizer.eos_token_id
88
  )
89
-
90
- response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper()
91
- first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO"
92
  print(f"Model response for {mapped_name}: {first_word}")
93
 
 
94
  if first_word == "YES":
 
95
  identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1
96
  elif first_word == "MAYBE":
 
97
  identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- final_triggers = [k for k, v in identified_triggers.items() if v > 0.5]
100
  if not final_triggers:
101
  final_triggers = ["None"]
102
 
103
  return final_triggers
104
 
105
  def analyze_content(script):
 
106
  triggers = analyze_script(script)
107
- result = {
108
- "detected_triggers": triggers,
109
- "confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
110
- "model": "Llama-3.2-1B",
111
- "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
112
- }
 
 
 
 
 
 
 
 
 
 
 
113
  print("\nFinal Result Dictionary:", result)
114
  return result
115
 
116
- # Gradio interface
117
  iface = gr.Interface(
118
  fn=analyze_content,
119
  inputs=gr.Textbox(lines=8, label="Input Text"),
120
  outputs=gr.JSON(),
121
  title="Content Analysis",
122
- description="Analyze text content for triggers like violence, death, and more."
123
  )
124
 
125
  if __name__ == "__main__":
 
10
  if not hf_token:
11
  raise ValueError("HF_TOKEN environment variable is not set!")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def analyze_script(script):
14
+ # Starting the script analysis
15
  print("\n=== Starting Analysis ===")
16
+ print(f"Time: {datetime.now()}") # Outputting the current timestamp
17
+ print("Loading model and tokenizer...")
18
 
19
  try:
20
+ # Load the tokenizer and model, selecting the appropriate device (CPU or CUDA)
21
  tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B", use_fast=True)
22
+ device = "cuda" if torch.cuda.is_available() else "cpu" # Use CUDA if available, else use CPU
23
  print(f"Using device: {device}")
24
 
25
+ # Load model with token authentication
26
  model = AutoModelForCausalLM.from_pretrained(
27
  "meta-llama/Llama-3.2-1B",
28
+ token=hf_token, # Pass the token to authenticate
29
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32, # Use 16-bit precision for CUDA, 32-bit for CPU
30
+ device_map="auto" # Automatically map model to available device
31
  )
32
  print("Model loaded successfully")
33
+
34
  except Exception as e:
35
+ print(f"An error occurred: {e}")
36
  return []
37
 
38
+ # Define trigger categories with their descriptions
39
+ trigger_categories = {
40
+ "Violence": {
41
+ "mapped_name": "Violence",
42
+ "description": (
43
+ "Any act involving physical force or aggression intended to cause harm, injury, or death to a person, animal, or object. "
44
+ "Includes direct physical confrontations (e.g., fights, beatings, or assaults), implied violence (e.g., very graphical threats or descriptions of injuries), "
45
+ "or large-scale events like wars, riots, or violent protests."
46
+ )
47
+ },
48
+ "Death": {
49
+ "mapped_name": "Death References",
50
+ "description": (
51
+ "Any mention, implication, or depiction of the loss of life, including direct deaths of characters, including mentions of deceased individuals, "
52
+ "or abstract references to mortality (e.g., 'facing the end' or 'gone forever'). This also covers depictions of funerals, mourning, "
53
+ "grieving, or any dialogue that centers around death, do not take metaphors into context that don't actually lead to death."
54
+ )
55
+ },
56
+ "Substance Use": {
57
+ "mapped_name": "Substance Use",
58
+ "description": (
59
+ "Any explicit or implied reference to the consumption, misuse, or abuse of drugs, alcohol, or other intoxicating substances. "
60
+ "Includes scenes of drinking, smoking, or drug use, whether recreational or addictive. May also cover references to withdrawal symptoms, "
61
+ "rehabilitation, or substance-related paraphernalia (e.g., needles, bottles, pipes)."
62
+ )
63
+ },
64
+ "Gore": {
65
+ "mapped_name": "Gore",
66
+ "description": (
67
+ "Extremely detailed and graphic depictions of highly severe physical injuries, mutilation, or extreme bodily harm, often accompanied by descriptions of heavy blood, exposed organs, "
68
+ "or dismemberment. This includes war scenes with severe casualties, horror scenarios involving grotesque creatures, or medical procedures depicted with excessive detail."
69
+ )
70
+ },
71
+ "Vomit": {
72
+ "mapped_name": "Vomit",
73
+ "description": (
74
+ "Any reference to the act of vomiting, whether directly described, implied, or depicted in detail. This includes sounds or visual descriptions of the act, "
75
+ "mentions of nausea leading to vomiting, or its aftermath (e.g., the presence of vomit, cleaning it up, or characters reacting to it)."
76
+ )
77
+ },
78
+ "Sexual Content": {
79
+ "mapped_name": "Sexual Content",
80
+ "description": (
81
+ "Any depiction or mention of sexual activity, intimacy, or sexual behavior, ranging from implied scenes to explicit descriptions. "
82
+ "This includes romantic encounters, physical descriptions of characters in a sexual context, sexual dialogue, or references to sexual themes (e.g., harassment, innuendos)."
83
+ )
84
+ },
85
+ "Sexual Abuse": {
86
+ "mapped_name": "Sexual Abuse",
87
+ "description": (
88
+ "Any form of non-consensual sexual act, behavior, or interaction, involving coercion, manipulation, or physical force. "
89
+ "This includes incidents of sexual assault, molestation, exploitation, harassment, and any acts where an individual is subjected to sexual acts against their will or without their consent. "
90
+ "It also covers discussions or depictions of the aftermath of such abuse, such as trauma, emotional distress, legal proceedings, or therapy. "
91
+ "References to inappropriate sexual advances, groping, or any other form of sexual misconduct are also included, as well as the psychological and emotional impact on survivors. "
92
+ "Scenes where individuals are placed in sexually compromising situations, even if not directly acted upon, may also fall under this category."
93
+ )
94
+ },
95
+ "Self-Harm": {
96
+ "mapped_name": "Self-Harm",
97
+ "description": (
98
+ "Any mention or depiction of behaviors where an individual intentionally causes harm to themselves. This includes cutting, burning, or other forms of physical injury, "
99
+ "as well as suicidal ideation, suicide attempts, or discussions of self-destructive thoughts and actions. References to scars, bruises, or other lasting signs of self-harm are also included."
100
+ )
101
+ },
102
+ "Gun Use": {
103
+ "mapped_name": "Gun Use",
104
+ "description": (
105
+ "Any explicit or implied mention of firearms being handled, fired, or used in a threatening manner. This includes scenes of gun violence, references to shootings, "
106
+ "gun-related accidents, or the presence of firearms in a tense or dangerous context (e.g., holstered weapons during an argument)."
107
+ )
108
+ },
109
+ "Animal Cruelty": {
110
+ "mapped_name": "Animal Cruelty",
111
+ "description": (
112
+ "Any act of harm, abuse, or neglect toward animals, whether intentional or accidental. This includes physical abuse (e.g., hitting, injuring, or killing animals), "
113
+ "mental or emotional mistreatment (e.g., starvation, isolation), and scenes where animals are subjected to pain or suffering for human entertainment or experimentation."
114
+ )
115
+ },
116
+ "Mental Health Issues": {
117
+ "mapped_name": "Mental Health Issues",
118
+ "description": (
119
+ "Any reference to mental health struggles, disorders, or psychological distress. This includes mentions of depression, anxiety, PTSD, bipolar disorder, schizophrenia, "
120
+ "or other conditions. Scenes depicting therapy sessions, psychiatric treatment, or coping mechanisms (e.g., medication, journaling) are also included. May cover subtle hints "
121
+ "like a character expressing feelings of worthlessness, hopelessness, or detachment from reality."
122
+ )
123
+ }
124
+ }
125
+
126
+ print("\nProcessing text...") # Output indicating the text is being processed
127
+ chunk_size = 256 # Set the chunk size for text processing
128
+ overlap = 15 # Overlap between chunks for context preservation
129
  script_chunks = [script[i:i + chunk_size] for i in range(0, len(script), chunk_size - overlap)]
130
 
131
  identified_triggers = {}
 
136
  mapped_name = info["mapped_name"]
137
  description = info["description"]
138
 
139
+ print(f"\nAnalyzing for {mapped_name}...")
140
  prompt = f"""
141
  Check this text for any indication of {mapped_name} ({description}).
142
  Be sensitive to subtle references or implications, make sure the text is not metaphorical.
 
145
  Answer:
146
  """
147
 
148
+ print("Sending prompt to model...") # Indicate that prompt is being sent to the model
149
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Tokenize the prompt
150
+ inputs = {k: v.to(device) for k, v in inputs.items()} # Send inputs to the chosen device
151
 
152
+ with torch.no_grad(): # Disable gradient calculation for inference
153
+ print("Generating response...") # Indicate that the model is generating a response
154
  outputs = model.generate(
155
  **inputs,
156
+ max_new_tokens=3, # Limit response length
157
+ do_sample=True, # Enable sampling for more diverse output
158
+ temperature=0.7, # Control randomness of the output
159
+ top_p=0.8, # Use nucleus sampling
160
+ pad_token_id=tokenizer.eos_token_id # Pad token ID
161
  )
162
+
163
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip().upper() # Decode and format the response
164
+ first_word = response_text.split("\n")[-1].split()[0] if response_text else "NO" # Get the first word of the response
165
  print(f"Model response for {mapped_name}: {first_word}")
166
 
167
+ # Update identified triggers based on model response
168
  if first_word == "YES":
169
+ print(f"Detected {mapped_name} in this chunk!") # Trigger detected
170
  identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 1
171
  elif first_word == "MAYBE":
172
+ print(f"Possible {mapped_name} detected, marking for further review.") # Possible trigger detected
173
  identified_triggers[mapped_name] = identified_triggers.get(mapped_name, 0) + 0.5
174
+ else:
175
+ print(f"No {mapped_name} detected in this chunk.") # No trigger detected
176
+
177
+ print("\n=== Analysis Complete ===") # Indicate that analysis is complete
178
+ final_triggers = [] # List to store final triggers
179
+
180
+ # Filter and output the final trigger results
181
+ for mapped_name, count in identified_triggers.items():
182
+ if count > 0.5:
183
+ final_triggers.append(mapped_name)
184
+ print(f"- {mapped_name}: found in {count} chunks")
185
 
 
186
  if not final_triggers:
187
  final_triggers = ["None"]
188
 
189
  return final_triggers
190
 
191
  def analyze_content(script):
192
+ # Simulate trigger analysis (you can replace this with your actual logic)
193
  triggers = analyze_script(script)
194
+
195
+ # Define the result based on triggers found
196
+ if isinstance(triggers, list) and triggers != ["None"]:
197
+ result = {
198
+ "detected_triggers": triggers,
199
+ "confidence": "High - Content detected",
200
+ "model": "Llama-3.2-1B",
201
+ "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
202
+ }
203
+ else:
204
+ result = {
205
+ "detected_triggers": ["None"],
206
+ "confidence": "High - No concerning content detected",
207
+ "model": "Llama-3.2-1B",
208
+ "analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
209
+ }
210
+
211
  print("\nFinal Result Dictionary:", result)
212
  return result
213
 
214
+ # Create and launch the Gradio interface
215
  iface = gr.Interface(
216
  fn=analyze_content,
217
  inputs=gr.Textbox(lines=8, label="Input Text"),
218
  outputs=gr.JSON(),
219
  title="Content Analysis",
220
+ description="Analyze text content for sensitive topics"
221
  )
222
 
223
  if __name__ == "__main__":