ashjo317 commited on
Commit
e934123
·
verified ·
1 Parent(s): 9bd5503

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +124 -0
  3. cleaned_toxic_tweets.csv +3 -0
  4. requirements.txt +8 -0
  5. webserver.py +89 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ cleaned_toxic_tweets.csv filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import gradio as gr
3
+ import json
4
+
5
+ # Define the API endpoints
6
+ get_random_tweet_url = "http://127.0.0.1:5050/random_tweet"
7
+ openapi_url = "http://127.0.0.1:5050/test_tweet_openapi"
8
+ huggingface_url = "http://127.0.0.1:5050/test_tweet_huggingface"
9
+
10
+ def _get_random_tweet():
11
+ response = requests.get(get_random_tweet_url)
12
+ return response.json()['random_tweet']
13
+
14
+ def _test_tweet_openapi(tweet):
15
+ response = requests.post(openapi_url, json={"tweet": tweet})
16
+ return response.json()
17
+ # response['categories'].json(), response['category_score'].json()
18
+
19
+ def _test_tweet_huggingface(tweet):
20
+ response = requests.post(huggingface_url, json={"tweet": tweet})
21
+ return response.json()
22
+
23
+ def _shutdown_server():
24
+ return requests.get("http://127.0.0.1:5050/shutdown")
25
+
26
+
27
+ mksdown = """# 😃 Welcome To The Friendly Text Moderation for Twitter (X) Posts
28
+ ### 🔍 Identify 13 Categories of Text Toxicity
29
+
30
+ > 🚀 This **NLP-powered AI** aims to detect and prevent **profanity, vulgarity, hate speech, violence, sexism, and offensive language** in tweets.
31
+ > 🛡️ **Not an act of censorship** – the UI allows readers (excluding young audiences) to click on a label to reveal toxic messages.
32
+ > 🎯 **Goal**: Foster a safer, more respectful online space for **you, your colleagues, and your family**.
33
+
34
+ ---
35
+
36
+ ## 🛠️ How to Use This App?
37
+ 1️⃣ **Enter your tweet** (or use "Populate Random Tweet" to load a harmful tweet from a Kaggle dataset).
38
+ 2️⃣ **Click "Measure Toxicity OpenAPI"** to analyze toxicity across 13 categories, visualized as a **horizontal bar graph**.
39
+ 3️⃣ **Click "Measure Toxicity HF"** to get a **JSON-based** safe/unsafe result with toxicity percentages using **Hugging Face**.
40
+
41
+ ---
42
+
43
+ ## 📌 AI Models Used
44
+ - 🧠 **OpenAI’s 'omni-moderation-latest' model** for multi-category toxicity detection.
45
+ - 🤖 **Hugging Face’s 'unitary/toxic-bert' model** for binary (Safe/Unsafe) classification.
46
+ - 🔬 **Understands context, nuance, and intent** – beyond just swear words!
47
+
48
+ ---
49
+
50
+ ## 📊 Toxicity Categories (13)
51
+ 1️⃣ **Sexual**
52
+ 2️⃣ **Harassment**
53
+ 3️⃣ **Violence**
54
+ 4️⃣ **Hate**
55
+ 5️⃣ **Illicit**
56
+ 6️⃣ **Harassment/Threatening**
57
+ 7️⃣ **Self-Harm**
58
+ 8️⃣ **Sexual/Minors**
59
+ 9️⃣ **Self-Harm/Intent**
60
+ 🔟 **Self-Harm/Instructions**
61
+ 1️⃣1️⃣ **Illicit/Violent**
62
+ 1️⃣2️⃣ **Hate/Threatening**
63
+ 1️⃣3️⃣ **Violence/Graphic**
64
+
65
+ ---
66
+
67
+ ## 📝 Example Hugging Face Output
68
+ ```json
69
+ {
70
+ "toxicity": "unsafe",
71
+ "%Toxic": 65.95,
72
+ "%Safe": 34.05
73
+ }
74
+
75
+ * Open API analyzes tweet for 13 categories and displays them with %
76
+ * The real-world dataset is from the "Toxic Tweets Dataset" (https://www.kaggle.com/datasets/ashwiniyer176/toxic-tweets-dataset/data)
77
+ ---
78
+ # 🌟 "AI Solution Architect" Course by ELVTR
79
+ """
80
+
81
+ # Function to get toxicity scores from OpenAI
82
+ def get_toxicity_openai(tweet):
83
+ open_api_answer = _test_tweet_openapi(tweet)
84
+ open_api_answer['category_scores']['IS THIS TWEET TOXIC'] = open_api_answer['flagged']
85
+ return open_api_answer['category_scores']
86
+
87
+ # Function to get toxicity scores from Hugging Face
88
+ def get_toxicity_hf(tweet):
89
+ hugging_face_answer = _test_tweet_huggingface(tweet)
90
+
91
+ def get_toxicity_hf(tweet):
92
+ hugging_face_answer = _test_tweet_huggingface(tweet)
93
+ print(hugging_face_answer)
94
+ score = hugging_face_answer[0]['score']*100
95
+ if score <= 60:
96
+ return json.dumps({"toxicity": "safe", "%Toxic": score, "%safe": (100-score)}, indent=4)
97
+ else:
98
+ return json.dumps({"toxicity": "unsafe", "%Toxic": score, "%safe": (100-score)}, indent=4)
99
+
100
+ # Random Tweet Generator
101
+ def get_random_tweet():
102
+ return _get_random_tweet()
103
+
104
+ # Gradio UI
105
+ with gr.Blocks() as demo:
106
+ gr.Markdown(mksdown)
107
+ user_input = gr.Textbox(label="Paste your paragraph (2-10 lines)", lines=5)
108
+
109
+ with gr.Row():
110
+ analyze_btn = gr.Button("Measure Toxicity OpenAPI")
111
+ analyze_btn_hf = gr.Button("Measure Toxicity HF")
112
+ random_tweet_btn = gr.Button("Populate Random Tweet")
113
+
114
+ toxicity_output_json = gr.Code(label="Formatted Toxicity JSON", language="json")
115
+ toxicity_output = gr.Label("Toxicity Results")
116
+
117
+ analyze_btn_hf.click(get_toxicity_hf, inputs=[user_input], outputs=[toxicity_output_json])
118
+ analyze_btn.click(get_toxicity_openai, inputs=[user_input], outputs=[toxicity_output])
119
+ random_tweet_btn.click(get_random_tweet, outputs=user_input)
120
+
121
+ if __name__ == "__main__":
122
+ from webserver import start_web_server
123
+ start_web_server()
124
+ demo.launch(debug=True)
cleaned_toxic_tweets.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd96e948dfc0a25392e286f09d5eb548e83b7a2ee03886c5f27c7c4e19c045c
3
+ size 10497842
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ nest_asyncio
3
+ fastapi
4
+ uvicorn
5
+ openai
6
+ requests
7
+ gradio
8
+ pydantic
webserver.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import random
3
+ import nest_asyncio
4
+ from fastapi import FastAPI
5
+ from uvicorn import run , Config, Server
6
+ import openai
7
+ import threading
8
+ from pydantic import BaseModel
9
+ import os
10
+
11
+ # Allow FastAPI to run in the notebook event loop
12
+ nest_asyncio.apply()
13
+ # Initialize FastAPI app
14
+ app = FastAPI()
15
+
16
+ # Load the CSV file into a DataFrame
17
+ df = pd.read_csv('cleaned_toxic_tweets.csv') # Replace with the actual path to your CSV file
18
+
19
+ # Ensure the 'tweet' column exists in the DataFrame
20
+ if 'cleaned_tweet' not in df.columns:
21
+ raise ValueError("The CSV file must contain a 'tweet' column")
22
+
23
+ # Define request body model
24
+ class TweetRequest(BaseModel):
25
+ tweet: str
26
+
27
+ # Endpoint to get a random tweet
28
+ @app.get("/random_tweet")
29
+ def get_random_tweet():
30
+ # Get a random value from the 'tweet' column
31
+ random_tweet = random.choice(df['cleaned_tweet'].tolist())
32
+ return {"random_tweet": random_tweet}
33
+
34
+ @app.post("/test_tweet_openapi")
35
+ def test_tweet_openapi(request: TweetRequest):
36
+ # Open AI Access
37
+ toxic_comment = request.tweet
38
+ import logging
39
+ logging.basicConfig(level=logging.INFO)
40
+ logging.info(f"Input: {toxic_comment}")
41
+ ai_client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
42
+ response = ai_client.moderations.create(input=toxic_comment, model='omni-moderation-latest')
43
+ # Extract the first result
44
+ print("Open API omni-moderation-latest Response:")
45
+ return response.results[0]
46
+
47
+
48
+ @app.post("/test_tweet_huggingface")
49
+ def test_tweet_huggingface(request: TweetRequest):
50
+ from transformers import pipeline
51
+ from transformers import pipeline
52
+
53
+ # This model only tells toxic / not toxic when using pipeline way of getting results.
54
+ # When using the Hugging Face pipeline for text classification
55
+ # with the unitary/toxic-bert model, it typically provides a simple
56
+ # classification output, such as "toxic" or "non-toxic."
57
+ # This is because the pipeline is designed to give a straightforward result based on the model's predictions.
58
+ toxic_comment = request.tweet
59
+ pipe = pipeline("text-classification", model="unitary/toxic-bert")
60
+ helper = pipe(toxic_comment)
61
+ print("HF model unitary/toxic-bert Response:")
62
+ return helper
63
+
64
+ # API to shut down the server gracefully
65
+ @app.get("/shutdown")
66
+ def shutdown_server():
67
+ global server
68
+ if server is not None:
69
+ server.should_exit = True # Signal Uvicorn to shut down
70
+ return "Server shutting down..."
71
+ return "Server not running"
72
+
73
+
74
+ # Global variable to store Uvicorn server instance
75
+ server = None
76
+
77
+ # Function to run FastAPI in a separate thread
78
+ def run_fastapi():
79
+ global server
80
+ config = Config(app, host="127.0.0.1", port=5050, log_level="info")
81
+ server = Server(config)
82
+ server.run()
83
+
84
+ def start_web_server():
85
+ # Run the FastAPI server in the background thread
86
+ fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
87
+ fastapi_thread.start()
88
+ print("✅ FastAPI server is running in the background. You can proceed with other cells.")
89
+