Upload 4 files
Browse files- .gitattributes +1 -0
- app.py +124 -0
- cleaned_toxic_tweets.csv +3 -0
- requirements.txt +8 -0
- webserver.py +89 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
cleaned_toxic_tweets.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import gradio as gr
|
3 |
+
import json
|
4 |
+
|
5 |
+
# Define the API endpoints
|
6 |
+
get_random_tweet_url = "http://127.0.0.1:5050/random_tweet"
|
7 |
+
openapi_url = "http://127.0.0.1:5050/test_tweet_openapi"
|
8 |
+
huggingface_url = "http://127.0.0.1:5050/test_tweet_huggingface"
|
9 |
+
|
10 |
+
def _get_random_tweet():
|
11 |
+
response = requests.get(get_random_tweet_url)
|
12 |
+
return response.json()['random_tweet']
|
13 |
+
|
14 |
+
def _test_tweet_openapi(tweet):
|
15 |
+
response = requests.post(openapi_url, json={"tweet": tweet})
|
16 |
+
return response.json()
|
17 |
+
# response['categories'].json(), response['category_score'].json()
|
18 |
+
|
19 |
+
def _test_tweet_huggingface(tweet):
|
20 |
+
response = requests.post(huggingface_url, json={"tweet": tweet})
|
21 |
+
return response.json()
|
22 |
+
|
23 |
+
def _shutdown_server():
|
24 |
+
return requests.get("http://127.0.0.1:5050/shutdown")
|
25 |
+
|
26 |
+
|
27 |
+
mksdown = """# 😃 Welcome To The Friendly Text Moderation for Twitter (X) Posts
|
28 |
+
### 🔍 Identify 13 Categories of Text Toxicity
|
29 |
+
|
30 |
+
> 🚀 This **NLP-powered AI** aims to detect and prevent **profanity, vulgarity, hate speech, violence, sexism, and offensive language** in tweets.
|
31 |
+
> 🛡️ **Not an act of censorship** – the UI allows readers (excluding young audiences) to click on a label to reveal toxic messages.
|
32 |
+
> 🎯 **Goal**: Foster a safer, more respectful online space for **you, your colleagues, and your family**.
|
33 |
+
|
34 |
+
---
|
35 |
+
|
36 |
+
## 🛠️ How to Use This App?
|
37 |
+
1️⃣ **Enter your tweet** (or use "Populate Random Tweet" to load a harmful tweet from a Kaggle dataset).
|
38 |
+
2️⃣ **Click "Measure Toxicity OpenAPI"** to analyze toxicity across 13 categories, visualized as a **horizontal bar graph**.
|
39 |
+
3️⃣ **Click "Measure Toxicity HF"** to get a **JSON-based** safe/unsafe result with toxicity percentages using **Hugging Face**.
|
40 |
+
|
41 |
+
---
|
42 |
+
|
43 |
+
## 📌 AI Models Used
|
44 |
+
- 🧠 **OpenAI’s 'omni-moderation-latest' model** for multi-category toxicity detection.
|
45 |
+
- 🤖 **Hugging Face’s 'unitary/toxic-bert' model** for binary (Safe/Unsafe) classification.
|
46 |
+
- 🔬 **Understands context, nuance, and intent** – beyond just swear words!
|
47 |
+
|
48 |
+
---
|
49 |
+
|
50 |
+
## 📊 Toxicity Categories (13)
|
51 |
+
1️⃣ **Sexual**
|
52 |
+
2️⃣ **Harassment**
|
53 |
+
3️⃣ **Violence**
|
54 |
+
4️⃣ **Hate**
|
55 |
+
5️⃣ **Illicit**
|
56 |
+
6️⃣ **Harassment/Threatening**
|
57 |
+
7️⃣ **Self-Harm**
|
58 |
+
8️⃣ **Sexual/Minors**
|
59 |
+
9️⃣ **Self-Harm/Intent**
|
60 |
+
🔟 **Self-Harm/Instructions**
|
61 |
+
1️⃣1️⃣ **Illicit/Violent**
|
62 |
+
1️⃣2️⃣ **Hate/Threatening**
|
63 |
+
1️⃣3️⃣ **Violence/Graphic**
|
64 |
+
|
65 |
+
---
|
66 |
+
|
67 |
+
## 📝 Example Hugging Face Output
|
68 |
+
```json
|
69 |
+
{
|
70 |
+
"toxicity": "unsafe",
|
71 |
+
"%Toxic": 65.95,
|
72 |
+
"%Safe": 34.05
|
73 |
+
}
|
74 |
+
|
75 |
+
* Open API analyzes tweet for 13 categories and displays them with %
|
76 |
+
* The real-world dataset is from the "Toxic Tweets Dataset" (https://www.kaggle.com/datasets/ashwiniyer176/toxic-tweets-dataset/data)
|
77 |
+
---
|
78 |
+
# 🌟 "AI Solution Architect" Course by ELVTR
|
79 |
+
"""
|
80 |
+
|
81 |
+
# Function to get toxicity scores from OpenAI
|
82 |
+
def get_toxicity_openai(tweet):
|
83 |
+
open_api_answer = _test_tweet_openapi(tweet)
|
84 |
+
open_api_answer['category_scores']['IS THIS TWEET TOXIC'] = open_api_answer['flagged']
|
85 |
+
return open_api_answer['category_scores']
|
86 |
+
|
87 |
+
# Function to get toxicity scores from Hugging Face
|
88 |
+
def get_toxicity_hf(tweet):
|
89 |
+
hugging_face_answer = _test_tweet_huggingface(tweet)
|
90 |
+
|
91 |
+
def get_toxicity_hf(tweet):
|
92 |
+
hugging_face_answer = _test_tweet_huggingface(tweet)
|
93 |
+
print(hugging_face_answer)
|
94 |
+
score = hugging_face_answer[0]['score']*100
|
95 |
+
if score <= 60:
|
96 |
+
return json.dumps({"toxicity": "safe", "%Toxic": score, "%safe": (100-score)}, indent=4)
|
97 |
+
else:
|
98 |
+
return json.dumps({"toxicity": "unsafe", "%Toxic": score, "%safe": (100-score)}, indent=4)
|
99 |
+
|
100 |
+
# Random Tweet Generator
|
101 |
+
def get_random_tweet():
|
102 |
+
return _get_random_tweet()
|
103 |
+
|
104 |
+
# Gradio UI
|
105 |
+
with gr.Blocks() as demo:
|
106 |
+
gr.Markdown(mksdown)
|
107 |
+
user_input = gr.Textbox(label="Paste your paragraph (2-10 lines)", lines=5)
|
108 |
+
|
109 |
+
with gr.Row():
|
110 |
+
analyze_btn = gr.Button("Measure Toxicity OpenAPI")
|
111 |
+
analyze_btn_hf = gr.Button("Measure Toxicity HF")
|
112 |
+
random_tweet_btn = gr.Button("Populate Random Tweet")
|
113 |
+
|
114 |
+
toxicity_output_json = gr.Code(label="Formatted Toxicity JSON", language="json")
|
115 |
+
toxicity_output = gr.Label("Toxicity Results")
|
116 |
+
|
117 |
+
analyze_btn_hf.click(get_toxicity_hf, inputs=[user_input], outputs=[toxicity_output_json])
|
118 |
+
analyze_btn.click(get_toxicity_openai, inputs=[user_input], outputs=[toxicity_output])
|
119 |
+
random_tweet_btn.click(get_random_tweet, outputs=user_input)
|
120 |
+
|
121 |
+
if __name__ == "__main__":
|
122 |
+
from webserver import start_web_server
|
123 |
+
start_web_server()
|
124 |
+
demo.launch(debug=True)
|
cleaned_toxic_tweets.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3dd96e948dfc0a25392e286f09d5eb548e83b7a2ee03886c5f27c7c4e19c045c
|
3 |
+
size 10497842
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
nest_asyncio
|
3 |
+
fastapi
|
4 |
+
uvicorn
|
5 |
+
openai
|
6 |
+
requests
|
7 |
+
gradio
|
8 |
+
pydantic
|
webserver.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import random
|
3 |
+
import nest_asyncio
|
4 |
+
from fastapi import FastAPI
|
5 |
+
from uvicorn import run , Config, Server
|
6 |
+
import openai
|
7 |
+
import threading
|
8 |
+
from pydantic import BaseModel
|
9 |
+
import os
|
10 |
+
|
11 |
+
# Allow FastAPI to run in the notebook event loop
|
12 |
+
nest_asyncio.apply()
|
13 |
+
# Initialize FastAPI app
|
14 |
+
app = FastAPI()
|
15 |
+
|
16 |
+
# Load the CSV file into a DataFrame
|
17 |
+
df = pd.read_csv('cleaned_toxic_tweets.csv') # Replace with the actual path to your CSV file
|
18 |
+
|
19 |
+
# Ensure the 'tweet' column exists in the DataFrame
|
20 |
+
if 'cleaned_tweet' not in df.columns:
|
21 |
+
raise ValueError("The CSV file must contain a 'tweet' column")
|
22 |
+
|
23 |
+
# Define request body model
|
24 |
+
class TweetRequest(BaseModel):
|
25 |
+
tweet: str
|
26 |
+
|
27 |
+
# Endpoint to get a random tweet
|
28 |
+
@app.get("/random_tweet")
|
29 |
+
def get_random_tweet():
|
30 |
+
# Get a random value from the 'tweet' column
|
31 |
+
random_tweet = random.choice(df['cleaned_tweet'].tolist())
|
32 |
+
return {"random_tweet": random_tweet}
|
33 |
+
|
34 |
+
@app.post("/test_tweet_openapi")
|
35 |
+
def test_tweet_openapi(request: TweetRequest):
|
36 |
+
# Open AI Access
|
37 |
+
toxic_comment = request.tweet
|
38 |
+
import logging
|
39 |
+
logging.basicConfig(level=logging.INFO)
|
40 |
+
logging.info(f"Input: {toxic_comment}")
|
41 |
+
ai_client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
42 |
+
response = ai_client.moderations.create(input=toxic_comment, model='omni-moderation-latest')
|
43 |
+
# Extract the first result
|
44 |
+
print("Open API omni-moderation-latest Response:")
|
45 |
+
return response.results[0]
|
46 |
+
|
47 |
+
|
48 |
+
@app.post("/test_tweet_huggingface")
|
49 |
+
def test_tweet_huggingface(request: TweetRequest):
|
50 |
+
from transformers import pipeline
|
51 |
+
from transformers import pipeline
|
52 |
+
|
53 |
+
# This model only tells toxic / not toxic when using pipeline way of getting results.
|
54 |
+
# When using the Hugging Face pipeline for text classification
|
55 |
+
# with the unitary/toxic-bert model, it typically provides a simple
|
56 |
+
# classification output, such as "toxic" or "non-toxic."
|
57 |
+
# This is because the pipeline is designed to give a straightforward result based on the model's predictions.
|
58 |
+
toxic_comment = request.tweet
|
59 |
+
pipe = pipeline("text-classification", model="unitary/toxic-bert")
|
60 |
+
helper = pipe(toxic_comment)
|
61 |
+
print("HF model unitary/toxic-bert Response:")
|
62 |
+
return helper
|
63 |
+
|
64 |
+
# API to shut down the server gracefully
|
65 |
+
@app.get("/shutdown")
|
66 |
+
def shutdown_server():
|
67 |
+
global server
|
68 |
+
if server is not None:
|
69 |
+
server.should_exit = True # Signal Uvicorn to shut down
|
70 |
+
return "Server shutting down..."
|
71 |
+
return "Server not running"
|
72 |
+
|
73 |
+
|
74 |
+
# Global variable to store Uvicorn server instance
|
75 |
+
server = None
|
76 |
+
|
77 |
+
# Function to run FastAPI in a separate thread
|
78 |
+
def run_fastapi():
|
79 |
+
global server
|
80 |
+
config = Config(app, host="127.0.0.1", port=5050, log_level="info")
|
81 |
+
server = Server(config)
|
82 |
+
server.run()
|
83 |
+
|
84 |
+
def start_web_server():
|
85 |
+
# Run the FastAPI server in the background thread
|
86 |
+
fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
|
87 |
+
fastapi_thread.start()
|
88 |
+
print("✅ FastAPI server is running in the background. You can proceed with other cells.")
|
89 |
+
|