Spaces:

ashjo317
/

AISA_Toxic_Text_Analyzer

Sleeping

App Files Files Community

ashjo317 commited on Feb 2

Commit

e934123

verified ·

1 Parent(s): 9bd5503

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
app.py +124 -0
cleaned_toxic_tweets.csv +3 -0
requirements.txt +8 -0
webserver.py +89 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+cleaned_toxic_tweets.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import requests
+import gradio as gr
+import json
+# Define the API endpoints
+get_random_tweet_url = "http://127.0.0.1:5050/random_tweet"
+openapi_url = "http://127.0.0.1:5050/test_tweet_openapi"
+huggingface_url = "http://127.0.0.1:5050/test_tweet_huggingface"
+def _get_random_tweet():
+    response = requests.get(get_random_tweet_url)
+    return response.json()['random_tweet']
+def _test_tweet_openapi(tweet):
+    response = requests.post(openapi_url, json={"tweet": tweet})
+    return response.json()
+    # response['categories'].json(), response['category_score'].json()
+def _test_tweet_huggingface(tweet):
+    response = requests.post(huggingface_url, json={"tweet": tweet})
+    return response.json()
+def _shutdown_server():
+    return requests.get("http://127.0.0.1:5050/shutdown")
+mksdown = """# 😃 Welcome To The Friendly Text Moderation for Twitter (X) Posts
+### 🔍 Identify 13 Categories of Text Toxicity
+> 🚀 This **NLP-powered AI** aims to detect and prevent **profanity, vulgarity, hate speech, violence, sexism, and offensive language** in tweets.
+> 🛡️ **Not an act of censorship** – the UI allows readers (excluding young audiences) to click on a label to reveal toxic messages.
+> 🎯 **Goal**: Foster a safer, more respectful online space for **you, your colleagues, and your family**.
+---
+## 🛠️ How to Use This App?
+1️⃣ **Enter your tweet** (or use "Populate Random Tweet" to load a harmful tweet from a Kaggle dataset).
+2️⃣ **Click "Measure Toxicity OpenAPI"** to analyze toxicity across 13 categories, visualized as a **horizontal bar graph**.
+3️⃣ **Click "Measure Toxicity HF"** to get a **JSON-based** safe/unsafe result with toxicity percentages using **Hugging Face**.
+---
+## 📌 AI Models Used
+- 🧠 **OpenAI’s 'omni-moderation-latest' model** for multi-category toxicity detection.
+- 🤖 **Hugging Face’s 'unitary/toxic-bert' model** for binary (Safe/Unsafe) classification.
+- 🔬 **Understands context, nuance, and intent** – beyond just swear words!
+---
+## 📊 Toxicity Categories (13)
+1️⃣ **Sexual**
+2️⃣ **Harassment**
+3️⃣ **Violence**
+4️⃣ **Hate**
+5️⃣ **Illicit**
+6️⃣ **Harassment/Threatening**
+7️⃣ **Self-Harm**
+8️⃣ **Sexual/Minors**
+9️⃣ **Self-Harm/Intent**
+🔟 **Self-Harm/Instructions**
+1️⃣1️⃣ **Illicit/Violent**
+1️⃣2️⃣ **Hate/Threatening**
+1️⃣3️⃣ **Violence/Graphic**
+---
+## 📝 Example Hugging Face Output
+```json
+{
+    "toxicity": "unsafe",
+    "%Toxic": 65.95,
+    "%Safe": 34.05
+}
+* Open API analyzes tweet for 13 categories and displays them with %
+* The real-world dataset is from the "Toxic Tweets Dataset" (https://www.kaggle.com/datasets/ashwiniyer176/toxic-tweets-dataset/data)
+---
+# 🌟 "AI Solution Architect" Course by ELVTR
+"""
+# Function to get toxicity scores from OpenAI
+def get_toxicity_openai(tweet):
+    open_api_answer = _test_tweet_openapi(tweet)
+    open_api_answer['category_scores']['IS THIS TWEET TOXIC'] = open_api_answer['flagged']
+    return open_api_answer['category_scores']
+# Function to get toxicity scores from Hugging Face
+def get_toxicity_hf(tweet):
+    hugging_face_answer = _test_tweet_huggingface(tweet)
+def get_toxicity_hf(tweet):
+    hugging_face_answer = _test_tweet_huggingface(tweet)
+    print(hugging_face_answer)
+    score = hugging_face_answer[0]['score']*100
+    if score <= 60:
+       return json.dumps({"toxicity": "safe", "%Toxic": score, "%safe": (100-score)}, indent=4)
+    else:
+       return json.dumps({"toxicity": "unsafe", "%Toxic": score, "%safe": (100-score)}, indent=4)
+# Random Tweet Generator
+def get_random_tweet():
+    return _get_random_tweet()
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown(mksdown)
+    user_input = gr.Textbox(label="Paste your paragraph (2-10 lines)", lines=5)
+    with gr.Row():
+        analyze_btn = gr.Button("Measure Toxicity OpenAPI")
+        analyze_btn_hf = gr.Button("Measure Toxicity HF")
+        random_tweet_btn = gr.Button("Populate Random Tweet")
+    toxicity_output_json = gr.Code(label="Formatted Toxicity JSON", language="json")
+    toxicity_output = gr.Label("Toxicity Results")
+    analyze_btn_hf.click(get_toxicity_hf, inputs=[user_input], outputs=[toxicity_output_json])
+    analyze_btn.click(get_toxicity_openai, inputs=[user_input], outputs=[toxicity_output])
+    random_tweet_btn.click(get_random_tweet, outputs=user_input)
+if __name__ == "__main__":
+    from webserver import start_web_server
+    start_web_server()
+    demo.launch(debug=True)

cleaned_toxic_tweets.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dd96e948dfc0a25392e286f09d5eb548e83b7a2ee03886c5f27c7c4e19c045c
+size 10497842

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pandas
+nest_asyncio
+fastapi
+uvicorn
+openai
+requests
+gradio
+pydantic

webserver.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import pandas as pd
+import random
+import nest_asyncio
+from fastapi import FastAPI
+from uvicorn import run , Config, Server
+import openai
+import threading
+from pydantic import BaseModel
+import os
+# Allow FastAPI to run in the notebook event loop
+nest_asyncio.apply()
+# Initialize FastAPI app
+app = FastAPI()
+# Load the CSV file into a DataFrame
+df = pd.read_csv('cleaned_toxic_tweets.csv')  # Replace with the actual path to your CSV file
+# Ensure the 'tweet' column exists in the DataFrame
+if 'cleaned_tweet' not in df.columns:
+    raise ValueError("The CSV file must contain a 'tweet' column")
+# Define request body model
+class TweetRequest(BaseModel):
+    tweet: str
+# Endpoint to get a random tweet
+@app.get("/random_tweet")
+def get_random_tweet():
+    # Get a random value from the 'tweet' column
+    random_tweet = random.choice(df['cleaned_tweet'].tolist())
+    return {"random_tweet": random_tweet}
+@app.post("/test_tweet_openapi")
+def test_tweet_openapi(request: TweetRequest):
+    # Open AI Access
+    toxic_comment = request.tweet
+    import logging
+    logging.basicConfig(level=logging.INFO)
+    logging.info(f"Input: {toxic_comment}")
+    ai_client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    response = ai_client.moderations.create(input=toxic_comment, model='omni-moderation-latest')
+    # Extract the first result
+    print("Open API omni-moderation-latest Response:")
+    return response.results[0]
+@app.post("/test_tweet_huggingface")
+def test_tweet_huggingface(request: TweetRequest):
+    from transformers import pipeline
+    from transformers import pipeline
+    # This model only tells toxic / not toxic when using pipeline way of getting results.
+    # When using the Hugging Face pipeline for text classification
+    # with the unitary/toxic-bert model, it typically provides a simple
+    # classification output, such as "toxic" or "non-toxic."
+    # This is because the pipeline is designed to give a straightforward result based on the model's predictions.
+    toxic_comment = request.tweet
+    pipe = pipeline("text-classification", model="unitary/toxic-bert")
+    helper = pipe(toxic_comment)
+    print("HF model unitary/toxic-bert Response:")
+    return helper
+# API to shut down the server gracefully
+@app.get("/shutdown")
+def shutdown_server():
+    global server
+    if server is not None:
+        server.should_exit = True  # Signal Uvicorn to shut down
+        return "Server shutting down..."
+    return "Server not running"
+# Global variable to store Uvicorn server instance
+server = None
+# Function to run FastAPI in a separate thread
+def run_fastapi():
+    global server
+    config = Config(app, host="127.0.0.1", port=5050, log_level="info")
+    server = Server(config)
+    server.run()
+def start_web_server():
+    # Run the FastAPI server in the background thread
+    fastapi_thread = threading.Thread(target=run_fastapi, daemon=True)
+    fastapi_thread.start()
+    print("✅ FastAPI server is running in the background. You can proceed with other cells.")