Hasitha16 commited on
Commit
7ea2e4f
Β·
verified Β·
1 Parent(s): 89bb67c

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +26 -0
  3. README.md +9 -11
  4. frontend.py +162 -0
  5. logo.png +3 -0
  6. main.py +249 -0
  7. model.py +121 -0
  8. requirements.txt +10 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ logo.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- BASE PYTHON IMAGE ----
2
+ FROM python:3.10-slim
3
+
4
+ # ---- ENV & WORKDIR ----
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+ WORKDIR /code
8
+
9
+ # ---- SYSTEM DEPENDENCIES ----
10
+ RUN apt-get update && apt-get install -y \
11
+ libsndfile1 ffmpeg git \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # ---- COPY PROJECT FILES ----
15
+ COPY . /code
16
+
17
+ # ---- INSTALL DEPENDENCIES ----
18
+ RUN pip install --upgrade pip
19
+ RUN pip install -r requirements.txt
20
+
21
+ # ---- EXPOSE PORTS ----
22
+ EXPOSE 7860
23
+ EXPOSE 8000
24
+
25
+ # ---- LAUNCH BOTH BACKEND & FRONTEND ----
26
+ CMD ["bash", "-c", "uvicorn app.main:app --host 0.0.0.0 --port 8000 & streamlit run frontend.py --server.port 7860 --server.address 0.0.0.0"]
README.md CHANGED
@@ -1,11 +1,9 @@
1
- ---
2
- title: Neuro Pulse Ai
3
- emoji: πŸ“Š
4
- colorFrom: green
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # NeuroPulse AI
2
+
3
+ Multimodal Feedback Analyzer with Streamlit + FastAPI.
4
+ Summarization Β· Sentiment Β· Emotion Β· Aspects Β· Smart Clustering.
5
+
6
+ ## Running locally
7
+ ```bash
8
+ streamlit run app/frontend/frontend.py
9
+ python -m uvicorn app.main:app --reload
 
 
frontend.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import pandas as pd
4
+ from gtts import gTTS
5
+ import base64
6
+ from io import BytesIO
7
+ from PIL import Image
8
+ import os
9
+
10
+ st.set_page_config(page_title="NeuroPulse AI", page_icon="🧠", layout="wide")
11
+
12
+ logo_path = os.path.join("app", "static", "logo.png")
13
+ if os.path.exists(logo_path):
14
+ st.image(logo_path, width=160)
15
+
16
+ # Session state
17
+ if "history" not in st.session_state:
18
+ st.session_state.history = []
19
+ if "dark_mode" not in st.session_state:
20
+ st.session_state.dark_mode = False
21
+
22
+ # Sidebar
23
+ with st.sidebar:
24
+ st.header("βš™οΈ Settings")
25
+ st.session_state.dark_mode = st.toggle("πŸŒ™ Dark Mode", value=st.session_state.dark_mode)
26
+
27
+ sentiment_model = st.selectbox("πŸ“Š Sentiment Model", [
28
+ "distilbert-base-uncased-finetuned-sst-2-english",
29
+ "nlptown/bert-base-multilingual-uncased-sentiment"
30
+ ])
31
+
32
+ industry = st.selectbox("🏭 Industry Context", [
33
+ "Generic", "E-commerce", "Healthcare", "Education", "Travel", "Banking", "Insurance"
34
+ ])
35
+
36
+ product_category = st.selectbox("🧩 Product Category", [
37
+ "General", "Mobile Devices", "Laptops", "Healthcare Devices", "Banking App",
38
+ "Travel Service", "Educational Tool", "Insurance Portal"
39
+ ])
40
+
41
+ device_type = st.selectbox("πŸ’» Device Type", [
42
+ "Web", "Android", "iOS", "Desktop", "Smartwatch", "Kiosk"
43
+ ])
44
+
45
+ use_aspects = st.checkbox("πŸ“ˆ Enable Aspect-Based Analysis")
46
+ use_smart_summary = st.checkbox("🧠 Use Smart Summary (clustered key points)")
47
+ use_smart_summary_bulk = st.checkbox("🧠 Smart Summary for Bulk CSV")
48
+
49
+ follow_up = st.text_input("πŸ” Follow-up Question")
50
+ voice_lang = st.selectbox("πŸ”ˆ Voice Language", ["en", "fr", "es", "de", "hi", "zh"])
51
+ backend_url = st.text_input("πŸ–₯️ Backend URL", value="http://127.0.0.1:8000")
52
+ api_token = st.text_input("πŸ” API Token", type="password")
53
+
54
+ # Tabs
55
+ tab1, tab2 = st.tabs(["🧠 Single Review", "πŸ“š Bulk CSV"])
56
+
57
+ def speak(text, lang='en'):
58
+ tts = gTTS(text, lang=lang)
59
+ mp3 = BytesIO()
60
+ tts.write_to_fp(mp3)
61
+ b64 = base64.b64encode(mp3.getvalue()).decode()
62
+ st.markdown(f'<audio controls><source src="data:audio/mp3;base64,{b64}" type="audio/mp3"></audio>', unsafe_allow_html=True)
63
+ mp3.seek(0)
64
+ return mp3
65
+
66
+ # Tab: Single Review
67
+ with tab1:
68
+ st.title("🧠 NeuroPulse AI – Multimodal Review Analyzer")
69
+
70
+ review = st.session_state.get("review", "")
71
+ review = st.text_area("πŸ“ Enter a Review", value=review, height=160)
72
+
73
+ col1, col2, col3 = st.columns(3)
74
+ with col1:
75
+ analyze = st.button("πŸ” Analyze")
76
+ with col2:
77
+ if st.button("🎲 Example"):
78
+ st.session_state["review"] = "App was smooth, but the transaction failed twice on Android."
79
+ st.rerun()
80
+ with col3:
81
+ if st.button("🧹 Clear"):
82
+ st.session_state["review"] = ""
83
+ st.rerun()
84
+
85
+ if analyze and review:
86
+ with st.spinner("Analyzing..."):
87
+ try:
88
+ payload = {
89
+ "text": review,
90
+ "model": sentiment_model,
91
+ "industry": industry,
92
+ "aspects": use_aspects,
93
+ "follow_up": follow_up,
94
+ "product_category": product_category,
95
+ "device": device_type
96
+ }
97
+ headers = {"X-API-Key": api_token} if api_token else {}
98
+ params = {"smart": "1"} if use_smart_summary else {}
99
+ res = requests.post(f"{backend_url}/analyze/", json=payload, headers=headers, params=params)
100
+ if res.status_code == 200:
101
+ data = res.json()
102
+ st.success("βœ… Analysis Complete")
103
+ st.subheader("πŸ“Œ Summary")
104
+ st.info(data["summary"])
105
+ st.caption(f"🧠 Summary Type: {'Smart Summary' if use_smart_summary else 'Standard Model'}")
106
+ st.subheader("πŸ”Š Audio")
107
+ audio = speak(data["summary"], lang=voice_lang)
108
+ st.download_button("⬇️ Download Summary Audio", audio.read(), "summary.mp3", mime="audio/mp3")
109
+ st.metric("πŸ“Š Sentiment", data["sentiment"]["label"], delta=f"{data['sentiment']['score']:.2%}")
110
+ st.info(f"πŸ’’ Emotion: {data['emotion']}")
111
+ if data.get("aspects"):
112
+ st.subheader("πŸ”¬ Aspects")
113
+ for a in data["aspects"]:
114
+ st.write(f"πŸ”Ή {a['aspect']}: {a['sentiment']} ({a['score']:.2%})")
115
+ if data.get("follow_up"):
116
+ st.subheader("πŸ€– Follow-Up Response")
117
+ st.warning(data["follow_up"])
118
+ else:
119
+ st.error(f"❌ API Error: {res.status_code}")
120
+ except Exception as e:
121
+ st.error(f"🚫 {e}")
122
+
123
+ # Tab: Bulk CSV
124
+ with tab2:
125
+ st.title("πŸ“š Bulk CSV Upload")
126
+ uploaded_file = st.file_uploader("Upload CSV with `review` column", type="csv")
127
+ if uploaded_file:
128
+ try:
129
+ df = pd.read_csv(uploaded_file)
130
+ if "review" in df.columns:
131
+ st.success(f"βœ… Loaded {len(df)} reviews")
132
+
133
+ for col in ["industry", "product_category", "device"]:
134
+ if col not in df.columns:
135
+ df[col] = [""] * len(df)
136
+ df[col] = df[col].fillna("").astype(str)
137
+
138
+ if st.button("πŸ“Š Analyze Bulk Reviews"):
139
+ with st.spinner("Processing..."):
140
+ payload = {
141
+ "reviews": df["review"].tolist(),
142
+ "model": sentiment_model,
143
+ "aspects": use_aspects,
144
+ "industry": df["industry"].tolist(),
145
+ "product_category": df["product_category"].tolist(),
146
+ "device": df["device"].tolist()
147
+ }
148
+ headers = {"X-API-Key": api_token} if api_token else {}
149
+ params = {"smart": "1"} if use_smart_summary_bulk else {}
150
+
151
+ res = requests.post(f"{backend_url}/bulk/", json=payload, headers=headers, params=params)
152
+ if res.status_code == 200:
153
+ results = pd.DataFrame(res.json()["results"])
154
+ results["summary_type"] = "Smart" if use_smart_summary_bulk else "Standard"
155
+ st.dataframe(results)
156
+ st.download_button("⬇️ Download Results CSV", results.to_csv(index=False), "bulk_results.csv", mime="text/csv")
157
+ else:
158
+ st.error(f"❌ Bulk Analysis Failed: {res.status_code}")
159
+ else:
160
+ st.error("CSV must contain a column named `review`.")
161
+ except Exception as e:
162
+ st.error(f"❌ File Error: {e}")
logo.png ADDED

Git LFS Details

  • SHA256: ca98ab53a6295b751283b275deaa2a8cd3713d7a4bfd45ff87e26ebcaa5bb9d5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.06 MB
main.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, Header, HTTPException
2
+ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
3
+ from fastapi.openapi.utils import get_openapi
4
+ from fastapi.openapi.docs import get_swagger_ui_html
5
+ from pydantic import BaseModel
6
+ from transformers import pipeline
7
+ from io import StringIO
8
+ import os, csv, logging
9
+ from openai import OpenAI
10
+ from app.model import summarize_review, smart_summarize # import both
11
+ from typing import Optional
12
+
13
+ app = FastAPI(
14
+ title="🧠 NeuroPulse AI",
15
+ description="Multilingual GenAI for smarter feedback β€” summarization, sentiment, emotion, aspects, Q&A and tags.",
16
+ version="2025.1.0",
17
+ openapi_url="/openapi.json",
18
+ docs_url=None,
19
+ redoc_url="/redoc"
20
+ )
21
+
22
+ @app.get("/docs", include_in_schema=False)
23
+ def custom_swagger_ui():
24
+ return get_swagger_ui_html(
25
+ openapi_url=app.openapi_url,
26
+ title="🧠 Swagger UI - NeuroPulse AI",
27
+ swagger_favicon_url="https://cdn-icons-png.flaticon.com/512/3794/3794616.png",
28
+ swagger_js_url="https://cdn.jsdelivr.net/npm/[email protected]/swagger-ui-bundle.js",
29
+ swagger_css_url="https://cdn.jsdelivr.net/npm/[email protected]/swagger-ui.css",
30
+ )
31
+
32
+ @app.get("/", response_class=HTMLResponse)
33
+ def root():
34
+ return """
35
+ <html>
36
+ <head>
37
+ <title>NeuroPulse AI</title>
38
+ <style>
39
+ body {
40
+ font-family: 'Segoe UI', sans-serif;
41
+ background: linear-gradient(135deg, #f0f4ff, #fef3c7);
42
+ margin: 0;
43
+ padding: 60px;
44
+ text-align: center;
45
+ color: #1f2937;
46
+ }
47
+ .container {
48
+ background: white;
49
+ padding: 40px;
50
+ border-radius: 16px;
51
+ max-width: 800px;
52
+ margin: auto;
53
+ box-shadow: 0 10px 30px rgba(0,0,0,0.08);
54
+ animation: fadeIn 1s ease-in-out;
55
+ }
56
+ @keyframes fadeIn {
57
+ from {opacity: 0; transform: translateY(20px);}
58
+ to {opacity: 1; transform: translateY(0);}
59
+ }
60
+ h1 {
61
+ font-size: 36px;
62
+ margin-bottom: 12px;
63
+ color: #4f46e5;
64
+ }
65
+ p {
66
+ font-size: 18px;
67
+ margin-bottom: 32px;
68
+ }
69
+ .btn {
70
+ display: inline-block;
71
+ margin: 8px;
72
+ padding: 14px 24px;
73
+ border-radius: 8px;
74
+ font-weight: 600;
75
+ color: white;
76
+ text-decoration: none;
77
+ background: linear-gradient(90deg, #4f46e5, #6366f1);
78
+ transition: all 0.3s ease;
79
+ }
80
+ .btn:hover {
81
+ transform: translateY(-2px);
82
+ box-shadow: 0 4px 12px rgba(0,0,0,0.1);
83
+ }
84
+ .btn.red {
85
+ background: linear-gradient(90deg, #dc2626, #ef4444);
86
+ }
87
+ </style>
88
+ </head>
89
+ <body>
90
+ <div class="container">
91
+ <h1>🧠 Welcome to <strong>NeuroPulse AI</strong></h1>
92
+ <p>Smarter AI feedback analysis β€” Summarization, Sentiment, Emotion, Aspects, LLM Q&A, and Metadata Tags.</p>
93
+ <a class="btn" href="/docs">πŸ“˜ Swagger UI</a>
94
+ <a class="btn red" href="/redoc">πŸ“• ReDoc</a>
95
+ </div>
96
+ </body>
97
+ </html>
98
+ """
99
+
100
+ # --- Models ---
101
+ class ReviewInput(BaseModel):
102
+ text: str
103
+ model: str = "distilbert-base-uncased-finetuned-sst-2-english"
104
+ industry: str = "Generic"
105
+ aspects: bool = False
106
+ follow_up: str = None
107
+ product_category: str = None
108
+ device: str = None
109
+
110
+ class BulkReviewInput(BaseModel):
111
+ reviews: list[str]
112
+ model: str = "distilbert-base-uncased-finetuned-sst-2-english"
113
+ industry: Optional[list[str]] = None
114
+ aspects: bool = False
115
+ product_category: Optional[list[str]] = None
116
+ device: Optional[list[str]] = None
117
+
118
+ class ChatInput(BaseModel):
119
+ question: str
120
+ context: str
121
+
122
+ class TranslationInput(BaseModel):
123
+ text: str
124
+ target_lang: str = "fr"
125
+
126
+ # --- Auth & Logging ---
127
+ VALID_API_KEY = "my-secret-key"
128
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
129
+
130
+ # --- Load Models Once ---
131
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
132
+ emotion_model = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
133
+ sentiment_pipelines = {
134
+ "distilbert-base-uncased-finetuned-sst-2-english": pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english"),
135
+ "nlptown/bert-base-multilingual-uncased-sentiment": pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
136
+ }
137
+
138
+ # --- Analyze (Bulk) ---
139
+ @app.post("/bulk/")
140
+ async def bulk(data: BulkReviewInput, x_api_key: str = Header(None)):
141
+ if x_api_key != VALID_API_KEY:
142
+ raise HTTPException(status_code=401, detail="Invalid or missing API key")
143
+
144
+ sentiment_pipeline = sentiment_pipelines[data.model]
145
+ summaries = summarizer(data.reviews, max_length=80, min_length=20, truncation=True)
146
+ sentiments = sentiment_pipeline(data.reviews)
147
+ emotions = emotion_model(data.reviews)
148
+
149
+ results = []
150
+ for i, review in enumerate(data.reviews):
151
+ label = sentiments[i]["label"]
152
+ if "star" in label:
153
+ stars = int(label[0])
154
+ label = "NEGATIVE" if stars <= 2 else "NEUTRAL" if stars == 3 else "POSITIVE"
155
+
156
+ result = {
157
+ "review": review,
158
+ "summary": summaries[i]["summary_text"],
159
+ "sentiment": label,
160
+ "emotion": emotions[i][0]["label"],
161
+ "aspects": [],
162
+ "product_category": data.product_category[i] if data.product_category else None,
163
+ "device": data.device[i] if data.device else None,
164
+ "industry": data.industry[i] if data.industry else None,
165
+ }
166
+ results.append(result)
167
+
168
+ return {"results": results}
169
+
170
+ @app.post("/analyze/")
171
+ async def analyze(request: Request, data: ReviewInput, x_api_key: str = Header(None), download: str = None):
172
+ if x_api_key != VALID_API_KEY:
173
+ raise HTTPException(status_code=401, detail="Invalid or missing API key")
174
+
175
+ sentiment_pipeline = sentiment_pipelines.get(data.model)
176
+ summary = smart_summarize(data.text) if request.query_params.get("smart") == "1" else summarize_review(data.text)
177
+ sentiment = sentiment_pipeline(data.text)[0]
178
+ label = sentiment["label"]
179
+ if "star" in label:
180
+ stars = int(label[0])
181
+ label = "NEGATIVE" if stars <= 2 else "NEUTRAL" if stars == 3 else "POSITIVE"
182
+
183
+ emotion = emotion_model(data.text)[0][0]["label"]
184
+
185
+ aspects_list = []
186
+ if data.aspects:
187
+ for asp in ["battery", "price", "camera"]:
188
+ if asp in data.text.lower():
189
+ asp_result = sentiment_pipeline(asp + " " + data.text)[0]
190
+ aspects_list.append({
191
+ "aspect": asp,
192
+ "sentiment": asp_result["label"],
193
+ "score": asp_result["score"]
194
+ })
195
+
196
+ follow_up_response = chat_llm(data.follow_up, data.text) if data.follow_up else None
197
+
198
+ return {
199
+ "summary": summary,
200
+ "sentiment": {"label": label, "score": sentiment["score"]},
201
+ "emotion": emotion,
202
+ "aspects": aspects_list,
203
+ "follow_up": follow_up_response,
204
+ "product_category": data.product_category,
205
+ "device": data.device,
206
+ "industry": data.industry
207
+ }
208
+ # --- Translate ---
209
+ @app.post("/translate/")
210
+ async def translate(data: TranslationInput):
211
+ translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{data.target_lang}")
212
+ return {"translated_text": translator(data.text)[0]["translation_text"]}
213
+
214
+ # --- LLM Agent Chat ---
215
+ @app.post("/chat/")
216
+ async def chat(input: ChatInput, x_api_key: str = Header(None)):
217
+ if x_api_key != VALID_API_KEY:
218
+ raise HTTPException(status_code=401, detail="Invalid or missing API key")
219
+ return {"response": chat_llm(input.question, input.context)}
220
+
221
+ def chat_llm(question, context):
222
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
223
+ res = client.chat.completions.create(
224
+ model="gpt-3.5-turbo",
225
+ messages=[
226
+ {"role": "system", "content": "You are a helpful AI review analyst."},
227
+ {"role": "user", "content": f"Context: {context}\nQuestion: {question}"}
228
+ ]
229
+ )
230
+ return res.choices[0].message.content.strip()
231
+
232
+ # --- Custom OpenAPI ---
233
+ def custom_openapi():
234
+ if app.openapi_schema:
235
+ return app.openapi_schema
236
+ openapi_schema = get_openapi(
237
+ title=app.title,
238
+ version=app.version,
239
+ description="""
240
+ <b><span style='color:#4f46e5'>NeuroPulse AI</span></b> Β· Smart GenAI Feedback Engine<br>
241
+ Summarize reviews, detect sentiment/emotion, extract aspects, tag metadata, and ask GPT follow-ups.
242
+ """,
243
+ routes=app.routes
244
+ )
245
+ openapi_schema["openapi"] = "3.0.0"
246
+ app.openapi_schema = openapi_schema
247
+ return app.openapi_schema
248
+
249
+ app.openapi = custom_openapi
model.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from pydantic import BaseModel
3
+ from transformers import pipeline
4
+ import nltk.data
5
+
6
+ # βœ… Extra: Smart Summarization Imports
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.cluster import KMeans
9
+ from nltk.tokenize import sent_tokenize
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+ import numpy as np
12
+
13
+ # πŸ“„ Load HuggingFace Pipelines
14
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
15
+ sentiment_analyzer = pipeline("sentiment-analysis")
16
+
17
+ # 🧠 Basic Summarization (Abstractive)
18
+ def summarize_review(text):
19
+ return summarizer(text, max_length=60, min_length=10, do_sample=False, no_repeat_ngram_size=3)[0]["summary_text"]
20
+
21
+ # 🧠 Smart Summarization (Clustered Key Sentences)
22
+ def smart_summarize(text, n_clusters=1):
23
+ """Improved summarization using clustering on sentence embeddings"""
24
+ tokenizer = nltk.tokenize.PunktSentenceTokenizer() # βœ… Use default trained Punkt tokenizer
25
+ sentences = tokenizer.tokenize(text)
26
+
27
+ if len(sentences) <= 1:
28
+ return text
29
+
30
+ vectorizer = TfidfVectorizer(stop_words="english")
31
+ tfidf_matrix = vectorizer.fit_transform(sentences)
32
+
33
+ if len(sentences) <= n_clusters:
34
+ return " ".join(sentences)
35
+
36
+ kmeans = KMeans(n_clusters=n_clusters, random_state=42)
37
+ kmeans.fit(tfidf_matrix)
38
+
39
+ avg = []
40
+ for i in range(n_clusters):
41
+ idx = np.where(kmeans.labels_ == i)[0]
42
+ if len(idx) == 0:
43
+ continue
44
+ avg_vector = tfidf_matrix[idx].mean(axis=0).A1.reshape(1, -1) # Convert np.matrix to ndarray
45
+ sim = cosine_similarity(avg_vector, tfidf_matrix[idx])
46
+ most_representative_idx = idx[np.argmax(sim)]
47
+ avg.append(sentences[most_representative_idx])
48
+
49
+ return " ".join(sorted(avg, key=sentences.index))
50
+
51
+ # πŸ“Š Sentiment Detection
52
+ def analyze_sentiment(text):
53
+ result = sentiment_analyzer(text)[0]
54
+ label = result["label"]
55
+ score = result["score"]
56
+
57
+ if "star" in label:
58
+ stars = int(label[0])
59
+ if stars <= 2:
60
+ label = "NEGATIVE"
61
+ elif stars == 3:
62
+ label = "NEUTRAL"
63
+ else:
64
+ label = "POSITIVE"
65
+
66
+ return {
67
+ "label": label,
68
+ "score": score
69
+ }
70
+
71
+ # πŸ”₯ Emotion Detection (heuristic-based)
72
+ def detect_emotion(text):
73
+ text_lower = text.lower()
74
+ if "angry" in text_lower or "hate" in text_lower:
75
+ return "anger"
76
+ elif "happy" in text_lower or "love" in text_lower:
77
+ return "joy"
78
+ elif "sad" in text_lower or "disappointed" in text_lower:
79
+ return "sadness"
80
+ elif "confused" in text_lower or "unclear" in text_lower:
81
+ return "confusion"
82
+ else:
83
+ return "neutral"
84
+
85
+ # 🧩 Aspect-Based Sentiment (mock)
86
+ def extract_aspect_sentiment(text, aspects: list):
87
+ results = {}
88
+ text_lower = text.lower()
89
+ for asp in aspects:
90
+ label = "positive" if asp in text_lower and "not" not in text_lower else "neutral"
91
+ results[asp] = {
92
+ "label": label,
93
+ "confidence": 0.85
94
+ }
95
+ return results
96
+
97
+ # βœ… Pydantic Schemas for FastAPI
98
+ class ReviewInput(BaseModel):
99
+ text: str
100
+ model: str = "distilbert-base-uncased-finetuned-sst-2-english"
101
+ industry: str = "Generic"
102
+ aspects: bool = False
103
+ follow_up: Optional[str] = None
104
+ product_category: Optional[str] = None
105
+ device: Optional[str] = None
106
+
107
+ class BulkReviewInput(BaseModel):
108
+ reviews: List[str]
109
+ model: str = "distilbert-base-uncased-finetuned-sst-2-english"
110
+ industry: str = "Generic"
111
+ aspects: bool = False
112
+ product_category: Optional[str] = None
113
+ device: Optional[str] = None
114
+
115
+ class TranslationInput(BaseModel):
116
+ text: str
117
+ target_lang: str = "fr"
118
+
119
+ class ChatInput(BaseModel):
120
+ question: str
121
+ context: str
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ pandas
5
+ scikit-learn
6
+ nltk
7
+ streamlit
8
+ gtts
9
+ requests
10
+ openai