Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,11 +5,10 @@ from sklearn.cluster import KMeans
|
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
import requests
|
7 |
import os
|
8 |
-
|
9 |
-
HF_API_TOKEN = os.getenv("HF_API_TOKEN") # ✅ GOOD: Read from environment
|
10 |
|
11 |
# === CONFIGURATION ===
|
12 |
-
|
13 |
FALCON_MODEL = "tiiuae/falcon-7b-instruct"
|
14 |
|
15 |
# === STEP 1: CLUSTERING MODEL ===
|
@@ -26,6 +25,9 @@ def cluster_texts(texts, n_clusters=10):
|
|
26 |
|
27 |
# === STEP 2: FALCON-BASED LABELING ===
|
28 |
def query_falcon(prompt):
|
|
|
|
|
|
|
29 |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
30 |
API_URL = f"https://api-inference.huggingface.co/models/{FALCON_MODEL}"
|
31 |
|
@@ -38,12 +40,11 @@ def query_falcon(prompt):
|
|
38 |
}
|
39 |
}
|
40 |
|
41 |
-
response = requests.post(API_URL, headers=headers, json=payload)
|
42 |
try:
|
|
|
43 |
return response.json()[0]['generated_text'].strip()
|
44 |
except Exception as e:
|
45 |
-
|
46 |
-
return ""
|
47 |
|
48 |
def generate_topic_labels(texts, clusters, n_clusters=10):
|
49 |
cluster_samples = {}
|
@@ -76,9 +77,17 @@ session = {
|
|
76 |
"topic_labels": {}
|
77 |
}
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def run_initial_analysis(csv_file, context_input, n_clusters=10):
|
80 |
try:
|
81 |
-
df =
|
82 |
except Exception as e:
|
83 |
return f"Error reading CSV: {str(e)}", "", ""
|
84 |
|
@@ -96,9 +105,7 @@ def run_initial_analysis(csv_file, context_input, n_clusters=10):
|
|
96 |
df['label'] = df['cluster'].map(topic_labels)
|
97 |
|
98 |
session['current_df'] = df
|
99 |
-
session['topic_labels'] = topic_labels
|
100 |
|
101 |
-
# Save CSV
|
102 |
output = io.StringIO()
|
103 |
df.to_csv(output, index=False)
|
104 |
csv_str = output.getvalue()
|
|
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
import requests
|
7 |
import os
|
8 |
+
import io
|
|
|
9 |
|
10 |
# === CONFIGURATION ===
|
11 |
+
HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Set in Hugging Face Secrets
|
12 |
FALCON_MODEL = "tiiuae/falcon-7b-instruct"
|
13 |
|
14 |
# === STEP 1: CLUSTERING MODEL ===
|
|
|
25 |
|
26 |
# === STEP 2: FALCON-BASED LABELING ===
|
27 |
def query_falcon(prompt):
|
28 |
+
if not HF_API_TOKEN:
|
29 |
+
return "API Token missing"
|
30 |
+
|
31 |
headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
32 |
API_URL = f"https://api-inference.huggingface.co/models/{FALCON_MODEL}"
|
33 |
|
|
|
40 |
}
|
41 |
}
|
42 |
|
|
|
43 |
try:
|
44 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
45 |
return response.json()[0]['generated_text'].strip()
|
46 |
except Exception as e:
|
47 |
+
return f"Error calling Falcon: {str(e)}"
|
|
|
48 |
|
49 |
def generate_topic_labels(texts, clusters, n_clusters=10):
|
50 |
cluster_samples = {}
|
|
|
77 |
"topic_labels": {}
|
78 |
}
|
79 |
|
80 |
+
def read_csv_file(file_obj):
|
81 |
+
"""Robust CSV reader that handles both string paths and file-like objects"""
|
82 |
+
if isinstance(file_obj, str):
|
83 |
+
return pd.read_csv(file_obj)
|
84 |
+
else:
|
85 |
+
content = file_obj.read().decode("utf-8")
|
86 |
+
return pd.read_csv(io.StringIO(content))
|
87 |
+
|
88 |
def run_initial_analysis(csv_file, context_input, n_clusters=10):
|
89 |
try:
|
90 |
+
df = read_csv_file(csv_file)
|
91 |
except Exception as e:
|
92 |
return f"Error reading CSV: {str(e)}", "", ""
|
93 |
|
|
|
105 |
df['label'] = df['cluster'].map(topic_labels)
|
106 |
|
107 |
session['current_df'] = df
|
|
|
108 |
|
|
|
109 |
output = io.StringIO()
|
110 |
df.to_csv(output, index=False)
|
111 |
csv_str = output.getvalue()
|