Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -108,7 +108,7 @@ def compute_similarity(embedder: QwenEmbedder, text1: str, text2: str, model_cho
|
|
108 |
similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
|
109 |
return round(similarity, 3)
|
110 |
|
111 |
-
def rerank_documents(embedder: QwenEmbedder, query: str, documents: str) -> List[Tuple[str, float]]:
|
112 |
docs_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
|
113 |
|
114 |
# Add instruction to query
|
@@ -126,7 +126,7 @@ def rerank_documents(embedder: QwenEmbedder, query: str, documents: str) -> List
|
|
126 |
|
127 |
return [(doc, round(score, 3)) for doc, score in results]
|
128 |
|
129 |
-
def process_batch_embeddings(embedder: QwenEmbedder, texts: str) -> pd.DataFrame:
|
130 |
text_list = [text.strip() for text in texts.split('\n') if text.strip()]
|
131 |
if len(text_list) < 1:
|
132 |
return pd.DataFrame()
|
@@ -143,7 +143,7 @@ def process_batch_embeddings(embedder: QwenEmbedder, texts: str) -> pd.DataFrame
|
|
143 |
|
144 |
return df_similarities.round(3)
|
145 |
|
146 |
-
def process_retrieval(embedder: QwenEmbedder, task_prompt: str, queries: str, documents: str) -> pd.DataFrame:
|
147 |
# Process queries and documents
|
148 |
query_list = [q.strip() for q in queries.split('\n') if q.strip()]
|
149 |
doc_list = [d.strip() for d in documents.split('\n') if d.strip()]
|
@@ -165,13 +165,13 @@ def process_retrieval(embedder: QwenEmbedder, task_prompt: str, queries: str, do
|
|
165 |
df = pd.DataFrame(scores, index=query_list, columns=doc_list)
|
166 |
return df.round(3)
|
167 |
|
168 |
-
def process_cross_lingual(embedder: QwenEmbedder, arabic_text: str, english_text: str) -> dict:
|
169 |
texts = [arabic_text, english_text]
|
170 |
embeddings = embedder.get_embeddings(texts)
|
171 |
similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
|
172 |
return {"similarity": round(similarity, 3)}
|
173 |
|
174 |
-
def classify_text(embedder: QwenEmbedder, text: str, categories: str) -> List[Tuple[str, float]]:
|
175 |
cat_list = [c.strip() for c in categories.split('\n') if c.strip()]
|
176 |
text_embedding = embedder.get_embeddings([text])
|
177 |
cat_embeddings = embedder.get_embeddings(cat_list)
|
@@ -180,7 +180,7 @@ def classify_text(embedder: QwenEmbedder, text: str, categories: str) -> List[Tu
|
|
180 |
results.sort(key=lambda x: x[1], reverse=True)
|
181 |
return [(cat, round(score, 3)) for cat, score in results]
|
182 |
|
183 |
-
def cluster_documents(embedder: QwenEmbedder, documents: str, num_clusters: int) -> pd.DataFrame:
|
184 |
from sklearn.cluster import KMeans
|
185 |
doc_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
|
186 |
if len(doc_list) < num_clusters:
|
@@ -212,7 +212,7 @@ def cluster_documents(embedder: QwenEmbedder, documents: str, num_clusters: int)
|
|
212 |
})
|
213 |
return df.sort_values('Cluster')
|
214 |
|
215 |
-
def analyze_sentiment(embedder: QwenEmbedder, text: str) -> Tuple[str, dict]:
|
216 |
# Define sentiment anchors
|
217 |
anchors = {
|
218 |
"very_positive": "هذا رائع جداً ومدهش! أنا سعيد للغاية",
|
@@ -237,7 +237,7 @@ def analyze_sentiment(embedder: QwenEmbedder, text: str) -> Tuple[str, dict]:
|
|
237 |
{k: round(float(v), 3) for k, v in results}
|
238 |
)
|
239 |
|
240 |
-
def extract_concepts(embedder: QwenEmbedder, text: str, concept_type: str) -> List[Tuple[str, float]]:
|
241 |
# Define concept anchors based on type
|
242 |
concept_anchors = {
|
243 |
"emotions": [
|
@@ -693,7 +693,7 @@ def create_demo():
|
|
693 |
similarity_score = gr.Number(label="Similarity Score")
|
694 |
|
695 |
similarity_btn.click(
|
696 |
-
fn=lambda t1, t2, m, d: process_with_embedder('compute_similarity', t1, t2),
|
697 |
inputs=[text1, text2, model_choice, embedding_dim],
|
698 |
outputs=similarity_score
|
699 |
)
|
|
|
108 |
similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
|
109 |
return round(similarity, 3)
|
110 |
|
111 |
+
def rerank_documents(embedder: QwenEmbedder, query: str, documents: str, model_choice: str = None, embedding_dim: int = None) -> List[Tuple[str, float]]:
|
112 |
docs_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
|
113 |
|
114 |
# Add instruction to query
|
|
|
126 |
|
127 |
return [(doc, round(score, 3)) for doc, score in results]
|
128 |
|
129 |
+
def process_batch_embeddings(embedder: QwenEmbedder, texts: str, model_choice: str = None, embedding_dim: int = None) -> pd.DataFrame:
|
130 |
text_list = [text.strip() for text in texts.split('\n') if text.strip()]
|
131 |
if len(text_list) < 1:
|
132 |
return pd.DataFrame()
|
|
|
143 |
|
144 |
return df_similarities.round(3)
|
145 |
|
146 |
+
def process_retrieval(embedder: QwenEmbedder, task_prompt: str, queries: str, documents: str, model_choice: str = None, embedding_dim: int = None) -> pd.DataFrame:
|
147 |
# Process queries and documents
|
148 |
query_list = [q.strip() for q in queries.split('\n') if q.strip()]
|
149 |
doc_list = [d.strip() for d in documents.split('\n') if d.strip()]
|
|
|
165 |
df = pd.DataFrame(scores, index=query_list, columns=doc_list)
|
166 |
return df.round(3)
|
167 |
|
168 |
+
def process_cross_lingual(embedder: QwenEmbedder, arabic_text: str, english_text: str, model_choice: str = None, embedding_dim: int = None) -> dict:
|
169 |
texts = [arabic_text, english_text]
|
170 |
embeddings = embedder.get_embeddings(texts)
|
171 |
similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
|
172 |
return {"similarity": round(similarity, 3)}
|
173 |
|
174 |
+
def classify_text(embedder: QwenEmbedder, text: str, categories: str, model_choice: str = None, embedding_dim: int = None) -> List[Tuple[str, float]]:
|
175 |
cat_list = [c.strip() for c in categories.split('\n') if c.strip()]
|
176 |
text_embedding = embedder.get_embeddings([text])
|
177 |
cat_embeddings = embedder.get_embeddings(cat_list)
|
|
|
180 |
results.sort(key=lambda x: x[1], reverse=True)
|
181 |
return [(cat, round(score, 3)) for cat, score in results]
|
182 |
|
183 |
+
def cluster_documents(embedder: QwenEmbedder, documents: str, num_clusters: int, model_choice: str = None, embedding_dim: int = None) -> pd.DataFrame:
|
184 |
from sklearn.cluster import KMeans
|
185 |
doc_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
|
186 |
if len(doc_list) < num_clusters:
|
|
|
212 |
})
|
213 |
return df.sort_values('Cluster')
|
214 |
|
215 |
+
def analyze_sentiment(embedder: QwenEmbedder, text: str, model_choice: str = None, embedding_dim: int = None) -> Tuple[str, dict]:
|
216 |
# Define sentiment anchors
|
217 |
anchors = {
|
218 |
"very_positive": "هذا رائع جداً ومدهش! أنا سعيد للغاية",
|
|
|
237 |
{k: round(float(v), 3) for k, v in results}
|
238 |
)
|
239 |
|
240 |
+
def extract_concepts(embedder: QwenEmbedder, text: str, concept_type: str, model_choice: str = None, embedding_dim: int = None) -> List[Tuple[str, float]]:
|
241 |
# Define concept anchors based on type
|
242 |
concept_anchors = {
|
243 |
"emotions": [
|
|
|
693 |
similarity_score = gr.Number(label="Similarity Score")
|
694 |
|
695 |
similarity_btn.click(
|
696 |
+
fn=lambda t1, t2, m, d: process_with_embedder('compute_similarity', t1, t2, m, d),
|
697 |
inputs=[text1, text2, model_choice, embedding_dim],
|
698 |
outputs=similarity_score
|
699 |
)
|