Omartificial-Intelligence-Space commited on
Commit
bd3c0c4
·
verified ·
1 Parent(s): 9435960

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -108,7 +108,7 @@ def compute_similarity(embedder: QwenEmbedder, text1: str, text2: str, model_cho
108
  similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
109
  return round(similarity, 3)
110
 
111
- def rerank_documents(embedder: QwenEmbedder, query: str, documents: str) -> List[Tuple[str, float]]:
112
  docs_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
113
 
114
  # Add instruction to query
@@ -126,7 +126,7 @@ def rerank_documents(embedder: QwenEmbedder, query: str, documents: str) -> List
126
 
127
  return [(doc, round(score, 3)) for doc, score in results]
128
 
129
- def process_batch_embeddings(embedder: QwenEmbedder, texts: str) -> pd.DataFrame:
130
  text_list = [text.strip() for text in texts.split('\n') if text.strip()]
131
  if len(text_list) < 1:
132
  return pd.DataFrame()
@@ -143,7 +143,7 @@ def process_batch_embeddings(embedder: QwenEmbedder, texts: str) -> pd.DataFrame
143
 
144
  return df_similarities.round(3)
145
 
146
- def process_retrieval(embedder: QwenEmbedder, task_prompt: str, queries: str, documents: str) -> pd.DataFrame:
147
  # Process queries and documents
148
  query_list = [q.strip() for q in queries.split('\n') if q.strip()]
149
  doc_list = [d.strip() for d in documents.split('\n') if d.strip()]
@@ -165,13 +165,13 @@ def process_retrieval(embedder: QwenEmbedder, task_prompt: str, queries: str, do
165
  df = pd.DataFrame(scores, index=query_list, columns=doc_list)
166
  return df.round(3)
167
 
168
- def process_cross_lingual(embedder: QwenEmbedder, arabic_text: str, english_text: str) -> dict:
169
  texts = [arabic_text, english_text]
170
  embeddings = embedder.get_embeddings(texts)
171
  similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
172
  return {"similarity": round(similarity, 3)}
173
 
174
- def classify_text(embedder: QwenEmbedder, text: str, categories: str) -> List[Tuple[str, float]]:
175
  cat_list = [c.strip() for c in categories.split('\n') if c.strip()]
176
  text_embedding = embedder.get_embeddings([text])
177
  cat_embeddings = embedder.get_embeddings(cat_list)
@@ -180,7 +180,7 @@ def classify_text(embedder: QwenEmbedder, text: str, categories: str) -> List[Tu
180
  results.sort(key=lambda x: x[1], reverse=True)
181
  return [(cat, round(score, 3)) for cat, score in results]
182
 
183
- def cluster_documents(embedder: QwenEmbedder, documents: str, num_clusters: int) -> pd.DataFrame:
184
  from sklearn.cluster import KMeans
185
  doc_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
186
  if len(doc_list) < num_clusters:
@@ -212,7 +212,7 @@ def cluster_documents(embedder: QwenEmbedder, documents: str, num_clusters: int)
212
  })
213
  return df.sort_values('Cluster')
214
 
215
- def analyze_sentiment(embedder: QwenEmbedder, text: str) -> Tuple[str, dict]:
216
  # Define sentiment anchors
217
  anchors = {
218
  "very_positive": "هذا رائع جداً ومدهش! أنا سعيد للغاية",
@@ -237,7 +237,7 @@ def analyze_sentiment(embedder: QwenEmbedder, text: str) -> Tuple[str, dict]:
237
  {k: round(float(v), 3) for k, v in results}
238
  )
239
 
240
- def extract_concepts(embedder: QwenEmbedder, text: str, concept_type: str) -> List[Tuple[str, float]]:
241
  # Define concept anchors based on type
242
  concept_anchors = {
243
  "emotions": [
@@ -693,7 +693,7 @@ def create_demo():
693
  similarity_score = gr.Number(label="Similarity Score")
694
 
695
  similarity_btn.click(
696
- fn=lambda t1, t2, m, d: process_with_embedder('compute_similarity', t1, t2),
697
  inputs=[text1, text2, model_choice, embedding_dim],
698
  outputs=similarity_score
699
  )
 
108
  similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
109
  return round(similarity, 3)
110
 
111
+ def rerank_documents(embedder: QwenEmbedder, query: str, documents: str, model_choice: str = None, embedding_dim: int = None) -> List[Tuple[str, float]]:
112
  docs_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
113
 
114
  # Add instruction to query
 
126
 
127
  return [(doc, round(score, 3)) for doc, score in results]
128
 
129
+ def process_batch_embeddings(embedder: QwenEmbedder, texts: str, model_choice: str = None, embedding_dim: int = None) -> pd.DataFrame:
130
  text_list = [text.strip() for text in texts.split('\n') if text.strip()]
131
  if len(text_list) < 1:
132
  return pd.DataFrame()
 
143
 
144
  return df_similarities.round(3)
145
 
146
+ def process_retrieval(embedder: QwenEmbedder, task_prompt: str, queries: str, documents: str, model_choice: str = None, embedding_dim: int = None) -> pd.DataFrame:
147
  # Process queries and documents
148
  query_list = [q.strip() for q in queries.split('\n') if q.strip()]
149
  doc_list = [d.strip() for d in documents.split('\n') if d.strip()]
 
165
  df = pd.DataFrame(scores, index=query_list, columns=doc_list)
166
  return df.round(3)
167
 
168
+ def process_cross_lingual(embedder: QwenEmbedder, arabic_text: str, english_text: str, model_choice: str = None, embedding_dim: int = None) -> dict:
169
  texts = [arabic_text, english_text]
170
  embeddings = embedder.get_embeddings(texts)
171
  similarity = torch.cosine_similarity(embeddings[0:1], embeddings[1:2]).item()
172
  return {"similarity": round(similarity, 3)}
173
 
174
+ def classify_text(embedder: QwenEmbedder, text: str, categories: str, model_choice: str = None, embedding_dim: int = None) -> List[Tuple[str, float]]:
175
  cat_list = [c.strip() for c in categories.split('\n') if c.strip()]
176
  text_embedding = embedder.get_embeddings([text])
177
  cat_embeddings = embedder.get_embeddings(cat_list)
 
180
  results.sort(key=lambda x: x[1], reverse=True)
181
  return [(cat, round(score, 3)) for cat, score in results]
182
 
183
+ def cluster_documents(embedder: QwenEmbedder, documents: str, num_clusters: int, model_choice: str = None, embedding_dim: int = None) -> pd.DataFrame:
184
  from sklearn.cluster import KMeans
185
  doc_list = [doc.strip() for doc in documents.split('\n') if doc.strip()]
186
  if len(doc_list) < num_clusters:
 
212
  })
213
  return df.sort_values('Cluster')
214
 
215
+ def analyze_sentiment(embedder: QwenEmbedder, text: str, model_choice: str = None, embedding_dim: int = None) -> Tuple[str, dict]:
216
  # Define sentiment anchors
217
  anchors = {
218
  "very_positive": "هذا رائع جداً ومدهش! أنا سعيد للغاية",
 
237
  {k: round(float(v), 3) for k, v in results}
238
  )
239
 
240
+ def extract_concepts(embedder: QwenEmbedder, text: str, concept_type: str, model_choice: str = None, embedding_dim: int = None) -> List[Tuple[str, float]]:
241
  # Define concept anchors based on type
242
  concept_anchors = {
243
  "emotions": [
 
693
  similarity_score = gr.Number(label="Similarity Score")
694
 
695
  similarity_btn.click(
696
+ fn=lambda t1, t2, m, d: process_with_embedder('compute_similarity', t1, t2, m, d),
697
  inputs=[text1, text2, model_choice, embedding_dim],
698
  outputs=similarity_score
699
  )