Spaces:

thankrandomness
/

mimic-iii-retrieval-matryoshka

Runtime error

App Files Files Community

thankrandomness commited on Oct 31, 2024

Commit

a3db2dc

1 Parent(s): 6d03bc9

calculate avg_similarity

Browse files

Files changed (1) hide show

app.py +20 -28

app.py CHANGED Viewed

@@ -106,31 +106,9 @@ def retrieve_relevant_text(input_text, similarity_threshold=0.1):  # Lower thres
 def evaluate_efficiency(dataset_split, similarity_threshold=0.1):
     y_true = []
     y_pred = []
-    # texts = []  # To store texts for debugging
-    # for i, row in enumerate(dataset_split):
-    #     for note in row['notes']:
-    #         text = note.get('text', '')
-    #         annotations_list = [annotation['code'] for annotation in note.get('annotations', []) if 'code' in annotation]
-    #         if text and annotations_list:
-    #             # Store the original text for each entry in y_true for debugging
-    #             texts.append(text)
-    #             y_true.extend(annotations_list)
-    #             # Retrieve predictions for the current text
-    #             retrieved_results = retrieve_relevant_text(text, similarity_threshold=similarity_threshold)
-    #             retrieved_codes = [result['code'] for result in retrieved_results]
-    #             # Limit predictions to the length of true annotations to ensure consistent lengths
-    #             y_pred.extend(retrieved_codes[:len(annotations_list)])
-    # # Debugging output
-    # for idx, (text, true_codes, pred_codes) in enumerate(zip(texts, y_true, y_pred)):
-    #     print(f"\nExample {idx + 1}")
-    #     print(f"Text: {text}")
-    #     print(f"Ground Truth Codes (y_true): {true_codes}")
-    #     print(f"Predicted Codes (y_pred): {pred_codes}")
     for i, row in enumerate(dataset_split):
         for note in row['notes']:
             text = note.get('text', '')
@@ -140,15 +118,28 @@ def evaluate_efficiency(dataset_split, similarity_threshold=0.1):
                 retrieved_results = retrieve_relevant_text(text, similarity_threshold=similarity_threshold)
                 retrieved_codes = [result['code'] for result in retrieved_results]
                 # Ground truth
                 y_true.extend(annotations_list)
                 # Predictions (limit to length of true annotations to avoid mismatch)
                 y_pred.extend(retrieved_codes[:len(annotations_list)])
     # Debugging output to check for mismatches and understand results
     print("Sample y_true:", y_true[:10])
     print("Sample y_pred:", y_pred[:10])
     if len(y_true) != len(y_pred):
         min_length = min(len(y_true), len(y_pred))
         y_true = y_true[:min_length]
@@ -159,10 +150,10 @@ def evaluate_efficiency(dataset_split, similarity_threshold=0.1):
     recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
     f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
-    return precision, recall, f1
 # Calculate retrieval efficiency metrics
-precision, recall, f1 = evaluate_efficiency(dataset['validation'], similarity_threshold=0.1)
 # Gradio interface
 def gradio_interface(input_text):
@@ -179,7 +170,8 @@ def gradio_interface(input_text):
     return "\n".join(formatted_results)
 # Display retrieval efficiency metrics
-metrics = f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}"
 with gr.Blocks() as interface:
     gr.Markdown("# Text Retrieval with Efficiency Metrics")

 def evaluate_efficiency(dataset_split, similarity_threshold=0.1):
     y_true = []
     y_pred = []
+    total_similarity = 0
+    total_items = 0
     for i, row in enumerate(dataset_split):
         for note in row['notes']:
             text = note.get('text', '')
                 retrieved_results = retrieve_relevant_text(text, similarity_threshold=similarity_threshold)
                 retrieved_codes = [result['code'] for result in retrieved_results]
+                # Sum up similarity scores for average calculation
+                for result in retrieved_results:
+                    total_similarity += result['similarity_score']
+                    total_items += 1
                 # Ground truth
                 y_true.extend(annotations_list)
                 # Predictions (limit to length of true annotations to avoid mismatch)
                 y_pred.extend(retrieved_codes[:len(annotations_list)])
+                for result in retrieved_results:
+                    print(f"  Code: {result['code']}, Similarity Score: {result['similarity_score']:.2f}")
     # Debugging output to check for mismatches and understand results
     print("Sample y_true:", y_true[:10])
     print("Sample y_pred:", y_pred[:10])
+    if total_items > 0:
+        avg_similarity = total_similarity / total_items
+    else:
+        avg_similarity = 0
     if len(y_true) != len(y_pred):
         min_length = min(len(y_true), len(y_pred))
         y_true = y_true[:min_length]
     recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
     f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
+    return precision, recall, f1, avg_similarity
 # Calculate retrieval efficiency metrics
+precision, recall, f1, avg_similarity = evaluate_efficiency(dataset['validation'], similarity_threshold=0.1)
 # Gradio interface
 def gradio_interface(input_text):
     return "\n".join(formatted_results)
 # Display retrieval efficiency metrics
+# metrics = f"Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}"
+metrics = f"Accuracy: {avg_similarity:.2f}"
 with gr.Blocks() as interface:
     gr.Markdown("# Text Retrieval with Efficiency Metrics")