Spaces:

liang-huggingface
/

PubmedSearch

Runtime error

App Files Files Community

liang-huggingface commited on Nov 7, 2023

Commit

c881d34

1 Parent(s): d410e1c

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -27

app.py CHANGED Viewed

@@ -6,6 +6,10 @@ import requests
 import os
 HF_API = os.getenv('HF_API')
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
@@ -63,33 +67,81 @@ def search_pubmed(query, retmax):
         article_list.append(article_dict)
     return pd.DataFrame(article_list)
-# Function to summarize articles using Hugging Face's API
-def summarize_with_huggingface(model, selected_articles, USE_LOCAL=False):
-    API_URL = f"https://api-inference.huggingface.co/models/{model}"
-    # Your Hugging Face API key
-    API_KEY = HF_API
-    headers = {"Authorization": f"Bearer {API_KEY}"}
-    # Prepare the text to summarize: concatenate all abstracts
-    print(type(selected_articles))
-    print(selected_articles.to_dict(orient='records'))
-    text_to_summarize = " ".join(
-        [f"PMID: {article['PMID']}. Authors: {article['Authors']}. Title: {article['Title']}. Abstract: {article['Abstract']}."
-         for article in selected_articles.to_dict(orient='records')]
-    )
-    # Define the payload
-    payload = {
-        "inputs": text_to_summarize,
-        "parameters": {"max_length": 300}  # Adjust as needed
     }
-    if USE_LOCAL:
-        response = generate_response(text_to_summarize)
     else:
-        # Make the POST request to the Hugging Face API
-        response = requests.post(API_URL, headers=headers, json=payload)
-        response.raise_for_status()  # Raise an HTTPError if the HTTP request returned an unsuccessful status code
-    # The API returns a list of dictionaries. We extract the summary from the first one.
-    return response.json()[0]['generated_text']
@@ -119,7 +171,6 @@ def summarize_articles(indices, articles_for_display):
     summary = summarize_with_huggingface(selected_articles)
     return summary
-PASSWORD = "pass"
 def check_password(username, password):
     if username == USERNAME and password == PASSWORD:
@@ -133,7 +184,9 @@ with gr.Blocks() as demo:
     gr.Markdown("### PubMed Article Summarizer")
-    model_input = gr.Textbox(label="Enter the model to use", value="h2oai/h2ogpt-4096-llama2-7b-chat")
     query_input = gr.Textbox(label="Query Keywords")
     retmax_input = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of articles")
     search_button = gr.Button("Search")
@@ -147,7 +200,7 @@ with gr.Blocks() as demo:
 #        output_table.update(value=df)
         return df
     search_button.click(update_output_table, inputs=[query_input, retmax_input], outputs=output_table)
-    summarize_button.click(fn=summarize_with_huggingface, inputs=[model_input, output_table], outputs=summary_output)
 demo.launch(debug=True)

 import os
 HF_API = os.getenv('HF_API')
+openai_api_key = os.getenv('OPENAI_API')
+PASSWORD = "pass"
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
         article_list.append(article_dict)
     return pd.DataFrame(article_list)
+# Function to format search results for OpenAI summarization
+def format_results_for_openai(table_data):
+    # Combine title and abstract for each record into one string for summarization
+    summaries = []
+    for _, row in table_data.iterrows():
+        summary = f"Title: {row['Title']}\nAuthors:{row['Authors']}\nAbstract: {row['Abstract']}\n"
+        summaries.append(summary)
+    print(summaries)
+    return "\n".join(summaries)
+def get_summary_from_openai(text_to_summarize, openai_api_key):
+    headers = {
+        'Authorization': f'Bearer {openai_api_key}',
+        'Content-Type': 'application/json'
     }
+    data = {
+        "model": "gpt-3.5-turbo",  # Specify the GPT-3.5-turbo model
+        "messages": [{"role": "system", "content": '''Please summarize the following PubMed search results,
+        including the authors who conducted the research, the main research subject, and the major findings.
+        Please compare the difference among these articles.
+        Please return your results in a single paragraph in the regular scientific paper fashion for each article:'''},
+                     {"role": "user", "content": text_to_summarize}],
+    }
+    response = requests.post('https://api.openai.com/v1/chat/completions', headers=headers, json=data)
+    if response.status_code == 200:
+        summary = response.json().get('choices', [{}])[0].get('message', {'content':''}).get('content', '').strip()
+        return summary
     else:
+        # Print the error message if the API call was unsuccessful
+        print(f"Error: {response.status_code}")
+        print(response.text)
+        return None
+# Function that combines PubMed search with OpenAI summarization
+def summarize_pubmed_search(search_results):
+    formatted_text = format_results_for_openai(search_results)
+    summary = get_summary_from_openai(formatted_text, openai_api_key)  # Replace with your actual OpenAI API key
+    return summary
+# Function to summarize articles using Hugging Face's API
+def summarize_with_huggingface(model, selected_articles, password):
+    if password == PASSWORD:
+        summary = summarize_pubmed_search(format_results_for_openai(selected_articles),openai_api_key)
+        return summary
+    else:
+        API_URL = f"https://api-inference.huggingface.co/models/{model}"
+        # Your Hugging Face API key
+        API_KEY = HF_API
+        headers = {"Authorization": f"Bearer {API_KEY}"}
+        # Prepare the text to summarize: concatenate all abstracts
+        print(type(selected_articles))
+        print(selected_articles.to_dict(orient='records'))
+        text_to_summarize = " ".join(
+            [f"PMID: {article['PMID']}. Authors: {article['Authors']}. Title: {article['Title']}. Abstract: {article['Abstract']}."
+             for article in selected_articles.to_dict(orient='records')]
+        )
+        # Define the payload
+        payload = {
+            "inputs": text_to_summarize,
+            "parameters": {"max_length": 300}  # Adjust as needed
+        }
+        USE_LOCAL=False
+        if USE_LOCAL:
+            response = generate_response(text_to_summarize)
+        else:
+            # Make the POST request to the Hugging Face API
+            response = requests.post(API_URL, headers=headers, json=payload)
+            response.raise_for_status()  # Raise an HTTPError if the HTTP request returned an unsuccessful status code
+        # The API returns a list of dictionaries. We extract the summary from the first one.
+        return response.json()[0]['generated_text']
     summary = summarize_with_huggingface(selected_articles)
     return summary
 def check_password(username, password):
     if username == USERNAME and password == PASSWORD:
     gr.Markdown("### PubMed Article Summarizer")
+    with gr.Row():
+        password_input = gr.Textbox(label="Enter the password")
+        model_input = gr.Textbox(label="Enter the model to use", value="h2oai/h2ogpt-4096-llama2-7b-chat")
     query_input = gr.Textbox(label="Query Keywords")
     retmax_input = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of articles")
     search_button = gr.Button("Search")
 #        output_table.update(value=df)
         return df
     search_button.click(update_output_table, inputs=[query_input, retmax_input], outputs=output_table)
+    summarize_button.click(fn=summarize_with_huggingface, inputs=[model_input, output_table, password_input], outputs=summary_output)
 demo.launch(debug=True)