liang-huggingface commited on
Commit
7c7d36b
·
1 Parent(s): 82d26dd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from Bio import Entrez
4
+ import requests
5
+
6
+ import os
7
+
8
+ HF_API = os.getenv('HF_API')
9
+ # Function to search PubMed for articles
10
+ def search_pubmed(query, retmax):
11
+ Entrez.email = '[email protected]'
12
+ handle = Entrez.esearch(db="pubmed", term=query, retmax=retmax)
13
+ record = Entrez.read(handle)
14
+ handle.close()
15
+ idlist = record['IdList']
16
+ handle = Entrez.efetch(db="pubmed", id=idlist, retmode="xml")
17
+ articles = Entrez.read(handle)['PubmedArticle']
18
+ handle.close()
19
+ article_list = []
20
+ for article in articles:
21
+ article_dict = {
22
+ 'PMID': str(article['MedlineCitation']['PMID']),
23
+ 'Authors': ' '.join([author['LastName'] + ' ' + author.get('Initials', '')
24
+ for author in article['MedlineCitation']['Article']['AuthorList']]),
25
+ 'Title': article['MedlineCitation']['Article']['ArticleTitle'],
26
+ 'Abstract': article['MedlineCitation']['Article'].get('Abstract', {}).get('AbstractText', [None])[0]
27
+ }
28
+ article_list.append(article_dict)
29
+ return pd.DataFrame(article_list)
30
+
31
+ # Function to summarize articles using Hugging Face's API
32
+ def summarize_with_huggingface(model, selected_articles):
33
+ API_URL = f"https://api-inference.huggingface.co/models/{model}"
34
+ # Your Hugging Face API key
35
+ API_KEY = HF_API
36
+ headers = {"Authorization": f"Bearer {API_KEY}"}
37
+ # Prepare the text to summarize: concatenate all abstracts
38
+ print(type(selected_articles))
39
+ print(selected_articles.to_dict(orient='records'))
40
+ text_to_summarize = " ".join(
41
+ [f"PMID: {article['PMID']}. Authors: {article['Authors']}. Title: {article['Title']}. Abstract: {article['Abstract']}."
42
+ for article in selected_articles.to_dict(orient='records')]
43
+ )
44
+ # Define the payload
45
+ payload = {
46
+ "inputs": text_to_summarize,
47
+ "parameters": {"max_length": 300} # Adjust as needed
48
+ }
49
+ # Make the POST request to the Hugging Face API
50
+ response = requests.post(API_URL, headers=headers, json=payload)
51
+ response.raise_for_status() # Raise an HTTPError if the HTTP request returned an unsuccessful status code
52
+ # The API returns a list of dictionaries. We extract the summary from the first one.
53
+ return response.json()[0]['generated_text']
54
+
55
+
56
+
57
+
58
+ import gradio as gr
59
+ from Bio import Entrez
60
+
61
+ # Always tell NCBI who you are
62
+ Entrez.email = "[email protected]"
63
+
64
+
65
+ def process_query(keywords, top_k):
66
+ articles = search_pubmed(keywords, top_k)
67
+ # Convert each article from a dictionary to a list of values in the correct order
68
+ articles_for_display = [[article['pmid'], article['authors'], article['title'], article['abstract']] for article in articles]
69
+ return articles_for_display
70
+
71
+
72
+ def summarize_articles(indices, articles_for_display):
73
+ # Convert indices to a list of integers
74
+ selected_indices = [int(index.strip()) for index in indices.split(',') if index.strip().isdigit()]
75
+ # Convert the DataFrame to a list of dictionaries
76
+ articles_list = articles_for_display.to_dict(orient='records')
77
+ # Select articles based on the provided indices
78
+ selected_articles = [articles_list[index] for index in selected_indices]
79
+ # Generate the summary
80
+ summary = summarize_with_huggingface(selected_articles)
81
+ return summary
82
+
83
+ # Gradio interface
84
+ with gr.Blocks() as demo:
85
+ gr.Markdown("### PubMed Article Summarizer")
86
+ model_input = gr.Textbox(label="Enter the model to use", value="h2oai/h2ogpt-4096-llama2-7b-chat")
87
+ query_input = gr.Textbox(label="Query Keywords")
88
+ retmax_input = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of articles")
89
+ search_button = gr.Button("Search")
90
+ output_table = gr.Dataframe(headers=["PMID", "Authors", "Title","Abstract" ])
91
+ summarize_button = gr.Button("Summarize")
92
+ summary_output = gr.Textbox()
93
+
94
+ def update_output_table(query, retmax):
95
+ df = search_pubmed(query, retmax)
96
+ # output_table.update(value=df)
97
+ return df
98
+ search_button.click(update_output_table, inputs=[query_input, retmax_input], outputs=output_table)
99
+ summarize_button.click(fn=summarize_with_huggingface, inputs=[model_input, output_table], outputs=summary_output)
100
+
101
+ demo.launch(debug=True)
102
+
103
+ if False:
104
+ with gr.Blocks() as demo:
105
+ gr.Markdown("### PubMed Article Summarizer")
106
+ with gr.Row():
107
+ query_input = gr.Textbox(label="Query Keywords")
108
+ top_k_input = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Top K Results")
109
+ search_button = gr.Button("Search")
110
+ output_table = gr.Dataframe(headers=["Title", "Authors", "Abstract", "PMID"])
111
+ indices_input = gr.Textbox(label="Enter indices of articles to summarize (comma-separated)")
112
+ summarize_button = gr.Button("Summarize Selected Articles")
113
+ summary_output = gr.Textbox(label="Summary")
114
+
115
+ search_button.click(
116
+ fn=process_query,
117
+ inputs=[query_input, top_k_input],
118
+ outputs=output_table
119
+ )
120
+
121
+ summarize_button.click(
122
+ fn=summarize_articles,
123
+ inputs=[indices_input, output_table],
124
+ outputs=summary_output
125
+ )
126
+
127
+ demo.launch(debug=True)