liang-huggingface commited on
Commit
e8629da
·
1 Parent(s): e73fe73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -4
app.py CHANGED
@@ -44,9 +44,49 @@ def generate_response(prompt):
44
  # Decode the response
45
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
  return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # Function to search PubMed for articles
49
- def search_pubmed(query, retmax):
50
  Entrez.email = '[email protected]'
51
  handle = Entrez.esearch(db="pubmed", term=query, retmax=retmax, sort = 'relevance')
52
  record = Entrez.read(handle)
@@ -192,6 +232,10 @@ with gr.Blocks() as demo:
192
  with gr.Row():
193
  password_input = gr.Textbox(label="Enter the password")
194
  model_input = gr.Textbox(label="Enter the model to use", value="h2oai/h2ogpt-4096-llama2-7b-chat")
 
 
 
 
195
  query_input = gr.Textbox(label="Query Keywords")
196
  retmax_input = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of articles")
197
  search_button = gr.Button("Search")
@@ -200,11 +244,11 @@ with gr.Blocks() as demo:
200
  summary_output = gr.Textbox()
201
 
202
 
203
- def update_output_table(query, retmax):
204
- df = search_pubmed(query, retmax)
205
  # output_table.update(value=df)
206
  return df
207
- search_button.click(update_output_table, inputs=[query_input, retmax_input], outputs=output_table)
208
  summarize_button.click(fn=summarize_with_huggingface, inputs=[model_input, output_table, password_input], outputs=summary_output)
209
 
210
  demo.launch(debug=True)
 
44
  # Decode the response
45
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
  return response
47
+
48
+ def search_pubmed(query, retmax=5, mindate=None, maxdate=None, datetype="pdat"):
49
+ Entrez.email = '[email protected]' # Always set the Entrez.email to tell NCBI who you are
50
+ search_kwargs = {
51
+ "db": "pubmed",
52
+ "term": query,
53
+ "retmax": retmax,
54
+ "sort": 'relevance',
55
+ "datetype": datetype
56
+ }
57
+
58
+ # If dates are provided, add them to the search arguments
59
+ if mindate:
60
+ search_kwargs["mindate"] = mindate
61
+ if maxdate:
62
+ search_kwargs["maxdate"] = maxdate
63
+
64
+ handle = Entrez.esearch(**search_kwargs)
65
+ record = Entrez.read(handle)
66
+ handle.close()
67
+ idlist = record['IdList']
68
+
69
+ handle = Entrez.efetch(db="pubmed", id=idlist, retmode="xml")
70
+ articles = Entrez.read(handle)['PubmedArticle']
71
+ handle.close()
72
+
73
+ # ... (the rest of your existing code to extract article information)
74
+ abstracts = []
75
+ for article in articles:
76
+ article_id = article['MedlineCitation']['PMID']
77
+ article_title = article['MedlineCitation']['Article']['ArticleTitle']
78
+ print(len(article['MedlineCitation']['Article'].get('Abstract', {}).get('AbstractText', [None])))
79
+ abstract_text = article['MedlineCitation']['Article'].get('Abstract', {}).get('AbstractText', [None])
80
+
81
+ if isinstance(abstract_text, list):
82
+ # Join the list elements if abstract is a list
83
+ abstract_text = " ".join(abstract_text)
84
+ abstracts.append((article_id, article_title, abstract_text))
85
+
86
+ return pd.DataFrame(abstracts)
87
 
88
  # Function to search PubMed for articles
89
+ def search_pubmed_old(query, retmax):
90
  Entrez.email = '[email protected]'
91
  handle = Entrez.esearch(db="pubmed", term=query, retmax=retmax, sort = 'relevance')
92
  record = Entrez.read(handle)
 
232
  with gr.Row():
233
  password_input = gr.Textbox(label="Enter the password")
234
  model_input = gr.Textbox(label="Enter the model to use", value="h2oai/h2ogpt-4096-llama2-7b-chat")
235
+ with gr.Row():
236
+ startdate = gr.Textbox(label="Starting year")
237
+ enddate = gr.Textbox(label="End year")
238
+
239
  query_input = gr.Textbox(label="Query Keywords")
240
  retmax_input = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of articles")
241
  search_button = gr.Button("Search")
 
244
  summary_output = gr.Textbox()
245
 
246
 
247
+ def update_output_table(query, retmax, startdate, enddate):
248
+ df = search_pubmed(query, retmax, startdate, enddate)
249
  # output_table.update(value=df)
250
  return df
251
+ search_button.click(update_output_table, inputs=[query_input, retmax_input, startdate, enddate], outputs=output_table)
252
  summarize_button.click(fn=summarize_with_huggingface, inputs=[model_input, output_table, password_input], outputs=summary_output)
253
 
254
  demo.launch(debug=True)