First_agent_template

Build error

App Files Files Community

ngockhoinguyenpy commited on Mar 9

Commit

25fe8bb

verified ·

1 Parent(s): cbf4732

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -48

app.py CHANGED Viewed

@@ -1,74 +1,222 @@
-from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
-import datetime
-import requests
-import pytz
 import yaml
-from tools.final_answer import FinalAnswerTool
-from Gradio_UI import GradioUI
-# Below is an example of a tool that does nothing. Amaze us with your creativity !
-@tool
-def fetch_zen_quote() -> str:
-    """Fetches a random zen quote and returns the 'h' field from the JSON response.
-    Returns:
-        A string containing the formatted quote with the author.
-    """
-    response = requests.get("https://zenquotes.io/api/random")
-    json_data = response.json()
-    # Extract the 'h' field from the JSON response
-    quote_html = json_data[0]["h"]
-    return quote_html
-@tool
-def get_current_time_in_timezone(timezone: str) -> str:
-    """A tool that fetches the current local time in a specified timezone.
     Args:
-        timezone: A string representing a valid timezone (e.g., 'America/New_York').
     """
     try:
-        # Create timezone object
-        tz = pytz.timezone(timezone)
-        # Get current time in that timezone
-        local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
-        return f"The current local time in {timezone} is: {local_time}"
-    except Exception as e:
-        return f"Error fetching time for timezone '{timezone}': {str(e)}"
-final_answer = FinalAnswerTool()
-# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
-# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
-model = HfApiModel(
-max_tokens=2096,
-temperature=0.5,
-model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
-custom_role_conversions=None,
-)
-# Import tool from Hub
-image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
 with open("prompts.yaml", 'r') as stream:
     prompt_templates = yaml.safe_load(stream)
 agent = CodeAgent(
     model=model,
-    tools=[final_answer, image_generation_tool, fetch_zen_quote], ## add your tools here (don't remove final answer)
     max_steps=6,
     verbosity_level=1,
     grammar=None,
     planning_interval=None,
-    name=None,
-    description=None,
     prompt_templates=prompt_templates
 )
-GradioUI(agent).launch()

+import feedparser
+import urllib.parse
 import yaml
+import gradio as gr
+from smolagents import CodeAgent, HfApiModel, tool
+# @tool
+# def fetch_latest_arxiv_papers(keywords: list, num_results: int = 3) -> list:
+#     """Fetches the latest research papers from arXiv based on provided keywords.
+#     Args:
+#         keywords: A list of keywords to search for relevant papers.
+#         num_results: The number of papers to fetch (default is 3).
+#     Returns:
+#         A list of dictionaries containing:
+#             - "title": The title of the research paper.
+#             - "authors": The authors of the paper.
+#             - "year": The publication year.
+#             - "abstract": A summary of the research paper.
+#             - "link": A direct link to the paper on arXiv.
+#     """
+#     try:
+#         print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")  # Debug input
+#         #Properly format query with +AND+ for multiple keywords
+#         query = "+AND+".join([f"all:{kw}" for kw in keywords])
+#         query_encoded = urllib.parse.quote(query)  # Encode spaces and special characters
+#         url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results={num_results}&sortBy=submittedDate&sortOrder=descending"
+#         print(f"DEBUG: Query URL - {url}")  # Debug URL
+#         feed = feedparser.parse(url)
+#         papers = []
+#         for entry in feed.entries:
+#             papers.append({
+#                 "title": entry.title,
+#                 "authors": ", ".join(author.name for author in entry.authors),
+#                 "year": entry.published[:4],  # Extract year
+#                 "abstract": entry.summary,
+#                 "link": entry.link
+#             })
+#         return papers
+#     except Exception as e:
+#         print(f"ERROR: {str(e)}")  # Debug errors
+#         return [f"Error fetching research papers: {str(e)}"]
+from rank_bm25 import BM25Okapi
+import nltk
+import os
+import shutil
+nltk_data_path = os.path.join(nltk.data.path[0], "tokenizers", "punkt")
+if os.path.exists(nltk_data_path):
+    shutil.rmtree(nltk_data_path)  # Remove corrupted version
+print("✅ Removed old NLTK 'punkt' data. Reinstalling...")
+# ✅ Step 2: Download the correct 'punkt' tokenizer
+nltk.download("punkt_tab")
+print("✅ Successfully installed 'punkt'!")
+@tool  # Register the function properly as a SmolAgents tool
+def fetch_latest_arxiv_papers(keywords: list, num_results: int = 5) -> list:
+    """Fetches and ranks arXiv papers using BM25 keyword relevance.
     Args:
+        keywords: List of keywords for search.
+        num_results: Number of results to return.
+    Returns:
+        List of the most relevant papers based on BM25 ranking.
     """
     try:
+        print(f"DEBUG: Searching arXiv papers with keywords: {keywords}")
+        # Use a general keyword search (without `ti:` and `abs:`)
+        query = "+AND+".join([f"all:{kw}" for kw in keywords])
+        query_encoded = urllib.parse.quote(query)
+        url = f"http://export.arxiv.org/api/query?search_query={query_encoded}&start=0&max_results=50&sortBy=submittedDate&sortOrder=descending"
+        print(f"DEBUG: Query URL - {url}")
+        feed = feedparser.parse(url)
+        papers = []
+        # Extract papers from arXiv
+        for entry in feed.entries:
+            papers.append({
+                "title": entry.title,
+                "authors": ", ".join(author.name for author in entry.authors),
+                "year": entry.published[:4],
+                "abstract": entry.summary,
+                "link": entry.link
+            })
+        if not papers:
+            return [{"error": "No results found. Try different keywords."}]
+        # Apply BM25 ranking
+        tokenized_corpus = [nltk.word_tokenize(paper["title"].lower() + " " + paper["abstract"].lower()) for paper in papers]
+        bm25 = BM25Okapi(tokenized_corpus)
+        tokenized_query = nltk.word_tokenize(" ".join(keywords).lower())
+        scores = bm25.get_scores(tokenized_query)
+        # Sort papers based on BM25 score
+        ranked_papers = sorted(zip(papers, scores), key=lambda x: x[1], reverse=True)
+        # Return the most relevant ones
+        return [paper[0] for paper in ranked_papers[:num_results]]
+    except Exception as e:
+        print(f"ERROR: {str(e)}")
+        return [{"error": f"Error fetching research papers: {str(e)}"}]
+# AI Model
+model = HfApiModel(
+    max_tokens=2096,
+    temperature=0.5,
+    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
+    custom_role_conversions=None,
+)
+# Load prompt templates
 with open("prompts.yaml", 'r') as stream:
     prompt_templates = yaml.safe_load(stream)
+# Create the AI Agent
 agent = CodeAgent(
     model=model,
+    tools=[fetch_latest_arxiv_papers],  # Properly registered tool
     max_steps=6,
     verbosity_level=1,
     grammar=None,
     planning_interval=None,
+    name="ScholarAgent",
+    description="An AI agent that fetches the latest research papers from arXiv based on user-defined keywords and filters.",
     prompt_templates=prompt_templates
 )
+# # Define Gradio Search Function
+# def search_papers(user_input):
+#     keywords = [kw.strip() for kw in user_input.split(",") if kw.strip()]  # Ensure valid keywords
+#     print(f"DEBUG: Received input keywords - {keywords}")  # Debug user input
+#     if not keywords:
+#         print("DEBUG: No valid keywords provided.")
+#         return "Error: Please enter at least one valid keyword."
+#     results = fetch_latest_arxiv_papers(keywords, num_results=3)  # Fetch 3 results
+#     print(f"DEBUG: Results received - {results}")  # Debug function output
+#     if isinstance(results, list) and results and isinstance(results[0], dict):
+#         #Format output with better readability and clarity
+#         formatted_results = "\n\n".join([
+#             f"---\n\n"
+#             f"📌 **Title:**\n{paper['title']}\n\n"
+#             f"👨‍🔬 **Authors:**\n{paper['authors']}\n\n"
+#             f"📅 **Year:** {paper['year']}\n\n"
+#             f"📖 **Abstract:**\n{paper['abstract'][:500]}... *(truncated for readability)*\n\n"
+#             f"[🔗 Read Full Paper]({paper['link']})\n\n"
+#             for paper in results
+#         ])
+#         return formatted_results
+#     print("DEBUG: No results found.")
+#     return "No results found. Try different keywords."
+#Search Papers
+def search_papers(user_input):
+    keywords = [kw.strip() for kw in user_input.split(",") if kw.strip()]  # Ensure valid keywords
+    print(f"DEBUG: Received input keywords - {keywords}")  # Debug user input
+    if not keywords:
+        print("DEBUG: No valid keywords provided.")
+        return "Error: Please enter at least one valid keyword."
+    results = fetch_latest_arxiv_papers(keywords, num_results=3)  # Fetch 3 results
+    print(f"DEBUG: Results received - {results}")  # Debug function output
+    # ✅ Check if the API returned an error
+    if isinstance(results, list) and len(results) > 0 and "error" in results[0]:
+        return results[0]["error"]  # Return the error message directly
+    # ✅ Format results only if valid papers exist
+    if isinstance(results, list) and results and isinstance(results[0], dict):
+        formatted_results = "\n\n".join([
+            f"---\n\n"
+            f"📌 **Title:** {paper['title']}\n\n"
+            f"👨‍🔬 **Authors:** {paper['authors']}\n\n"
+            f"📅 **Year:** {paper['year']}\n\n"
+            f"📖 **Abstract:** {paper['abstract'][:500]}... *(truncated for readability)*\n\n"
+            f"[🔗 Read Full Paper]({paper['link']})\n\n"
+            for paper in results
+        ])
+        return formatted_results
+    print("DEBUG: No results found.")
+    return "No results found. Try different keywords."
+# Create Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# ScholarAgent")
+    keyword_input = gr.Textbox(label="Enter keywords (comma-separated)", placeholder="e.g., deep learning, reinforcement learning")
+    output_display = gr.Markdown()
+    search_button = gr.Button("Search")
+    search_button.click(search_papers, inputs=[keyword_input], outputs=[output_display])
+    print("DEBUG: Gradio UI is running. Waiting for user input...")
+# Launch Gradio App
+demo.launch()