mdicio's picture
deprecation
0c43797
import os
from dotenv import load_dotenv
# Import models from SmolaAgents
from smolagents import CodeAgent, LiteLLMModel, OpenAIServerModel
# Import SmolaAgents tools
from smolagents.default_tools import FinalAnswerTool, PythonInterpreterTool
# Import custom tools
from tools import (
AddDocumentToVectorStoreTool,
ArxivSearchTool,
DownloadFileFromLinkTool,
DuckDuckGoSearchTool,
QueryVectorStoreTool,
ReadFileContentTool,
TranscibeVideoFileTool,
TranscribeAudioTool,
VisitWebpageTool,
WikipediaSearchTool,
image_question_answering
)
# Import utility functions
from utils import extract_final_answer, replace_tool_mentions
class BoomBot:
def __init__(self, provider="meta"):
"""
Initialize the BoomBot with the specified provider.
Args:
provider (str): The model provider to use (e.g., "groq", "qwen", "gemma", "anthropic", "deepinfra", "meta")
"""
load_dotenv()
self.provider = provider
self.model = self._initialize_model()
self.agent = self._create_agent()
def _initialize_model(self):
"""
Initialize the appropriate model based on the provider.
Returns:
The initialized model object
"""
if self.provider == "qwen":
qwen_model = "ollama_chat/qwen3:8b"
return LiteLLMModel(
model_id=qwen_model,
device="cuda",
num_ctx=32768,
temperature=0.6,
top_p=0.95,
)
elif self.provider == "gemma":
gemma_model = "ollama_chat/gemma3:12b-it-qat"
return LiteLLMModel(
model_id=gemma_model,
num_ctx=65536,
temperature=1.0,
device="cuda",
top_k=64,
top_p=0.95,
min_p=0.0,
)
elif self.provider == "anthropic":
model_id = "anthropic/claude-3-5-sonnet-latest"
return LiteLLMModel(model_id=model_id, temperature=0.6, max_tokens=8192)
elif self.provider == "deepinfra":
deepinfra_model = "Qwen/Qwen3-235B-A22B"
return OpenAIServerModel(
model_id=deepinfra_model,
api_base="https://api.deepinfra.com/v1/openai",
# api_key=os.environ["DEEPINFRA_API_KEY"],
flatten_messages_as_text=True,
max_tokens=8192,
temperature=0.1,
)
elif self.provider == "meta":
meta_model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
return OpenAIServerModel(
model_id=meta_model,
api_base="https://api.deepinfra.com/v1/openai",
# api_key=os.environ["DEEPINFRA_API_KEY"],
flatten_messages_as_text=True,
max_tokens=8192,
temperature=0.7,
)
elif self.provider == "groq":
# Default to use groq's claude-3-opus or llama-3
model_id = "claude-3-opus-20240229"
return LiteLLMModel(model_id=model_id, temperature=0.7, max_tokens=8192)
else:
raise ValueError(f"Unsupported provider: {self.provider}")
def _create_agent(self):
"""
Create and configure the agent with all necessary tools.
Returns:
The configured CodeAgent
"""
# Initialize tools
download_file = DownloadFileFromLinkTool()
read_file_content = ReadFileContentTool()
visit_webpage = VisitWebpageTool()
transcribe_video = TranscibeVideoFileTool()
transcribe_audio = TranscribeAudioTool()
get_wikipedia_info = WikipediaSearchTool()
web_searcher = DuckDuckGoSearchTool()
arxiv_search = ArxivSearchTool()
add_doc_vectorstore = AddDocumentToVectorStoreTool()
retrieve_doc_vectorstore = QueryVectorStoreTool()
# SmolaAgents default tools
python_interpreter = PythonInterpreterTool()
final_answer = FinalAnswerTool()
# Combine all tools
agent_tools = [
web_searcher,
download_file,
read_file_content,
visit_webpage,
transcribe_video,
transcribe_audio,
get_wikipedia_info,
arxiv_search,
add_doc_vectorstore,
retrieve_doc_vectorstore,
image_question_answering,
python_interpreter,
final_answer,
]
# Additional imports for the Python interpreter
additional_imports = [
"json",
"os",
"glob",
"pathlib",
"pandas",
"numpy",
"matplotlib",
"seaborn",
"sklearn",
"tqdm",
"argparse",
"pickle",
"io",
"re",
"datetime",
"collections",
"math",
"random",
"csv",
"zipfile",
"itertools",
"functools",
]
# Create the agent
agent = CodeAgent(
tools=agent_tools,
max_steps=12,
model=self.model,
add_base_tools=False,
stream_outputs=True,
additional_authorized_imports=additional_imports,
)
# Modify the system prompt
modified_prompt = replace_tool_mentions(agent.system_prompt)
agent.system_prompt = modified_prompt
return agent
def _get_system_prompt(self):
"""
Return the system prompt for the agent.
Returns:
str: The system prompt
"""
return """
YOUR BEHAVIOR GUIDELINES:
• Do NOT make unfounded assumptions—always ground answers in reliable sources or search results.
• For math or puzzles: break the problem into code/math, then solve programmatically.
RESEARCH WORKFLOW (in rough priority order):
1. SEARCH
- Try web_search, wikipedia_search, or arxiv_search first.
- Refine your query rather than repeating the exact same terms.
- If one search tool yields insufficient info, switch to another before downloading.
2. VISIT
- Use visit_webpage to extract and read page content when a promising link appears after one of the SEARCH tools.
- For each visited link, also download the file and add to the vector store, you might need to query this later, especially if you have a lot of search results.
3. EVALUATE
- ✅ If the page or search snippet fully answers the question, respond immediately.
- ❌ If not, move on to deeper investigation.
4. DOWNLOAD
- Use download_file_from_link tool on relevant links found (yes you can download webpages as html).
- For arXiv papers, target the /pdf/ or DOI link (e.g https://arxiv.org/pdf/2011.10672).
-
5. INDEX & QUERY
- Add downloaded documents to the vector store with add_document_to_vector_store.
- Use query_downloaded_documents for detailed answers.
6. READ
- You have access to a read_file_content tool to read most types of files. You can also directly interact with downloaded files in your python code (do this for csv files and excel files)
FALLBACK & ADAPTATION:
• If a tool fails, reformulate your query or try a different search method before dropping to download.
• If a tool fails multiple times, try a different tool.
• For arXiv: you might discover a paper link via web_search tool and then directly use download_file_from_link tool
COMMON TOOL CHAINS (conceptual outlines):
These are just guidelines, each task might require a unique workflow.
A tool can provide useful information for the task, it will not always contain the answer. You need to work to get to a final_answer that makes sense.
• FACTUAL Qs:
web_search → final_answer
• CURRENT EVENTS:
To have some summary information use web_search, that might output a promising website to visit and read content from using (visit_webpage or download_file_from_link and read_file_content)
web_search → visit_webpage → final_answer
• DOCUMENT-BASED Qs:
web_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents → final_answer
• ARXIV PAPERS:
The arxiv search tool provides a list of results with summary content, to inspect the whole paper you need to download it with download_file_from_link tool.
arxiv_search → download_file_from_link → read_file_content
If that fails
arxiv_search → download_file_from_link → add_document_to_vector_store → query_downloaded_documents
• MEDIA ANALYSIS:
download_file_from_link → transcribe_video/transcribe_audio/describe_image → final_answer
FINAL ANSWER FORMAT:
- Begin with "FINAL ANSWER: "
- Number → digits only (e.g., 42)
- String → exact text (e.g., Pope Francis)
- List → comma-separated, one space (e.g., 2, 3, 4)
- Conclude with: FINAL ANSWER: <your_answer>
"""
def run(self, question: str, task_id: str, to_download) -> str:
"""
Run the agent with the given question, task_id, and download flag.
Args:
question (str): The question or task for the agent to process
task_id (str): A unique identifier for the task
to_download (Bool): Flag indicating whether to download resources
Returns:
str: The agent's response
"""
prompt = self._get_system_prompt()
# Task introduction
prompt += "\nHere is the Task you need to solve:\n\n"
prompt += f"Task: {question}\n\n"
# Include download instructions if applicable
if to_download:
link = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
prompt += (
"IMPORTANT: Before solving the task, you must download a required file.\n"
f"Use the `download_file_from_link` tool with this link: {link}\n"
"After downloading, use the appropriate tool to read or process the file "
"before attempting to solve the task.\n\n"
)
# Run the agent with the given question
result = self.agent.generate_response(question)
# Extract the final answer from the result
final_answer = extract_final_answer(result)
return final_answer
# Example of how to use this code (commented out)
# if __name__ == "__main__":
# agent = BasicAgent()
# response = agent("What is the current population of Tokyo?", "population_query", True)
# print(f"Response: {response}")