Final_Assignment_Template3

Running

App Files Files Community

Duibonduil commited on 5 days ago

Commit

9c31777

verified ·

1 Parent(s): 1e0a254

Upload 21 files

Browse files

Files changed (21) hide show

tests/__init__.py +0 -0
tests/conftest.py +23 -0
tests/test_agents.py +2089 -0
tests/test_all_docs.py +176 -0
tests/test_cli.py +112 -0
tests/test_default_tools.py +134 -0
tests/test_final_answer.py +56 -0
tests/test_function_type_hints_utils.py +514 -0
tests/test_gradio_ui.py +385 -0
tests/test_import.py +31 -0
tests/test_local_python_executor.py +2353 -0
tests/test_mcp_client.py +60 -0
tests/test_memory.py +228 -0
tests/test_models.py +763 -0
tests/test_monitoring.py +252 -0
tests/test_remote_executors.py +335 -0
tests/test_search.py +35 -0
tests/test_tool_validation.py +189 -0
tests/test_tools.py +731 -0
tests/test_types.py +121 -0
tests/test_utils.py +495 -0

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from unittest.mock import patch
+import pytest
+from smolagents.agents import MultiStepAgent
+from smolagents.monitoring import LogLevel
+# Import fixture modules as plugins
+pytest_plugins = ["tests.fixtures.agents", "tests.fixtures.tools"]
+original_multi_step_agent_init = MultiStepAgent.__init__
+@pytest.fixture(autouse=True)
+def patch_multi_step_agent_with_suppressed_logging():
+    with patch.object(MultiStepAgent, "__init__", autospec=True) as mock_init:
+        def init_with_suppressed_logging(self, *args, verbosity_level=LogLevel.OFF, **kwargs):
+            original_multi_step_agent_init(self, *args, verbosity_level=verbosity_level, **kwargs)
+        mock_init.side_effect = init_with_suppressed_logging
+        yield

tests/test_agents.py ADDED Viewed

	@@ -0,0 +1,2089 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import io
+import os
+import re
+import tempfile
+import uuid
+import warnings
+from collections.abc import Generator
+from contextlib import nullcontext as does_not_raise
+from dataclasses import dataclass
+from pathlib import Path
+from textwrap import dedent
+from typing import Optional
+from unittest.mock import MagicMock, patch
+import pytest
+from huggingface_hub import (
+    ChatCompletionOutputFunctionDefinition,
+    ChatCompletionOutputMessage,
+    ChatCompletionOutputToolCall,
+)
+from rich.console import Console
+from smolagents import EMPTY_PROMPT_TEMPLATES
+from smolagents.agent_types import AgentImage, AgentText
+from smolagents.agents import (
+    AgentError,
+    AgentMaxStepsError,
+    AgentToolCallError,
+    CodeAgent,
+    MultiStepAgent,
+    ToolCall,
+    ToolCallingAgent,
+    ToolOutput,
+    populate_template,
+)
+from smolagents.default_tools import DuckDuckGoSearchTool, FinalAnswerTool, PythonInterpreterTool, VisitWebpageTool
+from smolagents.memory import (
+    ActionStep,
+    PlanningStep,
+    TaskStep,
+)
+from smolagents.models import (
+    ChatMessage,
+    ChatMessageToolCall,
+    ChatMessageToolCallFunction,
+    InferenceClientModel,
+    MessageRole,
+    Model,
+    TransformersModel,
+)
+from smolagents.monitoring import AgentLogger, LogLevel, TokenUsage
+from smolagents.tools import Tool, tool
+from smolagents.utils import (
+    BASE_BUILTIN_MODULES,
+    AgentExecutionError,
+    AgentGenerationError,
+    AgentToolExecutionError,
+)
+@dataclass
+class ChoiceDeltaToolCallFunction:
+    arguments: Optional[str] = None
+    name: Optional[str] = None
+@dataclass
+class ChoiceDeltaToolCall:
+    index: Optional[int] = None
+    id: Optional[str] = None
+    function: Optional[ChoiceDeltaToolCallFunction] = None
+    type: Optional[str] = None
+@dataclass
+class ChoiceDelta:
+    content: Optional[str] = None
+    function_call: Optional[str] = None
+    refusal: Optional[str] = None
+    role: Optional[str] = None
+    tool_calls: Optional[list] = None
+def get_new_path(suffix="") -> str:
+    directory = tempfile.mkdtemp()
+    return os.path.join(directory, str(uuid.uuid4()) + suffix)
+@pytest.fixture
+def agent_logger():
+    return AgentLogger(
+        LogLevel.DEBUG, console=Console(record=True, no_color=True, force_terminal=False, file=io.StringIO())
+    )
+class FakeToolCallModel(Model):
+    def generate(self, messages, tools_to_call_from=None, stop_sequences=None):
+        if len(messages) < 3:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="",
+                tool_calls=[
+                    ChatMessageToolCall(
+                        id="call_0",
+                        type="function",
+                        function=ChatMessageToolCallFunction(
+                            name="python_interpreter", arguments={"code": "2*3.6452"}
+                        ),
+                    )
+                ],
+            )
+        else:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="",
+                tool_calls=[
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="final_answer", arguments={"answer": "7.2904"}),
+                    )
+                ],
+            )
+class FakeToolCallModelImage(Model):
+    def generate(self, messages, tools_to_call_from=None, stop_sequences=None):
+        if len(messages) < 3:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="",
+                tool_calls=[
+                    ChatMessageToolCall(
+                        id="call_0",
+                        type="function",
+                        function=ChatMessageToolCallFunction(
+                            name="fake_image_generation_tool",
+                            arguments={"prompt": "An image of a cat"},
+                        ),
+                    )
+                ],
+            )
+        else:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="",
+                tool_calls=[
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="final_answer", arguments="image.png"),
+                    )
+                ],
+            )
+class FakeToolCallModelVL(Model):
+    def generate(self, messages, tools_to_call_from=None, stop_sequences=None):
+        if len(messages) < 3:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="",
+                tool_calls=[
+                    ChatMessageToolCall(
+                        id="call_0",
+                        type="function",
+                        function=ChatMessageToolCallFunction(
+                            name="fake_image_understanding_tool",
+                            arguments={
+                                "prompt": "What is in this image?",
+                                "image": "image.png",
+                            },
+                        ),
+                    )
+                ],
+            )
+        else:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="",
+                tool_calls=[
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="final_answer", arguments="The image is a cat."),
+                    )
+                ],
+            )
+class FakeCodeModel(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I should multiply 2 by 3.6452. special_marker
+<code>
+result = 2**3.6452
+</code>
+""",
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I can now answer the initial question
+<code>
+final_answer(7.2904)
+</code>
+""",
+            )
+class FakeCodeModelPlanning(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "planning_marker" not in prompt:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="llm plan update planning_marker",
+                token_usage=TokenUsage(input_tokens=10, output_tokens=10),
+            )
+        elif "action_marker" not in prompt:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I should multiply 2 by 3.6452. action_marker
+<code>
+result = 2**3.6452
+</code>
+""",
+                token_usage=TokenUsage(input_tokens=10, output_tokens=10),
+            )
+        else:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="llm plan again",
+                token_usage=TokenUsage(input_tokens=10, output_tokens=10),
+            )
+class FakeCodeModelError(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I should multiply 2 by 3.6452. special_marker
+<code>
+print("Flag!")
+def error_function():
+    raise ValueError("error")
+error_function()
+</code>
+""",
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I faced an error in the previous step.
+<code>
+final_answer("got an error")
+</code>
+""",
+            )
+class FakeCodeModelSyntaxError(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I should multiply 2 by 3.6452. special_marker
+<code>
+a = 2
+b = a * 2
+    print("Failing due to unexpected indent")
+print("Ok, calculation done!")
+</code>
+""",
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I can now answer the initial question
+<code>
+final_answer("got an error")
+</code>
+""",
+            )
+class FakeCodeModelImport(Model):
+    def generate(self, messages, stop_sequences=None):
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="""
+Thought: I can answer the question
+<code>
+import numpy as np
+final_answer("got an error")
+</code>
+""",
+        )
+class FakeCodeModelFunctionDef(Model):
+    def generate(self, messages, stop_sequences=None):
+        prompt = str(messages)
+        if "special_marker" not in prompt:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: Let's define the function. special_marker
+<code>
+import numpy as np
+def moving_average(x, w):
+    return np.convolve(x, np.ones(w), 'valid') / w
+</code>
+    """,
+            )
+        else:  # We're at step 2
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""
+Thought: I can now answer the initial question
+<code>
+x, w = [0, 1, 2, 3, 4, 5], 2
+res = moving_average(x, w)
+final_answer(res)
+</code>
+""",
+            )
+class FakeCodeModelSingleStep(Model):
+    def generate(self, messages, stop_sequences=None):
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="""
+Thought: I should multiply 2 by 3.6452. special_marker
+<code>
+result = python_interpreter(code="2*3.6452")
+final_answer(result)
+```
+""",
+        )
+class FakeCodeModelNoReturn(Model):
+    def generate(self, messages, stop_sequences=None):
+        return ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="""
+Thought: I should multiply 2 by 3.6452. special_marker
+<code>
+result = python_interpreter(code="2*3.6452")
+print(result)
+```
+""",
+        )
+class TestAgent:
+    def test_fake_toolcalling_agent(self):
+        agent = ToolCallingAgent(tools=[PythonInterpreterTool()], model=FakeToolCallModel())
+        output = agent.run("What is 2 multiplied by 3.6452?")
+        assert isinstance(output, str)
+        assert "7.2904" in output
+        assert agent.memory.steps[0].task == "What is 2 multiplied by 3.6452?"
+        assert "7.2904" in agent.memory.steps[1].observations
+        assert (
+            agent.memory.steps[2].model_output
+            == "Tool call call_1: calling 'final_answer' with arguments: {'answer': '7.2904'}"
+        )
+    def test_toolcalling_agent_handles_image_tool_outputs(self, shared_datadir):
+        import PIL.Image
+        @tool
+        def fake_image_generation_tool(prompt: str) -> PIL.Image.Image:
+            """Tool that generates an image.
+            Args:
+                prompt: The prompt
+            """
+            import PIL.Image
+            return PIL.Image.open(shared_datadir / "000000039769.png")
+        agent = ToolCallingAgent(
+            tools=[fake_image_generation_tool], model=FakeToolCallModelImage(), verbosity_level=10
+        )
+        output = agent.run("Make me an image.")
+        assert isinstance(output, AgentImage)
+        assert isinstance(agent.state["image.png"], PIL.Image.Image)
+    def test_toolcalling_agent_handles_image_inputs(self, shared_datadir):
+        import PIL.Image
+        image = PIL.Image.open(shared_datadir / "000000039769.png")  # dummy input
+        @tool
+        def fake_image_understanding_tool(prompt: str, image: PIL.Image.Image) -> str:
+            """Tool that creates a caption for an image.
+            Args:
+                prompt: The prompt
+                image: The image
+            """
+            return "The image is a cat."
+        agent = ToolCallingAgent(tools=[fake_image_understanding_tool], model=FakeToolCallModelVL())
+        output = agent.run("Caption this image.", images=[image])
+        assert output == "The image is a cat."
+    def test_fake_code_agent(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel(), verbosity_level=10)
+        output = agent.run("What is 2 multiplied by 3.6452?")
+        assert isinstance(output, float)
+        assert output == 7.2904
+        assert agent.memory.steps[0].task == "What is 2 multiplied by 3.6452?"
+        assert agent.memory.steps[2].tool_calls == [
+            ToolCall(name="python_interpreter", arguments="final_answer(7.2904)", id="call_2")
+        ]
+    def test_additional_args_added_to_task(self):
+        agent = CodeAgent(tools=[], model=FakeCodeModel())
+        agent.run(
+            "What is 2 multiplied by 3.6452?",
+            additional_args={"instruction": "Remember this."},
+        )
+        assert "Remember this" in agent.task
+    def test_reset_conversations(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel())
+        output = agent.run("What is 2 multiplied by 3.6452?", reset=True)
+        assert output == 7.2904
+        assert len(agent.memory.steps) == 3
+        output = agent.run("What is 2 multiplied by 3.6452?", reset=False)
+        assert output == 7.2904
+        assert len(agent.memory.steps) == 5
+        output = agent.run("What is 2 multiplied by 3.6452?", reset=True)
+        assert output == 7.2904
+        assert len(agent.memory.steps) == 3
+    def test_setup_agent_with_empty_toolbox(self):
+        ToolCallingAgent(model=FakeToolCallModel(), tools=[])
+    def test_fails_max_steps(self):
+        agent = CodeAgent(
+            tools=[PythonInterpreterTool()],
+            model=FakeCodeModelNoReturn(),  # use this callable because it never ends
+            max_steps=5,
+        )
+        answer = agent.run("What is 2 multiplied by 3.6452?")
+        assert len(agent.memory.steps) == 7  # Task step + 5 action steps + Final answer
+        assert type(agent.memory.steps[-1].error) is AgentMaxStepsError
+        assert isinstance(answer, str)
+        agent = CodeAgent(
+            tools=[PythonInterpreterTool()],
+            model=FakeCodeModelNoReturn(),  # use this callable because it never ends
+            max_steps=5,
+        )
+        answer = agent.run("What is 2 multiplied by 3.6452?", max_steps=3)
+        assert len(agent.memory.steps) == 5  # Task step + 3 action steps + Final answer
+        assert type(agent.memory.steps[-1].error) is AgentMaxStepsError
+        assert isinstance(answer, str)
+    def test_tool_descriptions_get_baked_in_system_prompt(self):
+        tool = PythonInterpreterTool()
+        tool.name = "fake_tool_name"
+        tool.description = "fake_tool_description"
+        agent = CodeAgent(tools=[tool], model=FakeCodeModel())
+        agent.run("Empty task")
+        assert agent.system_prompt is not None
+        assert f"def {tool.name}(" in agent.system_prompt
+        assert f'"""{tool.description}' in agent.system_prompt
+    def test_module_imports_get_baked_in_system_prompt(self):
+        agent = CodeAgent(tools=[], model=FakeCodeModel())
+        agent.run("Empty task")
+        for module in BASE_BUILTIN_MODULES:
+            assert module in agent.system_prompt
+    def test_init_agent_with_different_toolsets(self):
+        toolset_1 = []
+        agent = CodeAgent(tools=toolset_1, model=FakeCodeModel())
+        assert len(agent.tools) == 1  # when no tools are provided, only the final_answer tool is added by default
+        toolset_2 = [PythonInterpreterTool(), PythonInterpreterTool()]
+        with pytest.raises(ValueError) as e:
+            agent = CodeAgent(tools=toolset_2, model=FakeCodeModel())
+        assert "Each tool or managed_agent should have a unique name!" in str(e)
+        with pytest.raises(ValueError) as e:
+            agent.name = "python_interpreter"
+            agent.description = "empty"
+            CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModel(), managed_agents=[agent])
+        assert "Each tool or managed_agent should have a unique name!" in str(e)
+        # check that python_interpreter base tool does not get added to CodeAgent
+        agent = CodeAgent(tools=[], model=FakeCodeModel(), add_base_tools=True)
+        assert len(agent.tools) == 3  # added final_answer tool + search + visit_webpage
+        # check that python_interpreter base tool gets added to ToolCallingAgent
+        agent = ToolCallingAgent(tools=[], model=FakeCodeModel(), add_base_tools=True)
+        assert len(agent.tools) == 4  # added final_answer tool + search + visit_webpage
+    def test_function_persistence_across_steps(self):
+        agent = CodeAgent(
+            tools=[],
+            model=FakeCodeModelFunctionDef(),
+            max_steps=2,
+            additional_authorized_imports=["numpy"],
+            verbosity_level=100,
+        )
+        res = agent.run("ok")
+        assert res[0] == 0.5
+    def test_init_managed_agent(self):
+        agent = CodeAgent(tools=[], model=FakeCodeModelFunctionDef(), name="managed_agent", description="Empty")
+        assert agent.name == "managed_agent"
+        assert agent.description == "Empty"
+    def test_agent_description_gets_correctly_inserted_in_system_prompt(self):
+        managed_agent = CodeAgent(
+            tools=[], model=FakeCodeModelFunctionDef(), name="managed_agent", description="Empty"
+        )
+        manager_agent = CodeAgent(
+            tools=[],
+            model=FakeCodeModelFunctionDef(),
+            managed_agents=[managed_agent],
+        )
+        assert "You can also give tasks to team members." not in managed_agent.system_prompt
+        assert "{{managed_agents_descriptions}}" not in managed_agent.system_prompt
+        assert "You can also give tasks to team members." in manager_agent.system_prompt
+    def test_replay_shows_logs(self, agent_logger):
+        agent = CodeAgent(
+            tools=[],
+            model=FakeCodeModelImport(),
+            verbosity_level=0,
+            additional_authorized_imports=["numpy"],
+            logger=agent_logger,
+        )
+        agent.run("Count to 3")
+        str_output = agent_logger.console.export_text()
+        assert "New run" in str_output
+        assert 'final_answer("got' in str_output
+        assert "</code>" in str_output
+        agent = ToolCallingAgent(tools=[PythonInterpreterTool()], model=FakeToolCallModel(), verbosity_level=0)
+        agent.logger = agent_logger
+        agent.run("What is 2 multiplied by 3.6452?")
+        agent.replay()
+        str_output = agent_logger.console.export_text()
+        assert "Tool call" in str_output
+        assert "arguments" in str_output
+    def test_code_nontrivial_final_answer_works(self):
+        class FakeCodeModelFinalAnswer(Model):
+            def generate(self, messages, stop_sequences=None):
+                return ChatMessage(
+                    role=MessageRole.ASSISTANT,
+                    content="""<code>
+def nested_answer():
+    final_answer("Correct!")
+nested_answer()
+</code>""",
+                )
+        agent = CodeAgent(tools=[], model=FakeCodeModelFinalAnswer())
+        output = agent.run("Count to 3")
+        assert output == "Correct!"
+    def test_transformers_toolcalling_agent(self):
+        @tool
+        def weather_api(location: str, celsius: str = "") -> str:
+            """
+            Gets the weather in the next days at given location.
+            Secretly this tool does not care about the location, it hates the weather everywhere.
+            Args:
+                location: the location
+                celsius: the temperature type
+            """
+            return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+        model = TransformersModel(
+            model_id="HuggingFaceTB/SmolLM2-360M-Instruct",
+            max_new_tokens=100,
+            device_map="auto",
+            do_sample=False,
+        )
+        agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1, verbosity_level=10)
+        task = "What is the weather in Paris? "
+        agent.run(task)
+        assert agent.memory.steps[0].task == task
+        assert agent.memory.steps[1].tool_calls[0].name == "weather_api"
+        step_memory_dict = agent.memory.get_succinct_steps()[1]
+        assert step_memory_dict["model_output_message"]["tool_calls"][0]["function"]["name"] == "weather_api"
+        assert step_memory_dict["model_output_message"]["raw"]["completion_kwargs"]["max_new_tokens"] == 100
+        assert "model_input_messages" in agent.memory.get_full_steps()[1]
+        assert step_memory_dict["token_usage"]["total_tokens"] > 100
+        assert step_memory_dict["timing"]["duration"] > 0.1
+    def test_final_answer_checks(self):
+        error_string = "failed with error"
+        def check_always_fails(final_answer, agent_memory):
+            assert False, "Error raised in check"
+        agent = CodeAgent(model=FakeCodeModel(), tools=[], final_answer_checks=[check_always_fails])
+        agent.run("Dummy task.")
+        assert error_string in str(agent.write_memory_to_messages())
+        assert "Error raised in check" in str(agent.write_memory_to_messages())
+        agent = CodeAgent(
+            model=FakeCodeModel(),
+            tools=[],
+            final_answer_checks=[lambda x, y: x == 7.2904],
+            verbosity_level=1000,
+        )
+        output = agent.run("Dummy task.")
+        assert output == 7.2904  # Check that output is correct
+        assert len([step for step in agent.memory.steps if isinstance(step, ActionStep)]) == 2
+        assert error_string not in str(agent.write_memory_to_messages())
+    def test_generation_errors_are_raised(self):
+        class FakeCodeModel(Model):
+            def generate(self, messages, stop_sequences=None):
+                assert False, "Generation failed"
+        agent = CodeAgent(model=FakeCodeModel(), tools=[])
+        with pytest.raises(AgentGenerationError) as e:
+            agent.run("Dummy task.")
+        assert len(agent.memory.steps) == 2
+        assert "Generation failed" in str(e)
+    def test_planning_step_with_injected_memory(self):
+        """Test that agent properly uses update plan prompts when memory is injected before a run.
+        This test verifies:
+        1. Planning steps are created with the correct frequency
+        2. Injected memory is included in planning context
+        3. Messages are properly formatted with expected roles and content
+        """
+        planning_interval = 1
+        max_steps = 4
+        task = "Continuous task"
+        previous_task = "Previous user request"
+        # Create agent with planning capability
+        agent = CodeAgent(
+            tools=[],
+            planning_interval=planning_interval,
+            model=FakeCodeModelPlanning(),
+            max_steps=max_steps,
+        )
+        # Inject memory before run to simulate existing conversation history
+        previous_step = TaskStep(task=previous_task)
+        agent.memory.steps.append(previous_step)
+        # Run the agent
+        agent.run(task, reset=False)
+        # Extract and validate planning steps
+        planning_steps = [step for step in agent.memory.steps if isinstance(step, PlanningStep)]
+        assert len(planning_steps) > 2, "Expected multiple planning steps to be generated"
+        # Verify first planning step incorporates injected memory
+        first_planning_step = planning_steps[0]
+        input_messages = first_planning_step.model_input_messages
+        # Check message structure and content
+        assert len(input_messages) == 4, (
+            "First planning step should have 4 messages: system-plan-pre-update + memory + task + user-plan-post-update"
+        )
+        # Verify system message contains current task
+        system_message = input_messages[0]
+        assert system_message.role == "system", "First message should have system role"
+        assert task in system_message.content[0]["text"], f"System message should contain the current task: '{task}'"
+        # Verify memory message contains previous task
+        memory_message = input_messages[1]
+        assert previous_task in memory_message.content[0]["text"], (
+            f"Memory message should contain previous task: '{previous_task}'"
+        )
+        # Verify task message contains current task
+        task_message = input_messages[2]
+        assert task in task_message.content[0]["text"], f"Task message should contain current task: '{task}'"
+        # Verify user message for planning
+        user_message = input_messages[3]
+        assert user_message.role == "user", "Fourth message should have user role"
+        # Verify second planning step has more context from first agent actions
+        second_planning_step = planning_steps[1]
+        second_messages = second_planning_step.model_input_messages
+        # Check that conversation history is growing appropriately
+        assert len(second_messages) == 6, "Second planning step should have 6 messages including tool interactions"
+        # Verify all conversation elements are present
+        conversation_text = "".join([msg.content[0]["text"] for msg in second_messages if hasattr(msg, "content")])
+        assert previous_task in conversation_text, "Previous task should be included in the conversation history"
+        assert task in conversation_text, "Current task should be included in the conversation history"
+        assert "tools" in conversation_text, "Tool interactions should be included in the conversation history"
+class CustomFinalAnswerTool(FinalAnswerTool):
+    def forward(self, answer) -> str:
+        return answer + "CUSTOM"
+class MockTool(Tool):
+    def __init__(self, name):
+        self.name = name
+        self.description = "Mock tool description"
+        self.inputs = {}
+        self.output_type = "string"
+    def forward(self):
+        return "Mock tool output"
+class MockAgent:
+    def __init__(self, name, tools, description="Mock agent description"):
+        self.name = name
+        self.tools = {t.name: t for t in tools}
+        self.description = description
+class DummyMultiStepAgent(MultiStepAgent):
+    def step(self, memory_step: ActionStep) -> Generator[None]:
+        yield None
+    def initialize_system_prompt(self):
+        pass
+class TestMultiStepAgent:
+    def test_instantiation_disables_logging_to_terminal(self):
+        fake_model = MagicMock()
+        agent = DummyMultiStepAgent(tools=[], model=fake_model)
+        assert agent.logger.level == -1, "logging to terminal should be disabled for testing using a fixture"
+    def test_instantiation_with_prompt_templates(self, prompt_templates):
+        agent = DummyMultiStepAgent(tools=[], model=MagicMock(), prompt_templates=prompt_templates)
+        assert agent.prompt_templates == prompt_templates
+        assert agent.prompt_templates["system_prompt"] == "This is a test system prompt."
+        assert "managed_agent" in agent.prompt_templates
+        assert agent.prompt_templates["managed_agent"]["task"] == "Task for {{name}}: {{task}}"
+        assert agent.prompt_templates["managed_agent"]["report"] == "Report for {{name}}: {{final_answer}}"
+    @pytest.mark.parametrize(
+        "tools, expected_final_answer_tool",
+        [([], FinalAnswerTool), ([CustomFinalAnswerTool()], CustomFinalAnswerTool)],
+    )
+    def test_instantiation_with_final_answer_tool(self, tools, expected_final_answer_tool):
+        agent = DummyMultiStepAgent(tools=tools, model=MagicMock())
+        assert "final_answer" in agent.tools
+        assert isinstance(agent.tools["final_answer"], expected_final_answer_tool)
+    def test_instantiation_with_deprecated_grammar(self):
+        class SimpleAgent(MultiStepAgent):
+            def initialize_system_prompt(self) -> str:
+                return "Test system prompt"
+        # Test with a non-None grammar parameter
+        with pytest.warns(
+            FutureWarning, match="Parameter 'grammar' is deprecated and will be removed in version 1.20."
+        ):
+            SimpleAgent(tools=[], model=MagicMock(), grammar={"format": "json"}, verbosity_level=LogLevel.DEBUG)
+        # Verify no warning when grammar is None
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")  # Turn warnings into errors
+            SimpleAgent(tools=[], model=MagicMock(), grammar=None, verbosity_level=LogLevel.DEBUG)
+    def test_system_prompt_property(self):
+        """Test that system_prompt property is read-only and calls initialize_system_prompt."""
+        class SimpleAgent(MultiStepAgent):
+            def initialize_system_prompt(self) -> str:
+                return "Test system prompt"
+            def step(self, memory_step: ActionStep) -> Generator[None]:
+                yield None
+        # Create a simple agent with mocked model
+        model = MagicMock()
+        agent = SimpleAgent(tools=[], model=model)
+        # Test reading the property works and calls initialize_system_prompt
+        assert agent.system_prompt == "Test system prompt"
+        # Test setting the property raises AttributeError with correct message
+        with pytest.raises(
+            AttributeError,
+            match=re.escape(
+                """The 'system_prompt' property is read-only. Use 'self.prompt_templates["system_prompt"]' instead."""
+            ),
+        ):
+            agent.system_prompt = "New system prompt"
+        # assert "read-only" in str(exc_info.value)
+        # assert "Use 'self.prompt_templates[\"system_prompt\"]' instead" in str(exc_info.value)
+    def test_logs_display_thoughts_even_if_error(self):
+        class FakeJsonModelNoCall(Model):
+            def generate(self, messages, stop_sequences=None, tools_to_call_from=None):
+                return ChatMessage(
+                    role=MessageRole.ASSISTANT,
+                    content="""I don't want to call tools today""",
+                    tool_calls=None,
+                    raw="""I don't want to call tools today""",
+                )
+        agent_toolcalling = ToolCallingAgent(model=FakeJsonModelNoCall(), tools=[], max_steps=1, verbosity_level=10)
+        with agent_toolcalling.logger.console.capture() as capture:
+            agent_toolcalling.run("Dummy task")
+        assert "don't" in capture.get() and "want" in capture.get()
+        class FakeCodeModelNoCall(Model):
+            def generate(self, messages, stop_sequences=None):
+                return ChatMessage(
+                    role=MessageRole.ASSISTANT,
+                    content="""I don't want to write an action today""",
+                )
+        agent_code = CodeAgent(model=FakeCodeModelNoCall(), tools=[], max_steps=1, verbosity_level=10)
+        with agent_code.logger.console.capture() as capture:
+            agent_code.run("Dummy task")
+        assert "don't" in capture.get() and "want" in capture.get()
+    def test_step_number(self):
+        fake_model = MagicMock()
+        fake_model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="Model output.",
+            tool_calls=None,
+            raw="Model output.",
+            token_usage=None,
+        )
+        max_steps = 2
+        agent = CodeAgent(tools=[], model=fake_model, max_steps=max_steps)
+        assert hasattr(agent, "step_number"), "step_number attribute should be defined"
+        assert agent.step_number == 0, "step_number should be initialized to 0"
+        agent.run("Test task")
+        assert hasattr(agent, "step_number"), "step_number attribute should be defined"
+        assert agent.step_number == max_steps + 1, "step_number should be max_steps + 1 after run method is called"
+    @pytest.mark.parametrize(
+        "step, expected_messages_list",
+        [
+            (
+                1,
+                [
+                    [
+                        ChatMessage(
+                            role=MessageRole.USER, content=[{"type": "text", "text": "INITIAL_PLAN_USER_PROMPT"}]
+                        ),
+                    ],
+                ],
+            ),
+            (
+                2,
+                [
+                    [
+                        ChatMessage(
+                            role=MessageRole.SYSTEM,
+                            content=[{"type": "text", "text": "UPDATE_PLAN_SYSTEM_PROMPT"}],
+                        ),
+                        ChatMessage(
+                            role=MessageRole.USER,
+                            content=[{"type": "text", "text": "UPDATE_PLAN_USER_PROMPT"}],
+                        ),
+                    ],
+                ],
+            ),
+        ],
+    )
+    def test_planning_step(self, step, expected_messages_list):
+        fake_model = MagicMock()
+        agent = CodeAgent(
+            tools=[],
+            model=fake_model,
+        )
+        task = "Test task"
+        planning_step = list(agent._generate_planning_step(task, is_first_step=(step == 1), step=step))[-1]
+        expected_message_texts = {
+            "INITIAL_PLAN_USER_PROMPT": populate_template(
+                agent.prompt_templates["planning"]["initial_plan"],
+                variables=dict(
+                    task=task,
+                    tools=agent.tools,
+                    managed_agents=agent.managed_agents,
+                    answer_facts=planning_step.model_output_message.content,
+                ),
+            ),
+            "UPDATE_PLAN_SYSTEM_PROMPT": populate_template(
+                agent.prompt_templates["planning"]["update_plan_pre_messages"], variables=dict(task=task)
+            ),
+            "UPDATE_PLAN_USER_PROMPT": populate_template(
+                agent.prompt_templates["planning"]["update_plan_post_messages"],
+                variables=dict(
+                    task=task,
+                    tools=agent.tools,
+                    managed_agents=agent.managed_agents,
+                    facts_update=planning_step.model_output_message.content,
+                    remaining_steps=agent.max_steps - step,
+                ),
+            ),
+        }
+        for expected_messages in expected_messages_list:
+            for expected_message in expected_messages:
+                expected_message.content[0]["text"] = expected_message_texts[expected_message.content[0]["text"]]
+        assert isinstance(planning_step, PlanningStep)
+        expected_model_input_messages = expected_messages_list[0]
+        model_input_messages = planning_step.model_input_messages
+        assert isinstance(model_input_messages, list)
+        assert len(model_input_messages) == len(expected_model_input_messages)  # 2
+        for message, expected_message in zip(model_input_messages, expected_model_input_messages):
+            assert isinstance(message, ChatMessage)
+            assert message.role in MessageRole.__members__.values()
+            assert message.role == expected_message.role
+            assert isinstance(message.content, list)
+            for content, expected_content in zip(message.content, expected_message.content):
+                assert content == expected_content
+        # Test calls to model
+        assert len(fake_model.generate.call_args_list) == 1
+        for call_args, expected_messages in zip(fake_model.generate.call_args_list, expected_messages_list):
+            assert len(call_args.args) == 1
+            messages = call_args.args[0]
+            assert isinstance(messages, list)
+            assert len(messages) == len(expected_messages)
+            for message, expected_message in zip(messages, expected_messages):
+                assert isinstance(message, ChatMessage)
+                assert message.role in MessageRole.__members__.values()
+                assert message.role == expected_message.role
+                assert isinstance(message.content, list)
+                for content, expected_content in zip(message.content, expected_message.content):
+                    assert content == expected_content
+    @pytest.mark.parametrize(
+        "images, expected_messages_list",
+        [
+            (
+                None,
+                [
+                    [
+                        ChatMessage(
+                            role=MessageRole.SYSTEM,
+                            content=[{"type": "text", "text": "FINAL_ANSWER_SYSTEM_PROMPT"}],
+                        ),
+                        ChatMessage(
+                            role=MessageRole.USER,
+                            content=[{"type": "text", "text": "FINAL_ANSWER_USER_PROMPT"}],
+                        ),
+                    ]
+                ],
+            ),
+            (
+                ["image1.png"],
+                [
+                    [
+                        ChatMessage(
+                            role=MessageRole.SYSTEM,
+                            content=[
+                                {"type": "text", "text": "FINAL_ANSWER_SYSTEM_PROMPT"},
+                                {"type": "image", "image": "image1.png"},
+                            ],
+                        ),
+                        ChatMessage(
+                            role=MessageRole.USER,
+                            content=[{"type": "text", "text": "FINAL_ANSWER_USER_PROMPT"}],
+                        ),
+                    ]
+                ],
+            ),
+        ],
+    )
+    def test_provide_final_answer(self, images, expected_messages_list):
+        fake_model = MagicMock()
+        fake_model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="Final answer.",
+            tool_calls=None,
+            raw="Final answer.",
+            token_usage=None,
+        )
+        agent = CodeAgent(
+            tools=[],
+            model=fake_model,
+        )
+        task = "Test task"
+        final_answer = agent.provide_final_answer(task, images=images).content
+        expected_message_texts = {
+            "FINAL_ANSWER_SYSTEM_PROMPT": agent.prompt_templates["final_answer"]["pre_messages"],
+            "FINAL_ANSWER_USER_PROMPT": populate_template(
+                agent.prompt_templates["final_answer"]["post_messages"], variables=dict(task=task)
+            ),
+        }
+        for expected_messages in expected_messages_list:
+            for expected_message in expected_messages:
+                for expected_content in expected_message.content:
+                    if "text" in expected_content:
+                        expected_content["text"] = expected_message_texts[expected_content["text"]]
+        assert final_answer == "Final answer."
+        # Test calls to model
+        assert len(fake_model.generate.call_args_list) == 1
+        for call_args, expected_messages in zip(fake_model.generate.call_args_list, expected_messages_list):
+            assert len(call_args.args) == 1
+            messages = call_args.args[0]
+            assert isinstance(messages, list)
+            assert len(messages) == len(expected_messages)
+            for message, expected_message in zip(messages, expected_messages):
+                assert isinstance(message, ChatMessage)
+                assert message.role in MessageRole.__members__.values()
+                assert message.role == expected_message.role
+                assert isinstance(message.content, list)
+                for content, expected_content in zip(message.content, expected_message.content):
+                    assert content == expected_content
+    def test_interrupt(self):
+        fake_model = MagicMock()
+        fake_model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="Model output.",
+            tool_calls=None,
+            raw="Model output.",
+            token_usage=None,
+        )
+        def interrupt_callback(memory_step, agent):
+            agent.interrupt()
+        agent = CodeAgent(
+            tools=[],
+            model=fake_model,
+            step_callbacks=[interrupt_callback],
+        )
+        with pytest.raises(AgentError) as e:
+            agent.run("Test task")
+        assert "Agent interrupted" in str(e)
+    @pytest.mark.parametrize(
+        "tools, managed_agents, name, expectation",
+        [
+            # Valid case: no duplicates
+            (
+                [MockTool("tool1"), MockTool("tool2")],
+                [MockAgent("agent1", [MockTool("tool3")])],
+                "test_agent",
+                does_not_raise(),
+            ),
+            # Invalid case: duplicate tool names
+            ([MockTool("tool1"), MockTool("tool1")], [], "test_agent", pytest.raises(ValueError)),
+            # Invalid case: tool name same as managed agent name
+            (
+                [MockTool("tool1")],
+                [MockAgent("tool1", [MockTool("final_answer")])],
+                "test_agent",
+                pytest.raises(ValueError),
+            ),
+            # Valid case: tool name same as managed agent's tool name
+            ([MockTool("tool1")], [MockAgent("agent1", [MockTool("tool1")])], "test_agent", does_not_raise()),
+            # Invalid case: duplicate managed agent name and managed agent tool name
+            ([MockTool("tool1")], [], "tool1", pytest.raises(ValueError)),
+            # Valid case: duplicate tool names across managed agents
+            (
+                [MockTool("tool1")],
+                [
+                    MockAgent("agent1", [MockTool("tool2"), MockTool("final_answer")]),
+                    MockAgent("agent2", [MockTool("tool2"), MockTool("final_answer")]),
+                ],
+                "test_agent",
+                does_not_raise(),
+            ),
+        ],
+    )
+    def test_validate_tools_and_managed_agents(self, tools, managed_agents, name, expectation):
+        fake_model = MagicMock()
+        with expectation:
+            DummyMultiStepAgent(
+                tools=tools,
+                model=fake_model,
+                name=name,
+                managed_agents=managed_agents,
+            )
+    def test_from_dict(self):
+        # Create a test agent dictionary
+        agent_dict = {
+            "model": {"class": "TransformersModel", "data": {"model_id": "test/model"}},
+            "tools": [
+                {
+                    "name": "valid_tool_function",
+                    "code": 'from smolagents import Tool\nfrom typing import Any, Optional\n\nclass SimpleTool(Tool):\n    name = "valid_tool_function"\n    description = "A valid tool function."\n    inputs = {"input":{"type":"string","description":"Input string."}}\n    output_type = "string"\n\n    def forward(self, input: str) -> str:\n        """A valid tool function.\n\n        Args:\n            input (str): Input string.\n        """\n        return input.upper()',
+                    "requirements": {"smolagents"},
+                }
+            ],
+            "managed_agents": {},
+            "prompt_templates": EMPTY_PROMPT_TEMPLATES,
+            "max_steps": 15,
+            "verbosity_level": 2,
+            "planning_interval": 3,
+            "name": "test_agent",
+            "description": "Test agent description",
+        }
+        # Call from_dict
+        with patch("smolagents.models.TransformersModel") as mock_model_class:
+            mock_model_instance = mock_model_class.from_dict.return_value
+            agent = DummyMultiStepAgent.from_dict(agent_dict)
+        # Verify the agent was created correctly
+        assert agent.model == mock_model_instance
+        assert mock_model_class.from_dict.call_args.args[0] == {"model_id": "test/model"}
+        assert agent.max_steps == 15
+        assert agent.logger.level == 2
+        assert agent.planning_interval == 3
+        assert agent.name == "test_agent"
+        assert agent.description == "Test agent description"
+        # Verify the tool was created correctly
+        assert sorted(agent.tools.keys()) == ["final_answer", "valid_tool_function"]
+        assert agent.tools["valid_tool_function"].name == "valid_tool_function"
+        assert agent.tools["valid_tool_function"].description == "A valid tool function."
+        assert agent.tools["valid_tool_function"].inputs == {
+            "input": {"type": "string", "description": "Input string."}
+        }
+        assert agent.tools["valid_tool_function"]("test") == "TEST"
+        # Test overriding with kwargs
+        with patch("smolagents.models.TransformersModel") as mock_model_class:
+            agent = DummyMultiStepAgent.from_dict(agent_dict, max_steps=30)
+        assert agent.max_steps == 30
+class TestToolCallingAgent:
+    def test_toolcalling_agent_instructions(self):
+        agent = ToolCallingAgent(tools=[], model=MagicMock(), instructions="Test instructions")
+        assert agent.instructions == "Test instructions"
+        assert "Test instructions" in agent.system_prompt
+    def test_toolcalling_agent_passes_both_tools_and_managed_agents(self, test_tool):
+        """Test that both tools and managed agents are passed to the model."""
+        managed_agent = MagicMock()
+        managed_agent.name = "managed_agent"
+        model = MagicMock()
+        model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="",
+            tool_calls=[
+                ChatMessageToolCall(
+                    id="call_0",
+                    type="function",
+                    function=ChatMessageToolCallFunction(name="test_tool", arguments={"input": "test_value"}),
+                )
+            ],
+        )
+        agent = ToolCallingAgent(tools=[test_tool], managed_agents=[managed_agent], model=model)
+        # Run the agent one step to trigger the model call
+        next(agent.run("Test task", stream=True))
+        # Check that the model was called with both tools and managed agents:
+        # - Get all tool_to_call_from names passed to the model
+        tools_to_call_from_names = [tool.name for tool in model.generate.call_args.kwargs["tools_to_call_from"]]
+        # - Verify both regular tools and managed agents are included
+        assert "test_tool" in tools_to_call_from_names  # The regular tool
+        assert "managed_agent" in tools_to_call_from_names  # The managed agent
+        assert "final_answer" in tools_to_call_from_names  # The final_answer tool (added by default)
+    @patch("huggingface_hub.InferenceClient")
+    def test_toolcalling_agent_api(self, mock_inference_client):
+        mock_client = mock_inference_client.return_value
+        mock_response = mock_client.chat_completion.return_value
+        mock_response.choices[0].message = ChatCompletionOutputMessage(
+            role=MessageRole.ASSISTANT,
+            content='{"name": "weather_api", "arguments": {"location": "Paris", "date": "today"}}',
+        )
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 20
+        model = InferenceClientModel(model_id="test-model")
+        from smolagents import tool
+        @tool
+        def weather_api(location: str, date: str) -> str:
+            """
+            Gets the weather in the next days at given location.
+            Args:
+                location: the location
+                date: the date
+            """
+            return f"The weather in {location} on date:{date} is sunny."
+        agent = ToolCallingAgent(model=model, tools=[weather_api], max_steps=1)
+        agent.run("What's the weather in Paris?")
+        assert agent.memory.steps[0].task == "What's the weather in Paris?"
+        assert agent.memory.steps[1].tool_calls[0].name == "weather_api"
+        assert agent.memory.steps[1].tool_calls[0].arguments == {"location": "Paris", "date": "today"}
+        assert agent.memory.steps[1].observations == "The weather in Paris on date:today is sunny."
+        mock_response.choices[0].message = ChatCompletionOutputMessage(
+            role=MessageRole.ASSISTANT,
+            content=None,
+            tool_calls=[
+                ChatCompletionOutputToolCall(
+                    function=ChatCompletionOutputFunctionDefinition(
+                        name="weather_api", arguments='{"location": "Paris", "date": "today"}'
+                    ),
+                    id="call_0",
+                    type="function",
+                )
+            ],
+        )
+        agent.run("What's the weather in Paris?")
+        assert agent.memory.steps[0].task == "What's the weather in Paris?"
+        assert agent.memory.steps[1].tool_calls[0].name == "weather_api"
+        assert agent.memory.steps[1].tool_calls[0].arguments == {"location": "Paris", "date": "today"}
+        assert agent.memory.steps[1].observations == "The weather in Paris on date:today is sunny."
+    @patch("openai.OpenAI")
+    def test_toolcalling_agent_stream_outputs_multiple_tool_calls(self, mock_openai_client, test_tool):
+        """Test that ToolCallingAgent with stream_outputs=True returns the first final_answer when multiple are called."""
+        mock_client = mock_openai_client.return_value
+        from smolagents import OpenAIServerModel
+        # Mock streaming response with multiple final_answer calls
+        mock_deltas = [
+            ChoiceDelta(role=MessageRole.ASSISTANT),
+            ChoiceDelta(
+                tool_calls=[
+                    ChoiceDeltaToolCall(
+                        index=0,
+                        id="call_1",
+                        function=ChoiceDeltaToolCallFunction(name="final_answer"),
+                        type="function",
+                    )
+                ]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=0, function=ChoiceDeltaToolCallFunction(arguments='{"an'))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=0, function=ChoiceDeltaToolCallFunction(arguments='swer"'))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=0, function=ChoiceDeltaToolCallFunction(arguments=': "out'))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=0, function=ChoiceDeltaToolCallFunction(arguments="put1"))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=0, function=ChoiceDeltaToolCallFunction(arguments='"}'))]
+            ),
+            ChoiceDelta(
+                tool_calls=[
+                    ChoiceDeltaToolCall(
+                        index=1,
+                        id="call_2",
+                        function=ChoiceDeltaToolCallFunction(name="test_tool"),
+                        type="function",
+                    )
+                ]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=1, function=ChoiceDeltaToolCallFunction(arguments='{"in'))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=1, function=ChoiceDeltaToolCallFunction(arguments='put"'))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=1, function=ChoiceDeltaToolCallFunction(arguments=': "out'))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=1, function=ChoiceDeltaToolCallFunction(arguments="put2"))]
+            ),
+            ChoiceDelta(
+                tool_calls=[ChoiceDeltaToolCall(index=1, function=ChoiceDeltaToolCallFunction(arguments='"}'))]
+            ),
+        ]
+        class MockChoice:
+            def __init__(self, delta):
+                self.delta = delta
+        class MockChunk:
+            def __init__(self, delta):
+                self.choices = [MockChoice(delta)]
+                self.usage = None
+        mock_client.chat.completions.create.return_value = (MockChunk(delta) for delta in mock_deltas)
+        # Mock usage for non-streaming fallback
+        mock_usage = MagicMock()
+        mock_usage.prompt_tokens = 10
+        mock_usage.completion_tokens = 20
+        model = OpenAIServerModel(model_id="fakemodel")
+        agent = ToolCallingAgent(model=model, tools=[test_tool], max_steps=1, stream_outputs=True)
+        result = agent.run("Make 2 calls to final answer: return both 'output1' and 'output2'")
+        assert len(agent.memory.steps[-1].model_output_message.tool_calls) == 2
+        assert agent.memory.steps[-1].model_output_message.tool_calls[0].function.name == "final_answer"
+        assert agent.memory.steps[-1].model_output_message.tool_calls[1].function.name == "test_tool"
+        # The agent should return the final answer call
+        assert result == "output1"
+    @patch("huggingface_hub.InferenceClient")
+    def test_toolcalling_agent_api_misformatted_output(self, mock_inference_client):
+        """Test that even misformatted json blobs don't interrupt the run for a ToolCallingAgent."""
+        mock_client = mock_inference_client.return_value
+        mock_response = mock_client.chat_completion.return_value
+        mock_response.choices[0].message = ChatCompletionOutputMessage(
+            role=MessageRole.ASSISTANT,
+            content='{"name": weather_api", "arguments": {"location": "Paris", "date": "today"}}',
+        )
+        mock_response.usage.prompt_tokens = 10
+        mock_response.usage.completion_tokens = 20
+        model = InferenceClientModel(model_id="test-model")
+        logger = AgentLogger(console=Console(markup=False, no_color=True))
+        agent = ToolCallingAgent(model=model, tools=[], max_steps=2, verbosity_level=1, logger=logger)
+        with agent.logger.console.capture() as capture:
+            agent.run("What's the weather in Paris?")
+        assert agent.memory.steps[0].task == "What's the weather in Paris?"
+        assert agent.memory.steps[1].tool_calls is None
+        assert "The JSON blob you used is invalid" in agent.memory.steps[1].error.message
+        assert "Error while parsing" in capture.get()
+        assert len(agent.memory.steps) == 4
+    def test_change_tools_after_init(self):
+        from smolagents import tool
+        @tool
+        def fake_tool_1() -> str:
+            """Fake tool"""
+            return "1"
+        @tool
+        def fake_tool_2() -> str:
+            """Fake tool"""
+            return "2"
+        class FakeCodeModel(Model):
+            def generate(self, messages, stop_sequences=None):
+                return ChatMessage(role=MessageRole.ASSISTANT, content="<code>\nfinal_answer(fake_tool_1())\n</code>")
+        agent = CodeAgent(tools=[fake_tool_1], model=FakeCodeModel())
+        agent.tools["final_answer"] = CustomFinalAnswerTool()
+        agent.tools["fake_tool_1"] = fake_tool_2
+        answer = agent.run("Fake task.")
+        assert answer == "2CUSTOM"
+    def test_custom_final_answer_with_custom_inputs(self, test_tool):
+        class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
+            inputs = {
+                "answer1": {"type": "string", "description": "First part of the answer."},
+                "answer2": {"type": "string", "description": "Second part of the answer."},
+            }
+            def forward(self, answer1: str, answer2: str) -> str:
+                return answer1 + " and " + answer2
+        model = MagicMock()
+        model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content=None,
+            tool_calls=[
+                ChatMessageToolCall(
+                    id="call_0",
+                    type="function",
+                    function=ChatMessageToolCallFunction(
+                        name="final_answer", arguments={"answer1": "1", "answer2": "2"}
+                    ),
+                ),
+                ChatMessageToolCall(
+                    id="call_1",
+                    type="function",
+                    function=ChatMessageToolCallFunction(name="test_tool", arguments={"input": "3"}),
+                ),
+            ],
+        )
+        agent = ToolCallingAgent(tools=[test_tool, CustomFinalAnswerToolWithCustomInputs()], model=model)
+        answer = agent.run("Fake task.")
+        assert answer == "1 and 2"
+        assert agent.memory.steps[-1].model_output_message.tool_calls[0].function.name == "final_answer"
+        assert agent.memory.steps[-1].model_output_message.tool_calls[1].function.name == "test_tool"
+    @pytest.mark.parametrize(
+        "test_case",
+        [
+            # Case 0: Single valid tool call
+            {
+                "tool_calls": [
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="test_tool", arguments={"input": "test_value"}),
+                    )
+                ],
+                "expected_model_output": "Tool call call_1: calling 'test_tool' with arguments: {'input': 'test_value'}",
+                "expected_observations": "Processed: test_value",
+                "expected_final_outputs": ["Processed: test_value"],
+                "expected_error": None,
+            },
+            # Case 1: Multiple tool calls
+            {
+                "tool_calls": [
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="test_tool", arguments={"input": "value1"}),
+                    ),
+                    ChatMessageToolCall(
+                        id="call_2",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="test_tool", arguments={"input": "value2"}),
+                    ),
+                ],
+                "expected_model_output": "Tool call call_1: calling 'test_tool' with arguments: {'input': 'value1'}\nTool call call_2: calling 'test_tool' with arguments: {'input': 'value2'}",
+                "expected_observations": "Processed: value1\nProcessed: value2",
+                "expected_final_outputs": ["Processed: value1", "Processed: value2"],
+                "expected_error": None,
+            },
+            # Case 2: Invalid tool name
+            {
+                "tool_calls": [
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="nonexistent_tool", arguments={"input": "test"}),
+                    )
+                ],
+                "expected_error": AgentToolExecutionError,
+            },
+            # Case 3: Tool execution error
+            {
+                "tool_calls": [
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="test_tool", arguments={"input": "error"}),
+                    )
+                ],
+                "expected_error": AgentToolExecutionError,
+            },
+            # Case 4: Empty tool calls list
+            {
+                "tool_calls": [],
+                "expected_model_output": "",
+                "expected_observations": "",
+                "expected_final_outputs": [],
+                "expected_error": None,
+            },
+            # Case 5: Final answer call
+            {
+                "tool_calls": [
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(
+                            name="final_answer", arguments={"answer": "This is the final answer"}
+                        ),
+                    )
+                ],
+                "expected_model_output": "Tool call call_1: calling 'final_answer' with arguments: {'answer': 'This is the final answer'}",
+                "expected_observations": "This is the final answer",
+                "expected_final_outputs": ["This is the final answer"],
+                "expected_error": None,
+            },
+            # Case 6: Invalid arguments
+            {
+                "tool_calls": [
+                    ChatMessageToolCall(
+                        id="call_1",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="test_tool", arguments={"wrong_param": "value"}),
+                    )
+                ],
+                "expected_error": AgentToolCallError,
+            },
+        ],
+    )
+    def test_process_tool_calls(self, test_case, test_tool):
+        # Create a ToolCallingAgent instance with the test tool
+        agent = ToolCallingAgent(tools=[test_tool], model=MagicMock())
+        # Create chat message with the specified tool calls for process_tool_calls
+        chat_message = ChatMessage(role=MessageRole.ASSISTANT, content="", tool_calls=test_case["tool_calls"])
+        # Create a memory step for process_tool_calls
+        memory_step = ActionStep(step_number=10, timing="mock_timing")
+        # Process tool calls
+        if test_case["expected_error"]:
+            with pytest.raises(test_case["expected_error"]):
+                list(agent.process_tool_calls(chat_message, memory_step))
+        else:
+            final_outputs = list(agent.process_tool_calls(chat_message, memory_step))
+            assert memory_step.model_output == test_case["expected_model_output"]
+            assert memory_step.observations == test_case["expected_observations"]
+            assert [
+                final_output.output for final_output in final_outputs if isinstance(final_output, ToolOutput)
+            ] == test_case["expected_final_outputs"]
+            # Verify memory step tool calls were updated correctly
+            if test_case["tool_calls"]:
+                assert memory_step.tool_calls == [
+                    ToolCall(name=tool_call.function.name, arguments=tool_call.function.arguments, id=tool_call.id)
+                    for tool_call in test_case["tool_calls"]
+                ]
+class TestCodeAgent:
+    def test_code_agent_instructions(self):
+        agent = CodeAgent(tools=[], model=MagicMock(), instructions="Test instructions")
+        assert agent.instructions == "Test instructions"
+        assert "Test instructions" in agent.system_prompt
+        agent = CodeAgent(
+            tools=[], model=MagicMock(), instructions="Test instructions", use_structured_outputs_internally=True
+        )
+        assert agent.instructions == "Test instructions"
+        assert "Test instructions" in agent.system_prompt
+    @pytest.mark.filterwarnings("ignore")  # Ignore FutureWarning for deprecated grammar parameter
+    def test_init_with_incompatible_grammar_and_use_structured_outputs_internally(self):
+        # Test that using both parameters raises ValueError with correct message
+        with pytest.raises(
+            ValueError, match="You cannot use 'grammar' and 'use_structured_outputs_internally' at the same time."
+        ):
+            CodeAgent(
+                tools=[],
+                model=MagicMock(),
+                grammar={"format": "json"},
+                use_structured_outputs_internally=True,
+                verbosity_level=LogLevel.DEBUG,
+            )
+        # Verify no error when only one option is used
+        # Only grammar
+        agent_with_grammar = CodeAgent(
+            tools=[],
+            model=MagicMock(),
+            grammar={"format": "json"},
+            use_structured_outputs_internally=False,
+            verbosity_level=LogLevel.DEBUG,
+        )
+        assert agent_with_grammar.grammar is not None
+        assert agent_with_grammar._use_structured_outputs_internally is False
+        # Only structured output
+        agent_with_structured = CodeAgent(
+            tools=[],
+            model=MagicMock(),
+            grammar=None,
+            use_structured_outputs_internally=True,
+            verbosity_level=LogLevel.DEBUG,
+        )
+        assert agent_with_structured.grammar is None
+        assert agent_with_structured._use_structured_outputs_internally is True
+    @pytest.mark.parametrize("provide_run_summary", [False, True])
+    def test_call_with_provide_run_summary(self, provide_run_summary):
+        agent = CodeAgent(tools=[], model=MagicMock(), provide_run_summary=provide_run_summary)
+        assert agent.provide_run_summary is provide_run_summary
+        agent.name = "test_agent"
+        agent.run = MagicMock(return_value="Test output")
+        agent.write_memory_to_messages = MagicMock(return_value=[{"content": "Test summary"}])
+        result = agent("Test request")
+        expected_summary = "Here is the final answer from your managed agent 'test_agent':\nTest output"
+        if provide_run_summary:
+            expected_summary += (
+                "\n\nFor more detail, find below a summary of this agent's work:\n"
+                "<summary_of_work>\n\nTest summary\n---\n</summary_of_work>"
+            )
+        assert result == expected_summary
+    def test_errors_logging(self):
+        class FakeCodeModel(Model):
+            def generate(self, messages, stop_sequences=None):
+                return ChatMessage(role=MessageRole.ASSISTANT, content="<code>\nsecret=3;['1', '2'][secret]\n</code>")
+        agent = CodeAgent(tools=[], model=FakeCodeModel(), verbosity_level=1)
+        with agent.logger.console.capture() as capture:
+            agent.run("Test request")
+        assert "secret\\\\" in repr(capture.get())
+    def test_missing_import_triggers_advice_in_error_log(self):
+        # Set explicit verbosity level to 1 to override the default verbosity level of -1 set in CI fixture
+        agent = CodeAgent(tools=[], model=FakeCodeModelImport(), verbosity_level=1)
+        with agent.logger.console.capture() as capture:
+            agent.run("Count to 3")
+        str_output = capture.get()
+        assert "`additional_authorized_imports`" in str_output.replace("\n", "")
+    def test_errors_show_offending_line_and_error(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelError())
+        output = agent.run("What is 2 multiplied by 3.6452?")
+        assert isinstance(output, AgentText)
+        assert output == "got an error"
+        assert "Code execution failed at line 'error_function()'" in str(agent.memory.steps[1].error)
+        assert "ValueError" in str(agent.memory.steps)
+    def test_error_saves_previous_print_outputs(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelError(), verbosity_level=10)
+        agent.run("What is 2 multiplied by 3.6452?")
+        assert "Flag!" in str(agent.memory.steps[1].observations)
+    def test_syntax_error_show_offending_lines(self):
+        agent = CodeAgent(tools=[PythonInterpreterTool()], model=FakeCodeModelSyntaxError())
+        output = agent.run("What is 2 multiplied by 3.6452?")
+        assert isinstance(output, AgentText)
+        assert output == "got an error"
+        assert '    print("Failing due to unexpected indent")' in str(agent.memory.steps)
+        assert isinstance(agent.memory.steps[-2], ActionStep)
+        assert agent.memory.steps[-2].code_action == dedent("""a = 2
+b = a * 2
+    print("Failing due to unexpected indent")
+print("Ok, calculation done!")""")
+    def test_end_code_appending(self):
+        # Checking original output message
+        orig_output = FakeCodeModelNoReturn().generate([])
+        assert not orig_output.content.endswith("<end_code>")
+        # Checking the step output
+        agent = CodeAgent(
+            tools=[PythonInterpreterTool()],
+            model=FakeCodeModelNoReturn(),
+            max_steps=1,
+        )
+        answer = agent.run("What is 2 multiplied by 3.6452?")
+        assert answer
+        memory_steps = agent.memory.steps
+        actions_steps = [s for s in memory_steps if isinstance(s, ActionStep)]
+        outputs = [s.model_output for s in actions_steps if s.model_output]
+        assert outputs
+        assert all(o.endswith("<end_code>") for o in outputs)
+        messages = [s.model_output_message for s in actions_steps if s.model_output_message]
+        assert messages
+        assert all(m.content.endswith("<end_code>") for m in messages)
+    def test_change_tools_after_init(self):
+        from smolagents import tool
+        @tool
+        def fake_tool_1() -> str:
+            """Fake tool"""
+            return "1"
+        @tool
+        def fake_tool_2() -> str:
+            """Fake tool"""
+            return "2"
+        class FakeCodeModel(Model):
+            def generate(self, messages, stop_sequences=None):
+                return ChatMessage(role=MessageRole.ASSISTANT, content="<code>\nfinal_answer(fake_tool_1())\n</code>")
+        agent = CodeAgent(tools=[fake_tool_1], model=FakeCodeModel())
+        agent.tools["final_answer"] = CustomFinalAnswerTool()
+        agent.tools["fake_tool_1"] = fake_tool_2
+        answer = agent.run("Fake task.")
+        assert answer == "2CUSTOM"
+    def test_local_python_executor_with_custom_functions(self):
+        model = MagicMock()
+        model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT,
+            content="",
+            tool_calls=None,
+            raw="",
+            token_usage=None,
+        )
+        agent = CodeAgent(tools=[], model=model, executor_kwargs={"additional_functions": {"open": open}})
+        agent.run("Test run")
+        assert "open" in agent.python_executor.static_tools
+    @pytest.mark.parametrize("agent_dict_version", ["v1.9", "v1.10"])
+    def test_from_folder(self, agent_dict_version, get_agent_dict):
+        agent_dict = get_agent_dict(agent_dict_version)
+        with (
+            patch("smolagents.agents.Path") as mock_path,
+            patch("smolagents.models.InferenceClientModel") as mock_model,
+        ):
+            import json
+            mock_path.return_value.__truediv__.return_value.read_text.return_value = json.dumps(agent_dict)
+            mock_model.from_dict.return_value.model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
+            agent = CodeAgent.from_folder("ignored_dummy_folder")
+        assert isinstance(agent, CodeAgent)
+        assert agent.name == "test_agent"
+        assert agent.description == "dummy description"
+        assert agent.max_steps == 10
+        assert agent.planning_interval == 2
+        assert agent.additional_authorized_imports == ["pandas"]
+        assert "pandas" in agent.authorized_imports
+        assert agent.executor_type == "local"
+        assert agent.executor_kwargs == {}
+        assert agent.max_print_outputs_length is None
+        assert agent.managed_agents == {}
+        assert set(agent.tools.keys()) == {"final_answer"}
+        assert agent.model == mock_model.from_dict.return_value
+        assert mock_model.from_dict.call_args.args[0]["model_id"] == "Qwen/Qwen2.5-Coder-32B-Instruct"
+        assert agent.model.model_id == "Qwen/Qwen2.5-Coder-32B-Instruct"
+        assert agent.logger.level == 2
+        assert agent.prompt_templates["system_prompt"] == "dummy system prompt"
+    def test_from_dict(self):
+        # Create a test agent dictionary
+        agent_dict = {
+            "model": {"class": "InferenceClientModel", "data": {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"}},
+            "tools": [
+                {
+                    "name": "valid_tool_function",
+                    "code": 'from smolagents import Tool\nfrom typing import Any, Optional\n\nclass SimpleTool(Tool):\n    name = "valid_tool_function"\n    description = "A valid tool function."\n    inputs = {"input":{"type":"string","description":"Input string."}}\n    output_type = "string"\n\n    def forward(self, input: str) -> str:\n        """A valid tool function.\n\n        Args:\n            input (str): Input string.\n        """\n        return input.upper()',
+                    "requirements": {"smolagents"},
+                }
+            ],
+            "managed_agents": {},
+            "prompt_templates": EMPTY_PROMPT_TEMPLATES,
+            "max_steps": 15,
+            "verbosity_level": 2,
+            "use_structured_output": False,
+            "planning_interval": 3,
+            "name": "test_code_agent",
+            "description": "Test code agent description",
+            "authorized_imports": ["pandas", "numpy"],
+            "executor_type": "local",
+            "executor_kwargs": {"max_print_outputs_length": 10_000},
+            "max_print_outputs_length": 1000,
+        }
+        # Call from_dict
+        with patch("smolagents.models.InferenceClientModel") as mock_model_class:
+            mock_model_instance = mock_model_class.from_dict.return_value
+            agent = CodeAgent.from_dict(agent_dict)
+        # Verify the agent was created correctly with CodeAgent-specific parameters
+        assert agent.model == mock_model_instance
+        assert agent.additional_authorized_imports == ["pandas", "numpy"]
+        assert agent.executor_type == "local"
+        assert agent.executor_kwargs == {"max_print_outputs_length": 10_000}
+        assert agent.max_print_outputs_length == 1000
+        # Test with missing optional parameters
+        minimal_agent_dict = {
+            "model": {"class": "InferenceClientModel", "data": {"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"}},
+            "tools": [],
+            "managed_agents": {},
+        }
+        with patch("smolagents.models.InferenceClientModel"):
+            agent = CodeAgent.from_dict(minimal_agent_dict)
+        # Verify defaults are used
+        assert agent.max_steps == 20  # default from MultiStepAgent.__init__
+        # Test overriding with kwargs
+        with patch("smolagents.models.InferenceClientModel"):
+            agent = CodeAgent.from_dict(
+                agent_dict,
+                additional_authorized_imports=["matplotlib"],
+                executor_kwargs={"max_print_outputs_length": 5_000},
+            )
+        assert agent.additional_authorized_imports == ["matplotlib"]
+        assert agent.executor_kwargs == {"max_print_outputs_length": 5_000}
+    def test_custom_final_answer_with_custom_inputs(self):
+        class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
+            inputs = {
+                "answer1": {"type": "string", "description": "First part of the answer."},
+                "answer2": {"type": "string", "description": "Second part of the answer."},
+            }
+            def forward(self, answer1: str, answer2: str) -> str:
+                return answer1 + "CUSTOM" + answer2
+        model = MagicMock()
+        model.generate.return_value = ChatMessage(
+            role=MessageRole.ASSISTANT, content="<code>\nfinal_answer(answer1='1', answer2='2')\n</code>"
+        )
+        agent = CodeAgent(tools=[CustomFinalAnswerToolWithCustomInputs()], model=model)
+        answer = agent.run("Fake task.")
+        assert answer == "1CUSTOM2"
+class TestMultiAgents:
+    def test_multiagents_save(self, tmp_path):
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=2096, temperature=0.5)
+        web_agent = ToolCallingAgent(
+            model=model,
+            tools=[DuckDuckGoSearchTool(max_results=2), VisitWebpageTool()],
+            name="web_agent",
+            description="does web searches",
+        )
+        code_agent = CodeAgent(model=model, tools=[], name="useless", description="does nothing in particular")
+        agent = CodeAgent(
+            model=model,
+            tools=[],
+            additional_authorized_imports=["pandas", "datetime"],
+            managed_agents=[web_agent, code_agent],
+            max_print_outputs_length=1000,
+            executor_type="local",
+            executor_kwargs={"max_print_outputs_length": 10_000},
+        )
+        agent.save(tmp_path)
+        expected_structure = {
+            "managed_agents": {
+                "useless": {"tools": {"files": ["final_answer.py"]}, "files": ["agent.json", "prompts.yaml"]},
+                "web_agent": {
+                    "tools": {"files": ["final_answer.py", "visit_webpage.py", "web_search.py"]},
+                    "files": ["agent.json", "prompts.yaml"],
+                },
+            },
+            "tools": {"files": ["final_answer.py"]},
+            "files": ["app.py", "requirements.txt", "agent.json", "prompts.yaml"],
+        }
+        def verify_structure(current_path: Path, structure: dict):
+            for dir_name, contents in structure.items():
+                if dir_name != "files":
+                    # For directories, verify they exist and recurse into them
+                    dir_path = current_path / dir_name
+                    assert dir_path.exists(), f"Directory {dir_path} does not exist"
+                    assert dir_path.is_dir(), f"{dir_path} is not a directory"
+                    verify_structure(dir_path, contents)
+                else:
+                    # For files, verify each exists in the current path
+                    for file_name in contents:
+                        file_path = current_path / file_name
+                        assert file_path.exists(), f"File {file_path} does not exist"
+                        assert file_path.is_file(), f"{file_path} is not a file"
+        verify_structure(tmp_path, expected_structure)
+        # Test that re-loaded agents work as expected.
+        agent2 = CodeAgent.from_folder(tmp_path, planning_interval=5)
+        assert agent2.planning_interval == 5  # Check that kwargs are used
+        assert set(agent2.authorized_imports) == set(["pandas", "datetime"] + BASE_BUILTIN_MODULES)
+        assert agent2.max_print_outputs_length == 1000
+        assert agent2.executor_type == "local"
+        assert agent2.executor_kwargs == {"max_print_outputs_length": 10_000}
+        assert (
+            agent2.managed_agents["web_agent"].tools["web_search"].max_results == 10
+        )  # For now tool init parameters are forgotten
+        assert agent2.model.kwargs["temperature"] == pytest.approx(0.5)
+    def test_multiagents(self):
+        class FakeModelMultiagentsManagerAgent(Model):
+            model_id = "fake_model"
+            def generate(
+                self,
+                messages,
+                stop_sequences=None,
+                tools_to_call_from=None,
+            ):
+                if tools_to_call_from is not None:
+                    if len(messages) < 3:
+                        return ChatMessage(
+                            role=MessageRole.ASSISTANT,
+                            content="",
+                            tool_calls=[
+                                ChatMessageToolCall(
+                                    id="call_0",
+                                    type="function",
+                                    function=ChatMessageToolCallFunction(
+                                        name="search_agent",
+                                        arguments="Who is the current US president?",
+                                    ),
+                                )
+                            ],
+                        )
+                    else:
+                        assert "Report on the current US president" in str(messages)
+                        return ChatMessage(
+                            role=MessageRole.ASSISTANT,
+                            content="",
+                            tool_calls=[
+                                ChatMessageToolCall(
+                                    id="call_0",
+                                    type="function",
+                                    function=ChatMessageToolCallFunction(
+                                        name="final_answer", arguments="Final report."
+                                    ),
+                                )
+                            ],
+                        )
+                else:
+                    if len(messages) < 3:
+                        return ChatMessage(
+                            role=MessageRole.ASSISTANT,
+                            content="""
+Thought: Let's call our search agent.
+<code>
+result = search_agent("Who is the current US president?")
+</code>
+""",
+                        )
+                    else:
+                        assert "Report on the current US president" in str(messages)
+                        return ChatMessage(
+                            role=MessageRole.ASSISTANT,
+                            content="""
+Thought: Let's return the report.
+<code>
+final_answer("Final report.")
+</code>
+""",
+                        )
+        manager_model = FakeModelMultiagentsManagerAgent()
+        class FakeModelMultiagentsManagedAgent(Model):
+            model_id = "fake_model"
+            def generate(
+                self,
+                messages,
+                tools_to_call_from=None,
+                stop_sequences=None,
+            ):
+                return ChatMessage(
+                    role=MessageRole.ASSISTANT,
+                    content="Here is the secret content: FLAG1",
+                    tool_calls=[
+                        ChatMessageToolCall(
+                            id="call_0",
+                            type="function",
+                            function=ChatMessageToolCallFunction(
+                                name="final_answer",
+                                arguments="Report on the current US president",
+                            ),
+                        )
+                    ],
+                )
+        managed_model = FakeModelMultiagentsManagedAgent()
+        web_agent = ToolCallingAgent(
+            tools=[],
+            model=managed_model,
+            max_steps=10,
+            name="search_agent",
+            description="Runs web searches for you. Give it your request as an argument. Make the request as detailed as needed, you can ask for thorough reports",
+            verbosity_level=2,
+        )
+        manager_code_agent = CodeAgent(
+            tools=[],
+            model=manager_model,
+            managed_agents=[web_agent],
+            additional_authorized_imports=["time", "numpy", "pandas"],
+        )
+        report = manager_code_agent.run("Fake question.")
+        assert report == "Final report."
+        manager_toolcalling_agent = ToolCallingAgent(
+            tools=[],
+            model=manager_model,
+            managed_agents=[web_agent],
+        )
+        with web_agent.logger.console.capture() as capture:
+            report = manager_toolcalling_agent.run("Fake question.")
+        assert report == "Final report."
+        assert "FLAG1" in capture.get()  # Check that managed agent's output is properly logged
+        # Test that visualization works
+        with manager_toolcalling_agent.logger.console.capture() as capture:
+            manager_toolcalling_agent.visualize()
+        assert "├──" in capture.get()
+@pytest.fixture
+def prompt_templates():
+    return {
+        "system_prompt": "This is a test system prompt.",
+        "managed_agent": {"task": "Task for {{name}}: {{task}}", "report": "Report for {{name}}: {{final_answer}}"},
+        "planning": {
+            "initial_plan": "The plan.",
+            "update_plan_pre_messages": "custom",
+            "update_plan_post_messages": "custom",
+        },
+        "final_answer": {"pre_messages": "custom", "post_messages": "custom"},
+    }
+@pytest.mark.parametrize(
+    "arguments",
+    [
+        {},
+        {"arg": "bar"},
+        {None: None},
+        [1, 2, 3],
+    ],
+)
+def test_tool_calling_agents_raises_tool_call_error_being_invoked_with_wrong_arguments(arguments):
+    @tool
+    def _sample_tool(prompt: str) -> str:
+        """Tool that returns same string
+        Args:
+            prompt: The string to return
+        Returns:
+            The same string
+        """
+        return prompt
+    agent = ToolCallingAgent(model=FakeToolCallModel(), tools=[_sample_tool])
+    with pytest.raises(AgentToolCallError):
+        agent.execute_tool_call(_sample_tool.name, arguments)
+def test_tool_calling_agents_raises_agent_execution_error_when_tool_raises():
+    @tool
+    def _sample_tool(_: str) -> float:
+        """Tool that fails
+        Args:
+            _: The pointless string
+        Returns:
+            Some number
+        """
+        return 1 / 0
+    agent = ToolCallingAgent(model=FakeToolCallModel(), tools=[_sample_tool])
+    with pytest.raises(AgentExecutionError):
+        agent.execute_tool_call(_sample_tool.name, "sample")

tests/test_all_docs.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ast
+import os
+import re
+import shutil
+import subprocess
+import tempfile
+import traceback
+from pathlib import Path
+import pytest
+from dotenv import load_dotenv
+from .utils.markers import require_run_all
+class SubprocessCallException(Exception):
+    pass
+def run_command(command: list[str], return_stdout=False, env=None):
+    """
+    Runs command with subprocess.check_output and returns stdout if requested.
+    Properly captures and handles errors during command execution.
+    """
+    for i, c in enumerate(command):
+        if isinstance(c, Path):
+            command[i] = str(c)
+    if env is None:
+        env = os.environ.copy()
+    try:
+        output = subprocess.check_output(command, stderr=subprocess.STDOUT, env=env)
+        if return_stdout:
+            if hasattr(output, "decode"):
+                output = output.decode("utf-8")
+            return output
+    except subprocess.CalledProcessError as e:
+        raise SubprocessCallException(
+            f"Command `{' '.join(command)}` failed with the following error:\n\n{e.output.decode()}"
+        ) from e
+class DocCodeExtractor:
+    """Handles extraction and validation of Python code from markdown files."""
+    @staticmethod
+    def extract_python_code(content: str) -> list[str]:
+        """Extract Python code blocks from markdown content."""
+        pattern = r"```(?:python|py)\n(.*?)\n```"
+        matches = re.finditer(pattern, content, re.DOTALL)
+        return [match.group(1).strip() for match in matches]
+    @staticmethod
+    def create_test_script(code_blocks: list[str], tmp_dir: str) -> Path:
+        """Create a temporary Python script from code blocks."""
+        combined_code = "\n\n".join(code_blocks)
+        assert len(combined_code) > 0, "Code is empty!"
+        tmp_file = Path(tmp_dir) / "test_script.py"
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            f.write(combined_code)
+        return tmp_file
+# Skip: slow tests + require API keys
+@require_run_all
+class TestDocs:
+    """Test case for documentation code testing."""
+    @classmethod
+    def setup_class(cls):
+        cls._tmpdir = tempfile.mkdtemp()
+        cls.launch_args = ["python3"]
+        cls.docs_dir = Path(__file__).parent.parent / "docs" / "source" / "en"
+        cls.extractor = DocCodeExtractor()
+        if not cls.docs_dir.exists():
+            raise ValueError(f"Docs directory not found at {cls.docs_dir}")
+        load_dotenv()
+        cls.md_files = list(cls.docs_dir.rglob("*.md")) + list(cls.docs_dir.rglob("*.mdx"))
+        if not cls.md_files:
+            raise ValueError(f"No markdown files found in {cls.docs_dir}")
+    @classmethod
+    def teardown_class(cls):
+        shutil.rmtree(cls._tmpdir)
+    @pytest.mark.timeout(100)
+    def test_single_doc(self, doc_path: Path):
+        """Test a single documentation file."""
+        with open(doc_path, "r", encoding="utf-8") as f:
+            content = f.read()
+        code_blocks = self.extractor.extract_python_code(content)
+        excluded_snippets = [
+            "ToolCollection",
+            "image_generation_tool",  # We don't want to run this expensive operation
+            "from_langchain",  # Langchain is not a dependency
+            "while llm_should_continue(memory):",  # This is pseudo code
+            "ollama_chat/llama3.2",  # Exclude ollama building in guided tour
+            "model = TransformersModel(model_id=model_id)",  # Exclude testing with transformers model
+            "SmolagentsInstrumentor",  # Exclude telemetry since it needs additional installs
+        ]
+        code_blocks = [
+            block
+            for block in code_blocks
+            if not any(
+                [snippet in block for snippet in excluded_snippets]
+            )  # Exclude these tools that take longer to run and add dependencies
+        ]
+        if len(code_blocks) == 0:
+            pytest.skip(f"No Python code blocks found in {doc_path.name}")
+        # Validate syntax of each block individually by parsing it
+        for i, block in enumerate(code_blocks, 1):
+            ast.parse(block)
+        # Create and execute test script
+        print("\n\nCollected code block:==========\n".join(code_blocks))
+        try:
+            code_blocks = [
+                (
+                    block.replace("<YOUR_HUGGINGFACEHUB_API_TOKEN>", os.getenv("HF_TOKEN"))
+                    .replace("YOUR_ANTHROPIC_API_KEY", os.getenv("ANTHROPIC_API_KEY"))
+                    .replace("{your_username}", "m-ric")
+                )
+                for block in code_blocks
+            ]
+            test_script = self.extractor.create_test_script(code_blocks, self._tmpdir)
+            run_command(self.launch_args + [str(test_script)])
+        except SubprocessCallException as e:
+            pytest.fail(f"\nError while testing {doc_path.name}:\n{str(e)}")
+        except Exception:
+            pytest.fail(f"\nUnexpected error while testing {doc_path.name}:\n{traceback.format_exc()}")
+    @pytest.fixture(autouse=True)
+    def _setup(self):
+        """Fixture to ensure temporary directory exists for each test."""
+        os.makedirs(self._tmpdir, exist_ok=True)
+        yield
+        # Clean up test files after each test
+        for file in Path(self._tmpdir).glob("*"):
+            file.unlink()
+def pytest_generate_tests(metafunc):
+    """Generate test cases for each markdown file."""
+    if "doc_path" in metafunc.fixturenames:
+        test_class = metafunc.cls
+        # Initialize the class if needed
+        if not hasattr(test_class, "md_files"):
+            test_class.setup_class()
+        # Parameterize with the markdown files
+        metafunc.parametrize("doc_path", test_class.md_files, ids=[f.stem for f in test_class.md_files])

tests/test_cli.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from unittest.mock import patch
+import pytest
+from smolagents.cli import load_model
+from smolagents.local_python_executor import LocalPythonExecutor
+from smolagents.models import InferenceClientModel, LiteLLMModel, OpenAIServerModel, TransformersModel
+@pytest.fixture
+def set_env_vars(monkeypatch):
+    monkeypatch.setenv("FIREWORKS_API_KEY", "test_fireworks_api_key")
+    monkeypatch.setenv("HF_TOKEN", "test_hf_api_key")
+def test_load_model_openai_server_model(set_env_vars):
+    with patch("openai.OpenAI") as MockOpenAI:
+        model = load_model("OpenAIServerModel", "test_model_id")
+    assert isinstance(model, OpenAIServerModel)
+    assert model.model_id == "test_model_id"
+    assert MockOpenAI.call_count == 1
+    assert MockOpenAI.call_args.kwargs["base_url"] == "https://api.fireworks.ai/inference/v1"
+    assert MockOpenAI.call_args.kwargs["api_key"] == "test_fireworks_api_key"
+def test_load_model_litellm_model():
+    model = load_model("LiteLLMModel", "test_model_id", api_key="test_api_key", api_base="https://api.test.com")
+    assert isinstance(model, LiteLLMModel)
+    assert model.api_key == "test_api_key"
+    assert model.api_base == "https://api.test.com"
+    assert model.model_id == "test_model_id"
+def test_load_model_transformers_model():
+    with (
+        patch(
+            "transformers.AutoModelForImageTextToText.from_pretrained",
+            side_effect=ValueError("Unrecognized configuration class"),
+        ),
+        patch("transformers.AutoModelForCausalLM.from_pretrained"),
+        patch("transformers.AutoTokenizer.from_pretrained"),
+    ):
+        model = load_model("TransformersModel", "test_model_id")
+    assert isinstance(model, TransformersModel)
+    assert model.model_id == "test_model_id"
+def test_load_model_hf_api_model(set_env_vars):
+    with patch("huggingface_hub.InferenceClient") as huggingface_hub_InferenceClient:
+        model = load_model("InferenceClientModel", "test_model_id")
+    assert isinstance(model, InferenceClientModel)
+    assert model.model_id == "test_model_id"
+    assert huggingface_hub_InferenceClient.call_count == 1
+    assert huggingface_hub_InferenceClient.call_args.kwargs["token"] == "test_hf_api_key"
+def test_load_model_invalid_model_type():
+    with pytest.raises(ValueError, match="Unsupported model type: InvalidModel"):
+        load_model("InvalidModel", "test_model_id")
+def test_cli_main(capsys):
+    with patch("smolagents.cli.load_model") as mock_load_model:
+        mock_load_model.return_value = "mock_model"
+        with patch("smolagents.cli.CodeAgent") as mock_code_agent:
+            from smolagents.cli import run_smolagent
+            run_smolagent("test_prompt", [], "InferenceClientModel", "test_model_id", provider="hf-inference")
+    # load_model
+    assert len(mock_load_model.call_args_list) == 1
+    assert mock_load_model.call_args.args == ("InferenceClientModel", "test_model_id")
+    assert mock_load_model.call_args.kwargs == {"api_base": None, "api_key": None, "provider": "hf-inference"}
+    # CodeAgent
+    assert len(mock_code_agent.call_args_list) == 1
+    assert mock_code_agent.call_args.args == ()
+    assert mock_code_agent.call_args.kwargs == {
+        "tools": [],
+        "model": "mock_model",
+        "additional_authorized_imports": None,
+    }
+    # agent.run
+    assert len(mock_code_agent.return_value.run.call_args_list) == 1
+    assert mock_code_agent.return_value.run.call_args.args == ("test_prompt",)
+    # print
+    captured = capsys.readouterr()
+    assert "Running agent with these tools: []" in captured.out
+def test_vision_web_browser_main():
+    with patch("smolagents.vision_web_browser.helium"):
+        with patch("smolagents.vision_web_browser.load_model") as mock_load_model:
+            mock_load_model.return_value = "mock_model"
+            with patch("smolagents.vision_web_browser.CodeAgent") as mock_code_agent:
+                from smolagents.vision_web_browser import helium_instructions, run_webagent
+                run_webagent("test_prompt", "InferenceClientModel", "test_model_id", provider="hf-inference")
+    # load_model
+    assert len(mock_load_model.call_args_list) == 1
+    assert mock_load_model.call_args.args == ("InferenceClientModel", "test_model_id")
+    # CodeAgent
+    assert len(mock_code_agent.call_args_list) == 1
+    assert mock_code_agent.call_args.args == ()
+    assert len(mock_code_agent.call_args.kwargs["tools"]) == 4
+    assert mock_code_agent.call_args.kwargs["model"] == "mock_model"
+    assert mock_code_agent.call_args.kwargs["additional_authorized_imports"] == ["helium"]
+    # agent.python_executor
+    assert len(mock_code_agent.return_value.python_executor.call_args_list) == 1
+    assert mock_code_agent.return_value.python_executor.call_args.args == ("from helium import *",)
+    assert LocalPythonExecutor(["helium"])("from helium import *") == (None, "", False)
+    # agent.run
+    assert len(mock_code_agent.return_value.run.call_args_list) == 1
+    assert mock_code_agent.return_value.run.call_args.args == ("test_prompt" + helium_instructions,)

tests/test_default_tools.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import pytest
+from smolagents.agent_types import _AGENT_TYPE_MAPPING
+from smolagents.default_tools import (
+    DuckDuckGoSearchTool,
+    PythonInterpreterTool,
+    SpeechToTextTool,
+    VisitWebpageTool,
+    WikipediaSearchTool,
+)
+from .test_tools import ToolTesterMixin
+from .utils.markers import require_run_all
+class DefaultToolTests(unittest.TestCase):
+    def test_visit_webpage(self):
+        arguments = {"url": "https://en.wikipedia.org/wiki/United_States_Secretary_of_Homeland_Security"}
+        result = VisitWebpageTool()(arguments)
+        assert isinstance(result, str)
+        assert "* [About Wikipedia](/wiki/Wikipedia:About)" in result  # Proper wikipedia pages have an About
+    @require_run_all
+    def test_ddgs_with_kwargs(self):
+        result = DuckDuckGoSearchTool(timeout=20)("DeepSeek parent company")
+        assert isinstance(result, str)
+class TestPythonInterpreterTool(ToolTesterMixin):
+    def setup_method(self):
+        self.tool = PythonInterpreterTool(authorized_imports=["numpy"])
+        self.tool.setup()
+    def test_exact_match_arg(self):
+        result = self.tool("(2 / 2) * 4")
+        assert result == "Stdout:\n\nOutput: 4.0"
+    def test_exact_match_kwarg(self):
+        result = self.tool(code="(2 / 2) * 4")
+        assert result == "Stdout:\n\nOutput: 4.0"
+    def test_agent_type_output(self):
+        inputs = ["2 * 2"]
+        output = self.tool(*inputs, sanitize_inputs_outputs=True)
+        output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
+        assert isinstance(output, output_type)
+    def test_agent_types_inputs(self):
+        inputs = ["2 * 2"]
+        _inputs = []
+        for _input, expected_input in zip(inputs, self.tool.inputs.values()):
+            input_type = expected_input["type"]
+            if isinstance(input_type, list):
+                _inputs.append([_AGENT_TYPE_MAPPING[_input_type](_input) for _input_type in input_type])
+            else:
+                _inputs.append(_AGENT_TYPE_MAPPING[input_type](_input))
+        # Should not raise an error
+        output = self.tool(*inputs, sanitize_inputs_outputs=True)
+        output_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
+        assert isinstance(output, output_type)
+    def test_imports_work(self):
+        result = self.tool("import numpy as np")
+        assert "import from numpy is not allowed" not in result.lower()
+    def test_unauthorized_imports_fail(self):
+        with pytest.raises(Exception) as e:
+            self.tool("import sympy as sp")
+        assert "sympy" in str(e).lower()
+class TestSpeechToTextTool:
+    def test_new_instance(self):
+        from transformers.models.whisper import WhisperForConditionalGeneration, WhisperProcessor
+        tool = SpeechToTextTool()
+        assert tool is not None
+        assert tool.pre_processor_class == WhisperProcessor
+        assert tool.model_class == WhisperForConditionalGeneration
+    def test_initialization(self):
+        from transformers.models.whisper import WhisperForConditionalGeneration, WhisperProcessor
+        tool = SpeechToTextTool(model="dummy_model_id")
+        assert tool is not None
+        assert tool.pre_processor_class == WhisperProcessor
+        assert tool.model_class == WhisperForConditionalGeneration
+@pytest.mark.parametrize(
+    "language, content_type, extract_format, query",
+    [
+        ("en", "summary", "HTML", "Python_(programming_language)"),  # English, Summary Mode, HTML format
+        ("en", "text", "WIKI", "Python_(programming_language)"),  # English, Full Text Mode, WIKI format
+        ("es", "summary", "HTML", "Python_(lenguaje_de_programación)"),  # Spanish, Summary Mode, HTML format
+        ("es", "text", "WIKI", "Python_(lenguaje_de_programación)"),  # Spanish, Full Text Mode, WIKI format
+    ],
+)
+def test_wikipedia_search(language, content_type, extract_format, query):
+    tool = WikipediaSearchTool(
+        user_agent="TestAgent ([email protected])",
+        language=language,
+        content_type=content_type,
+        extract_format=extract_format,
+    )
+    result = tool.forward(query)
+    assert isinstance(result, str), "Output should be a string"
+    assert "✅ **Wikipedia Page:**" in result, "Response should contain Wikipedia page title"
+    assert "🔗 **Read more:**" in result, "Response should contain Wikipedia page URL"
+    if content_type == "summary":
+        assert len(result.split()) < 1000, "Summary mode should return a shorter text"
+    if content_type == "text":
+        assert len(result.split()) > 1000, "Full text mode should return a longer text"

tests/test_final_answer.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import PIL.Image
+import pytest
+from smolagents.agent_types import _AGENT_TYPE_MAPPING
+from smolagents.default_tools import FinalAnswerTool
+from .test_tools import ToolTesterMixin
+from .utils.markers import require_torch
+class TestFinalAnswerTool(ToolTesterMixin):
+    def setup_method(self):
+        self.inputs = {"answer": "Final answer"}
+        self.tool = FinalAnswerTool()
+    def test_exact_match_arg(self):
+        result = self.tool("Final answer")
+        assert result == "Final answer"
+    def test_exact_match_kwarg(self):
+        result = self.tool(answer=self.inputs["answer"])
+        assert result == "Final answer"
+    @require_torch
+    def test_agent_type_output(self, inputs):
+        for input_type, input in inputs.items():
+            output = self.tool(**input, sanitize_inputs_outputs=True)
+            agent_type = _AGENT_TYPE_MAPPING[input_type]
+            assert isinstance(output, agent_type)
+    @pytest.fixture
+    def inputs(self, shared_datadir):
+        import torch
+        return {
+            "string": {"answer": "Text input"},
+            "image": {"answer": PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512))},
+            "audio": {"answer": torch.Tensor(np.ones(3000))},
+        }

tests/test_function_type_hints_utils.py ADDED Viewed

	@@ -0,0 +1,514 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+import pytest
+from smolagents._function_type_hints_utils import DocstringParsingException, get_imports, get_json_schema
+@pytest.fixture
+def valid_func():
+    """A well-formed function with docstring, type hints, and return block."""
+    def multiply(x: int, y: float) -> float:
+        """
+        Multiplies two numbers.
+        Args:
+            x: The first number.
+            y: The second number.
+        Returns:
+            Product of x and y.
+        """
+        return x * y
+    return multiply
+@pytest.fixture
+def no_docstring_func():
+    """Function with no docstring."""
+    def sample(x: int):
+        return x
+    return sample
+@pytest.fixture
+def missing_arg_doc_func():
+    """Function with docstring but missing an argument description."""
+    def add(x: int, y: int):
+        """
+        Adds two numbers.
+        Args:
+            x: The first number.
+        """
+        return x + y
+    return add
+@pytest.fixture
+def bad_return_func():
+    """Function docstring with missing return description (allowed)."""
+    def do_nothing(x: str | None = None):
+        """
+        Does nothing.
+        Args:
+            x: Some optional string.
+        """
+        pass
+    return do_nothing
+@pytest.fixture
+def complex_types_func():
+    def process_data(items: list[str], config: dict[str, float], point: tuple[int, int]) -> dict:
+        """
+        Process some data.
+        Args:
+            items: List of items to process.
+            config: Configuration parameters.
+            point: A position as (x,y).
+        Returns:
+            Processed data result.
+        """
+        return {"result": True}
+    return process_data
+@pytest.fixture
+def optional_types_func():
+    def process_with_optional(required_arg: str, optional_arg: int | None = None) -> str:
+        """
+        Process with optional argument.
+        Args:
+            required_arg: A required string argument.
+            optional_arg: An optional integer argument.
+        Returns:
+            Processing result.
+        """
+        return "processed"
+    return process_with_optional
+@pytest.fixture
+def enum_choices_func():
+    def select_color(color: str) -> str:
+        """
+        Select a color.
+        Args:
+            color: The color to select (choices: ["red", "green", "blue"])
+        Returns:
+            Selected color.
+        """
+        return color
+    return select_color
+@pytest.fixture
+def union_types_func():
+    def process_union(value: int | str) -> bool | str:
+        """
+        Process a value that can be either int or string.
+        Args:
+            value: An integer or string value.
+        Returns:
+            Processing result.
+        """
+        return True if isinstance(value, int) else "string result"
+    return process_union
+@pytest.fixture
+def nested_types_func():
+    def process_nested_data(data: list[dict[str, Any]]) -> list[str]:
+        """
+        Process nested data structure.
+        Args:
+            data: List of dictionaries to process.
+        Returns:
+            List of processed results.
+        """
+        return ["result"]
+    return process_nested_data
+@pytest.fixture
+def typed_docstring_func():
+    def calculate(x: int, y: float) -> float:
+        """
+        Calculate something.
+        Args:
+            x (int): An integer parameter with type in docstring.
+            y (float): A float parameter with type in docstring.
+        Returns:
+            float: The calculated result.
+        """
+        return x * y
+    return calculate
+@pytest.fixture
+def mismatched_types_func():
+    def convert(value: int) -> str:
+        """
+        Convert a value.
+        Args:
+            value (str): A string value (type mismatch with hint).
+        Returns:
+            int: Converted value (type mismatch with hint).
+        """
+        return str(value)
+    return convert
+@pytest.fixture
+def complex_docstring_types_func():
+    def process(data: dict[str, list[int]]) -> list[dict[str, Any]]:
+        """
+        Process complex data.
+        Args:
+            data (Dict[str, List[int]]): Nested structure with types.
+        Returns:
+            List[Dict[str, Any]]: Processed results with types.
+        """
+        return [{"result": sum(v) for k, v in data.items()}]
+    return process
+@pytest.fixture
+def keywords_in_description_func():
+    def process(value: str) -> str:
+        """
+        Function with Args: or Returns: keywords in its description.
+        Args:
+            value: A string value.
+        Returns:
+            str: Processed value.
+        """
+        return value.upper()
+    return process
+class TestGetJsonSchema:
+    def test_get_json_schema_example(self):
+        def fn(x: int, y: tuple[str, str, float] | None = None) -> None:
+            """
+            Test function
+            Args:
+                x: The first input
+                y: The second input
+            """
+            pass
+        schema = get_json_schema(fn)
+        expected_schema = {
+            "name": "fn",
+            "description": "Test function",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "x": {"type": "integer", "description": "The first input"},
+                    "y": {
+                        "type": "array",
+                        "description": "The second input",
+                        "nullable": True,
+                        "prefixItems": [{"type": "string"}, {"type": "string"}, {"type": "number"}],
+                    },
+                },
+                "required": ["x"],
+            },
+            "return": {"type": "null"},
+        }
+        assert schema["function"]["parameters"]["properties"]["y"] == expected_schema["parameters"]["properties"]["y"]
+        assert schema["function"] == expected_schema
+    @pytest.mark.parametrize(
+        "fixture_name,should_fail",
+        [
+            ("valid_func", False),
+            # ('no_docstring_func', True),
+            # ('missing_arg_doc_func', True),
+            ("bad_return_func", False),
+        ],
+    )
+    def test_get_json_schema(self, request, fixture_name, should_fail):
+        func = request.getfixturevalue(fixture_name)
+        schema = get_json_schema(func)
+        assert schema["type"] == "function"
+        assert "function" in schema
+        assert "parameters" in schema["function"]
+    @pytest.mark.parametrize(
+        "fixture_name,should_fail",
+        [
+            # ('valid_func', False),
+            ("no_docstring_func", True),
+            ("missing_arg_doc_func", True),
+            # ('bad_return_func', False),
+        ],
+    )
+    def test_get_json_schema_raises(self, request, fixture_name, should_fail):
+        func = request.getfixturevalue(fixture_name)
+        with pytest.raises(DocstringParsingException):
+            get_json_schema(func)
+    @pytest.mark.parametrize(
+        "fixture_name,expected_properties",
+        [
+            ("valid_func", {"x": "integer", "y": "number"}),
+            ("bad_return_func", {"x": "string"}),
+        ],
+    )
+    def test_property_types(self, request, fixture_name, expected_properties):
+        """Test that property types are correctly mapped."""
+        func = request.getfixturevalue(fixture_name)
+        schema = get_json_schema(func)
+        properties = schema["function"]["parameters"]["properties"]
+        for prop_name, expected_type in expected_properties.items():
+            assert properties[prop_name]["type"] == expected_type
+    def test_schema_basic_structure(self, valid_func):
+        """Test that basic schema structure is correct."""
+        schema = get_json_schema(valid_func)
+        # Check schema type
+        assert schema["type"] == "function"
+        assert "function" in schema
+        # Check function schema
+        function_schema = schema["function"]
+        assert function_schema["name"] == "multiply"
+        assert "description" in function_schema
+        assert function_schema["description"] == "Multiplies two numbers."
+        # Check parameters schema
+        assert "parameters" in function_schema
+        params = function_schema["parameters"]
+        assert params["type"] == "object"
+        assert "properties" in params
+        assert "required" in params
+        assert set(params["required"]) == {"x", "y"}
+        properties = params["properties"]
+        assert properties["x"]["type"] == "integer"
+        assert properties["y"]["type"] == "number"
+        # Check return schema
+        assert "return" in function_schema
+        return_schema = function_schema["return"]
+        assert return_schema["type"] == "number"
+        assert return_schema["description"] == "Product of x and y."
+    def test_complex_types(self, complex_types_func):
+        """Test schema generation for complex types."""
+        schema = get_json_schema(complex_types_func)
+        properties = schema["function"]["parameters"]["properties"]
+        # Check list type
+        assert properties["items"]["type"] == "array"
+        # Check dict type
+        assert properties["config"]["type"] == "object"
+        # Check tuple type
+        assert properties["point"]["type"] == "array"
+        assert len(properties["point"]["prefixItems"]) == 2
+        assert properties["point"]["prefixItems"][0]["type"] == "integer"
+        assert properties["point"]["prefixItems"][1]["type"] == "integer"
+    def test_optional_types(self, optional_types_func):
+        """Test schema generation for optional arguments."""
+        schema = get_json_schema(optional_types_func)
+        params = schema["function"]["parameters"]
+        # Required argument should be in required list
+        assert "required_arg" in params["required"]
+        # Optional argument should not be in required list
+        assert "optional_arg" not in params["required"]
+        # Optional argument should be nullable
+        assert params["properties"]["optional_arg"]["nullable"] is True
+        assert params["properties"]["optional_arg"]["type"] == "integer"
+    def test_enum_choices(self, enum_choices_func):
+        """Test schema generation for enum choices in docstring."""
+        schema = get_json_schema(enum_choices_func)
+        color_prop = schema["function"]["parameters"]["properties"]["color"]
+        assert "enum" in color_prop
+        assert color_prop["enum"] == ["red", "green", "blue"]
+    def test_union_types(self, union_types_func):
+        """Test schema generation for union types."""
+        schema = get_json_schema(union_types_func)
+        value_prop = schema["function"]["parameters"]["properties"]["value"]
+        return_prop = schema["function"]["return"]
+        # Check union in parameter
+        assert len(value_prop["type"]) == 2
+        # Check union in return type: should be converted to "any"
+        assert return_prop["type"] == "any"
+    def test_nested_types(self, nested_types_func):
+        """Test schema generation for nested complex types."""
+        schema = get_json_schema(nested_types_func)
+        data_prop = schema["function"]["parameters"]["properties"]["data"]
+        assert data_prop["type"] == "array"
+    def test_typed_docstring_parsing(self, typed_docstring_func):
+        """Test parsing of docstrings with type annotations."""
+        schema = get_json_schema(typed_docstring_func)
+        # Type hints should take precedence over docstring types
+        assert schema["function"]["parameters"]["properties"]["x"]["type"] == "integer"
+        assert schema["function"]["parameters"]["properties"]["y"]["type"] == "number"
+        # Description should be extracted correctly
+        assert (
+            schema["function"]["parameters"]["properties"]["x"]["description"]
+            == "An integer parameter with type in docstring."
+        )
+        assert (
+            schema["function"]["parameters"]["properties"]["y"]["description"]
+            == "A float parameter with type in docstring."
+        )
+        # Return type and description should be correct
+        assert schema["function"]["return"]["type"] == "number"
+        assert schema["function"]["return"]["description"] == "The calculated result."
+    def test_mismatched_docstring_types(self, mismatched_types_func):
+        """Test that type hints take precedence over docstring types when they conflict."""
+        schema = get_json_schema(mismatched_types_func)
+        # Type hints should take precedence over docstring types
+        assert schema["function"]["parameters"]["properties"]["value"]["type"] == "integer"
+        # Return type from type hint should be used, not docstring
+        assert schema["function"]["return"]["type"] == "string"
+    def test_complex_docstring_types(self, complex_docstring_types_func):
+        """Test parsing of complex type annotations in docstrings."""
+        schema = get_json_schema(complex_docstring_types_func)
+        # Check that complex nested type is parsed correctly from type hints
+        data_prop = schema["function"]["parameters"]["properties"]["data"]
+        assert data_prop["type"] == "object"
+        # Check return type
+        return_prop = schema["function"]["return"]
+        assert return_prop["type"] == "array"
+        # Description should include the type information from docstring
+        assert data_prop["description"] == "Nested structure with types."
+        assert return_prop["description"] == "Processed results with types."
+    @pytest.mark.parametrize(
+        "fixture_name,expected_description",
+        [
+            ("typed_docstring_func", "An integer parameter with type in docstring."),
+            ("complex_docstring_types_func", "Nested structure with types."),
+        ],
+    )
+    def test_type_in_description_handling(self, request, fixture_name, expected_description):
+        """Test that type information in docstrings is preserved in description."""
+        func = request.getfixturevalue(fixture_name)
+        schema = get_json_schema(func)
+        # First parameter description should contain the expected text
+        first_param_name = list(schema["function"]["parameters"]["properties"].keys())[0]
+        assert schema["function"]["parameters"]["properties"][first_param_name]["description"] == expected_description
+    def test_with_special_words_in_description_func(self, keywords_in_description_func):
+        schema = get_json_schema(keywords_in_description_func)
+        assert schema["function"]["description"] == "Function with Args: or Returns: keywords in its description."
+class TestGetCode:
+    @pytest.mark.parametrize(
+        "code, expected",
+        [
+            (
+                """
+        import numpy
+        import pandas
+        """,
+                ["numpy", "pandas"],
+            ),
+            # From imports
+            (
+                """
+        from torch import nn
+        from transformers import AutoModel
+        """,
+                ["torch", "transformers"],
+            ),
+            # Mixed case with nested imports
+            (
+                """
+        import numpy as np
+        from torch.nn import Linear
+        import os.path
+        """,
+                ["numpy", "torch", "os"],
+            ),
+            # Try/except block (should be filtered)
+            (
+                """
+        try:
+            import torch
+        except ImportError:
+            pass
+        import numpy
+        """,
+                ["numpy"],
+            ),
+            # Flash attention block (should be filtered)
+            (
+                """
+        if is_flash_attn_2_available():
+            from flash_attn import flash_attn_func
+        import transformers
+        """,
+                ["transformers"],
+            ),
+            # Relative imports (should be excluded)
+            (
+                """
+        from .utils import helper
+        from ..models import transformer
+        """,
+                [],
+            ),
+        ],
+    )
+    def test_get_imports(self, code: str, expected: list[str]):
+        assert sorted(get_imports(code)) == sorted(expected)

tests/test_gradio_ui.py ADDED Viewed

	@@ -0,0 +1,385 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import shutil
+import tempfile
+import unittest
+from unittest.mock import Mock, patch
+import pytest
+from smolagents.agent_types import AgentAudio, AgentImage, AgentText
+from smolagents.gradio_ui import GradioUI, pull_messages_from_step, stream_to_gradio
+from smolagents.memory import ActionStep, FinalAnswerStep, PlanningStep, ToolCall
+from smolagents.models import ChatMessageStreamDelta
+from smolagents.monitoring import Timing, TokenUsage
+class GradioUITester(unittest.TestCase):
+    def setUp(self):
+        """Initialize test environment"""
+        self.temp_dir = tempfile.mkdtemp()
+        self.mock_agent = Mock()
+        self.ui = GradioUI(agent=self.mock_agent, file_upload_folder=self.temp_dir)
+        self.allowed_types = [".pdf", ".docx", ".txt"]
+    def tearDown(self):
+        """Clean up test environment"""
+        shutil.rmtree(self.temp_dir)
+    def test_upload_file_default_types(self):
+        """Test default allowed file types"""
+        default_types = [".pdf", ".docx", ".txt"]
+        for file_type in default_types:
+            with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file:
+                mock_file = Mock()
+                mock_file.name = temp_file.name
+                textbox, uploads_log = self.ui.upload_file(mock_file, [])
+                self.assertIn("File uploaded:", textbox.value)
+                self.assertEqual(len(uploads_log), 1)
+                self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name))))
+    def test_upload_file_default_types_disallowed(self):
+        """Test default disallowed file types"""
+        disallowed_types = [".exe", ".sh", ".py", ".jpg"]
+        for file_type in disallowed_types:
+            with tempfile.NamedTemporaryFile(suffix=file_type) as temp_file:
+                mock_file = Mock()
+                mock_file.name = temp_file.name
+                textbox, uploads_log = self.ui.upload_file(mock_file, [])
+                self.assertEqual(textbox.value, "File type disallowed")
+                self.assertEqual(len(uploads_log), 0)
+    def test_upload_file_success(self):
+        """Test successful file upload scenario"""
+        with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file:
+            mock_file = Mock()
+            mock_file.name = temp_file.name
+            textbox, uploads_log = self.ui.upload_file(mock_file, [])
+            self.assertIn("File uploaded:", textbox.value)
+            self.assertEqual(len(uploads_log), 1)
+            self.assertTrue(os.path.exists(os.path.join(self.temp_dir, os.path.basename(temp_file.name))))
+            self.assertEqual(uploads_log[0], os.path.join(self.temp_dir, os.path.basename(temp_file.name)))
+    def test_upload_file_none(self):
+        """Test scenario when no file is selected"""
+        textbox, uploads_log = self.ui.upload_file(None, [])
+        self.assertEqual(textbox.value, "No file uploaded")
+        self.assertEqual(len(uploads_log), 0)
+    def test_upload_file_invalid_type(self):
+        """Test disallowed file type"""
+        with tempfile.NamedTemporaryFile(suffix=".exe") as temp_file:
+            mock_file = Mock()
+            mock_file.name = temp_file.name
+            textbox, uploads_log = self.ui.upload_file(mock_file, [])
+            self.assertEqual(textbox.value, "File type disallowed")
+            self.assertEqual(len(uploads_log), 0)
+    def test_upload_file_special_chars(self):
+        """Test scenario with special characters in filename"""
+        with tempfile.NamedTemporaryFile(suffix=".txt") as temp_file:
+            # Create a new temporary file with special characters
+            special_char_name = os.path.join(os.path.dirname(temp_file.name), "test@#$%^&*.txt")
+            shutil.copy(temp_file.name, special_char_name)
+            try:
+                mock_file = Mock()
+                mock_file.name = special_char_name
+                with patch("shutil.copy"):
+                    textbox, uploads_log = self.ui.upload_file(mock_file, [])
+                    self.assertIn("File uploaded:", textbox.value)
+                    self.assertEqual(len(uploads_log), 1)
+                    self.assertIn("test_____", uploads_log[0])
+            finally:
+                # Clean up the special character file
+                if os.path.exists(special_char_name):
+                    os.remove(special_char_name)
+    def test_upload_file_custom_types(self):
+        """Test custom allowed file types"""
+        with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file:
+            mock_file = Mock()
+            mock_file.name = temp_file.name
+            textbox, uploads_log = self.ui.upload_file(mock_file, [], allowed_file_types=[".csv"])
+            self.assertIn("File uploaded:", textbox.value)
+            self.assertEqual(len(uploads_log), 1)
+class TestStreamToGradio:
+    """Tests for the stream_to_gradio function."""
+    @patch("smolagents.gradio_ui.pull_messages_from_step")
+    def test_stream_to_gradio_memory_step(self, mock_pull_messages):
+        """Test streaming a memory step"""
+        # Create mock agent and memory step
+        mock_agent = Mock()
+        mock_agent.run = Mock(return_value=[Mock(spec=ActionStep)])
+        mock_agent.model = Mock()
+        mock_agent.model.last_input_token_count = 100
+        mock_agent.model.last_output_token_count = 200
+        # Mock the pull_messages_from_step function to return some messages
+        mock_message = Mock()
+        mock_pull_messages.return_value = [mock_message]
+        # Call stream_to_gradio
+        result = list(stream_to_gradio(mock_agent, "test task"))
+        # Verify that pull_messages_from_step was called and the message was yielded
+        mock_pull_messages.assert_called_once()
+        assert result == [mock_message]
+    def test_stream_to_gradio_stream_delta(self):
+        """Test streaming a ChatMessageStreamDelta"""
+        # Create mock agent and stream delta
+        mock_agent = Mock()
+        mock_delta = ChatMessageStreamDelta(content="Hello")
+        mock_agent.run = Mock(return_value=[mock_delta])
+        mock_agent.model = Mock()
+        mock_agent.model.last_input_token_count = 100
+        mock_agent.model.last_output_token_count = 200
+        # Call stream_to_gradio
+        result = list(stream_to_gradio(mock_agent, "test task"))
+        # Verify that the content was yielded
+        assert result == ["Hello"]
+    def test_stream_to_gradio_multiple_deltas(self):
+        """Test streaming multiple ChatMessageStreamDeltas"""
+        # Create mock agent and stream deltas
+        mock_agent = Mock()
+        mock_delta1 = ChatMessageStreamDelta(content="Hello")
+        mock_delta2 = ChatMessageStreamDelta(content=" world")
+        mock_agent.run = Mock(return_value=[mock_delta1, mock_delta2])
+        mock_agent.model = Mock()
+        mock_agent.model.last_input_token_count = 100
+        mock_agent.model.last_output_token_count = 200
+        # Call stream_to_gradio
+        result = list(stream_to_gradio(mock_agent, "test task"))
+        # Verify that the content was accumulated and yielded
+        assert result == ["Hello", "Hello world"]
+    @pytest.mark.parametrize(
+        "task,task_images,reset_memory,additional_args",
+        [
+            ("simple task", None, False, None),
+            ("task with images", ["image1.png", "image2.png"], False, None),
+            ("task with reset", None, True, None),
+            ("task with args", None, False, {"arg1": "value1"}),
+            ("complex task", ["image.png"], True, {"arg1": "value1", "arg2": "value2"}),
+        ],
+    )
+    def test_stream_to_gradio_parameters(self, task, task_images, reset_memory, additional_args):
+        """Test that stream_to_gradio passes parameters correctly to agent.run"""
+        # Create mock agent
+        mock_agent = Mock()
+        mock_agent.run = Mock(return_value=[])
+        # Call stream_to_gradio
+        list(
+            stream_to_gradio(
+                mock_agent,
+                task=task,
+                task_images=task_images,
+                reset_agent_memory=reset_memory,
+                additional_args=additional_args,
+            )
+        )
+        # Verify that agent.run was called with the right parameters
+        mock_agent.run.assert_called_once_with(
+            task, images=task_images, stream=True, reset=reset_memory, additional_args=additional_args
+        )
+class TestPullMessagesFromStep:
+    def test_action_step_basic(
+        self,
+    ):
+        """Test basic ActionStep processing."""
+        step = ActionStep(
+            step_number=1,
+            model_output="This is the model output",
+            observations="Some execution logs",
+            error=None,
+            timing=Timing(start_time=1.0, end_time=3.5),
+            token_usage=TokenUsage(input_tokens=100, output_tokens=50),
+        )
+        messages = list(pull_messages_from_step(step))
+        assert len(messages) == 5  # step number, model_output, logs, footnote, divider
+        for message, expected_content in zip(
+            messages,
+            [
+                "**Step 1**",
+                "This is the model output",
+                "execution logs",
+                "Input tokens: 100 | Output tokens: 50 | Duration: 2.5",
+                "-----",
+            ],
+        ):
+            assert expected_content in message.content
+    def test_action_step_with_tool_calls(self):
+        """Test ActionStep with tool calls."""
+        step = ActionStep(
+            step_number=2,
+            tool_calls=[ToolCall(name="test_tool", arguments={"answer": "Test answer"}, id="tool_call_1")],
+            observations="Tool execution logs",
+            timing=Timing(start_time=1.0, end_time=2.5),
+            token_usage=TokenUsage(input_tokens=100, output_tokens=50),
+        )
+        messages = list(pull_messages_from_step(step))
+        assert len(messages) == 5  # step, tool call, logs, footnote, divider
+        assert messages[1].content == "Test answer"
+        assert "Used tool test_tool" in messages[1].metadata["title"]
+    @pytest.mark.parametrize(
+        "tool_name, args, expected",
+        [
+            ("python_interpreter", "print('Hello')", "```python\nprint('Hello')\n```"),
+            ("regular_tool", {"key": "value"}, "{'key': 'value'}"),
+            ("string_args_tool", "simple string", "simple string"),
+        ],
+    )
+    def test_action_step_tool_call_formats(self, tool_name, args, expected):
+        """Test different formats of tool calls."""
+        tool_call = Mock()
+        tool_call.name = tool_name
+        tool_call.arguments = args
+        step = ActionStep(
+            step_number=1,
+            tool_calls=[tool_call],
+            timing=Timing(start_time=1.0, end_time=2.5),
+            token_usage=TokenUsage(input_tokens=100, output_tokens=50),
+        )
+        messages = list(pull_messages_from_step(step))
+        tool_message = next(
+            msg
+            for msg in messages
+            if msg.role == "assistant" and msg.metadata and msg.metadata.get("title", "").startswith("🛠️")
+        )
+        assert expected in tool_message.content
+    def test_action_step_with_error(self):
+        """Test ActionStep with error."""
+        step = ActionStep(
+            step_number=3,
+            error="This is an error message",
+            timing=Timing(start_time=1.0, end_time=2.0),
+            token_usage=TokenUsage(input_tokens=100, output_tokens=200),
+        )
+        messages = list(pull_messages_from_step(step))
+        error_message = next((m for m in messages if "error" in str(m.content).lower()), None)
+        assert error_message is not None
+        assert "This is an error message" in error_message.content
+    def test_action_step_with_images(self):
+        """Test ActionStep with observation images."""
+        step = ActionStep(
+            step_number=4,
+            observations_images=["image1.png", "image2.jpg"],
+            token_usage=TokenUsage(input_tokens=100, output_tokens=200),
+            timing=Timing(start_time=1.0, end_time=2.0),
+        )
+        with patch("smolagents.gradio_ui.AgentImage") as mock_agent_image:
+            mock_agent_image.return_value.to_string.side_effect = lambda: "path/to/image.png"
+            messages = list(pull_messages_from_step(step))
+            image_messages = [m for m in messages if "image" in str(m).lower()]
+            assert len(image_messages) == 2
+            assert "path/to/image.png" in str(image_messages[0])
+    @pytest.mark.parametrize(
+        "skip_model_outputs, expected_messages_length, token_usage",
+        [(False, 4, TokenUsage(input_tokens=80, output_tokens=30)), (True, 2, None)],
+    )
+    def test_planning_step(self, skip_model_outputs, expected_messages_length, token_usage):
+        """Test PlanningStep processing."""
+        step = PlanningStep(
+            plan="1. First step\n2. Second step",
+            model_input_messages=Mock(),
+            model_output_message=Mock(),
+            token_usage=token_usage,
+            timing=Timing(start_time=1.0, end_time=2.0),
+        )
+        messages = list(pull_messages_from_step(step, skip_model_outputs=skip_model_outputs))
+        assert len(messages) == expected_messages_length  # [header, plan,] footnote, divider
+        expected_contents = [
+            "**Planning step**",
+            "1. First step\n2. Second step",
+            "Input tokens: 80 | Output tokens: 30" if token_usage else "",
+            "-----",
+        ]
+        for message, expected_content in zip(messages, expected_contents[-expected_messages_length:]):
+            assert expected_content in message.content
+        if not token_usage:
+            assert "Input tokens: 80 | Output tokens: 30" not in message.content
+    @pytest.mark.parametrize(
+        "answer_type, answer_value, expected_content",
+        [
+            (AgentText, "This is a text answer", "**Final answer:**\nThis is a text answer\n"),
+            (lambda: "Plain string", "Plain string", "**Final answer:** Plain string"),
+        ],
+    )
+    def test_final_answer_step(self, answer_type, answer_value, expected_content):
+        """Test FinalAnswerStep with different answer types."""
+        try:
+            final_answer = answer_type()
+        except TypeError:
+            with patch.object(answer_type, "to_string", return_value=answer_value):
+                final_answer = answer_type(answer_value)
+        step = FinalAnswerStep(
+            output=final_answer,
+        )
+        messages = list(pull_messages_from_step(step))
+        assert len(messages) == 1
+        assert messages[0].content == expected_content
+    def test_final_answer_step_image(self):
+        """Test FinalAnswerStep with image answer."""
+        with patch.object(AgentImage, "to_string", return_value="path/to/image.png"):
+            step = FinalAnswerStep(output=AgentImage("path/to/image.png"))
+            messages = list(pull_messages_from_step(step))
+            assert len(messages) == 1
+            assert messages[0].content["path"] == "path/to/image.png"
+            assert messages[0].content["mime_type"] == "image/png"
+    def test_final_answer_step_audio(self):
+        """Test FinalAnswerStep with audio answer."""
+        with patch.object(AgentAudio, "to_string", return_value="path/to/audio.wav"):
+            step = FinalAnswerStep(output=AgentAudio("path/to/audio.wav"))
+            messages = list(pull_messages_from_step(step))
+            assert len(messages) == 1
+            assert messages[0].content["path"] == "path/to/audio.wav"
+            assert messages[0].content["mime_type"] == "audio/wav"
+    def test_unsupported_step_type(self):
+        """Test handling of unsupported step types."""
+        class UnsupportedStep(Mock):
+            pass
+        step = UnsupportedStep()
+        with pytest.raises(ValueError, match="Unsupported step type"):
+            list(pull_messages_from_step(step))

tests/test_import.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+import subprocess
+import tempfile
+def test_import_smolagents_without_extras(monkeypatch):
+    monkeypatch.delenv("VIRTUAL_ENV", raising=False)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create a virtual environment
+        venv_dir = os.path.join(temp_dir, "venv")
+        subprocess.run(["uv", "venv", venv_dir], check=True)
+        # Install smolagents in the virtual environment
+        subprocess.run(
+            ["uv", "pip", "install", "--python", os.path.join(venv_dir, "bin", "python"), "smolagents @ ."], check=True
+        )
+        # Run the import test in the virtual environment
+        result = subprocess.run(
+            [os.path.join(venv_dir, "bin", "python"), "-c", "import smolagents"],
+            capture_output=True,
+            text=True,
+        )
+    # Check if the import was successful
+    assert result.returncode == 0, (
+        "Import failed with error: "
+        + (result.stderr.splitlines()[-1] if result.stderr else "No error message")
+        + "\n"
+        + result.stderr
+    )

tests/test_local_python_executor.py ADDED Viewed

	@@ -0,0 +1,2353 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ast
+import types
+from contextlib import nullcontext as does_not_raise
+from textwrap import dedent
+from unittest.mock import patch
+import numpy as np
+import pandas as pd
+import pytest
+from smolagents.default_tools import BASE_PYTHON_TOOLS, FinalAnswerTool
+from smolagents.local_python_executor import (
+    DANGEROUS_FUNCTIONS,
+    DANGEROUS_MODULES,
+    InterpreterError,
+    LocalPythonExecutor,
+    PrintContainer,
+    check_import_authorized,
+    evaluate_boolop,
+    evaluate_condition,
+    evaluate_delete,
+    evaluate_python_code,
+    evaluate_subscript,
+    fix_final_answer_code,
+    get_safe_module,
+)
+# Fake function we will use as tool
+def add_two(x):
+    return x + 2
+class TestEvaluatePythonCode:
+    def assertDictEqualNoPrint(self, dict1, dict2):
+        assert {k: v for k, v in dict1.items() if k != "_print_outputs"} == {
+            k: v for k, v in dict2.items() if k != "_print_outputs"
+        }
+    def test_evaluate_assign(self):
+        code = "x = 3"
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == 3
+        self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": {"counter": 2}})
+        code = "x = y"
+        state = {"y": 5}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        # evaluate returns the value of the last assignment.
+        assert result == 5
+        self.assertDictEqualNoPrint(state, {"x": 5, "y": 5, "_operations_count": {"counter": 2}})
+        code = "a=1;b=None"
+        result, _ = evaluate_python_code(code, {}, state={})
+        # evaluate returns the value of the last assignment.
+        assert result is None
+    def test_assignment_cannot_overwrite_tool(self):
+        code = "print = '3'"
+        with pytest.raises(InterpreterError) as e:
+            evaluate_python_code(code, {"print": print}, state={})
+        assert "Cannot assign to name 'print': doing this would erase the existing tool!" in str(e)
+    def test_subscript_call(self):
+        code = """def foo(x,y):return x*y\n\ndef boo(y):\n\treturn y**3\nfun = [foo, boo]\nresult_foo = fun[0](4,2)\nresult_boo = fun[1](4)"""
+        state = {}
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert result == 64
+        assert state["result_foo"] == 8
+        assert state["result_boo"] == 64
+    def test_evaluate_call(self):
+        code = "y = add_two(x)"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
+        assert result == 5
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": {"counter": 3}})
+        # Should not work without the tool
+        with pytest.raises(InterpreterError, match="Forbidden function evaluation: 'add_two'"):
+            evaluate_python_code(code, {}, state=state)
+    def test_evaluate_class_def(self):
+        code = dedent('''\
+            class MyClass:
+                """A class with a value."""
+                def __init__(self, value):
+                    self.value = value
+                def get_value(self):
+                    return self.value
+            instance = MyClass(42)
+            result = instance.get_value()
+        ''')
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == 42
+        assert state["instance"].__doc__ == "A class with a value."
+    def test_evaluate_class_def_with_assign_attribute_target(self):
+        """
+        Test evaluate_class_def function when stmt is an instance of ast.Assign with ast.Attribute target.
+        """
+        code = dedent("""
+        class TestSubClass:
+            attr1 = 1
+        class TestClass:
+            data = TestSubClass()
+            data.attr1 = "value1"
+            data.attr2 = "value2"
+        result = (TestClass.data.attr1, TestClass.data.attr2)
+        """)
+        state = {}
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert result == ("value1", "value2")
+        assert isinstance(state["TestClass"], type)
+        assert state["TestClass"].data.attr1 == "value1"
+        assert state["TestClass"].data.attr2 == "value2"
+    def test_evaluate_constant(self):
+        code = "x = 3"
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == 3
+        self.assertDictEqualNoPrint(state, {"x": 3, "_operations_count": {"counter": 2}})
+    def test_evaluate_dict(self):
+        code = "test_dict = {'x': x, 'y': add_two(x)}"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
+        assert result == {"x": 3, "y": 5}
+        self.assertDictEqualNoPrint(
+            state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": {"counter": 7}}
+        )
+    def test_evaluate_expression(self):
+        code = "x = 3\ny = 5"
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        # evaluate returns the value of the last assignment.
+        assert result == 5
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 5, "_operations_count": {"counter": 4}})
+    def test_evaluate_f_string(self):
+        code = "text = f'This is x: {x}.'"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        # evaluate returns the value of the last assignment.
+        assert result == "This is x: 3."
+        self.assertDictEqualNoPrint(state, {"x": 3, "text": "This is x: 3.", "_operations_count": {"counter": 6}})
+    def test_evaluate_f_string_with_format(self):
+        code = "text = f'This is x: {x:.2f}.'"
+        state = {"x": 3.336}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == "This is x: 3.34."
+        self.assertDictEqualNoPrint(
+            state, {"x": 3.336, "text": "This is x: 3.34.", "_operations_count": {"counter": 8}}
+        )
+    def test_evaluate_f_string_with_complex_format(self):
+        code = "text = f'This is x: {x:>{width}.{precision}f}.'"
+        state = {"x": 3.336, "width": 10, "precision": 2}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == "This is x:       3.34."
+        self.assertDictEqualNoPrint(
+            state,
+            {
+                "x": 3.336,
+                "width": 10,
+                "precision": 2,
+                "text": "This is x:       3.34.",
+                "_operations_count": {"counter": 14},
+            },
+        )
+    def test_evaluate_if(self):
+        code = "if x <= 3:\n    y = 2\nelse:\n    y = 5"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        # evaluate returns the value of the last assignment.
+        assert result == 2
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 2, "_operations_count": {"counter": 6}})
+        state = {"x": 8}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        # evaluate returns the value of the last assignment.
+        assert result == 5
+        self.assertDictEqualNoPrint(state, {"x": 8, "y": 5, "_operations_count": {"counter": 6}})
+    def test_evaluate_list(self):
+        code = "test_list = [x, add_two(x)]"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
+        assert result == [3, 5]
+        self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": {"counter": 5}})
+    def test_evaluate_name(self):
+        code = "y = x"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == 3
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 3, "_operations_count": {"counter": 2}})
+    def test_evaluate_subscript(self):
+        code = "test_list = [x, add_two(x)]\ntest_list[1]"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
+        assert result == 5
+        self.assertDictEqualNoPrint(state, {"x": 3, "test_list": [3, 5], "_operations_count": {"counter": 9}})
+        code = "test_dict = {'x': x, 'y': add_two(x)}\ntest_dict['y']"
+        state = {"x": 3}
+        result, _ = evaluate_python_code(code, {"add_two": add_two}, state=state)
+        assert result == 5
+        self.assertDictEqualNoPrint(
+            state, {"x": 3, "test_dict": {"x": 3, "y": 5}, "_operations_count": {"counter": 11}}
+        )
+        code = "vendor = {'revenue': 31000, 'rent': 50312}; vendor['ratio'] = round(vendor['revenue'] / vendor['rent'], 2)"
+        state = {}
+        evaluate_python_code(code, {"min": min, "print": print, "round": round}, state=state)
+        assert state["vendor"] == {"revenue": 31000, "rent": 50312, "ratio": 0.62}
+    def test_subscript_string_with_string_index_raises_appropriate_error(self):
+        code = """
+search_results = "[{'title': 'Paris, Ville de Paris, France Weather Forecast | AccuWeather', 'href': 'https://www.accuweather.com/en/fr/paris/623/weather-forecast/623', 'body': 'Get the latest weather forecast for Paris, Ville de Paris, France , including hourly, daily, and 10-day outlooks. AccuWeather provides you with reliable and accurate information on temperature ...'}]"
+for result in search_results:
+    if 'current' in result['title'].lower() or 'temperature' in result['title'].lower():
+        current_weather_url = result['href']
+        print(current_weather_url)
+        break"""
+        with pytest.raises(InterpreterError) as e:
+            evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+            assert "You're trying to subscript a string with a string index" in e
+    def test_evaluate_for(self):
+        code = "x = 0\nfor i in range(3):\n    x = i"
+        state = {}
+        result, _ = evaluate_python_code(code, {"range": range}, state=state)
+        assert result == 2
+        self.assertDictEqualNoPrint(state, {"x": 2, "i": 2, "_operations_count": {"counter": 11}})
+    def test_evaluate_binop(self):
+        code = "y + x"
+        state = {"x": 3, "y": 6}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == 9
+        self.assertDictEqualNoPrint(state, {"x": 3, "y": 6, "_operations_count": {"counter": 4}})
+    def test_recursive_function(self):
+        code = """
+def recur_fibo(n):
+    if n <= 1:
+        return n
+    else:
+        return(recur_fibo(n-1) + recur_fibo(n-2))
+recur_fibo(6)"""
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == 8
+    def test_max_operations(self):
+        # Check that operation counter is not reset in functions
+        code = dedent(
+            """
+            def func(a):
+                for j in range(10):
+                    a += j
+                return a
+            for i in range(5):
+                func(i)
+            """
+        )
+        with patch("smolagents.local_python_executor.MAX_OPERATIONS", 100):
+            with pytest.raises(InterpreterError) as exception_info:
+                evaluate_python_code(code, {"range": range}, state={})
+        assert "Reached the max number of operations" in str(exception_info.value)
+    def test_operations_count(self):
+        # Check that operation counter is not reset in functions
+        code = dedent(
+            """
+            def func():
+                return 0
+            func()
+            """
+        )
+        state = {}
+        evaluate_python_code(code, {"range": range}, state=state)
+        assert state["_operations_count"]["counter"] == 5
+    def test_evaluate_string_methods(self):
+        code = "'hello'.replace('h', 'o').split('e')"
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == ["o", "llo"]
+    def test_evaluate_slicing(self):
+        code = "'hello'[1:3][::-1]"
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == "le"
+    def test_access_attributes(self):
+        class A:
+            attr = 2
+        code = "A.attr"
+        result, _ = evaluate_python_code(code, {}, state={"A": A})
+        assert result == 2
+    def test_list_comprehension(self):
+        code = "sentence = 'THESEAGULL43'\nmeaningful_sentence = '-'.join([char.lower() for char in sentence if char.isalpha()])"
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == "t-h-e-s-e-a-g-u-l-l"
+    def test_string_indexing(self):
+        code = """text_block = [
+    "THESE",
+    "AGULL"
+]
+sentence = ""
+for block in text_block:
+    for col in range(len(text_block[0])):
+        sentence += block[col]
+        """
+        result, _ = evaluate_python_code(code, {"len": len, "range": range}, state={})
+        assert result == "THESEAGULL"
+    def test_tuples(self):
+        code = "x = (1, 2, 3)\nx[1]"
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == 2
+        code = """
+digits, i = [1, 2, 3], 1
+digits[i], digits[i + 1] = digits[i + 1], digits[i]"""
+        evaluate_python_code(code, {"range": range, "print": print, "int": int}, {})
+        code = """
+def calculate_isbn_10_check_digit(number):
+    total = sum((10 - i) * int(digit) for i, digit in enumerate(number))
+    remainder = total % 11
+    check_digit = 11 - remainder
+    if check_digit == 10:
+        return 'X'
+    elif check_digit == 11:
+        return '0'
+    else:
+        return str(check_digit)
+# Given 9-digit numbers
+numbers = [
+    "478225952",
+    "643485613",
+    "739394228",
+    "291726859",
+    "875262394",
+    "542617795",
+    "031810713",
+    "957007669",
+    "871467426"
+]
+# Calculate check digits for each number
+check_digits = [calculate_isbn_10_check_digit(number) for number in numbers]
+print(check_digits)
+"""
+        state = {}
+        evaluate_python_code(
+            code,
+            {
+                "range": range,
+                "print": print,
+                "sum": sum,
+                "enumerate": enumerate,
+                "int": int,
+                "str": str,
+            },
+            state,
+        )
+    def test_listcomp(self):
+        code = "x = [i for i in range(3)]"
+        result, _ = evaluate_python_code(code, {"range": range}, state={})
+        assert result == [0, 1, 2]
+    def test_setcomp(self):
+        code = "batman_times = {entry['time'] for entry in [{'time': 10}, {'time': 19}, {'time': 20}]}"
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == {10, 19, 20}
+    def test_break_continue(self):
+        code = "for i in range(10):\n    if i == 5:\n        break\ni"
+        result, _ = evaluate_python_code(code, {"range": range}, state={})
+        assert result == 5
+        code = "for i in range(10):\n    if i == 5:\n        continue\ni"
+        result, _ = evaluate_python_code(code, {"range": range}, state={})
+        assert result == 9
+    def test_call_int(self):
+        code = "import math\nstr(math.ceil(149))"
+        result, _ = evaluate_python_code(code, {"str": lambda x: str(x)}, state={})
+        assert result == "149"
+    def test_lambda(self):
+        code = "f = lambda x: x + 2\nf(3)"
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == 5
+    def test_dictcomp(self):
+        code = "x = {i: i**2 for i in range(3)}"
+        result, _ = evaluate_python_code(code, {"range": range}, state={})
+        assert result == {0: 0, 1: 1, 2: 4}
+        code = "{num: name for num, name in {101: 'a', 102: 'b'}.items() if name not in ['a']}"
+        result, _ = evaluate_python_code(code, {"print": print}, state={}, authorized_imports=["pandas"])
+        assert result == {102: "b"}
+        code = """
+shifts = {'A': ('6:45', '8:00'), 'B': ('10:00', '11:45')}
+shift_minutes = {worker: ('a', 'b') for worker, (start, end) in shifts.items()}
+"""
+        result, _ = evaluate_python_code(code, {}, state={})
+        assert result == {"A": ("a", "b"), "B": ("a", "b")}
+    def test_tuple_assignment(self):
+        code = "a, b = 0, 1\nb"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == 1
+    def test_while(self):
+        code = "i = 0\nwhile i < 3:\n    i += 1\ni"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == 3
+        # test infinite loop
+        code = "i = 0\nwhile i < 3:\n    i -= 1\ni"
+        with patch("smolagents.local_python_executor.MAX_WHILE_ITERATIONS", 100):
+            with pytest.raises(InterpreterError, match=".*Maximum number of 100 iterations in While loop exceeded"):
+                evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        # test lazy evaluation
+        code = dedent(
+            """
+            house_positions = [0, 7, 10, 15, 18, 22, 22]
+            i, n, loc = 0, 7, 30
+            while i < n and house_positions[i] <= loc:
+                i += 1
+            """
+        )
+        state = {}
+        evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+    def test_generator(self):
+        code = "a = [1, 2, 3, 4, 5]; b = (i**2 for i in a); list(b)"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == [1, 4, 9, 16, 25]
+    def test_boolops(self):
+        code = """if (not (a > b and a > c)) or d > e:
+    best_city = "Brooklyn"
+else:
+    best_city = "Manhattan"
+    best_city
+    """
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": 1, "b": 2, "c": 3, "d": 4, "e": 5})
+        assert result == "Brooklyn"
+        code = """if d > e and a < b:
+    best_city = "Brooklyn"
+elif d < e and a < b:
+    best_city = "Sacramento"
+else:
+    best_city = "Manhattan"
+    best_city
+    """
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": 1, "b": 2, "c": 3, "d": 4, "e": 5})
+        assert result == "Sacramento"
+        # Short-circuit evaluation:
+        # (T and 0) or (T and T) => 0 or True => True
+        code = "result = (x > 3 and y) or (z == 10 and not y)\nresult"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"x": 5, "y": 0, "z": 10})
+        assert result
+        # (None or "") or "Found" => "" or "Found" => "Found"
+        code = "result = (a or c) or b\nresult"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": None, "b": "Found", "c": ""})
+        assert result == "Found"
+        # ("First" and "") or "Third" => "" or "Third" -> "Third"
+        code = "result = (a and b) or c\nresult"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={"a": "First", "b": "", "c": "Third"})
+        assert result == "Third"
+    def test_if_conditions(self):
+        code = """char='a'
+if char.isalpha():
+    print('2')"""
+        state = {}
+        evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert state["_print_outputs"].value == "2\n"
+    def test_imports(self):
+        code = "import math\nmath.sqrt(4)"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == 2.0
+        code = "from random import choice, seed\nseed(12)\nchoice(['win', 'lose', 'draw'])"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == "lose"
+        code = "import time, re\ntime.sleep(0.1)"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result is None
+        code = "from queue import Queue\nq = Queue()\nq.put(1)\nq.get()"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == 1
+        code = "import itertools\nlist(itertools.islice(range(10), 3))"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == [0, 1, 2]
+        code = "import re\nre.search('a', 'abc').group()"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == "a"
+        code = "import stat\nstat.S_ISREG(0o100644)"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result
+        code = "import statistics\nstatistics.mean([1, 2, 3, 4, 4])"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == 2.8
+        code = "import unicodedata\nunicodedata.name('A')"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == "LATIN CAPITAL LETTER A"
+        # Test submodules are handled properly, thus not raising error
+        code = "import numpy.random as rd\nrng = rd.default_rng(12345)\nrng.random()"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.random"])
+        code = "from numpy.random import default_rng as d_rng\nrng = d_rng(12345)\nrng.random()"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.random"])
+    def test_additional_imports(self):
+        code = "import numpy as np"
+        evaluate_python_code(code, authorized_imports=["numpy"], state={})
+        # Test that allowing 'numpy.*' allows numpy root package and its submodules
+        code = "import numpy as np\nnp.random.default_rng(123)\nnp.array([1, 2])"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.*"])
+        # Test that allowing 'numpy.*' allows importing a submodule
+        code = "import numpy.random as rd\nrd.default_rng(12345)"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={}, authorized_imports=["numpy.*"])
+        code = "import numpy.random as rd"
+        evaluate_python_code(code, authorized_imports=["numpy.random"], state={})
+        evaluate_python_code(code, authorized_imports=["numpy.*"], state={})
+        evaluate_python_code(code, authorized_imports=["*"], state={})
+        with pytest.raises(InterpreterError):
+            evaluate_python_code(code, authorized_imports=["random"], state={})
+        with pytest.raises(InterpreterError):
+            evaluate_python_code(code, authorized_imports=["numpy.a"], state={})
+        with pytest.raises(InterpreterError):
+            evaluate_python_code(code, authorized_imports=["numpy.a.*"], state={})
+    def test_multiple_comparators(self):
+        code = "0 <= -1 < 4 and 0 <= -5 < 4"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert not result
+        code = "0 <= 1 < 4 and 0 <= -5 < 4"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert not result
+        code = "0 <= 4 < 4 and 0 <= 3 < 4"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert not result
+        code = "0 <= 3 < 4 and 0 <= 3 < 4"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result
+    def test_print_output(self):
+        code = "print('Hello world!')\nprint('Ok no one cares')"
+        state = {}
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert result is None
+        assert state["_print_outputs"].value == "Hello world!\nOk no one cares\n"
+        # Test print in function (state copy)
+        code = """
+print("1")
+def function():
+    print("2")
+function()"""
+        state = {}
+        evaluate_python_code(code, {"print": print}, state=state)
+        assert state["_print_outputs"].value == "1\n2\n"
+        # Test print in list comprehension (state copy)
+        code = """
+print("1")
+def function():
+    print("2")
+[function() for i in range(10)]"""
+        state = {}
+        evaluate_python_code(code, {"print": print, "range": range}, state=state)
+        assert state["_print_outputs"].value == "1\n2\n2\n2\n2\n2\n2\n2\n2\n2\n2\n"
+    def test_tuple_target_in_iterator(self):
+        code = "for a, b in [('Ralf Weikert', 'Austria'), ('Samuel Seungwon Lee', 'South Korea')]:res = a.split()[0]"
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert result == "Samuel"
+    def test_classes(self):
+        code = """
+class Animal:
+    species = "Generic Animal"
+    def __init__(self, name, age):
+        self.name = name
+        self.age = age
+    def sound(self):
+        return "The animal makes a sound."
+    def __str__(self):
+        return f"{self.name}, {self.age} years old"
+class Dog(Animal):
+    species = "Canine"
+    def __init__(self, name, age, breed):
+        super().__init__(name, age)
+        self.breed = breed
+    def sound(self):
+        return "The dog barks."
+    def __str__(self):
+        return f"{self.name}, {self.age} years old, {self.breed}"
+class Cat(Animal):
+    def sound(self):
+        return "The cat meows."
+    def __str__(self):
+        return f"{self.name}, {self.age} years old, {self.species}"
+# Testing multiple instances
+dog1 = Dog("Fido", 3, "Labrador")
+dog2 = Dog("Buddy", 5, "Golden Retriever")
+# Testing method with built-in function
+animals = [dog1, dog2, Cat("Whiskers", 2)]
+num_animals = len(animals)
+# Testing exceptions in methods
+class ExceptionTest:
+    def method_that_raises(self):
+        raise ValueError("An error occurred")
+try:
+    exc_test = ExceptionTest()
+    exc_test.method_that_raises()
+except ValueError as e:
+    exception_message = str(e)
+# Collecting results
+dog1_sound = dog1.sound()
+dog1_str = str(dog1)
+dog2_sound = dog2.sound()
+dog2_str = str(dog2)
+cat = Cat("Whiskers", 2)
+cat_sound = cat.sound()
+cat_str = str(cat)
+    """
+        state = {}
+        evaluate_python_code(
+            code,
+            {"print": print, "len": len, "super": super, "str": str, "sum": sum},
+            state=state,
+        )
+        # Assert results
+        assert state["dog1_sound"] == "The dog barks."
+        assert state["dog1_str"] == "Fido, 3 years old, Labrador"
+        assert state["dog2_sound"] == "The dog barks."
+        assert state["dog2_str"] == "Buddy, 5 years old, Golden Retriever"
+        assert state["cat_sound"] == "The cat meows."
+        assert state["cat_str"] == "Whiskers, 2 years old, Generic Animal"
+        assert state["num_animals"] == 3
+        assert state["exception_message"] == "An error occurred"
+    def test_variable_args(self):
+        code = """
+def var_args_method(self, *args, **kwargs):
+    return sum(args) + sum(kwargs.values())
+var_args_method(1, 2, 3, x=4, y=5)
+"""
+        state = {}
+        result, _ = evaluate_python_code(code, {"sum": sum}, state=state)
+        assert result == 15
+    def test_exceptions(self):
+        code = """
+def method_that_raises(self):
+    raise ValueError("An error occurred")
+try:
+    method_that_raises()
+except ValueError as e:
+    exception_message = str(e)
+    """
+        state = {}
+        evaluate_python_code(
+            code,
+            {"print": print, "len": len, "super": super, "str": str, "sum": sum},
+            state=state,
+        )
+        assert state["exception_message"] == "An error occurred"
+    def test_print(self):
+        code = "print(min([1, 2, 3]))"
+        state = {}
+        evaluate_python_code(code, {"min": min, "print": print}, state=state)
+        assert state["_print_outputs"].value == "1\n"
+    def test_types_as_objects(self):
+        code = "type_a = float(2); type_b = str; type_c = int"
+        state = {}
+        result, is_final_answer = evaluate_python_code(code, {"float": float, "str": str, "int": int}, state=state)
+        # Type objects are not wrapped by safer_func
+        assert not hasattr(result, "__wrapped__")
+        assert result is int
+    def test_tuple_id(self):
+        code = """
+food_items = {"apple": 2, "banana": 3, "orange": 1, "pear": 1}
+unique_food_items = [item for item, count in food_item_counts.items() if count == 1]
+"""
+        state = {}
+        result, is_final_answer = evaluate_python_code(code, {}, state=state)
+        assert result == ["orange", "pear"]
+    def test_nonsimple_augassign(self):
+        code = """
+counts_dict = {'a': 0}
+counts_dict['a'] += 1
+counts_list = [1, 2, 3]
+counts_list += [4, 5, 6]
+class Counter:
+    def __init__(self):
+        self.count = 0
+a = Counter()
+a.count += 1
+"""
+        state = {}
+        evaluate_python_code(code, {}, state=state)
+        assert state["counts_dict"] == {"a": 1}
+        assert state["counts_list"] == [1, 2, 3, 4, 5, 6]
+        assert state["a"].count == 1
+    def test_adding_int_to_list_raises_error(self):
+        code = """
+counts = [1, 2, 3]
+counts += 1"""
+        with pytest.raises(InterpreterError) as e:
+            evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert "Cannot add non-list value 1 to a list." in str(e)
+    def test_error_highlights_correct_line_of_code(self):
+        code = """a = 1
+b = 2
+counts = [1, 2, 3]
+counts += 1
+b += 1"""
+        with pytest.raises(InterpreterError) as e:
+            evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert "Code execution failed at line 'counts += 1" in str(e)
+    def test_error_type_returned_in_function_call(self):
+        code = """def error_function():
+    raise ValueError("error")
+error_function()"""
+        with pytest.raises(InterpreterError) as e:
+            evaluate_python_code(code)
+        assert "error" in str(e)
+        assert "ValueError" in str(e)
+    def test_assert(self):
+        code = """
+assert 1 == 1
+assert 1 == 2
+"""
+        with pytest.raises(InterpreterError) as e:
+            evaluate_python_code(code, BASE_PYTHON_TOOLS, state={})
+        assert "1 == 2" in str(e) and "1 == 1" not in str(e)
+    def test_with_context_manager(self):
+        code = """
+class SimpleLock:
+    def __init__(self):
+        self.locked = False
+    def __enter__(self):
+        self.locked = True
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.locked = False
+lock = SimpleLock()
+with lock as l:
+    assert l.locked == True
+assert lock.locked == False
+    """
+        state = {}
+        tools = {}
+        evaluate_python_code(code, tools, state=state)
+    def test_default_arg_in_function(self):
+        code = """
+def f(a, b=333, n=1000):
+    return b + n
+n = f(1, n=667)
+"""
+        res, is_final_answer = evaluate_python_code(code, {}, {})
+        assert res == 1000
+        assert not is_final_answer
+    def test_set(self):
+        code = """
+S1 = {'a', 'b', 'c'}
+S2 = {'b', 'c', 'd'}
+S3 = S1.difference(S2)
+S4 = S1.intersection(S2)
+"""
+        state = {}
+        evaluate_python_code(code, {}, state=state)
+        assert state["S3"] == {"a"}
+        assert state["S4"] == {"b", "c"}
+    def test_break(self):
+        code = """
+i = 0
+while True:
+    i+= 1
+    if i==3:
+        break
+i"""
+        result, is_final_answer = evaluate_python_code(code, {"print": print, "round": round}, state={})
+        assert result == 3
+        assert not is_final_answer
+    def test_return(self):
+        # test early returns
+        code = """
+def add_one(n, shift):
+    if True:
+        return n + shift
+    return n
+add_one(1, 1)
+"""
+        state = {}
+        result, is_final_answer = evaluate_python_code(
+            code, {"print": print, "range": range, "ord": ord, "chr": chr}, state=state
+        )
+        assert result == 2
+        # test returning None
+        code = """
+def returns_none(a):
+    return
+returns_none(1)
+"""
+        state = {}
+        result, is_final_answer = evaluate_python_code(
+            code, {"print": print, "range": range, "ord": ord, "chr": chr}, state=state
+        )
+        assert result is None
+    def test_nested_for_loop(self):
+        code = """
+all_res = []
+for i in range(10):
+    subres = []
+    for j in range(i):
+        subres.append(j)
+    all_res.append(subres)
+out = [i for sublist in all_res for i in sublist]
+out[:10]
+"""
+        state = {}
+        result, is_final_answer = evaluate_python_code(code, {"print": print, "range": range}, state=state)
+        assert result == [0, 0, 1, 0, 1, 2, 0, 1, 2, 3]
+    def test_pandas(self):
+        code = """
+import pandas as pd
+df = pd.DataFrame.from_dict({'SetCount': ['5', '4', '5'], 'Quantity': [1, 0, -1]})
+df['SetCount'] = pd.to_numeric(df['SetCount'], errors='coerce')
+parts_with_5_set_count = df[df['SetCount'] == 5.0]
+parts_with_5_set_count[['Quantity', 'SetCount']].values[1]
+"""
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state, authorized_imports=["pandas"])
+        assert np.array_equal(result, [-1, 5])
+        code = """
+import pandas as pd
+df = pd.DataFrame.from_dict({"AtomicNumber": [111, 104, 105], "ok": [0, 1, 2]})
+# Filter the DataFrame to get only the rows with outdated atomic numbers
+filtered_df = df.loc[df['AtomicNumber'].isin([104])]
+"""
+        result, _ = evaluate_python_code(code, {"print": print}, state={}, authorized_imports=["pandas"])
+        assert np.array_equal(result.values[0], [104, 1])
+        # Test groupby
+        code = """import pandas as pd
+data = pd.DataFrame.from_dict([
+    {"Pclass": 1, "Survived": 1},
+    {"Pclass": 2, "Survived": 0},
+    {"Pclass": 2, "Survived": 1}
+])
+survival_rate_by_class = data.groupby('Pclass')['Survived'].mean()
+"""
+        result, _ = evaluate_python_code(code, {}, state={}, authorized_imports=["pandas"])
+        assert result.values[1] == 0.5
+        # Test loc and iloc
+        code = """import pandas as pd
+data = pd.DataFrame.from_dict([
+    {"Pclass": 1, "Survived": 1},
+    {"Pclass": 2, "Survived": 0},
+    {"Pclass": 2, "Survived": 1}
+])
+survival_rate_biased = data.loc[data['Survived']==1]['Survived'].mean()
+survival_rate_biased = data.loc[data['Survived']==1]['Survived'].mean()
+survival_rate_sorted = data.sort_values(by='Survived', ascending=False).iloc[0]
+"""
+        result, _ = evaluate_python_code(code, {}, state={}, authorized_imports=["pandas"])
+    def test_starred(self):
+        code = """
+from math import radians, sin, cos, sqrt, atan2
+def haversine(lat1, lon1, lat2, lon2):
+    R = 6371000  # Radius of the Earth in meters
+    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
+    dlat = lat2 - lat1
+    dlon = lon2 - lon1
+    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
+    c = 2 * atan2(sqrt(a), sqrt(1 - a))
+    distance = R * c
+    return distance
+coords_geneva = (46.1978, 6.1342)
+coords_barcelona = (41.3869, 2.1660)
+distance_geneva_barcelona = haversine(*coords_geneva, *coords_barcelona)
+"""
+        result, _ = evaluate_python_code(code, {"print": print, "map": map}, state={}, authorized_imports=["math"])
+        assert round(result, 1) == 622395.4
+    def test_for(self):
+        code = """
+shifts = {
+    "Worker A": ("6:45 pm", "8:00 pm"),
+    "Worker B": ("10:00 am", "11:45 am")
+}
+shift_intervals = {}
+for worker, (start, end) in shifts.items():
+    shift_intervals[worker] = end
+shift_intervals
+"""
+        result, _ = evaluate_python_code(code, {"print": print, "map": map}, state={})
+        assert result == {"Worker A": "8:00 pm", "Worker B": "11:45 am"}
+    def test_syntax_error_points_error(self):
+        code = "a = ;"
+        with pytest.raises(InterpreterError) as e:
+            evaluate_python_code(code)
+        assert "SyntaxError" in str(e)
+        assert "     ^" in str(e)
+    def test_close_matches_subscript(self):
+        code = 'capitals = {"Czech Republic": "Prague", "Monaco": "Monaco", "Bhutan": "Thimphu"};capitals["Butan"]'
+        with pytest.raises(Exception) as e:
+            evaluate_python_code(code)
+        assert "Maybe you meant one of these indexes instead" in str(e) and "['Bhutan']" in str(e).replace("\\", "")
+    def test_dangerous_builtins_calls_are_blocked(self):
+        unsafe_code = "import os"
+        dangerous_code = f"""
+exec = callable.__self__.exec
+compile = callable.__self__.compile
+exec(compile('{unsafe_code}', 'no filename', 'exec'))
+"""
+        with pytest.raises(InterpreterError):
+            evaluate_python_code(unsafe_code, static_tools=BASE_PYTHON_TOOLS)
+        with pytest.raises(InterpreterError):
+            evaluate_python_code(dangerous_code, static_tools=BASE_PYTHON_TOOLS)
+    def test_final_answer_accepts_kwarg_answer(self):
+        code = "final_answer(answer=2)"
+        result, _ = evaluate_python_code(code, {"final_answer": (lambda answer: 2 * answer)}, state={})
+        assert result == 4
+    def test_dangerous_builtins_are_callable_if_explicitly_added(self):
+        dangerous_code = dedent("""
+            eval("1 + 1")
+            exec(compile("1 + 1", "no filename", "exec"))
+        """)
+        evaluate_python_code(
+            dangerous_code, static_tools={"compile": compile, "eval": eval, "exec": exec} | BASE_PYTHON_TOOLS
+        )
+    def test_can_import_os_if_explicitly_authorized(self):
+        dangerous_code = "import os; os.listdir('./')"
+        evaluate_python_code(dangerous_code, authorized_imports=["os"])
+    def test_can_import_os_if_all_imports_authorized(self):
+        dangerous_code = "import os; os.listdir('./')"
+        evaluate_python_code(dangerous_code, authorized_imports=["*"])
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
+    def test_can_import_scipy_if_explicitly_authorized(self):
+        code = "import scipy"
+        evaluate_python_code(code, authorized_imports=["scipy"])
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
+    def test_can_import_sklearn_if_explicitly_authorized(self):
+        code = "import sklearn"
+        evaluate_python_code(code, authorized_imports=["sklearn"])
+    def test_function_def_recovers_source_code(self):
+        executor = LocalPythonExecutor([])
+        executor.send_tools({"final_answer": FinalAnswerTool()})
+        res, _, _ = executor(
+            dedent(
+                """
+                def target_function():
+                    return "Hello world"
+                final_answer(target_function)
+                """
+            )
+        )
+        assert res.__name__ == "target_function"
+        assert res.__source__ == "def target_function():\n    return 'Hello world'"
+    def test_evaluate_class_def_with_pass(self):
+        code = dedent("""
+            class TestClass:
+                pass
+            instance = TestClass()
+            instance.attr = "value"
+            result = instance.attr
+        """)
+        state = {}
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert result == "value"
+    def test_evaluate_class_def_with_ann_assign_name(self):
+        """
+        Test evaluate_class_def function when stmt is an instance of ast.AnnAssign with ast.Name target.
+        This test verifies that annotated assignments within a class definition are correctly evaluated.
+        """
+        code = dedent("""
+            class TestClass:
+                x: int = 5
+                y: str = "test"
+            instance = TestClass()
+            result = (instance.x, instance.y)
+        """)
+        state = {}
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert result == (5, "test")
+        assert isinstance(state["TestClass"], type)
+        # Type objects are not wrapped by safer_func
+        for value in state["TestClass"].__annotations__.values():
+            assert not hasattr(value, "__wrapped__")
+        assert state["TestClass"].__annotations__ == {"x": int, "y": str}
+        assert state["TestClass"].x == 5
+        assert state["TestClass"].y == "test"
+        assert isinstance(state["instance"], state["TestClass"])
+        assert state["instance"].x == 5
+        assert state["instance"].y == "test"
+    def test_evaluate_class_def_with_ann_assign_attribute(self):
+        """
+        Test evaluate_class_def function when stmt is an instance of ast.AnnAssign with ast.Attribute target.
+        This test ensures that class attributes using attribute notation are correctly handled.
+        """
+        code = dedent("""
+        class TestSubClass:
+            attr = 1
+        class TestClass:
+            data: TestSubClass = TestSubClass()
+            data.attr: str = "value"
+        result = TestClass.data.attr
+        """)
+        state = {}
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert result == "value"
+        assert isinstance(state["TestClass"], type)
+        assert state["TestClass"].__annotations__.keys() == {"data"}
+        assert isinstance(state["TestClass"].__annotations__["data"], type)
+        assert state["TestClass"].__annotations__["data"].__name__ == "TestSubClass"
+        assert state["TestClass"].data.attr == "value"
+    def test_evaluate_class_def_with_ann_assign_subscript(self):
+        """
+        Test evaluate_class_def function when stmt is an instance of ast.AnnAssign with ast.Subscript target.
+        This test ensures that class attributes using subscript notation are correctly handled.
+        """
+        code = dedent("""
+        class TestClass:
+            key_data: dict = {}
+            key_data["key"]: str = "value"
+            index_data: list = [10, 20, 30]
+            index_data[0:2]: list[str] = ["a", "b"]
+        result = (TestClass.key_data['key'], TestClass.index_data[1:])
+        """)
+        state = {}
+        result, _ = evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert result == ("value", ["b", 30])
+        assert isinstance(state["TestClass"], type)
+        # Type objects are not wrapped by safer_func
+        for value in state["TestClass"].__annotations__.values():
+            assert not hasattr(value, "__wrapped__")
+        assert state["TestClass"].__annotations__ == {"key_data": dict, "index_data": list}
+        assert state["TestClass"].key_data == {"key": "value"}
+        assert state["TestClass"].index_data == ["a", "b", 30]
+    def test_evaluate_annassign(self):
+        code = dedent("""\
+            # Basic annotated assignment
+            x: int = 42
+            # Type annotations with expressions
+            y: float = x / 2
+            # Type annotation without assignment
+            z: list
+            # Type annotation with complex value
+            names: list = ["Alice", "Bob", "Charlie"]
+            # Type hint shouldn't restrict values at runtime
+            s: str = 123  # Would be a type error in static checking, but valid at runtime
+            # Access the values
+            result = (x, y, names, s)
+        """)
+        state = {}
+        evaluate_python_code(code, BASE_PYTHON_TOOLS, state=state)
+        assert state["x"] == 42
+        assert state["y"] == 21.0
+        assert "z" not in state  # z should be not be defined
+        assert state["names"] == ["Alice", "Bob", "Charlie"]
+        assert state["s"] == 123  # Type hints don't restrict at runtime
+        assert state["result"] == (42, 21.0, ["Alice", "Bob", "Charlie"], 123)
+    @pytest.mark.parametrize(
+        "code, expected_result",
+        [
+            (
+                dedent("""\
+                    x = 1
+                    x += 2
+                """),
+                3,
+            ),
+            (
+                dedent("""\
+                    x = "a"
+                    x += "b"
+                """),
+                "ab",
+            ),
+            (
+                dedent("""\
+                    class Custom:
+                        def __init__(self, value):
+                            self.value = value
+                        def __iadd__(self, other):
+                            self.value += other * 10
+                            return self
+                    x = Custom(1)
+                    x += 2
+                    x.value
+                """),
+                21,
+            ),
+        ],
+    )
+    def test_evaluate_augassign(self, code, expected_result):
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == expected_result
+    @pytest.mark.parametrize(
+        "operator, expected_result",
+        [
+            ("+=", 7),
+            ("-=", 3),
+            ("*=", 10),
+            ("/=", 2.5),
+            ("//=", 2),
+            ("%=", 1),
+            ("**=", 25),
+            ("&=", 0),
+            ("|=", 7),
+            ("^=", 7),
+            (">>=", 1),
+            ("<<=", 20),
+        ],
+    )
+    def test_evaluate_augassign_number(self, operator, expected_result):
+        code = dedent("""\
+            x = 5
+            x {operator} 2
+        """).format(operator=operator)
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == expected_result
+    @pytest.mark.parametrize(
+        "operator, expected_result",
+        [
+            ("+=", 7),
+            ("-=", 3),
+            ("*=", 10),
+            ("/=", 2.5),
+            ("//=", 2),
+            ("%=", 1),
+            ("**=", 25),
+            ("&=", 0),
+            ("|=", 7),
+            ("^=", 7),
+            (">>=", 1),
+            ("<<=", 20),
+        ],
+    )
+    def test_evaluate_augassign_custom(self, operator, expected_result):
+        operator_names = {
+            "+=": "iadd",
+            "-=": "isub",
+            "*=": "imul",
+            "/=": "itruediv",
+            "//=": "ifloordiv",
+            "%=": "imod",
+            "**=": "ipow",
+            "&=": "iand",
+            "|=": "ior",
+            "^=": "ixor",
+            ">>=": "irshift",
+            "<<=": "ilshift",
+        }
+        code = dedent("""\
+            class Custom:
+                def __init__(self, value):
+                    self.value = value
+                def __{operator_name}__(self, other):
+                    self.value {operator} other
+                    return self
+            x = Custom(5)
+            x {operator} 2
+            x.value
+        """).format(operator=operator, operator_name=operator_names[operator])
+        state = {}
+        result, _ = evaluate_python_code(code, {}, state=state)
+        assert result == expected_result
+    @pytest.mark.parametrize(
+        "code, expected_error_message",
+        [
+            (
+                dedent("""\
+                    x = 5
+                    del x
+                    x
+                """),
+                "The variable `x` is not defined",
+            ),
+            (
+                dedent("""\
+                    x = [1, 2, 3]
+                    del x[2]
+                    x[2]
+                """),
+                "IndexError: list index out of range",
+            ),
+            (
+                dedent("""\
+                    x = {"key": "value"}
+                    del x["key"]
+                    x["key"]
+                """),
+                "Could not index {} with 'key'",
+            ),
+            (
+                dedent("""\
+                    del x
+                """),
+                "Cannot delete name 'x': name is not defined",
+            ),
+        ],
+    )
+    def test_evaluate_delete(self, code, expected_error_message):
+        state = {}
+        with pytest.raises(InterpreterError) as exception_info:
+            evaluate_python_code(code, {}, state=state)
+        assert expected_error_message in str(exception_info.value)
+    def test_non_standard_comparisons(self):
+        code = dedent("""\
+            class NonStdEqualsResult:
+                def __init__(self, left:object, right:object):
+                    self._left = left
+                    self._right = right
+                def __str__(self) -> str:
+                    return f'{self._left} == {self._right}'
+            class NonStdComparisonClass:
+                def __init__(self, value: str ):
+                    self._value = value
+                def __str__(self):
+                    return self._value
+                def __eq__(self, other):
+                    return NonStdEqualsResult(self, other)
+            a = NonStdComparisonClass("a")
+            b = NonStdComparisonClass("b")
+            result = a == b
+            """)
+        result, _ = evaluate_python_code(code, state={})
+        assert not isinstance(result, bool)
+        assert str(result) == "a == b"
+class TestEvaluateBoolop:
+    @pytest.mark.parametrize("a", [1, 0])
+    @pytest.mark.parametrize("b", [2, 0])
+    @pytest.mark.parametrize("c", [3, 0])
+    def test_evaluate_boolop_and(self, a, b, c):
+        boolop_ast = ast.parse("a and b and c").body[0].value
+        state = {"a": a, "b": b, "c": c}
+        result = evaluate_boolop(boolop_ast, state, {}, {}, [])
+        assert result == (a and b and c)
+    @pytest.mark.parametrize("a", [1, 0])
+    @pytest.mark.parametrize("b", [2, 0])
+    @pytest.mark.parametrize("c", [3, 0])
+    def test_evaluate_boolop_or(self, a, b, c):
+        boolop_ast = ast.parse("a or b or c").body[0].value
+        state = {"a": a, "b": b, "c": c}
+        result = evaluate_boolop(boolop_ast, state, {}, {}, [])
+        assert result == (a or b or c)
+class TestEvaluateDelete:
+    @pytest.mark.parametrize(
+        "code, state, expectation",
+        [
+            ("del x", {"x": 1}, {}),
+            ("del x[1]", {"x": [1, 2, 3]}, {"x": [1, 3]}),
+            ("del x['key']", {"x": {"key": "value"}}, {"x": {}}),
+            ("del x", {}, InterpreterError("Cannot delete name 'x': name is not defined")),
+        ],
+    )
+    def test_evaluate_delete(self, code, state, expectation):
+        delete_node = ast.parse(code).body[0]
+        if isinstance(expectation, Exception):
+            with pytest.raises(type(expectation)) as exception_info:
+                evaluate_delete(delete_node, state, {}, {}, [])
+            assert str(expectation) in str(exception_info.value)
+        else:
+            evaluate_delete(delete_node, state, {}, {}, [])
+            _ = state.pop("_operations_count", None)
+            assert state == expectation
+class TestEvaluateCondition:
+    @pytest.mark.parametrize(
+        "condition, state, expected_result",
+        [
+            ("a == b", {"a": 1, "b": 1}, True),
+            ("a == b", {"a": 1, "b": 2}, False),
+            ("a != b", {"a": 1, "b": 1}, False),
+            ("a != b", {"a": 1, "b": 2}, True),
+            ("a < b", {"a": 1, "b": 1}, False),
+            ("a < b", {"a": 1, "b": 2}, True),
+            ("a < b", {"a": 2, "b": 1}, False),
+            ("a <= b", {"a": 1, "b": 1}, True),
+            ("a <= b", {"a": 1, "b": 2}, True),
+            ("a <= b", {"a": 2, "b": 1}, False),
+            ("a > b", {"a": 1, "b": 1}, False),
+            ("a > b", {"a": 1, "b": 2}, False),
+            ("a > b", {"a": 2, "b": 1}, True),
+            ("a >= b", {"a": 1, "b": 1}, True),
+            ("a >= b", {"a": 1, "b": 2}, False),
+            ("a >= b", {"a": 2, "b": 1}, True),
+            ("a is b", {"a": 1, "b": 1}, True),
+            ("a is b", {"a": 1, "b": 2}, False),
+            ("a is not b", {"a": 1, "b": 1}, False),
+            ("a is not b", {"a": 1, "b": 2}, True),
+            ("a in b", {"a": 1, "b": [1, 2, 3]}, True),
+            ("a in b", {"a": 4, "b": [1, 2, 3]}, False),
+            ("a not in b", {"a": 1, "b": [1, 2, 3]}, False),
+            ("a not in b", {"a": 4, "b": [1, 2, 3]}, True),
+            # Chained conditions:
+            ("a == b == c", {"a": 1, "b": 1, "c": 1}, True),
+            ("a == b == c", {"a": 1, "b": 2, "c": 1}, False),
+            ("a == b < c", {"a": 2, "b": 2, "c": 2}, False),
+            ("a == b < c", {"a": 0, "b": 0, "c": 1}, True),
+        ],
+    )
+    def test_evaluate_condition(self, condition, state, expected_result):
+        condition_ast = ast.parse(condition, mode="eval").body
+        result = evaluate_condition(condition_ast, state, {}, {}, [])
+        assert result == expected_result
+    @pytest.mark.parametrize(
+        "condition, state, expected_result",
+        [
+            ("a == b", {"a": pd.Series([1, 2, 3]), "b": pd.Series([2, 2, 2])}, pd.Series([False, True, False])),
+            ("a != b", {"a": pd.Series([1, 2, 3]), "b": pd.Series([2, 2, 2])}, pd.Series([True, False, True])),
+            ("a < b", {"a": pd.Series([1, 2, 3]), "b": pd.Series([2, 2, 2])}, pd.Series([True, False, False])),
+            ("a <= b", {"a": pd.Series([1, 2, 3]), "b": pd.Series([2, 2, 2])}, pd.Series([True, True, False])),
+            ("a > b", {"a": pd.Series([1, 2, 3]), "b": pd.Series([2, 2, 2])}, pd.Series([False, False, True])),
+            ("a >= b", {"a": pd.Series([1, 2, 3]), "b": pd.Series([2, 2, 2])}, pd.Series([False, True, True])),
+            (
+                "a == b",
+                {"a": pd.DataFrame({"x": [1, 2], "y": [3, 4]}), "b": pd.DataFrame({"x": [1, 2], "y": [3, 5]})},
+                pd.DataFrame({"x": [True, True], "y": [True, False]}),
+            ),
+            (
+                "a != b",
+                {"a": pd.DataFrame({"x": [1, 2], "y": [3, 4]}), "b": pd.DataFrame({"x": [1, 2], "y": [3, 5]})},
+                pd.DataFrame({"x": [False, False], "y": [False, True]}),
+            ),
+            (
+                "a < b",
+                {"a": pd.DataFrame({"x": [1, 2], "y": [3, 4]}), "b": pd.DataFrame({"x": [2, 2], "y": [2, 2]})},
+                pd.DataFrame({"x": [True, False], "y": [False, False]}),
+            ),
+            (
+                "a <= b",
+                {"a": pd.DataFrame({"x": [1, 2], "y": [3, 4]}), "b": pd.DataFrame({"x": [2, 2], "y": [2, 2]})},
+                pd.DataFrame({"x": [True, True], "y": [False, False]}),
+            ),
+            (
+                "a > b",
+                {"a": pd.DataFrame({"x": [1, 2], "y": [3, 4]}), "b": pd.DataFrame({"x": [2, 2], "y": [2, 2]})},
+                pd.DataFrame({"x": [False, False], "y": [True, True]}),
+            ),
+            (
+                "a >= b",
+                {"a": pd.DataFrame({"x": [1, 2], "y": [3, 4]}), "b": pd.DataFrame({"x": [2, 2], "y": [2, 2]})},
+                pd.DataFrame({"x": [False, True], "y": [True, True]}),
+            ),
+        ],
+    )
+    def test_evaluate_condition_with_pandas(self, condition, state, expected_result):
+        condition_ast = ast.parse(condition, mode="eval").body
+        result = evaluate_condition(condition_ast, state, {}, {}, [])
+        if isinstance(result, pd.Series):
+            pd.testing.assert_series_equal(result, expected_result)
+        else:
+            pd.testing.assert_frame_equal(result, expected_result)
+    @pytest.mark.parametrize(
+        "condition, state, expected_exception",
+        [
+            # Chained conditions:
+            (
+                "a == b == c",
+                {
+                    "a": pd.Series([1, 2, 3]),
+                    "b": pd.Series([2, 2, 2]),
+                    "c": pd.Series([3, 3, 3]),
+                },
+                ValueError(
+                    "The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
+                ),
+            ),
+            (
+                "a == b == c",
+                {
+                    "a": pd.DataFrame({"x": [1, 2], "y": [3, 4]}),
+                    "b": pd.DataFrame({"x": [2, 2], "y": [2, 2]}),
+                    "c": pd.DataFrame({"x": [3, 3], "y": [3, 3]}),
+                },
+                ValueError(
+                    "The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()."
+                ),
+            ),
+        ],
+    )
+    def test_evaluate_condition_with_pandas_exceptions(self, condition, state, expected_exception):
+        condition_ast = ast.parse(condition, mode="eval").body
+        with pytest.raises(type(expected_exception)) as exception_info:
+            _ = evaluate_condition(condition_ast, state, {}, {}, [])
+        assert str(expected_exception) in str(exception_info.value)
+class TestEvaluateSubscript:
+    @pytest.mark.parametrize(
+        "subscript, state, expected_result",
+        [
+            ("dct[1]", {"dct": {1: 11, 2: 22}}, 11),
+            ("dct[2]", {"dct": {1: "a", 2: "b"}}, "b"),
+            ("dct['b']", {"dct": {"a": 1, "b": 2}}, 2),
+            ("dct['a']", {"dct": {"a": "aa", "b": "bb"}}, "aa"),
+            ("dct[1, 2]", {"dct": {(1, 2): 3}}, 3),  # tuple-index
+            ("dct['a']['b']", {"dct": {"a": {"b": 1}}}, 1),  # nested
+            ("lst[0]", {"lst": [1, 2, 3]}, 1),
+            ("lst[-1]", {"lst": [1, 2, 3]}, 3),
+            ("lst[1:3]", {"lst": [1, 2, 3, 4]}, [2, 3]),
+            ("lst[:]", {"lst": [1, 2, 3]}, [1, 2, 3]),
+            ("lst[::2]", {"lst": [1, 2, 3, 4]}, [1, 3]),
+            ("lst[::-1]", {"lst": [1, 2, 3]}, [3, 2, 1]),
+            ("tup[1]", {"tup": (1, 2, 3)}, 2),
+            ("tup[-1]", {"tup": (1, 2, 3)}, 3),
+            ("tup[1:3]", {"tup": (1, 2, 3, 4)}, (2, 3)),
+            ("tup[:]", {"tup": (1, 2, 3)}, (1, 2, 3)),
+            ("tup[::2]", {"tup": (1, 2, 3, 4)}, (1, 3)),
+            ("tup[::-1]", {"tup": (1, 2, 3)}, (3, 2, 1)),
+            ("st[1]", {"str": "abc"}, "b"),
+            ("st[-1]", {"str": "abc"}, "c"),
+            ("st[1:3]", {"str": "abcd"}, "bc"),
+            ("st[:]", {"str": "abc"}, "abc"),
+            ("st[::2]", {"str": "abcd"}, "ac"),
+            ("st[::-1]", {"str": "abc"}, "cba"),
+            ("arr[1]", {"arr": np.array([1, 2, 3])}, 2),
+            ("arr[1:3]", {"arr": np.array([1, 2, 3, 4])}, np.array([2, 3])),
+            ("arr[:]", {"arr": np.array([1, 2, 3])}, np.array([1, 2, 3])),
+            ("arr[::2]", {"arr": np.array([1, 2, 3, 4])}, np.array([1, 3])),
+            ("arr[::-1]", {"arr": np.array([1, 2, 3])}, np.array([3, 2, 1])),
+            ("arr[1, 2]", {"arr": np.array([[1, 2, 3], [4, 5, 6]])}, 6),
+            ("ser[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+            ("ser.loc[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+            ("ser.loc[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 3),
+            ("ser.iloc[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+            ("ser.iloc[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 2),
+            ("ser.at[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+            ("ser.at[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 3),
+            ("ser.iat[1]", {"ser": pd.Series([1, 2, 3])}, 2),
+            ("ser.iat[1]", {"ser": pd.Series([1, 2, 3], index=[2, 3, 1])}, 2),
+            ("ser[1:3]", {"ser": pd.Series([1, 2, 3, 4])}, pd.Series([2, 3], index=[1, 2])),
+            ("ser[:]", {"ser": pd.Series([1, 2, 3])}, pd.Series([1, 2, 3])),
+            ("ser[::2]", {"ser": pd.Series([1, 2, 3, 4])}, pd.Series([1, 3], index=[0, 2])),
+            ("ser[::-1]", {"ser": pd.Series([1, 2, 3])}, pd.Series([3, 2, 1], index=[2, 1, 0])),
+            ("df['y'][1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+            ("df['y'][5]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3),
+            ("df.loc[1, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+            ("df.loc[5, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3),
+            ("df.iloc[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+            ("df.iloc[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 4),
+            ("df.at[1, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+            ("df.at[5, 'y']", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 3),
+            ("df.iat[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]})}, 4),
+            ("df.iat[1, 1]", {"df": pd.DataFrame({"x": [1, 2], "y": [3, 4]}, index=[5, 6])}, 4),
+        ],
+    )
+    def test_evaluate_subscript(self, subscript, state, expected_result):
+        subscript_ast = ast.parse(subscript).body[0].value
+        result = evaluate_subscript(subscript_ast, state, {}, {}, [])
+        try:
+            assert result == expected_result
+        except ValueError:
+            assert (result == expected_result).all()
+    @pytest.mark.parametrize(
+        "subscript, state, expected_error_message",
+        [
+            ("dct['a']", {"dct": {}}, "KeyError: 'a'"),
+            ("dct[0]", {"dct": {}}, "KeyError: 0"),
+            ("dct['c']", {"dct": {"a": 1, "b": 2}}, "KeyError: 'c'"),
+            ("dct[1, 2, 3]", {"dct": {(1, 2): 3}}, "KeyError: (1, 2, 3)"),
+            ("lst[0]", {"lst": []}, "IndexError: list index out of range"),
+            ("lst[3]", {"lst": [1, 2, 3]}, "IndexError: list index out of range"),
+            ("lst[-4]", {"lst": [1, 2, 3]}, "IndexError: list index out of range"),
+            ("value[0]", {"value": 1}, "TypeError: 'int' object is not subscriptable"),
+        ],
+    )
+    def test_evaluate_subscript_error(self, subscript, state, expected_error_message):
+        subscript_ast = ast.parse(subscript).body[0].value
+        with pytest.raises(InterpreterError, match="Could not index") as exception_info:
+            _ = evaluate_subscript(subscript_ast, state, {}, {}, [])
+        assert expected_error_message in str(exception_info.value)
+    @pytest.mark.parametrize(
+        "subscriptable_class, expectation",
+        [
+            (True, 20),
+            (False, InterpreterError("TypeError: 'Custom' object is not subscriptable")),
+        ],
+    )
+    def test_evaluate_subscript_with_custom_class(self, subscriptable_class, expectation):
+        if subscriptable_class:
+            class Custom:
+                def __getitem__(self, key):
+                    return key * 10
+        else:
+            class Custom:
+                pass
+        state = {"obj": Custom()}
+        subscript = "obj[2]"
+        subscript_ast = ast.parse(subscript).body[0].value
+        if isinstance(expectation, Exception):
+            with pytest.raises(type(expectation), match="Could not index") as exception_info:
+                evaluate_subscript(subscript_ast, state, {}, {}, [])
+            assert "TypeError: 'Custom' object is not subscriptable" in str(exception_info.value)
+        else:
+            result = evaluate_subscript(subscript_ast, state, {}, {}, [])
+            assert result == expectation
+def test_get_safe_module_handle_lazy_imports():
+    class FakeModule(types.ModuleType):
+        def __init__(self, name):
+            super().__init__(name)
+            self.non_lazy_attribute = "ok"
+        def __getattr__(self, name):
+            if name == "lazy_attribute":
+                raise ImportError("lazy import failure")
+            return super().__getattr__(name)
+        def __dir__(self):
+            return super().__dir__() + ["lazy_attribute"]
+    fake_module = FakeModule("fake_module")
+    safe_module = get_safe_module(fake_module, authorized_imports=set())
+    assert not hasattr(safe_module, "lazy_attribute")
+    assert getattr(safe_module, "non_lazy_attribute") == "ok"
+class TestPrintContainer:
+    def test_initial_value(self):
+        pc = PrintContainer()
+        assert pc.value == ""
+    def test_append(self):
+        pc = PrintContainer()
+        pc.append("Hello")
+        assert pc.value == "Hello"
+    def test_iadd(self):
+        pc = PrintContainer()
+        pc += "World"
+        assert pc.value == "World"
+    def test_str(self):
+        pc = PrintContainer()
+        pc.append("Hello")
+        assert str(pc) == "Hello"
+    def test_repr(self):
+        pc = PrintContainer()
+        pc.append("Hello")
+        assert repr(pc) == "PrintContainer(Hello)"
+    def test_len(self):
+        pc = PrintContainer()
+        pc.append("Hello")
+        assert len(pc) == 5
+def test_fix_final_answer_code():
+    test_cases = [
+        (
+            "final_answer = 3.21\nfinal_answer(final_answer)",
+            "final_answer_variable = 3.21\nfinal_answer(final_answer_variable)",
+        ),
+        (
+            "x = final_answer(5)\nfinal_answer = x + 1\nfinal_answer(final_answer)",
+            "x = final_answer(5)\nfinal_answer_variable = x + 1\nfinal_answer(final_answer_variable)",
+        ),
+        (
+            "def func():\n    final_answer = 42\n    return final_answer(final_answer)",
+            "def func():\n    final_answer_variable = 42\n    return final_answer(final_answer_variable)",
+        ),
+        (
+            "final_answer(5)  # Should not change function calls",
+            "final_answer(5)  # Should not change function calls",
+        ),
+        (
+            "obj.final_answer = 5  # Should not change object attributes",
+            "obj.final_answer = 5  # Should not change object attributes",
+        ),
+        (
+            "final_answer=3.21;final_answer(final_answer)",
+            "final_answer_variable=3.21;final_answer(final_answer_variable)",
+        ),
+    ]
+    for i, (input_code, expected) in enumerate(test_cases, 1):
+        result = fix_final_answer_code(input_code)
+        assert result == expected, f"""
+Test case {i} failed:
+Input:    {input_code}
+Expected: {expected}
+Got:      {result}
+"""
+@pytest.mark.parametrize(
+    "module,authorized_imports,expected",
+    [
+        ("os", ["other", "*"], True),
+        ("AnyModule", ["*"], True),
+        ("os", ["os"], True),
+        ("AnyModule", ["AnyModule"], True),
+        ("Module.os", ["Module"], False),
+        ("Module.os", ["Module", "Module.os"], True),
+        ("os.path", ["os.*"], True),
+        ("os", ["os.path"], True),
+    ],
+)
+def test_check_import_authorized(module: str, authorized_imports: list[str], expected: bool):
+    assert check_import_authorized(module, authorized_imports) == expected
+class TestLocalPythonExecutor:
+    def test_state_name(self):
+        executor = LocalPythonExecutor(additional_authorized_imports=[])
+        assert executor.state.get("__name__") == "__main__"
+    @pytest.mark.parametrize(
+        "code",
+        [
+            "d = {'func': lambda x: x + 10}; func = d['func']; func(1)",
+            "d = {'func': lambda x: x + 10}; d['func'](1)",
+        ],
+    )
+    def test_call_from_dict(self, code):
+        executor = LocalPythonExecutor([])
+        result, _, _ = executor(code)
+        assert result == 11
+    @pytest.mark.parametrize(
+        "code",
+        [
+            "a = b = 1; a",
+            "a = b = 1; b",
+            "a, b = c, d = 1, 1; a",
+            "a, b = c, d = 1, 1; b",
+            "a, b = c, d = 1, 1; c",
+            "a, b = c, d = {1, 2}; a",
+            "a, b = c, d = {1, 2}; c",
+            "a, b = c, d = {1: 10, 2: 20}; a",
+            "a, b = c, d = {1: 10, 2: 20}; c",
+            "a = b = (lambda: 1)(); b",
+            "a = b = (lambda: 1)(); lambda x: 10; b",
+            "a = b = (lambda x: lambda y: x + y)(0)(1); b",
+            dedent("""
+            def foo():
+                return 1;
+            a = b = foo(); b"""),
+            dedent("""
+            def foo(*args, **kwargs):
+                return sum(args)
+            a = b = foo(1,-1,1); b"""),
+            "a, b = 1, 2; a, b = b, a; b",
+        ],
+    )
+    def test_chained_assignments(self, code):
+        executor = LocalPythonExecutor([])
+        executor.send_tools({})
+        result, _, _ = executor(code)
+        assert result == 1
+    def test_evaluate_assign_error(self):
+        code = "a, b = 1, 2, 3; a"
+        executor = LocalPythonExecutor([])
+        with pytest.raises(InterpreterError, match=".*Cannot unpack tuple of wrong size"):
+            executor(code)
+    def test_function_def_recovers_source_code(self):
+        executor = LocalPythonExecutor([])
+        executor.send_tools({"final_answer": FinalAnswerTool()})
+        res, _, _ = executor(
+            dedent(
+                """
+                def target_function():
+                    return "Hello world"
+                final_answer(target_function)
+                """
+            )
+        )
+        assert res.__name__ == "target_function"
+        assert res.__source__ == "def target_function():\n    return 'Hello world'"
+    @pytest.mark.parametrize(
+        "code, expected_result",
+        [("isinstance(5, int)", True), ("isinstance('foo', str)", True), ("isinstance(5, str)", False)],
+    )
+    def test_isinstance_builtin_type(self, code, expected_result):
+        executor = LocalPythonExecutor([])
+        executor.send_tools({})
+        result, _, _ = executor(code)
+        assert result is expected_result
+class TestLocalPythonExecutorSecurity:
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [([], InterpreterError("Import of os is not allowed")), (["os"], None)],
+    )
+    def test_vulnerability_import(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import os")
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [([], InterpreterError("Import of builtins is not allowed")), (["builtins"], None)],
+    )
+    def test_vulnerability_builtins(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import builtins")
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [([], InterpreterError("Import of builtins is not allowed")), (["builtins"], None)],
+    )
+    def test_vulnerability_builtins_safe_functions(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import builtins; builtins.print(1)")
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Import of builtins is not allowed")),
+            (["builtins"], [], InterpreterError("Forbidden access to function: exec")),
+            (["builtins"], ["exec"], None),
+        ],
+    )
+    def test_vulnerability_builtins_dangerous_functions(
+        self, additional_authorized_imports, additional_tools, expected_error
+    ):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import exec
+            executor.send_tools({"exec": exec})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import builtins; builtins.exec")
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Import of os is not allowed")),
+            (["os"], [], InterpreterError("Forbidden access to function: popen")),
+            (["os"], ["popen"], None),
+        ],
+    )
+    def test_vulnerability_dangerous_functions(self, additional_authorized_imports, additional_tools, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from os import popen
+            executor.send_tools({"popen": popen})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor("import os; os.popen")
+    @pytest.mark.parametrize("dangerous_function", DANGEROUS_FUNCTIONS)
+    def test_vulnerability_for_all_dangerous_functions(self, dangerous_function):
+        dangerous_module_name, dangerous_function_name = dangerous_function.rsplit(".", 1)
+        # Skip test if module is not installed: posix module is not installed on Windows
+        pytest.importorskip(dangerous_module_name)
+        executor = LocalPythonExecutor([dangerous_module_name])
+        if "__" in dangerous_function_name:
+            error_match = f".*Forbidden access to dunder attribute: {dangerous_function_name}"
+        else:
+            error_match = f".*Forbidden access to function: {dangerous_function_name}.*"
+        with pytest.raises(InterpreterError, match=error_match):
+            executor(f"import {dangerous_module_name}; {dangerous_function}")
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [
+            ([], InterpreterError("Import of sys is not allowed")),
+            (["sys"], InterpreterError("Forbidden access to module: os")),
+            (["sys", "os"], None),
+        ],
+    )
+    def test_vulnerability_via_sys(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    import sys
+                    sys.modules["os"].system(":")
+                    """
+                )
+            )
+    @pytest.mark.parametrize("dangerous_module", DANGEROUS_MODULES)
+    def test_vulnerability_via_sys_for_all_dangerous_modules(self, dangerous_module):
+        import sys
+        if dangerous_module not in sys.modules or dangerous_module == "sys":
+            pytest.skip("module not present in sys.modules")
+        executor = LocalPythonExecutor(["sys"])
+        with pytest.raises(InterpreterError) as exception_info:
+            executor(
+                dedent(
+                    f"""
+                    import sys
+                    sys.modules["{dangerous_module}"]
+                    """
+                )
+            )
+        assert f"Forbidden access to module: {dangerous_module}" in str(exception_info.value)
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [(["importlib"], InterpreterError("Forbidden access to module: os")), (["importlib", "os"], None)],
+    )
+    def test_vulnerability_via_importlib(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    import importlib
+                    importlib.import_module("os").system(":")
+                    """
+                )
+            )
+    @pytest.mark.parametrize(
+        "code, additional_authorized_imports, expected_error",
+        [
+            # os submodule
+            (
+                "import queue; queue.threading._os.system(':')",
+                [],
+                InterpreterError("Forbidden access to module: threading"),
+            ),
+            (
+                "import queue; queue.threading._os.system(':')",
+                ["threading"],
+                InterpreterError("Forbidden access to module: os"),
+            ),
+            ("import random; random._os.system(':')", [], InterpreterError("Forbidden access to module: os")),
+            (
+                "import random; random.__dict__['_os'].system(':')",
+                [],
+                InterpreterError("Forbidden access to dunder attribute: __dict__"),
+            ),
+            (
+                "import doctest; doctest.inspect.os.system(':')",
+                ["doctest"],
+                InterpreterError("Forbidden access to module: inspect"),
+            ),
+            (
+                "import doctest; doctest.inspect.os.system(':')",
+                ["doctest", "inspect"],
+                InterpreterError("Forbidden access to module: os"),
+            ),
+            # subprocess submodule
+            (
+                "import asyncio; asyncio.base_events.events.subprocess",
+                ["asyncio"],
+                InterpreterError("Forbidden access to module: asyncio.base_events"),
+            ),
+            (
+                "import asyncio; asyncio.base_events.events.subprocess",
+                ["asyncio", "asyncio.base_events"],
+                InterpreterError("Forbidden access to module: asyncio.events"),
+            ),
+            (
+                "import asyncio; asyncio.base_events.events.subprocess",
+                ["asyncio", "asyncio.base_events", "asyncio.base_events.events"],
+                InterpreterError("Forbidden access to module: asyncio.events"),
+            ),
+            # sys submodule
+            (
+                "import queue; queue.threading._sys.modules['os'].system(':')",
+                [],
+                InterpreterError("Forbidden access to module: threading"),
+            ),
+            (
+                "import queue; queue.threading._sys.modules['os'].system(':')",
+                ["threading"],
+                InterpreterError("Forbidden access to module: sys"),
+            ),
+            ("import warnings; warnings.sys", ["warnings"], InterpreterError("Forbidden access to module: sys")),
+            # Allowed
+            ("import pandas; pandas.io", ["pandas", "pandas.io"], None),
+        ],
+    )
+    def test_vulnerability_via_submodules(self, code, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(code)
+    @pytest.mark.parametrize(
+        "code, additional_authorized_imports, expected_error",
+        [
+            # Using filter with functools.partial
+            (
+                dedent(
+                    """
+                    import functools
+                    import warnings
+                    list(filter(functools.partial(getattr, warnings), ["sys"]))
+                    """
+                ),
+                ["warnings", "functools"],
+                InterpreterError("Forbidden access to module: sys"),
+            ),
+            # Using map
+            (
+                dedent(
+                    """
+                    import warnings
+                    list(map(getattr, [warnings], ["sys"]))
+                    """
+                ),
+                ["warnings"],
+                InterpreterError("Forbidden access to module: sys"),
+            ),
+            # Using map with functools.partial
+            (
+                dedent(
+                    """
+                    import functools
+                    import warnings
+                    list(map(functools.partial(getattr, warnings), ["sys"]))
+                    """
+                ),
+                ["warnings", "functools"],
+                InterpreterError("Forbidden access to module: sys"),
+            ),
+        ],
+    )
+    def test_vulnerability_via_submodules_through_indirect_attribute_access(
+        self, code, additional_authorized_imports, expected_error
+    ):
+        # warnings.sys
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        executor.send_tools({})
+        with pytest.raises(type(expected_error), match=f".*{expected_error}"):
+            executor(code)
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Import of sys is not allowed")),
+            (["sys"], [], InterpreterError("Forbidden access to module: builtins")),
+            (
+                ["sys", "builtins"],
+                [],
+                InterpreterError("Forbidden access to function: __import__"),
+            ),
+            (["sys", "builtins"], ["__import__"], InterpreterError("Forbidden access to module: os")),
+            (["sys", "builtins", "os"], ["__import__"], None),
+        ],
+    )
+    def test_vulnerability_builtins_via_sys(self, additional_authorized_imports, additional_tools, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import __import__
+            executor.send_tools({"__import__": __import__})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    import sys
+                    builtins = sys._getframe().f_builtins
+                    builtins_import = builtins["__import__"]
+                    os_module = builtins_import("os")
+                    os_module.system(":")
+                    """
+                )
+            )
+    @pytest.mark.parametrize("patch_builtin_import_module", [False, True])  # builtins_import.__module__ = None
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Forbidden access to dunder attribute: __traceback__")),
+            (
+                ["builtins", "os"],
+                ["__import__"],
+                InterpreterError("Forbidden access to dunder attribute: __traceback__"),
+            ),
+        ],
+    )
+    def test_vulnerability_builtins_via_traceback(
+        self, patch_builtin_import_module, additional_authorized_imports, additional_tools, expected_error, monkeypatch
+    ):
+        if patch_builtin_import_module:
+            monkeypatch.setattr("builtins.__import__.__module__", None)  # inspect.getmodule(func) = None
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import __import__
+            executor.send_tools({"__import__": __import__})
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    try:
+                        1 / 0
+                    except Exception as e:
+                        builtins = e.__traceback__.tb_frame.f_back.f_globals["__builtins__"]
+                        builtins_import = builtins["__import__"]
+                        os_module = builtins_import("os")
+                        os_module.system(":")
+                    """
+                )
+            )
+    @pytest.mark.parametrize("patch_builtin_import_module", [False, True])  # builtins_import.__module__ = None
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, additional_tools, expected_error",
+        [
+            ([], [], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (["warnings"], [], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (
+                ["warnings", "builtins"],
+                [],
+                InterpreterError("Forbidden access to dunder attribute: __base__"),
+            ),
+            (["warnings", "builtins", "os"], [], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (
+                ["warnings", "builtins", "os"],
+                ["__import__"],
+                InterpreterError("Forbidden access to dunder attribute: __base__"),
+            ),
+        ],
+    )
+    def test_vulnerability_builtins_via_class_catch_warnings(
+        self, patch_builtin_import_module, additional_authorized_imports, additional_tools, expected_error, monkeypatch
+    ):
+        if patch_builtin_import_module:
+            monkeypatch.setattr("builtins.__import__.__module__", None)  # inspect.getmodule(func) = None
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        if additional_tools:
+            from builtins import __import__
+            executor.send_tools({"__import__": __import__})
+        if isinstance(expected_error, tuple):  # different error depending on patch status
+            expected_error = expected_error[patch_builtin_import_module]
+        if isinstance(expected_error, Exception):
+            expectation = pytest.raises(type(expected_error), match=f".*{expected_error}")
+        elif expected_error is None:
+            expectation = does_not_raise()
+        with expectation:
+            executor(
+                dedent(
+                    """
+                    classes = {}.__class__.__base__.__subclasses__()
+                    for cls in classes:
+                        if cls.__name__ == "catch_warnings":
+                            break
+                    builtins = cls()._module.__builtins__
+                    builtins_import = builtins["__import__"]
+                    os_module = builtins_import('os')
+                    os_module.system(":")
+                    """
+                )
+            )
+    @pytest.mark.filterwarnings("ignore::DeprecationWarning")
+    @pytest.mark.parametrize(
+        "additional_authorized_imports, expected_error",
+        [
+            ([], InterpreterError("Forbidden access to dunder attribute: __base__")),
+            (["os"], InterpreterError("Forbidden access to dunder attribute: __base__")),
+        ],
+    )
+    def test_vulnerability_load_module_via_builtin_importer(self, additional_authorized_imports, expected_error):
+        executor = LocalPythonExecutor(additional_authorized_imports)
+        with (
+            pytest.raises(type(expected_error), match=f".*{expected_error}")
+            if isinstance(expected_error, Exception)
+            else does_not_raise()
+        ):
+            executor(
+                dedent(
+                    """
+                    classes = {}.__class__.__base__.__subclasses__()
+                    for cls in classes:
+                        if cls.__name__ == "BuiltinImporter":
+                            break
+                    os_module = cls().load_module("os")
+                    os_module.system(":")
+                    """
+                )
+            )
+    def test_vulnerability_class_via_subclasses(self):
+        # Subclass: subprocess.Popen
+        executor = LocalPythonExecutor([])
+        code = dedent(
+            """
+            for cls in ().__class__.__base__.__subclasses__():
+                if 'Popen' in cls.__class__.__repr__(cls):
+                    break
+            cls(["sh", "-c", ":"]).wait()
+            """
+        )
+        with pytest.raises(InterpreterError, match="Forbidden access to dunder attribute: __base__"):
+            executor(code)
+        code = dedent(
+            """
+            [c for c in ().__class__.__base__.__subclasses__() if "Popen" in c.__class__.__repr__(c)][0](
+                ["sh", "-c", ":"]
+            ).wait()
+            """
+        )
+        with pytest.raises(InterpreterError, match="Forbidden access to dunder attribute: __base__"):
+            executor(code)
+    @pytest.mark.parametrize(
+        "code, dunder_attribute",
+        [("a = (); b = a.__class__", "__class__"), ("class A:\n    attr=1\nx = A()\nx_dict = x.__dict__", "__dict__")],
+    )
+    def test_vulnerability_via_dunder_access(self, code, dunder_attribute):
+        executor = LocalPythonExecutor([])
+        with pytest.raises(InterpreterError, match=f"Forbidden access to dunder attribute: {dunder_attribute}"):
+            executor(code)
+    def test_vulnerability_via_dunder_indirect_access(self):
+        executor = LocalPythonExecutor([])
+        code = "a = (); b = getattr(a, '__class__')"
+        with pytest.raises(InterpreterError, match="Forbidden function evaluation: 'getattr'"):
+            executor(code)

tests/test_mcp_client.py ADDED Viewed

	@@ -0,0 +1,60 @@

+from textwrap import dedent
+import pytest
+from mcp import StdioServerParameters
+from smolagents.mcp_client import MCPClient
+@pytest.fixture
+def echo_server_script():
+    return dedent(
+        '''
+        from mcp.server.fastmcp import FastMCP
+        mcp = FastMCP("Echo Server")
+        @mcp.tool()
+        def echo_tool(text: str) -> str:
+            """Echo the input text"""
+            return f"Echo: {text}"
+        mcp.run()
+        '''
+    )
+def test_mcp_client_with_syntax(echo_server_script: str):
+    """Test the MCPClient with the context manager syntax."""
+    server_parameters = StdioServerParameters(command="python", args=["-c", echo_server_script])
+    with MCPClient(server_parameters) as tools:
+        assert len(tools) == 1
+        assert tools[0].name == "echo_tool"
+        assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"
+def test_mcp_client_try_finally_syntax(echo_server_script: str):
+    """Test the MCPClient with the try ... finally syntax."""
+    server_parameters = StdioServerParameters(command="python", args=["-c", echo_server_script])
+    mcp_client = MCPClient(server_parameters)
+    try:
+        tools = mcp_client.get_tools()
+        assert len(tools) == 1
+        assert tools[0].name == "echo_tool"
+        assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"
+    finally:
+        mcp_client.disconnect()
+def test_multiple_servers(echo_server_script: str):
+    """Test the MCPClient with multiple servers."""
+    server_parameters = [
+        StdioServerParameters(command="python", args=["-c", echo_server_script]),
+        StdioServerParameters(command="python", args=["-c", echo_server_script]),
+    ]
+    with MCPClient(server_parameters) as tools:
+        assert len(tools) == 2
+        assert tools[0].name == "echo_tool"
+        assert tools[1].name == "echo_tool"
+        assert tools[0].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"
+        assert tools[1].forward(**{"text": "Hello, world!"}) == "Echo: Hello, world!"

tests/test_memory.py ADDED Viewed

	@@ -0,0 +1,228 @@

+import pytest
+from PIL import Image
+from smolagents.agents import ToolCall
+from smolagents.memory import (
+    ActionStep,
+    AgentMemory,
+    ChatMessage,
+    MemoryStep,
+    MessageRole,
+    PlanningStep,
+    SystemPromptStep,
+    TaskStep,
+)
+from smolagents.monitoring import Timing, TokenUsage
+class TestAgentMemory:
+    def test_initialization(self):
+        system_prompt = "This is a system prompt."
+        memory = AgentMemory(system_prompt=system_prompt)
+        assert memory.system_prompt.system_prompt == system_prompt
+        assert memory.steps == []
+    def test_return_all_code_actions(self):
+        memory = AgentMemory(system_prompt="This is a system prompt.")
+        memory.steps = [
+            ActionStep(step_number=1, timing=Timing(start_time=0.0, end_time=1.0), code_action="print('Hello')"),
+            ActionStep(step_number=2, timing=Timing(start_time=0.0, end_time=1.0), code_action=None),
+            ActionStep(step_number=3, timing=Timing(start_time=0.0, end_time=1.0), code_action="print('World')"),
+        ]  # type: ignore
+        assert memory.return_full_code() == "print('Hello')\n\nprint('World')"
+class TestMemoryStep:
+    def test_initialization(self):
+        step = MemoryStep()
+        assert isinstance(step, MemoryStep)
+    def test_dict(self):
+        step = MemoryStep()
+        assert step.dict() == {}
+    def test_to_messages(self):
+        step = MemoryStep()
+        with pytest.raises(NotImplementedError):
+            step.to_messages()
+def test_action_step_dict():
+    action_step = ActionStep(
+        model_input_messages=[ChatMessage(role=MessageRole.USER, content="Hello")],
+        tool_calls=[
+            ToolCall(id="id", name="get_weather", arguments={"location": "Paris"}),
+        ],
+        timing=Timing(start_time=0.0, end_time=1.0),
+        step_number=1,
+        error=None,
+        model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Hi"),
+        model_output="Hi",
+        observations="This is a nice observation",
+        observations_images=[Image.new("RGB", (100, 100))],
+        action_output="Output",
+        token_usage=TokenUsage(input_tokens=10, output_tokens=20),
+    )
+    action_step_dict = action_step.dict()
+    # Check each key individually for better test failure messages
+    assert "model_input_messages" in action_step_dict
+    assert action_step_dict["model_input_messages"] == [ChatMessage(role=MessageRole.USER, content="Hello")]
+    assert "tool_calls" in action_step_dict
+    assert len(action_step_dict["tool_calls"]) == 1
+    assert action_step_dict["tool_calls"][0] == {
+        "id": "id",
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "arguments": {"location": "Paris"},
+        },
+    }
+    assert "timing" in action_step_dict
+    assert action_step_dict["timing"] == {"start_time": 0.0, "end_time": 1.0, "duration": 1.0}
+    assert "token_usage" in action_step_dict
+    assert action_step_dict["token_usage"] == {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}
+    assert "step_number" in action_step_dict
+    assert action_step_dict["step_number"] == 1
+    assert "error" in action_step_dict
+    assert action_step_dict["error"] is None
+    assert "model_output_message" in action_step_dict
+    assert action_step_dict["model_output_message"] == {
+        "role": "assistant",
+        "content": "Hi",
+        "tool_calls": None,
+        "raw": None,
+        "token_usage": None,
+    }
+    assert "model_output" in action_step_dict
+    assert action_step_dict["model_output"] == "Hi"
+    assert "observations" in action_step_dict
+    assert action_step_dict["observations"] == "This is a nice observation"
+    assert "observations_images" in action_step_dict
+    assert "action_output" in action_step_dict
+    assert action_step_dict["action_output"] == "Output"
+def test_action_step_to_messages():
+    action_step = ActionStep(
+        model_input_messages=[ChatMessage(role=MessageRole.USER, content="Hello")],
+        tool_calls=[
+            ToolCall(id="id", name="get_weather", arguments={"location": "Paris"}),
+        ],
+        timing=Timing(start_time=0.0, end_time=1.0),
+        step_number=1,
+        error=None,
+        model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Hi"),
+        model_output="Hi",
+        observations="This is a nice observation",
+        observations_images=[Image.new("RGB", (100, 100))],
+        action_output="Output",
+        token_usage=TokenUsage(input_tokens=10, output_tokens=20),
+    )
+    messages = action_step.to_messages()
+    assert len(messages) == 4
+    for message in messages:
+        assert isinstance(message, ChatMessage)
+    assistant_message = messages[0]
+    assert assistant_message.role == MessageRole.ASSISTANT
+    assert len(assistant_message.content) == 1
+    assert assistant_message.content[0]["type"] == "text"
+    assert assistant_message.content[0]["text"] == "Hi"
+    message = messages[1]
+    assert message.role == MessageRole.TOOL_CALL
+    assert len(message.content) == 1
+    assert message.content[0]["type"] == "text"
+    assert "Calling tools:" in message.content[0]["text"]
+    image_message = messages[2]
+    assert image_message.content[0]["type"] == "image"  # type: ignore
+    observation_message = messages[3]
+    assert observation_message.role == MessageRole.TOOL_RESPONSE
+    assert "Observation:\nThis is a nice observation" in observation_message.content[0]["text"]
+def test_action_step_to_messages_no_tool_calls_with_observations():
+    action_step = ActionStep(
+        model_input_messages=None,
+        tool_calls=None,
+        timing=Timing(start_time=0.0, end_time=1.0),
+        step_number=1,
+        error=None,
+        model_output_message=None,
+        model_output=None,
+        observations="This is an observation.",
+        observations_images=None,
+        action_output=None,
+        token_usage=TokenUsage(input_tokens=10, output_tokens=20),
+    )
+    messages = action_step.to_messages()
+    assert len(messages) == 1
+    observation_message = messages[0]
+    assert observation_message.role == MessageRole.TOOL_RESPONSE
+    assert "Observation:\nThis is an observation." in observation_message.content[0]["text"]
+def test_planning_step_to_messages():
+    planning_step = PlanningStep(
+        model_input_messages=[ChatMessage(role=MessageRole.USER, content="Hello")],
+        model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Plan"),
+        plan="This is a plan.",
+        timing=Timing(start_time=0.0, end_time=1.0),
+    )
+    messages = planning_step.to_messages(summary_mode=False)
+    assert len(messages) == 2
+    for message in messages:
+        assert isinstance(message, ChatMessage)
+        assert isinstance(message.content, list)
+        assert len(message.content) == 1
+        for content in message.content:
+            assert isinstance(content, dict)
+            assert "type" in content
+            assert "text" in content
+    assert messages[0].role == MessageRole.ASSISTANT
+    assert messages[1].role == MessageRole.USER
+def test_task_step_to_messages():
+    task_step = TaskStep(task="This is a task.", task_images=[Image.new("RGB", (100, 100))])
+    messages = task_step.to_messages(summary_mode=False)
+    assert len(messages) == 1
+    for message in messages:
+        assert isinstance(message, ChatMessage)
+        assert message.role == MessageRole.USER
+        assert isinstance(message.content, list)
+        assert len(message.content) == 2
+        text_content = message.content[0]
+        assert isinstance(text_content, dict)
+        assert "type" in text_content
+        assert "text" in text_content
+        for image_content in message.content[1:]:
+            assert isinstance(image_content, dict)
+            assert "type" in image_content
+            assert "image" in image_content
+def test_system_prompt_step_to_messages():
+    system_prompt_step = SystemPromptStep(system_prompt="This is a system prompt.")
+    messages = system_prompt_step.to_messages(summary_mode=False)
+    assert len(messages) == 1
+    for message in messages:
+        assert isinstance(message, ChatMessage)
+        assert message.role == MessageRole.SYSTEM
+        assert isinstance(message.content, list)
+        assert len(message.content) == 1
+        for content in message.content:
+            assert isinstance(content, dict)
+            assert "type" in content
+            assert "text" in content

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,763 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import sys
+import unittest
+from contextlib import ExitStack
+from unittest.mock import MagicMock, patch
+import pytest
+from huggingface_hub import ChatCompletionOutputMessage
+from smolagents.default_tools import FinalAnswerTool
+from smolagents.models import (
+    AmazonBedrockServerModel,
+    AzureOpenAIServerModel,
+    ChatMessage,
+    ChatMessageToolCall,
+    InferenceClientModel,
+    LiteLLMModel,
+    LiteLLMRouterModel,
+    MessageRole,
+    MLXModel,
+    Model,
+    OpenAIServerModel,
+    TransformersModel,
+    get_clean_message_list,
+    get_tool_call_from_text,
+    get_tool_json_schema,
+    parse_json_if_needed,
+    supports_stop_parameter,
+)
+from smolagents.tools import tool
+from .utils.markers import require_run_all
+class TestModel:
+    def test_agglomerate_stream_deltas(self):
+        from smolagents.models import (
+            ChatMessageStreamDelta,
+            ChatMessageToolCallFunction,
+            ChatMessageToolCallStreamDelta,
+            TokenUsage,
+            agglomerate_stream_deltas,
+        )
+        stream_deltas = [
+            ChatMessageStreamDelta(
+                content="Hi",
+                tool_calls=[
+                    ChatMessageToolCallStreamDelta(
+                        index=0,
+                        type="function",
+                        function=ChatMessageToolCallFunction(arguments="", name="web_search", description=None),
+                    )
+                ],
+                token_usage=None,
+            ),
+            ChatMessageStreamDelta(
+                content=" everyone",
+                tool_calls=[
+                    ChatMessageToolCallStreamDelta(
+                        index=0,
+                        type="function",
+                        function=ChatMessageToolCallFunction(arguments=' {"', name="web_search", description=None),
+                    )
+                ],
+                token_usage=None,
+            ),
+            ChatMessageStreamDelta(
+                content=", it's",
+                tool_calls=[
+                    ChatMessageToolCallStreamDelta(
+                        index=0,
+                        type="function",
+                        function=ChatMessageToolCallFunction(
+                            arguments='query": "current pope name and date of birth"}',
+                            name="web_search",
+                            description=None,
+                        ),
+                    )
+                ],
+                token_usage=None,
+            ),
+            ChatMessageStreamDelta(
+                content="",
+                tool_calls=None,
+                token_usage=TokenUsage(input_tokens=1348, output_tokens=24),
+            ),
+        ]
+        agglomerated_stream_delta = agglomerate_stream_deltas(stream_deltas)
+        assert agglomerated_stream_delta.content == "Hi everyone, it's"
+        assert (
+            agglomerated_stream_delta.tool_calls[0].function.arguments
+            == ' {"query": "current pope name and date of birth"}'
+        )
+        assert agglomerated_stream_delta.token_usage.total_tokens == 1372
+    @pytest.mark.parametrize(
+        "model_id, stop_sequences, should_contain_stop",
+        [
+            ("regular-model", ["stop1", "stop2"], True),  # Regular model should include stop
+            ("openai/o3", ["stop1", "stop2"], False),  # o3 model should not include stop
+            ("openai/o4-mini", ["stop1", "stop2"], False),  # o4-mini model should not include stop
+            ("something/else/o3", ["stop1", "stop2"], False),  # Path ending with o3 should not include stop
+            ("something/else/o4-mini", ["stop1", "stop2"], False),  # Path ending with o4-mini should not include stop
+            ("o3", ["stop1", "stop2"], False),  # Exact o3 model should not include stop
+            ("o4-mini", ["stop1", "stop2"], False),  # Exact o4-mini model should not include stop
+            ("regular-model", None, False),  # None stop_sequences should not add stop parameter
+        ],
+    )
+    def test_prepare_completion_kwargs_stop_sequences(self, model_id, stop_sequences, should_contain_stop):
+        model = Model()
+        model.model_id = model_id
+        completion_kwargs = model._prepare_completion_kwargs(
+            messages=[
+                ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello"}]),
+            ],
+            stop_sequences=stop_sequences,
+        )
+        # Verify that the stop parameter is only included when appropriate
+        if should_contain_stop:
+            assert "stop" in completion_kwargs
+            assert completion_kwargs["stop"] == stop_sequences
+        else:
+            assert "stop" not in completion_kwargs
+    @pytest.mark.parametrize(
+        "with_tools, tool_choice, expected_result",
+        [
+            # Default behavior: With tools but no explicit tool_choice, should default to "required"
+            (True, ..., {"has_tool_choice": True, "value": "required"}),
+            # Custom value: With tools and explicit tool_choice="auto"
+            (True, "auto", {"has_tool_choice": True, "value": "auto"}),
+            # Tool name as string
+            (True, "valid_tool_function", {"has_tool_choice": True, "value": "valid_tool_function"}),
+            # Tool choice as dictionary
+            (
+                True,
+                {"type": "function", "function": {"name": "valid_tool_function"}},
+                {"has_tool_choice": True, "value": {"type": "function", "function": {"name": "valid_tool_function"}}},
+            ),
+            # With tools but explicit None tool_choice: should exclude tool_choice
+            (True, None, {"has_tool_choice": False, "value": None}),
+            # Without tools: tool_choice should never be included
+            (False, "required", {"has_tool_choice": False, "value": None}),
+            (False, "auto", {"has_tool_choice": False, "value": None}),
+            (False, None, {"has_tool_choice": False, "value": None}),
+            (False, ..., {"has_tool_choice": False, "value": None}),
+        ],
+    )
+    def test_prepare_completion_kwargs_tool_choice(self, with_tools, tool_choice, expected_result, example_tool):
+        model = Model()
+        kwargs = {"messages": [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello"}])]}
+        if with_tools:
+            kwargs["tools_to_call_from"] = [example_tool]
+        if tool_choice is not ...:
+            kwargs["tool_choice"] = tool_choice
+        completion_kwargs = model._prepare_completion_kwargs(**kwargs)
+        if expected_result["has_tool_choice"]:
+            assert "tool_choice" in completion_kwargs
+            assert completion_kwargs["tool_choice"] == expected_result["value"]
+        else:
+            assert "tool_choice" not in completion_kwargs
+    def test_get_json_schema_has_nullable_args(self):
+        @tool
+        def get_weather(location: str, celsius: bool | None = False) -> str:
+            """
+            Get weather in the next days at given location.
+            Secretly this tool does not care about the location, it hates the weather everywhere.
+            Args:
+                location: the location
+                celsius: the temperature type
+            """
+            return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+        assert "nullable" in get_tool_json_schema(get_weather)["function"]["parameters"]["properties"]["celsius"]
+    def test_chatmessage_has_model_dumps_json(self):
+        message = ChatMessage("user", [{"type": "text", "text": "Hello!"}])
+        data = json.loads(message.model_dump_json())
+        assert data["content"] == [{"type": "text", "text": "Hello!"}]
+    @unittest.skipUnless(sys.platform.startswith("darwin"), "requires macOS")
+    def test_get_mlx_message_no_tool(self):
+        model = MLXModel(model_id="HuggingFaceTB/SmolLM2-135M-Instruct", max_tokens=10)
+        messages = [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}])]
+        output = model(messages, stop_sequences=["great"]).content
+        assert output.startswith("Hello")
+    @unittest.skipUnless(sys.platform.startswith("darwin"), "requires macOS")
+    def test_get_mlx_message_tricky_stop_sequence(self):
+        # In this test HuggingFaceTB/SmolLM2-135M-Instruct generates the token ">'"
+        # which is required to test capturing stop_sequences that have extra chars at the end.
+        model = MLXModel(model_id="HuggingFaceTB/SmolLM2-135M-Instruct", max_tokens=100)
+        stop_sequence = " print '>"
+        messages = [
+            ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": f"Please{stop_sequence}'"}]),
+        ]
+        # check our assumption that that ">" is followed by "'"
+        assert model.tokenizer.vocab[">'"]
+        assert model(messages, stop_sequences=[]).content == f"I'm ready to help you{stop_sequence}'"
+        # check stop_sequence capture when output has trailing chars
+        assert model(messages, stop_sequences=[stop_sequence]).content == "I'm ready to help you"
+    def test_transformers_message_no_tool(self, monkeypatch):
+        monkeypatch.setattr("huggingface_hub.constants.HF_HUB_DOWNLOAD_TIMEOUT", 30)  # instead of 10
+        model = TransformersModel(
+            model_id="HuggingFaceTB/SmolLM2-135M-Instruct",
+            max_new_tokens=5,
+            device_map="cpu",
+            do_sample=False,
+        )
+        messages = [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}])]
+        output = model.generate(messages).content
+        assert output == "Hello! I'm here"
+        output = model.generate_stream(messages, stop_sequences=["great"])
+        output_str = ""
+        for el in output:
+            output_str += el.content
+        assert output_str == "Hello! I'm here"
+    def test_transformers_message_vl_no_tool(self, shared_datadir, monkeypatch):
+        monkeypatch.setattr("huggingface_hub.constants.HF_HUB_DOWNLOAD_TIMEOUT", 30)  # instead of 10
+        import PIL.Image
+        img = PIL.Image.open(shared_datadir / "000000039769.png")
+        model = TransformersModel(
+            model_id="llava-hf/llava-interleave-qwen-0.5b-hf",
+            max_new_tokens=4,
+            device_map="cpu",
+            do_sample=False,
+        )
+        messages = [
+            ChatMessage(
+                role=MessageRole.USER,
+                content=[{"type": "text", "text": "What is this?"}, {"type": "image", "image": img}],
+            )
+        ]
+        output = model.generate(messages).content
+        assert output == "This is a very"
+        output = model.generate_stream(messages, stop_sequences=["great"])
+        output_str = ""
+        for el in output:
+            output_str += el.content
+        assert output_str == "This is a very"
+    def test_parse_json_if_needed(self):
+        args = "abc"
+        parsed_args = parse_json_if_needed(args)
+        assert parsed_args == "abc"
+        args = '{"a": 3}'
+        parsed_args = parse_json_if_needed(args)
+        assert parsed_args == {"a": 3}
+        args = "3"
+        parsed_args = parse_json_if_needed(args)
+        assert parsed_args == 3
+        args = 3
+        parsed_args = parse_json_if_needed(args)
+        assert parsed_args == 3
+class TestInferenceClientModel:
+    def test_call_with_custom_role_conversions(self):
+        custom_role_conversions = {MessageRole.USER: MessageRole.SYSTEM}
+        model = InferenceClientModel(model_id="test-model", custom_role_conversions=custom_role_conversions)
+        model.client = MagicMock()
+        mock_response = model.client.chat_completion.return_value
+        mock_response.choices[0].message = ChatCompletionOutputMessage(role=MessageRole.ASSISTANT)
+        messages = [ChatMessage(role=MessageRole.USER, content="Test message")]
+        _ = model(messages)
+        # Verify that the role conversion was applied
+        assert model.client.chat_completion.call_args.kwargs["messages"][0]["role"] == "system", (
+            "role conversion should be applied"
+        )
+    def test_init_model_with_tokens(self):
+        model = InferenceClientModel(model_id="test-model", token="abc")
+        assert model.client.token == "abc"
+        model = InferenceClientModel(model_id="test-model", api_key="abc")
+        assert model.client.token == "abc"
+        with pytest.raises(ValueError, match="Received both `token` and `api_key` arguments."):
+            InferenceClientModel(model_id="test-model", token="abc", api_key="def")
+    def test_structured_outputs_with_unsupported_provider(self):
+        with pytest.raises(
+            ValueError, match="InferenceClientModel only supports structured outputs with these providers:"
+        ):
+            model = InferenceClientModel(model_id="test-model", token="abc", provider="some_provider")
+            model.generate(
+                messages=[ChatMessage(role=MessageRole.USER, content="Hello!")],
+                response_format={"type": "json_object"},
+            )
+    @require_run_all
+    def test_get_hfapi_message_no_tool(self):
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10)
+        messages = [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}])]
+        model(messages, stop_sequences=["great"])
+    @require_run_all
+    def test_get_hfapi_message_no_tool_external_provider(self):
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10)
+        messages = [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}])]
+        model(messages, stop_sequences=["great"])
+    @require_run_all
+    def test_get_hfapi_message_stream_no_tool(self):
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", max_tokens=10)
+        messages = [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}])]
+        for el in model.generate_stream(messages, stop_sequences=["great"]):
+            assert el.content is not None
+    @require_run_all
+    def test_get_hfapi_message_stream_no_tool_external_provider(self):
+        model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=10)
+        messages = [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}])]
+        for el in model.generate_stream(messages, stop_sequences=["great"]):
+            assert el.content is not None
+class TestLiteLLMModel:
+    @pytest.mark.parametrize(
+        "model_id, error_flag",
+        [
+            ("groq/llama-3.3-70b", "Invalid API Key"),
+            ("cerebras/llama-3.3-70b", "The api_key client option must be set"),
+            ("mistral/mistral-tiny", "The api_key client option must be set"),
+        ],
+    )
+    def test_call_different_providers_without_key(self, model_id, error_flag):
+        model = LiteLLMModel(model_id=model_id)
+        messages = [ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Test message"}])]
+        with pytest.raises(Exception) as e:
+            # This should raise 401 error because of missing API key, not fail for any "bad format" reason
+            model.generate(messages)
+        assert error_flag in str(e)
+        with pytest.raises(Exception) as e:
+            # This should raise 401 error because of missing API key, not fail for any "bad format" reason
+            for el in model.generate_stream(messages):
+                assert el.content is not None
+        assert error_flag in str(e)
+    def test_passing_flatten_messages(self):
+        model = LiteLLMModel(model_id="groq/llama-3.3-70b", flatten_messages_as_text=False)
+        assert not model.flatten_messages_as_text
+        model = LiteLLMModel(model_id="fal/llama-3.3-70b", flatten_messages_as_text=True)
+        assert model.flatten_messages_as_text
+class TestLiteLLMRouterModel:
+    @pytest.mark.parametrize(
+        "model_id, expected",
+        [
+            ("llama-3.3-70b", False),
+            ("llama-3.3-70b", True),
+            ("mistral-tiny", True),
+        ],
+    )
+    def test_flatten_messages_as_text(self, model_id, expected):
+        model_list = [
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "groq/llama-3.3-70b"}},
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "cerebras/llama-3.3-70b"}},
+            {"model_name": "mistral-tiny", "litellm_params": {"model": "mistral/mistral-tiny"}},
+        ]
+        model = LiteLLMRouterModel(model_id=model_id, model_list=model_list, flatten_messages_as_text=expected)
+        assert model.flatten_messages_as_text is expected
+    def test_create_client(self):
+        model_list = [
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "groq/llama-3.3-70b"}},
+            {"model_name": "llama-3.3-70b", "litellm_params": {"model": "cerebras/llama-3.3-70b"}},
+        ]
+        with patch("litellm.router.Router") as mock_router:
+            router_model = LiteLLMRouterModel(
+                model_id="model-group-1", model_list=model_list, client_kwargs={"routing_strategy": "simple-shuffle"}
+            )
+            # Ensure that the Router constructor was called with the expected keyword arguments
+            mock_router.assert_called_once()
+            assert mock_router.call_count == 1
+            assert mock_router.call_args.kwargs["model_list"] == model_list
+            assert mock_router.call_args.kwargs["routing_strategy"] == "simple-shuffle"
+            assert router_model.client == mock_router.return_value
+class TestOpenAIServerModel:
+    def test_client_kwargs_passed_correctly(self):
+        model_id = "gpt-3.5-turbo"
+        api_base = "https://api.openai.com/v1"
+        api_key = "test_api_key"
+        organization = "test_org"
+        project = "test_project"
+        client_kwargs = {"max_retries": 5}
+        with patch("openai.OpenAI") as MockOpenAI:
+            model = OpenAIServerModel(
+                model_id=model_id,
+                api_base=api_base,
+                api_key=api_key,
+                organization=organization,
+                project=project,
+                client_kwargs=client_kwargs,
+            )
+        MockOpenAI.assert_called_once_with(
+            base_url=api_base, api_key=api_key, organization=organization, project=project, max_retries=5
+        )
+        assert model.client == MockOpenAI.return_value
+    @require_run_all
+    def test_streaming_tool_calls(self):
+        model = OpenAIServerModel(model_id="gpt-4o-mini")
+        messages = [
+            ChatMessage(
+                role=MessageRole.USER,
+                content=[
+                    {
+                        "type": "text",
+                        "text": "Hello! Please return the final answer 'blob' and the final answer 'blob2' in two parallel tool calls",
+                    }
+                ],
+            ),
+        ]
+        for el in model.generate_stream(messages, tools_to_call_from=[FinalAnswerTool()]):
+            if el.tool_calls:
+                assert el.tool_calls[0].function.name == "final_answer"
+                args = el.tool_calls[0].function.arguments
+                if len(el.tool_calls) > 1:
+                    assert el.tool_calls[1].function.name == "final_answer"
+                    args2 = el.tool_calls[1].function.arguments
+        assert args == '{"answer": "blob"}'
+        assert args2 == '{"answer": "blob2"}'
+class TestAmazonBedrockServerModel:
+    def test_client_for_bedrock(self):
+        model_id = "us.amazon.nova-pro-v1:0"
+        with patch("boto3.client") as MockBoto3:
+            model = AmazonBedrockServerModel(
+                model_id=model_id,
+            )
+        assert model.client == MockBoto3.return_value
+class TestAzureOpenAIServerModel:
+    def test_client_kwargs_passed_correctly(self):
+        model_id = "gpt-3.5-turbo"
+        api_key = "test_api_key"
+        api_version = "2023-12-01-preview"
+        azure_endpoint = "https://example-resource.azure.openai.com/"
+        organization = "test_org"
+        project = "test_project"
+        client_kwargs = {"max_retries": 5}
+        with patch("openai.OpenAI") as MockOpenAI, patch("openai.AzureOpenAI") as MockAzureOpenAI:
+            model = AzureOpenAIServerModel(
+                model_id=model_id,
+                api_key=api_key,
+                api_version=api_version,
+                azure_endpoint=azure_endpoint,
+                organization=organization,
+                project=project,
+                client_kwargs=client_kwargs,
+            )
+        assert MockOpenAI.call_count == 0
+        MockAzureOpenAI.assert_called_once_with(
+            base_url=None,
+            api_key=api_key,
+            api_version=api_version,
+            azure_endpoint=azure_endpoint,
+            organization=organization,
+            project=project,
+            max_retries=5,
+        )
+        assert model.client == MockAzureOpenAI.return_value
+class TestTransformersModel:
+    @pytest.mark.parametrize(
+        "patching",
+        [
+            [
+                (
+                    "transformers.AutoModelForImageTextToText.from_pretrained",
+                    {"side_effect": ValueError("Unrecognized configuration class")},
+                ),
+                ("transformers.AutoModelForCausalLM.from_pretrained", {}),
+                ("transformers.AutoTokenizer.from_pretrained", {}),
+            ],
+            [
+                ("transformers.AutoModelForImageTextToText.from_pretrained", {}),
+                ("transformers.AutoProcessor.from_pretrained", {}),
+            ],
+        ],
+    )
+    def test_init(self, patching):
+        with ExitStack() as stack:
+            mocks = {target: stack.enter_context(patch(target, **kwargs)) for target, kwargs in patching}
+            model = TransformersModel(
+                model_id="test-model", device_map="cpu", torch_dtype="float16", trust_remote_code=True
+            )
+        assert model.model_id == "test-model"
+        if "transformers.AutoTokenizer.from_pretrained" in mocks:
+            assert model.model == mocks["transformers.AutoModelForCausalLM.from_pretrained"].return_value
+            assert mocks["transformers.AutoModelForCausalLM.from_pretrained"].call_args.kwargs == {
+                "device_map": "cpu",
+                "torch_dtype": "float16",
+                "trust_remote_code": True,
+            }
+            assert model.tokenizer == mocks["transformers.AutoTokenizer.from_pretrained"].return_value
+            assert mocks["transformers.AutoTokenizer.from_pretrained"].call_args.args == ("test-model",)
+            assert mocks["transformers.AutoTokenizer.from_pretrained"].call_args.kwargs == {"trust_remote_code": True}
+        elif "transformers.AutoProcessor.from_pretrained" in mocks:
+            assert model.model == mocks["transformers.AutoModelForImageTextToText.from_pretrained"].return_value
+            assert mocks["transformers.AutoModelForImageTextToText.from_pretrained"].call_args.kwargs == {
+                "device_map": "cpu",
+                "torch_dtype": "float16",
+                "trust_remote_code": True,
+            }
+            assert model.processor == mocks["transformers.AutoProcessor.from_pretrained"].return_value
+            assert mocks["transformers.AutoProcessor.from_pretrained"].call_args.args == ("test-model",)
+            assert mocks["transformers.AutoProcessor.from_pretrained"].call_args.kwargs == {"trust_remote_code": True}
+def test_get_clean_message_list_basic():
+    messages = [
+        ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}]),
+        ChatMessage(role=MessageRole.ASSISTANT, content=[{"type": "text", "text": "Hi there!"}]),
+    ]
+    result = get_clean_message_list(messages)
+    assert len(result) == 2
+    assert result[0]["role"] == "user"
+    assert result[0]["content"][0]["text"] == "Hello!"
+    assert result[1]["role"] == "assistant"
+    assert result[1]["content"][0]["text"] == "Hi there!"
+def test_get_clean_message_list_role_conversions():
+    messages = [
+        ChatMessage(role=MessageRole.TOOL_CALL, content=[{"type": "text", "text": "Calling tool..."}]),
+        ChatMessage(role=MessageRole.TOOL_RESPONSE, content=[{"type": "text", "text": "Tool response"}]),
+    ]
+    result = get_clean_message_list(messages, role_conversions={"tool-call": "assistant", "tool-response": "user"})
+    assert len(result) == 2
+    assert result[0]["role"] == "assistant"
+    assert result[0]["content"][0]["text"] == "Calling tool..."
+    assert result[1]["role"] == "user"
+    assert result[1]["content"][0]["text"] == "Tool response"
+@pytest.mark.parametrize(
+    "convert_images_to_image_urls, expected_clean_message",
+    [
+        (
+            False,
+            dict(
+                role=MessageRole.USER,
+                content=[
+                    {"type": "image", "image": "encoded_image"},
+                    {"type": "image", "image": "second_encoded_image"},
+                ],
+            ),
+        ),
+        (
+            True,
+            dict(
+                role=MessageRole.USER,
+                content=[
+                    {"type": "image_url", "image_url": {"url": "data:image/png;base64,encoded_image"}},
+                    {"type": "image_url", "image_url": {"url": "data:image/png;base64,second_encoded_image"}},
+                ],
+            ),
+        ),
+    ],
+)
+def test_get_clean_message_list_image_encoding(convert_images_to_image_urls, expected_clean_message):
+    message = ChatMessage(
+        role=MessageRole.USER,
+        content=[{"type": "image", "image": b"image_data"}, {"type": "image", "image": b"second_image_data"}],
+    )
+    with patch("smolagents.models.encode_image_base64") as mock_encode:
+        mock_encode.side_effect = ["encoded_image", "second_encoded_image"]
+        result = get_clean_message_list([message], convert_images_to_image_urls=convert_images_to_image_urls)
+        mock_encode.assert_any_call(b"image_data")
+        mock_encode.assert_any_call(b"second_image_data")
+        assert len(result) == 1
+        assert result[0] == expected_clean_message
+def test_get_clean_message_list_flatten_messages_as_text():
+    messages = [
+        ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "Hello!"}]),
+        ChatMessage(role=MessageRole.USER, content=[{"type": "text", "text": "How are you?"}]),
+    ]
+    result = get_clean_message_list(messages, flatten_messages_as_text=True)
+    assert len(result) == 1
+    assert result[0]["role"] == "user"
+    assert result[0]["content"] == "Hello!\nHow are you?"
+@pytest.mark.parametrize(
+    "model_class, model_kwargs, patching, expected_flatten_messages_as_text",
+    [
+        (AzureOpenAIServerModel, {}, ("openai.AzureOpenAI", {}), False),
+        (InferenceClientModel, {}, ("huggingface_hub.InferenceClient", {}), False),
+        (LiteLLMModel, {}, None, False),
+        (LiteLLMModel, {"model_id": "ollama"}, None, True),
+        (LiteLLMModel, {"model_id": "groq"}, None, True),
+        (LiteLLMModel, {"model_id": "cerebras"}, None, True),
+        (MLXModel, {}, ("mlx_lm.load", {"return_value": (MagicMock(), MagicMock())}), True),
+        (OpenAIServerModel, {}, ("openai.OpenAI", {}), False),
+        (OpenAIServerModel, {"flatten_messages_as_text": True}, ("openai.OpenAI", {}), True),
+        (
+            TransformersModel,
+            {},
+            [
+                (
+                    "transformers.AutoModelForImageTextToText.from_pretrained",
+                    {"side_effect": ValueError("Unrecognized configuration class")},
+                ),
+                ("transformers.AutoModelForCausalLM.from_pretrained", {}),
+                ("transformers.AutoTokenizer.from_pretrained", {}),
+            ],
+            True,
+        ),
+        (
+            TransformersModel,
+            {},
+            [
+                ("transformers.AutoModelForImageTextToText.from_pretrained", {}),
+                ("transformers.AutoProcessor.from_pretrained", {}),
+            ],
+            False,
+        ),
+    ],
+)
+def test_flatten_messages_as_text_for_all_models(
+    model_class, model_kwargs, patching, expected_flatten_messages_as_text
+):
+    with ExitStack() as stack:
+        if isinstance(patching, list):
+            for target, kwargs in patching:
+                stack.enter_context(patch(target, **kwargs))
+        elif patching:
+            target, kwargs = patching
+            stack.enter_context(patch(target, **kwargs))
+        model = model_class(**{"model_id": "test-model", **model_kwargs})
+    assert model.flatten_messages_as_text is expected_flatten_messages_as_text, f"{model_class.__name__} failed"
+@pytest.mark.parametrize(
+    "model_id,expected",
+    [
+        # Unsupported base models
+        ("o3", False),
+        ("o4-mini", False),
+        # Unsupported versioned models
+        ("o3-2025-04-16", False),
+        ("o4-mini-2025-04-16", False),
+        # Unsupported models with path prefixes
+        ("openai/o3", False),
+        ("openai/o4-mini", False),
+        ("openai/o3-2025-04-16", False),
+        ("openai/o4-mini-2025-04-16", False),
+        # Supported models
+        ("o3-mini", True),  # Different from o3
+        ("o3-mini-2025-01-31", True),  # Different from o3
+        ("o4", True),  # Different from o4-mini
+        ("o4-turbo", True),  # Different from o4-mini
+        ("gpt-4", True),
+        ("claude-3-5-sonnet", True),
+        ("mistral-large", True),
+        # Supported models with path prefixes
+        ("openai/gpt-4", True),
+        ("anthropic/claude-3-5-sonnet", True),
+        ("mistralai/mistral-large", True),
+        # Edge cases
+        ("", True),  # Empty string doesn't match pattern
+        ("o3x", True),  # Not exactly o3
+        ("o3_mini", True),  # Not o3-mini format
+        ("prefix-o3", True),  # o3 not at start
+    ],
+)
+def test_supports_stop_parameter(model_id, expected):
+    """Test the supports_stop_parameter function with various model IDs"""
+    assert supports_stop_parameter(model_id) == expected, f"Failed for model_id: {model_id}"
+class TestGetToolCallFromText:
+    @pytest.fixture(autouse=True)
+    def mock_uuid4(self):
+        with patch("uuid.uuid4", return_value="test-uuid"):
+            yield
+    def test_get_tool_call_from_text_basic(self):
+        text = '{"name": "weather_tool", "arguments": "New York"}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert isinstance(result, ChatMessageToolCall)
+        assert result.id == "test-uuid"
+        assert result.type == "function"
+        assert result.function.name == "weather_tool"
+        assert result.function.arguments == "New York"
+    def test_get_tool_call_from_text_name_key_missing(self):
+        text = '{"action": "weather_tool", "arguments": "New York"}'
+        with pytest.raises(ValueError) as exc_info:
+            get_tool_call_from_text(text, "name", "arguments")
+        error_msg = str(exc_info.value)
+        assert "Key tool_name_key='name' not found" in error_msg
+        assert "'action', 'arguments'" in error_msg
+    def test_get_tool_call_from_text_json_object_args(self):
+        text = '{"name": "weather_tool", "arguments": {"city": "New York"}}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.arguments == {"city": "New York"}
+    def test_get_tool_call_from_text_json_string_args(self):
+        text = '{"name": "weather_tool", "arguments": "{\\"city\\": \\"New York\\"}"}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.arguments == {"city": "New York"}
+    def test_get_tool_call_from_text_missing_args(self):
+        text = '{"name": "weather_tool"}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.arguments is None
+    def test_get_tool_call_from_text_custom_keys(self):
+        text = '{"tool": "weather_tool", "params": "New York"}'
+        result = get_tool_call_from_text(text, "tool", "params")
+        assert result.function.name == "weather_tool"
+        assert result.function.arguments == "New York"
+    def test_get_tool_call_from_text_numeric_args(self):
+        text = '{"name": "calculator", "arguments": 42}'
+        result = get_tool_call_from_text(text, "name", "arguments")
+        assert result.function.name == "calculator"
+        assert result.function.arguments == 42

tests/test_monitoring.py ADDED Viewed

	@@ -0,0 +1,252 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import PIL.Image
+import pytest
+from smolagents import (
+    CodeAgent,
+    RunResult,
+    ToolCallingAgent,
+    stream_to_gradio,
+)
+from smolagents.models import (
+    ChatMessage,
+    ChatMessageToolCall,
+    ChatMessageToolCallFunction,
+    MessageRole,
+    Model,
+    TokenUsage,
+)
+class FakeLLMModel(Model):
+    def __init__(self, give_token_usage: bool = True):
+        self.give_token_usage = give_token_usage
+    def generate(self, prompt, tools_to_call_from=None, **kwargs):
+        if tools_to_call_from is not None:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="",
+                tool_calls=[
+                    ChatMessageToolCall(
+                        id="fake_id",
+                        type="function",
+                        function=ChatMessageToolCallFunction(name="final_answer", arguments={"answer": "image"}),
+                    )
+                ],
+                token_usage=TokenUsage(input_tokens=10, output_tokens=20) if self.give_token_usage else None,
+            )
+        else:
+            return ChatMessage(
+                role=MessageRole.ASSISTANT,
+                content="""<code>
+final_answer('This is the final answer.')
+</code>""",
+                token_usage=TokenUsage(input_tokens=10, output_tokens=20) if self.give_token_usage else None,
+            )
+class MonitoringTester(unittest.TestCase):
+    def test_code_agent_metrics(self):
+        agent = CodeAgent(
+            tools=[],
+            model=FakeLLMModel(),
+            max_steps=1,
+        )
+        agent.run("Fake task")
+        self.assertEqual(agent.monitor.total_input_token_count, 10)
+        self.assertEqual(agent.monitor.total_output_token_count, 20)
+    def test_toolcalling_agent_metrics(self):
+        agent = ToolCallingAgent(
+            tools=[],
+            model=FakeLLMModel(),
+            max_steps=1,
+        )
+        agent.run("Fake task")
+        self.assertEqual(agent.monitor.total_input_token_count, 10)
+        self.assertEqual(agent.monitor.total_output_token_count, 20)
+    def test_code_agent_metrics_max_steps(self):
+        class FakeLLMModelMalformedAnswer(Model):
+            def generate(self, prompt, **kwargs):
+                return ChatMessage(
+                    role=MessageRole.ASSISTANT,
+                    content="Malformed answer",
+                    token_usage=TokenUsage(input_tokens=10, output_tokens=20),
+                )
+        agent = CodeAgent(
+            tools=[],
+            model=FakeLLMModelMalformedAnswer(),
+            max_steps=1,
+        )
+        agent.run("Fake task")
+        self.assertEqual(agent.monitor.total_input_token_count, 20)
+        self.assertEqual(agent.monitor.total_output_token_count, 40)
+    def test_code_agent_metrics_generation_error(self):
+        class FakeLLMModelGenerationException(Model):
+            def generate(self, prompt, **kwargs):
+                raise Exception("Cannot generate")
+        agent = CodeAgent(
+            tools=[],
+            model=FakeLLMModelGenerationException(),
+            max_steps=1,
+        )
+        with pytest.raises(Exception) as e:
+            agent.run("Fake task")
+        assert "Cannot generate" in str(e.value)
+    def test_streaming_agent_text_output(self):
+        agent = CodeAgent(
+            tools=[],
+            model=FakeLLMModel(),
+            max_steps=1,
+            planning_interval=2,
+        )
+        # Use stream_to_gradio to capture the output
+        outputs = list(stream_to_gradio(agent, task="Test task"))
+        self.assertEqual(len(outputs), 11)
+        plan_message = outputs[1]
+        self.assertEqual(plan_message.role, "assistant")
+        self.assertIn("<code>", plan_message.content)
+        final_message = outputs[-1]
+        self.assertEqual(final_message.role, "assistant")
+        self.assertIn("This is the final answer.", final_message.content)
+    def test_streaming_agent_image_output(self):
+        agent = ToolCallingAgent(
+            tools=[],
+            model=FakeLLMModel(),
+            max_steps=1,
+            verbosity_level=100,
+        )
+        # Use stream_to_gradio to capture the output
+        outputs = list(
+            stream_to_gradio(
+                agent,
+                task="Test task",
+                additional_args=dict(image=PIL.Image.new("RGB", (100, 100))),
+            )
+        )
+        self.assertEqual(len(outputs), 7)
+        final_message = outputs[-1]
+        self.assertEqual(final_message.role, "assistant")
+        self.assertIsInstance(final_message.content, dict)
+        self.assertEqual(final_message.content["mime_type"], "image/png")
+    def test_streaming_with_agent_error(self):
+        class DummyModel(Model):
+            def generate(self, prompt, **kwargs):
+                return ChatMessage(role=MessageRole.ASSISTANT, content="Malformed call")
+        agent = CodeAgent(
+            tools=[],
+            model=DummyModel(),
+            max_steps=1,
+        )
+        # Use stream_to_gradio to capture the output
+        outputs = list(stream_to_gradio(agent, task="Test task"))
+        self.assertEqual(len(outputs), 11)
+        final_message = outputs[-1]
+        self.assertEqual(final_message.role, "assistant")
+        self.assertIn("Malformed call", final_message.content)
+    def test_run_return_full_result(self):
+        agent = CodeAgent(
+            tools=[],
+            model=FakeLLMModel(),
+            max_steps=1,
+            return_full_result=True,
+        )
+        result = agent.run("Fake task")
+        self.assertIsInstance(result, RunResult)
+        self.assertEqual(result.output, "This is the final answer.")
+        self.assertEqual(result.state, "success")
+        self.assertEqual(result.token_usage, TokenUsage(input_tokens=10, output_tokens=20))
+        self.assertIsInstance(result.messages, list)
+        self.assertGreater(result.timing.duration, 0)
+        agent = ToolCallingAgent(
+            tools=[],
+            model=FakeLLMModel(),
+            max_steps=1,
+            return_full_result=True,
+        )
+        result = agent.run("Fake task")
+        self.assertIsInstance(result, RunResult)
+        self.assertEqual(result.output, "image")
+        self.assertEqual(result.state, "success")
+        self.assertEqual(result.token_usage, TokenUsage(input_tokens=10, output_tokens=20))
+        self.assertIsInstance(result.messages, list)
+        self.assertGreater(result.timing.duration, 0)
+        # Below 2 lines should be removed when the attributes are removed
+        assert agent.monitor.total_input_token_count == 10
+        assert agent.monitor.total_output_token_count == 20
+    def test_run_result_no_token_usage(self):
+        agent = CodeAgent(
+            tools=[],
+            model=FakeLLMModel(give_token_usage=False),
+            max_steps=1,
+            return_full_result=True,
+        )
+        result = agent.run("Fake task")
+        self.assertIsInstance(result, RunResult)
+        self.assertEqual(result.output, "This is the final answer.")
+        self.assertEqual(result.state, "success")
+        self.assertIsNone(result.token_usage)
+        self.assertIsInstance(result.messages, list)
+        self.assertGreater(result.timing.duration, 0)
+        agent = ToolCallingAgent(
+            tools=[],
+            model=FakeLLMModel(give_token_usage=False),
+            max_steps=1,
+            return_full_result=True,
+        )
+        result = agent.run("Fake task")
+        self.assertIsInstance(result, RunResult)
+        self.assertEqual(result.output, "image")
+        self.assertEqual(result.state, "success")
+        self.assertIsNone(result.token_usage)
+        self.assertIsInstance(result.messages, list)
+        self.assertGreater(result.timing.duration, 0)

tests/test_remote_executors.py ADDED Viewed

	@@ -0,0 +1,335 @@

+import io
+from textwrap import dedent
+from unittest.mock import MagicMock, patch
+import docker
+import PIL.Image
+import pytest
+from rich.console import Console
+from smolagents.default_tools import FinalAnswerTool, WikipediaSearchTool
+from smolagents.monitoring import AgentLogger, LogLevel
+from smolagents.remote_executors import DockerExecutor, E2BExecutor, RemotePythonExecutor
+from smolagents.utils import AgentError
+from .utils.markers import require_run_all
+class TestRemotePythonExecutor:
+    def test_send_tools_empty_tools(self):
+        executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
+        executor.run_code_raise_errors = MagicMock()
+        executor.send_tools({})
+        assert executor.run_code_raise_errors.call_count == 1
+        # No new packages should be installed
+        assert "!pip install" not in executor.run_code_raise_errors.call_args.args[0]
+    @require_run_all
+    def test_send_tools_with_default_wikipedia_search_tool(self):
+        tool = WikipediaSearchTool()
+        executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock())
+        executor.run_code_raise_errors = MagicMock()
+        executor.run_code_raise_errors.return_value = (None, "", False)
+        executor.send_tools({"wikipedia_search": tool})
+        assert executor.run_code_raise_errors.call_count == 2
+        assert "!pip install wikipedia-api" == executor.run_code_raise_errors.call_args_list[0].args[0]
+        assert "class WikipediaSearchTool(Tool)" in executor.run_code_raise_errors.call_args_list[1].args[0]
+class TestE2BExecutorUnit:
+    def test_e2b_executor_instantiation(self):
+        logger = MagicMock()
+        with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
+            mock_sandbox.return_value.commands.run.return_value.error = None
+            mock_sandbox.return_value.run_code.return_value.error = None
+            executor = E2BExecutor(
+                additional_imports=[], logger=logger, api_key="dummy-api-key", template="dummy-template-id", timeout=60
+            )
+        assert isinstance(executor, E2BExecutor)
+        assert executor.logger == logger
+        assert executor.sandbox == mock_sandbox.return_value
+        assert mock_sandbox.call_count == 1
+        assert mock_sandbox.call_args.kwargs == {
+            "api_key": "dummy-api-key",
+            "template": "dummy-template-id",
+            "timeout": 60,
+        }
+    def test_cleanup(self):
+        """Test that the cleanup method properly shuts down the sandbox"""
+        logger = MagicMock()
+        with patch("e2b_code_interpreter.Sandbox") as mock_sandbox:
+            # Setup mock
+            mock_sandbox.return_value.kill = MagicMock()
+            # Create executor
+            executor = E2BExecutor(additional_imports=[], logger=logger, api_key="dummy-api-key")
+            # Call cleanup
+            executor.cleanup()
+            # Verify sandbox was killed
+            mock_sandbox.return_value.kill.assert_called_once()
+            assert logger.log.call_count >= 2  # Should log start and completion messages
+@pytest.fixture
+def e2b_executor():
+    executor = E2BExecutor(
+        additional_imports=["pillow", "numpy"],
+        logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
+    )
+    yield executor
+    executor.cleanup()
+@require_run_all
+class TestE2BExecutorIntegration:
+    @pytest.fixture(autouse=True)
+    def set_executor(self, e2b_executor):
+        self.executor = e2b_executor
+    @pytest.mark.parametrize(
+        "code_action, expected_result",
+        [
+            (
+                dedent('''
+                    final_answer("""This is
+                    a multiline
+                    final answer""")
+                '''),
+                "This is\na multiline\nfinal answer",
+            ),
+            (
+                dedent("""
+                    text = '''Text containing
+                    final_answer(5)
+                    '''
+                    final_answer(text)
+                """),
+                "Text containing\nfinal_answer(5)\n",
+            ),
+            (
+                dedent("""
+                    num = 2
+                    if num == 1:
+                        final_answer("One")
+                    elif num == 2:
+                        final_answer("Two")
+                """),
+                "Two",
+            ),
+        ],
+    )
+    def test_final_answer_patterns(self, code_action, expected_result):
+        self.executor.send_tools({"final_answer": FinalAnswerTool()})
+        result, logs, final_answer = self.executor(code_action)
+        assert final_answer is True
+        assert result == expected_result
+    def test_custom_final_answer(self):
+        class CustomFinalAnswerTool(FinalAnswerTool):
+            def forward(self, answer: str) -> str:
+                return "CUSTOM" + answer
+        self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
+        code_action = dedent("""
+            final_answer(answer="_answer")
+        """)
+        result, logs, final_answer = self.executor(code_action)
+        assert final_answer is True
+        assert result == "CUSTOM_answer"
+    def test_custom_final_answer_with_custom_inputs(self):
+        class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
+            inputs = {
+                "answer1": {"type": "string", "description": "First part of the answer."},
+                "answer2": {"type": "string", "description": "Second part of the answer."},
+            }
+            def forward(self, answer1: str, answer2: str) -> str:
+                return answer1 + "CUSTOM" + answer2
+        self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
+        code_action = dedent("""
+            final_answer(
+                answer1="answer1_",
+                answer2="_answer2"
+            )
+        """)
+        result, logs, final_answer = self.executor(code_action)
+        assert final_answer is True
+        assert result == "answer1_CUSTOM_answer2"
+@pytest.fixture
+def docker_executor():
+    executor = DockerExecutor(
+        additional_imports=["pillow", "numpy"],
+        logger=AgentLogger(LogLevel.INFO, Console(force_terminal=False, file=io.StringIO())),
+    )
+    yield executor
+    executor.delete()
+@require_run_all
+class TestDockerExecutorIntegration:
+    @pytest.fixture(autouse=True)
+    def set_executor(self, docker_executor):
+        self.executor = docker_executor
+    def test_initialization(self):
+        """Check if DockerExecutor initializes without errors"""
+        assert self.executor.container is not None, "Container should be initialized"
+    def test_state_persistence(self):
+        """Test that variables and imports form one snippet persist in the next"""
+        code_action = "import numpy as np; a = 2"
+        self.executor(code_action)
+        code_action = "print(np.sqrt(a))"
+        result, logs, final_answer = self.executor(code_action)
+        assert "1.41421" in logs
+    def test_execute_output(self):
+        """Test execution that returns a string"""
+        code_action = 'final_answer("This is the final answer")'
+        result, logs, final_answer = self.executor(code_action)
+        assert result == "This is the final answer", "Result should be 'This is the final answer'"
+    def test_execute_multiline_output(self):
+        """Test execution that returns a string"""
+        code_action = 'result = "This is the final answer"\nfinal_answer(result)'
+        result, logs, final_answer = self.executor(code_action)
+        assert result == "This is the final answer", "Result should be 'This is the final answer'"
+    def test_execute_image_output(self):
+        """Test execution that returns a base64 image"""
+        code_action = dedent("""
+            import base64
+            from PIL import Image
+            from io import BytesIO
+            image = Image.new("RGB", (10, 10), (255, 0, 0))
+            final_answer(image)
+        """)
+        result, logs, final_answer = self.executor(code_action)
+        assert isinstance(result, PIL.Image.Image), "Result should be a PIL Image"
+    def test_syntax_error_handling(self):
+        """Test handling of syntax errors"""
+        code_action = 'print("Missing Parenthesis'  # Syntax error
+        with pytest.raises(AgentError) as exception_info:
+            self.executor(code_action)
+        assert "SyntaxError" in str(exception_info.value), "Should raise a syntax error"
+    def test_cleanup_on_deletion(self):
+        """Test if Docker container stops and removes on deletion"""
+        container_id = self.executor.container.id
+        self.executor.delete()  # Trigger cleanup
+        client = docker.from_env()
+        containers = [c.id for c in client.containers.list(all=True)]
+        assert container_id not in containers, "Container should be removed"
+    @pytest.mark.parametrize(
+        "code_action, expected_result",
+        [
+            (
+                dedent('''
+                    final_answer("""This is
+                    a multiline
+                    final answer""")
+                '''),
+                "This is\na multiline\nfinal answer",
+            ),
+            (
+                dedent("""
+                    text = '''Text containing
+                    final_answer(5)
+                    '''
+                    final_answer(text)
+                """),
+                "Text containing\nfinal_answer(5)\n",
+            ),
+            (
+                dedent("""
+                    num = 2
+                    if num == 1:
+                        final_answer("One")
+                    elif num == 2:
+                        final_answer("Two")
+                """),
+                "Two",
+            ),
+        ],
+    )
+    def test_final_answer_patterns(self, code_action, expected_result):
+        self.executor.send_tools({"final_answer": FinalAnswerTool()})
+        result, logs, final_answer = self.executor(code_action)
+        assert final_answer is True
+        assert result == expected_result
+    def test_custom_final_answer(self):
+        class CustomFinalAnswerTool(FinalAnswerTool):
+            def forward(self, answer: str) -> str:
+                return "CUSTOM" + answer
+        self.executor.send_tools({"final_answer": CustomFinalAnswerTool()})
+        code_action = dedent("""
+            final_answer(answer="_answer")
+        """)
+        result, logs, final_answer = self.executor(code_action)
+        assert final_answer is True
+        assert result == "CUSTOM_answer"
+    def test_custom_final_answer_with_custom_inputs(self):
+        class CustomFinalAnswerToolWithCustomInputs(FinalAnswerTool):
+            inputs = {
+                "answer1": {"type": "string", "description": "First part of the answer."},
+                "answer2": {"type": "string", "description": "Second part of the answer."},
+            }
+            def forward(self, answer1: str, answer2: str) -> str:
+                return answer1 + "CUSTOM" + answer2
+        self.executor.send_tools({"final_answer": CustomFinalAnswerToolWithCustomInputs()})
+        code_action = dedent("""
+            final_answer(
+                answer1="answer1_",
+                answer2="_answer2"
+            )
+        """)
+        result, logs, final_answer = self.executor(code_action)
+        assert final_answer is True
+        assert result == "answer1_CUSTOM_answer2"
+class TestDockerExecutorUnit:
+    def test_cleanup(self):
+        """Test that cleanup properly stops and removes the container"""
+        logger = MagicMock()
+        with (
+            patch("docker.from_env") as mock_docker_client,
+            patch("requests.post") as mock_post,
+            patch("websocket.create_connection"),
+        ):
+            # Setup mocks
+            mock_container = MagicMock()
+            mock_container.status = "running"
+            mock_container.short_id = "test123"
+            mock_docker_client.return_value.containers.run.return_value = mock_container
+            mock_docker_client.return_value.images.get.return_value = MagicMock()
+            mock_post.return_value.status_code = 201
+            mock_post.return_value.json.return_value = {"id": "test-kernel-id"}
+            # Create executor
+            executor = DockerExecutor(additional_imports=[], logger=logger, build_new_image=False)
+            # Call cleanup
+            executor.cleanup()
+            # Verify container was stopped and removed
+            mock_container.stop.assert_called_once()
+            mock_container.remove.assert_called_once()

tests/test_search.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from smolagents import DuckDuckGoSearchTool
+from .test_tools import ToolTesterMixin
+from .utils.markers import require_run_all
+class TestDuckDuckGoSearchTool(ToolTesterMixin):
+    def setup_method(self):
+        self.tool = DuckDuckGoSearchTool()
+        self.tool.setup()
+    @require_run_all
+    def test_exact_match_arg(self):
+        result = self.tool("Agents")
+        assert isinstance(result, str)
+    @require_run_all
+    def test_agent_type_output(self):
+        super().test_agent_type_output()

tests/test_tool_validation.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import ast
+from textwrap import dedent
+import pytest
+from smolagents.default_tools import (
+    DuckDuckGoSearchTool,
+    GoogleSearchTool,
+    SpeechToTextTool,
+    VisitWebpageTool,
+    WebSearchTool,
+)
+from smolagents.tool_validation import MethodChecker, validate_tool_attributes
+from smolagents.tools import Tool, tool
+UNDEFINED_VARIABLE = "undefined_variable"
+@pytest.mark.parametrize(
+    "tool_class", [DuckDuckGoSearchTool, GoogleSearchTool, SpeechToTextTool, VisitWebpageTool, WebSearchTool]
+)
+def test_validate_tool_attributes_with_default_tools(tool_class):
+    assert validate_tool_attributes(tool_class) is None, f"failed for {tool_class.name} tool"
+class ValidTool(Tool):
+    name = "valid_tool"
+    description = "A valid tool"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    simple_attr = "string"
+    dict_attr = {"key": "value"}
+    def __init__(self, optional_param="default"):
+        super().__init__()
+        self.param = optional_param
+    def forward(self, input: str) -> str:
+        return input.upper()
+@tool
+def valid_tool_function(input: str) -> str:
+    """A valid tool function.
+    Args:
+        input (str): Input string.
+    """
+    return input.upper()
+@pytest.mark.parametrize("tool_class", [ValidTool, valid_tool_function.__class__])
+def test_validate_tool_attributes_valid(tool_class):
+    assert validate_tool_attributes(tool_class) is None
+class InvalidToolName(Tool):
+    name = "invalid tool name"
+    description = "Tool with invalid name"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+    def forward(self, input: str) -> str:
+        return input
+class InvalidToolComplexAttrs(Tool):
+    name = "invalid_tool"
+    description = "Tool with complex class attributes"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    complex_attr = [x for x in range(3)]  # Complex class attribute
+    def __init__(self):
+        super().__init__()
+    def forward(self, input: str) -> str:
+        return input
+class InvalidToolRequiredParams(Tool):
+    name = "invalid_tool"
+    description = "Tool with required params"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    def __init__(self, required_param, kwarg1=1):  # No default value
+        super().__init__()
+        self.param = required_param
+    def forward(self, input: str) -> str:
+        return input
+class InvalidToolNonLiteralDefaultParam(Tool):
+    name = "invalid_tool"
+    description = "Tool with non-literal default parameter value"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    def __init__(self, default_param=UNDEFINED_VARIABLE):  # UNDEFINED_VARIABLE as default is non-literal
+        super().__init__()
+        self.default_param = default_param
+    def forward(self, input: str) -> str:
+        return input
+class InvalidToolUndefinedNames(Tool):
+    name = "invalid_tool"
+    description = "Tool with undefined names"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    def forward(self, input: str) -> str:
+        return UNDEFINED_VARIABLE  # Undefined name
+@pytest.mark.parametrize(
+    "tool_class, expected_error",
+    [
+        (
+            InvalidToolName,
+            "Class attribute 'name' must be a valid Python identifier and not a reserved keyword, found 'invalid tool name'",
+        ),
+        (InvalidToolComplexAttrs, "Complex attributes should be defined in __init__, not as class attributes"),
+        (InvalidToolRequiredParams, "Parameters in __init__ must have default values, found required parameters"),
+        (
+            InvalidToolNonLiteralDefaultParam,
+            "Parameters in __init__ must have literal default values, found non-literal defaults",
+        ),
+        (InvalidToolUndefinedNames, "Name 'UNDEFINED_VARIABLE' is undefined"),
+    ],
+)
+def test_validate_tool_attributes_exceptions(tool_class, expected_error):
+    with pytest.raises(ValueError, match=expected_error):
+        validate_tool_attributes(tool_class)
+class MultipleAssignmentsTool(Tool):
+    name = "multiple_assignments_tool"
+    description = "Tool with multiple assignments"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+    def forward(self, input: str) -> str:
+        a, b = "1", "2"
+        return a + b
+def test_validate_tool_attributes_multiple_assignments():
+    validate_tool_attributes(MultipleAssignmentsTool)
+@tool
+def tool_function_with_multiple_assignments(input: str) -> str:
+    """A valid tool function.
+    Args:
+        input (str): Input string.
+    """
+    a, b = "1", "2"
+    return input.upper() + a + b
+@pytest.mark.parametrize("tool_instance", [MultipleAssignmentsTool(), tool_function_with_multiple_assignments])
+def test_tool_to_dict_validation_with_multiple_assignments(tool_instance):
+    tool_instance.to_dict()
+class TestMethodChecker:
+    def test_multiple_assignments(self):
+        source_code = dedent(
+            """
+            def forward(self) -> str:
+                a, b = "1", "2"
+                return a + b
+            """
+        )
+        method_checker = MethodChecker(set())
+        method_checker.visit(ast.parse(source_code))
+        assert method_checker.errors == []

tests/test_tools.py ADDED Viewed

	@@ -0,0 +1,731 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import os
+from textwrap import dedent
+from typing import Any, Literal
+from unittest.mock import MagicMock, patch
+import mcp
+import numpy as np
+import PIL.Image
+import pytest
+from smolagents.agent_types import _AGENT_TYPE_MAPPING
+from smolagents.tools import AUTHORIZED_TYPES, Tool, ToolCollection, launch_gradio_demo, tool, validate_tool_arguments
+from .utils.markers import require_run_all
+class ToolTesterMixin:
+    def test_inputs_output(self):
+        assert hasattr(self.tool, "inputs")
+        assert hasattr(self.tool, "output_type")
+        inputs = self.tool.inputs
+        assert isinstance(inputs, dict)
+        for _, input_spec in inputs.items():
+            assert "type" in input_spec
+            assert "description" in input_spec
+            assert input_spec["type"] in AUTHORIZED_TYPES
+            assert isinstance(input_spec["description"], str)
+        output_type = self.tool.output_type
+        assert output_type in AUTHORIZED_TYPES
+    def test_common_attributes(self):
+        assert hasattr(self.tool, "description")
+        assert hasattr(self.tool, "name")
+        assert hasattr(self.tool, "inputs")
+        assert hasattr(self.tool, "output_type")
+    def test_agent_type_output(self, create_inputs):
+        inputs = create_inputs(self.tool.inputs)
+        output = self.tool(**inputs, sanitize_inputs_outputs=True)
+        if self.tool.output_type != "any":
+            agent_type = _AGENT_TYPE_MAPPING[self.tool.output_type]
+            assert isinstance(output, agent_type)
+    @pytest.fixture
+    def create_inputs(self, shared_datadir):
+        def _create_inputs(tool_inputs: dict[str, dict[str | type, str]]) -> dict[str, Any]:
+            inputs = {}
+            for input_name, input_desc in tool_inputs.items():
+                input_type = input_desc["type"]
+                if input_type == "string":
+                    inputs[input_name] = "Text input"
+                elif input_type == "image":
+                    inputs[input_name] = PIL.Image.open(shared_datadir / "000000039769.png").resize((512, 512))
+                elif input_type == "audio":
+                    inputs[input_name] = np.ones(3000)
+                else:
+                    raise ValueError(f"Invalid type requested: {input_type}")
+            return inputs
+        return _create_inputs
+class TestTool:
+    def test_tool_init_with_decorator(self):
+        @tool
+        def coolfunc(a: str, b: int) -> float:
+            """Cool function
+            Args:
+                a: The first argument
+                b: The second one
+            """
+            return b + 2, a
+        assert coolfunc.output_type == "number"
+    def test_tool_init_vanilla(self):
+        class HFModelDownloadsTool(Tool):
+            name = "model_download_counter"
+            description = """
+            This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub.
+            It returns the name of the checkpoint."""
+            inputs = {
+                "task": {
+                    "type": "string",
+                    "description": "the task category (such as text-classification, depth-estimation, etc)",
+                }
+            }
+            output_type = "string"
+            def forward(self, task: str) -> str:
+                return "best model"
+        tool = HFModelDownloadsTool()
+        assert list(tool.inputs.keys())[0] == "task"
+    def test_tool_init_decorator_raises_issues(self):
+        with pytest.raises(Exception) as e:
+            @tool
+            def coolfunc(a: str, b: int):
+                """Cool function
+                Args:
+                    a: The first argument
+                    b: The second one
+                """
+                return a + b
+            assert coolfunc.output_type == "number"
+        assert "Tool return type not found" in str(e)
+        with pytest.raises(Exception) as e:
+            @tool
+            def coolfunc(a: str, b: int) -> int:
+                """Cool function
+                Args:
+                    a: The first argument
+                """
+                return b + a
+            assert coolfunc.output_type == "number"
+        assert "docstring has no description for the argument" in str(e)
+    def test_saving_tool_raises_error_imports_outside_function(self, tmp_path):
+        with pytest.raises(Exception) as e:
+            import numpy as np
+            @tool
+            def get_current_time() -> str:
+                """
+                Gets the current time.
+                """
+                return str(np.random.random())
+            get_current_time.save(tmp_path)
+        assert "np" in str(e)
+        # Also test with classic definition
+        with pytest.raises(Exception) as e:
+            class GetCurrentTimeTool(Tool):
+                name = "get_current_time_tool"
+                description = "Gets the current time"
+                inputs = {}
+                output_type = "string"
+                def forward(self):
+                    return str(np.random.random())
+            get_current_time = GetCurrentTimeTool()
+            get_current_time.save(tmp_path)
+        assert "np" in str(e)
+    def test_tool_definition_raises_no_error_imports_in_function(self):
+        @tool
+        def get_current_time() -> str:
+            """
+            Gets the current time.
+            """
+            from datetime import datetime
+            return str(datetime.now())
+        class GetCurrentTimeTool(Tool):
+            name = "get_current_time_tool"
+            description = "Gets the current time"
+            inputs = {}
+            output_type = "string"
+            def forward(self):
+                from datetime import datetime
+                return str(datetime.now())
+    def test_tool_to_dict_allows_no_arg_in_init(self):
+        """Test that a tool cannot be saved with required args in init"""
+        class FailTool(Tool):
+            name = "specific"
+            description = "test description"
+            inputs = {"string_input": {"type": "string", "description": "input description"}}
+            output_type = "string"
+            def __init__(self, url):
+                super().__init__(self)
+                self.url = url
+            def forward(self, string_input: str) -> str:
+                return self.url + string_input
+        fail_tool = FailTool("dummy_url")
+        with pytest.raises(Exception) as e:
+            fail_tool.to_dict()
+        assert "Parameters in __init__ must have default values, found required parameters" in str(e)
+        class PassTool(Tool):
+            name = "specific"
+            description = "test description"
+            inputs = {"string_input": {"type": "string", "description": "input description"}}
+            output_type = "string"
+            def __init__(self, url: str | None = "none"):
+                super().__init__(self)
+                self.url = url
+            def forward(self, string_input: str) -> str:
+                return self.url + string_input
+        fail_tool = PassTool()
+        fail_tool.to_dict()
+    def test_saving_tool_allows_no_imports_from_outside_methods(self, tmp_path):
+        # Test that using imports from outside functions fails
+        import numpy as np
+        class FailTool(Tool):
+            name = "specific"
+            description = "test description"
+            inputs = {"string_input": {"type": "string", "description": "input description"}}
+            output_type = "string"
+            def useless_method(self):
+                self.client = np.random.random()
+                return ""
+            def forward(self, string_input):
+                return self.useless_method() + string_input
+        fail_tool = FailTool()
+        with pytest.raises(Exception) as e:
+            fail_tool.save(tmp_path)
+        assert "'np' is undefined" in str(e)
+        # Test that putting these imports inside functions works
+        class SuccessTool(Tool):
+            name = "specific"
+            description = "test description"
+            inputs = {"string_input": {"type": "string", "description": "input description"}}
+            output_type = "string"
+            def useless_method(self):
+                import numpy as np
+                self.client = np.random.random()
+                return ""
+            def forward(self, string_input):
+                return self.useless_method() + string_input
+        success_tool = SuccessTool()
+        success_tool.save(tmp_path)
+    def test_tool_missing_class_attributes_raises_error(self):
+        with pytest.raises(Exception) as e:
+            class GetWeatherTool(Tool):
+                name = "get_weather"
+                description = "Get weather in the next days at given location."
+                inputs = {
+                    "location": {"type": "string", "description": "the location"},
+                    "celsius": {
+                        "type": "string",
+                        "description": "the temperature type",
+                    },
+                }
+                def forward(self, location: str, celsius: bool | None = False) -> str:
+                    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+            GetWeatherTool()
+        assert "You must set an attribute output_type" in str(e)
+    def test_tool_from_decorator_optional_args(self):
+        @tool
+        def get_weather(location: str, celsius: bool | None = False) -> str:
+            """
+            Get weather in the next days at given location.
+            Secretly this tool does not care about the location, it hates the weather everywhere.
+            Args:
+                location: the location
+                celsius: the temperature type
+            """
+            return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+        assert "nullable" in get_weather.inputs["celsius"]
+        assert get_weather.inputs["celsius"]["nullable"]
+        assert "nullable" not in get_weather.inputs["location"]
+    def test_tool_mismatching_nullable_args_raises_error(self):
+        with pytest.raises(Exception) as e:
+            class GetWeatherTool(Tool):
+                name = "get_weather"
+                description = "Get weather in the next days at given location."
+                inputs = {
+                    "location": {"type": "string", "description": "the location"},
+                    "celsius": {
+                        "type": "string",
+                        "description": "the temperature type",
+                    },
+                }
+                output_type = "string"
+                def forward(self, location: str, celsius: bool | None = False) -> str:
+                    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+            GetWeatherTool()
+        assert "Nullable" in str(e)
+        with pytest.raises(Exception) as e:
+            class GetWeatherTool2(Tool):
+                name = "get_weather"
+                description = "Get weather in the next days at given location."
+                inputs = {
+                    "location": {"type": "string", "description": "the location"},
+                    "celsius": {
+                        "type": "string",
+                        "description": "the temperature type",
+                    },
+                }
+                output_type = "string"
+                def forward(self, location: str, celsius: bool = False) -> str:
+                    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+            GetWeatherTool2()
+        assert "Nullable" in str(e)
+        with pytest.raises(Exception) as e:
+            class GetWeatherTool3(Tool):
+                name = "get_weather"
+                description = "Get weather in the next days at given location."
+                inputs = {
+                    "location": {"type": "string", "description": "the location"},
+                    "celsius": {
+                        "type": "string",
+                        "description": "the temperature type",
+                        "nullable": True,
+                    },
+                }
+                output_type = "string"
+                def forward(self, location, celsius: str) -> str:
+                    return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+            GetWeatherTool3()
+        assert "Nullable" in str(e)
+    def test_tool_default_parameters_is_nullable(self):
+        @tool
+        def get_weather(location: str, celsius: bool = False) -> str:
+            """
+            Get weather in the next days at given location.
+            Args:
+                location: The location to get the weather for.
+                celsius: is the temperature given in celsius?
+            """
+            return "The weather is UNGODLY with torrential rains and temperatures below -10°C"
+        assert get_weather.inputs["celsius"]["nullable"]
+    def test_tool_supports_any_none(self, tmp_path):
+        @tool
+        def get_weather(location: Any) -> None:
+            """
+            Get weather in the next days at given location.
+            Args:
+                location: The location to get the weather for.
+            """
+            return
+        get_weather.save(tmp_path)
+        assert get_weather.inputs["location"]["type"] == "any"
+        assert get_weather.output_type == "null"
+    def test_tool_supports_array(self):
+        @tool
+        def get_weather(locations: list[str], months: tuple[str, str] | None = None) -> dict[str, float]:
+            """
+            Get weather in the next days at given locations.
+            Args:
+                locations: The locations to get the weather for.
+                months: The months to get the weather for
+            """
+            return
+        assert get_weather.inputs["locations"]["type"] == "array"
+        assert get_weather.inputs["months"]["type"] == "array"
+    def test_tool_supports_string_literal(self):
+        @tool
+        def get_weather(unit: Literal["celsius", "fahrenheit"] = "celsius") -> None:
+            """
+            Get weather in the next days at given location.
+            Args:
+                unit: The unit of temperature
+            """
+            return
+        assert get_weather.inputs["unit"]["type"] == "string"
+        assert get_weather.inputs["unit"]["enum"] == ["celsius", "fahrenheit"]
+    def test_tool_supports_numeric_literal(self):
+        @tool
+        def get_choice(choice: Literal[1, 2, 3]) -> None:
+            """
+            Get choice based on the provided numeric literal.
+            Args:
+                choice: The numeric choice to be made.
+            """
+            return
+        assert get_choice.inputs["choice"]["type"] == "integer"
+        assert get_choice.inputs["choice"]["enum"] == [1, 2, 3]
+    def test_tool_supports_nullable_literal(self):
+        @tool
+        def get_choice(choice: Literal[1, 2, 3, None]) -> None:
+            """
+            Get choice based on the provided value.
+            Args:
+                choice: The numeric choice to be made.
+            """
+            return
+        assert get_choice.inputs["choice"]["type"] == "integer"
+        assert get_choice.inputs["choice"]["nullable"] is True
+        assert get_choice.inputs["choice"]["enum"] == [1, 2, 3]
+    def test_saving_tool_produces_valid_pyhon_code_with_multiline_description(self, tmp_path):
+        @tool
+        def get_weather(location: Any) -> None:
+            """
+            Get weather in the next days at given location.
+            And works pretty well.
+            Args:
+                location: The location to get the weather for.
+            """
+            return
+        get_weather.save(tmp_path)
+        with open(os.path.join(tmp_path, "tool.py"), "r", encoding="utf-8") as f:
+            source_code = f.read()
+            compile(source_code, f.name, "exec")
+    @pytest.mark.parametrize("fixture_name", ["boolean_default_tool_class", "boolean_default_tool_function"])
+    def test_to_dict_boolean_default_input(self, fixture_name, request):
+        """Test that boolean input parameter with default value is correctly represented in to_dict output"""
+        tool = request.getfixturevalue(fixture_name)
+        result = tool.to_dict()
+        # Check that the boolean default annotation is preserved
+        assert "flag: bool = False" in result["code"]
+        # Check nullable attribute is set for the parameter with default value
+        assert "'nullable': True" in result["code"]
+    @pytest.mark.parametrize("fixture_name", ["optional_input_tool_class", "optional_input_tool_function"])
+    def test_to_dict_optional_input(self, fixture_name, request):
+        """Test that Optional/nullable input parameter is correctly represented in to_dict output"""
+        tool = request.getfixturevalue(fixture_name)
+        result = tool.to_dict()
+        # Check the Optional type annotation is preserved
+        assert "optional_text: str | None = None" in result["code"]
+        # Check that the input is marked as nullable in the code
+        assert "'nullable': True" in result["code"]
+    def test_from_dict_roundtrip(self, example_tool):
+        # Convert to dict
+        tool_dict = example_tool.to_dict()
+        # Create from dict
+        recreated_tool = Tool.from_dict(tool_dict)
+        # Verify properties
+        assert recreated_tool.name == example_tool.name
+        assert recreated_tool.description == example_tool.description
+        assert recreated_tool.inputs == example_tool.inputs
+        assert recreated_tool.output_type == example_tool.output_type
+        # Verify functionality
+        test_input = "Hello, world!"
+        assert recreated_tool(test_input) == test_input.upper()
+    def test_tool_from_dict_invalid(self):
+        # Missing code key
+        with pytest.raises(ValueError) as e:
+            Tool.from_dict({"name": "invalid_tool"})
+        assert "must contain 'code' key" in str(e)
+    def test_tool_decorator_preserves_original_function(self):
+        # Define a test function with type hints and docstring
+        def test_function(items: list[str]) -> str:
+            """Join a list of strings.
+            Args:
+                items: A list of strings to join
+            Returns:
+                The joined string
+            """
+            return ", ".join(items)
+        # Store original function signature, name, and source
+        original_signature = inspect.signature(test_function)
+        original_name = test_function.__name__
+        original_docstring = test_function.__doc__
+        # Create a tool from the function
+        test_tool = tool(test_function)
+        # Check that the original function is unchanged
+        assert original_signature == inspect.signature(test_function)
+        assert original_name == test_function.__name__
+        assert original_docstring == test_function.__doc__
+        # Verify that the tool's forward method has a different signature (it has 'self')
+        tool_forward_sig = inspect.signature(test_tool.forward)
+        assert list(tool_forward_sig.parameters.keys())[0] == "self"
+        # Original function should not have 'self' parameter
+        assert "self" not in original_signature.parameters
+    def test_tool_with_union_type_return(self):
+        @tool
+        def union_type_return_tool_function(param: int) -> str | bool:
+            """
+            Tool with output union type.
+            Args:
+                param: Input parameter.
+            """
+            return str(param) if param > 0 else False
+        assert isinstance(union_type_return_tool_function, Tool)
+        assert union_type_return_tool_function.output_type == "any"
+@pytest.fixture
+def mock_server_parameters():
+    return MagicMock()
+@pytest.fixture
+def mock_mcp_adapt():
+    with patch("mcpadapt.core.MCPAdapt") as mock:
+        mock.return_value.__enter__.return_value = ["tool1", "tool2"]
+        mock.return_value.__exit__.return_value = None
+        yield mock
+@pytest.fixture
+def mock_smolagents_adapter():
+    with patch("mcpadapt.smolagents_adapter.SmolAgentsAdapter") as mock:
+        yield mock
+class TestToolCollection:
+    def test_from_mcp(self, mock_server_parameters, mock_mcp_adapt, mock_smolagents_adapter):
+        with ToolCollection.from_mcp(mock_server_parameters, trust_remote_code=True) as tool_collection:
+            assert isinstance(tool_collection, ToolCollection)
+            assert len(tool_collection.tools) == 2
+            assert "tool1" in tool_collection.tools
+            assert "tool2" in tool_collection.tools
+    @require_run_all
+    def test_integration_from_mcp(self):
+        # define the most simple mcp server with one tool that echoes the input text
+        mcp_server_script = dedent("""\
+            from mcp.server.fastmcp import FastMCP
+            mcp = FastMCP("Echo Server")
+            @mcp.tool()
+            def echo_tool(text: str) -> str:
+                return text
+            mcp.run()
+        """).strip()
+        mcp_server_params = mcp.StdioServerParameters(
+            command="python",
+            args=["-c", mcp_server_script],
+        )
+        with ToolCollection.from_mcp(mcp_server_params, trust_remote_code=True) as tool_collection:
+            assert len(tool_collection.tools) == 1, "Expected 1 tool"
+            assert tool_collection.tools[0].name == "echo_tool", "Expected tool name to be 'echo_tool'"
+            assert tool_collection.tools[0](text="Hello") == "Hello", "Expected tool to echo the input text"
+    def test_integration_from_mcp_with_streamable_http(self):
+        import subprocess
+        import time
+        # define the most simple mcp server with one tool that echoes the input text
+        mcp_server_script = dedent("""\
+            from mcp.server.fastmcp import FastMCP
+            mcp = FastMCP("Echo Server", host="127.0.0.1", port=8000)
+            @mcp.tool()
+            def echo_tool(text: str) -> str:
+                return text
+            mcp.run(transport="streamable-http")
+        """).strip()
+        # start the SSE mcp server in a subprocess
+        server_process = subprocess.Popen(
+            ["python", "-c", mcp_server_script],
+        )
+        # wait for the server to start
+        time.sleep(1)
+        try:
+            with ToolCollection.from_mcp(
+                {"url": "http://127.0.0.1:8000/mcp", "transport": "streamable-http"}, trust_remote_code=True
+            ) as tool_collection:
+                assert len(tool_collection.tools) == 1, "Expected 1 tool"
+                assert tool_collection.tools[0].name == "echo_tool", "Expected tool name to be 'echo_tool'"
+                assert tool_collection.tools[0](text="Hello") == "Hello", "Expected tool to echo the input text"
+        finally:
+            # clean up the process when test is done
+            server_process.kill()
+            server_process.wait()
+    def test_integration_from_mcp_with_sse(self):
+        import subprocess
+        import time
+        # define the most simple mcp server with one tool that echoes the input text
+        mcp_server_script = dedent("""\
+            from mcp.server.fastmcp import FastMCP
+            mcp = FastMCP("Echo Server", host="127.0.0.1", port=8000)
+            @mcp.tool()
+            def echo_tool(text: str) -> str:
+                return text
+            mcp.run("sse")
+        """).strip()
+        # start the SSE mcp server in a subprocess
+        server_process = subprocess.Popen(
+            ["python", "-c", mcp_server_script],
+        )
+        # wait for the server to start
+        time.sleep(1)
+        try:
+            with ToolCollection.from_mcp(
+                {"url": "http://127.0.0.1:8000/sse", "transport": "sse"}, trust_remote_code=True
+            ) as tool_collection:
+                assert len(tool_collection.tools) == 1, "Expected 1 tool"
+                assert tool_collection.tools[0].name == "echo_tool", "Expected tool name to be 'echo_tool'"
+                assert tool_collection.tools[0](text="Hello") == "Hello", "Expected tool to echo the input text"
+        finally:
+            # clean up the process when test is done
+            server_process.kill()
+            server_process.wait()
+@pytest.mark.parametrize("tool_fixture_name", ["boolean_default_tool_class"])
+def test_launch_gradio_demo_does_not_raise(tool_fixture_name, request):
+    tool = request.getfixturevalue(tool_fixture_name)
+    with patch("gradio.Interface.launch") as mock_launch:
+        launch_gradio_demo(tool)
+    assert mock_launch.call_count == 1
+@pytest.mark.parametrize(
+    "tool_input_type, expected_input, expects_error",
+    [
+        (bool, True, False),
+        (str, "b", False),
+        (int, 1, False),
+        (list, ["a", "b"], False),
+        (list[str], ["a", "b"], False),
+        (dict[str, str], {"a": "b"}, False),
+        (dict[str, str], "b", True),
+        (bool, "b", True),
+    ],
+)
+def test_validate_tool_arguments(tool_input_type, expected_input, expects_error):
+    @tool
+    def test_tool(argument_a: tool_input_type) -> str:
+        """Fake tool
+        Args:
+            argument_a: The input
+        """
+        return argument_a
+    error = validate_tool_arguments(test_tool, {"argument_a": expected_input})
+    if expects_error:
+        assert error is not None
+    else:
+        assert error is None

tests/test_types.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import tempfile
+import unittest
+import uuid
+import PIL.Image
+from transformers.testing_utils import (
+    require_soundfile,
+)
+from smolagents.agent_types import AgentAudio, AgentImage, AgentText
+from .utils.markers import require_torch
+def get_new_path(suffix="") -> str:
+    directory = tempfile.mkdtemp()
+    return os.path.join(directory, str(uuid.uuid4()) + suffix)
+@require_soundfile
+@require_torch
+class AgentAudioTests(unittest.TestCase):
+    def test_from_tensor(self):
+        import soundfile as sf
+        import torch
+        tensor = torch.rand(12, dtype=torch.float64) - 0.5
+        agent_type = AgentAudio(tensor)
+        path = str(agent_type.to_string())
+        # Ensure that the tensor and the agent_type's tensor are the same
+        self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
+        del agent_type
+        # Ensure the path remains even after the object deletion
+        self.assertTrue(os.path.exists(path))
+        # Ensure that the file contains the same value as the original tensor
+        new_tensor, _ = sf.read(path)
+        self.assertTrue(torch.allclose(tensor, torch.tensor(new_tensor), atol=1e-4))
+    def test_from_string(self):
+        import soundfile as sf
+        import torch
+        tensor = torch.rand(12, dtype=torch.float64) - 0.5
+        path = get_new_path(suffix=".wav")
+        sf.write(path, tensor, 16000)
+        agent_type = AgentAudio(path)
+        self.assertTrue(torch.allclose(tensor, agent_type.to_raw(), atol=1e-4))
+        self.assertEqual(agent_type.to_string(), path)
+@require_torch
+class TestAgentImage:
+    def test_from_tensor(self):
+        import torch
+        tensor = torch.randint(0, 256, (64, 64, 3))
+        agent_type = AgentImage(tensor)
+        path = str(agent_type.to_string())
+        # Ensure that the tensor and the agent_type's tensor are the same
+        assert torch.allclose(tensor, agent_type._tensor, atol=1e-4)
+        assert isinstance(agent_type.to_raw(), PIL.Image.Image)
+        # Ensure the path remains even after the object deletion
+        del agent_type
+        assert os.path.exists(path)
+    def test_from_string(self, shared_datadir):
+        path = shared_datadir / "000000039769.png"
+        image = PIL.Image.open(path)
+        agent_type = AgentImage(path)
+        assert path.samefile(agent_type.to_string())
+        assert image == agent_type.to_raw()
+        # Ensure the path remains even after the object deletion
+        del agent_type
+        assert os.path.exists(path)
+    def test_from_image(self, shared_datadir):
+        path = shared_datadir / "000000039769.png"
+        image = PIL.Image.open(path)
+        agent_type = AgentImage(image)
+        assert not path.samefile(agent_type.to_string())
+        assert image == agent_type.to_raw()
+        # Ensure the path remains even after the object deletion
+        del agent_type
+        assert os.path.exists(path)
+class AgentTextTests(unittest.TestCase):
+    def test_from_string(self):
+        string = "Hey!"
+        agent_type = AgentText(string)
+        self.assertEqual(string, agent_type.to_string())
+        self.assertEqual(string, agent_type.to_raw())

tests/test_utils.py ADDED Viewed

	@@ -0,0 +1,495 @@

+# coding=utf-8
+# Copyright 2024 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import os
+import textwrap
+import unittest
+import pytest
+from IPython.core.interactiveshell import InteractiveShell
+from smolagents import Tool
+from smolagents.tools import tool
+from smolagents.utils import get_source, instance_to_source, is_valid_name, parse_code_blobs, parse_json_blob
+class ValidTool(Tool):
+    name = "valid_tool"
+    description = "A valid tool"
+    inputs = {"input": {"type": "string", "description": "input"}}
+    output_type = "string"
+    simple_attr = "string"
+    dict_attr = {"key": "value"}
+    def __init__(self, optional_param="default"):
+        super().__init__()
+        self.param = optional_param
+    def forward(self, input: str) -> str:
+        return input.upper()
+@tool
+def valid_tool_function(input: str) -> str:
+    """A valid tool function.
+    Args:
+        input (str): Input string.
+    """
+    return input.upper()
+VALID_TOOL_SOURCE = """\
+from smolagents.tools import Tool
+class ValidTool(Tool):
+    name = "valid_tool"
+    description = "A valid tool"
+    inputs = {'input': {'type': 'string', 'description': 'input'}}
+    output_type = "string"
+    simple_attr = "string"
+    dict_attr = {'key': 'value'}
+    def __init__(self, optional_param="default"):
+        super().__init__()
+        self.param = optional_param
+    def forward(self, input: str) -> str:
+        return input.upper()
+"""
+VALID_TOOL_FUNCTION_SOURCE = '''\
+from smolagents.tools import Tool
+class SimpleTool(Tool):
+    name = "valid_tool_function"
+    description = "A valid tool function."
+    inputs = {'input': {'type': 'string', 'description': 'Input string.'}}
+    output_type = "string"
+    def __init__(self):
+        self.is_initialized = True
+    def forward(self, input: str) -> str:
+        """A valid tool function.
+        Args:
+            input (str): Input string.
+        """
+        return input.upper()
+'''
+class AgentTextTests(unittest.TestCase):
+    def test_parse_code_blobs(self):
+        with pytest.raises(ValueError):
+            parse_code_blobs("Wrong blob!")
+        # Parsing mardkwon with code blobs should work
+        output = parse_code_blobs("""
+Here is how to solve the problem:
+<code>
+import numpy as np
+</code>
+""")
+        assert output == "import numpy as np"
+        # Parsing code blobs should work
+        code_blob = "import numpy as np"
+        output = parse_code_blobs(code_blob)
+        assert output == code_blob
+        # Allow whitespaces after header
+        output = parse_code_blobs("<code>    \ncode_a\n</code>")
+        assert output == "code_a"
+    def test_multiple_code_blobs(self):
+        test_input = "<code>\nFoo\n</code>\n\n<code>\ncode_a\n</code>\n\n<code>\ncode_b\n</code>"
+        result = parse_code_blobs(test_input)
+        assert result == "Foo\n\ncode_a\n\ncode_b"
+@pytest.fixture(scope="function")
+def ipython_shell():
+    """Reset IPython shell before and after each test."""
+    shell = InteractiveShell.instance()
+    shell.reset()  # Clean before test
+    yield shell
+    shell.reset()  # Clean after test
+@pytest.mark.parametrize(
+    "obj_name, code_blob",
+    [
+        ("test_func", "def test_func():\n    return 42"),
+        ("TestClass", "class TestClass:\n    ..."),
+    ],
+)
+def test_get_source_ipython(ipython_shell, obj_name, code_blob):
+    ipython_shell.run_cell(code_blob, store_history=True)
+    obj = ipython_shell.user_ns[obj_name]
+    assert get_source(obj) == code_blob
+def test_get_source_standard_class():
+    class TestClass: ...
+    source = get_source(TestClass)
+    assert source == "class TestClass: ..."
+    assert source == textwrap.dedent(inspect.getsource(TestClass)).strip()
+def test_get_source_standard_function():
+    def test_func(): ...
+    source = get_source(test_func)
+    assert source == "def test_func(): ..."
+    assert source == textwrap.dedent(inspect.getsource(test_func)).strip()
+def test_get_source_ipython_errors_empty_cells(ipython_shell):
+    test_code = textwrap.dedent("""class TestClass:\n    ...""").strip()
+    ipython_shell.user_ns["In"] = [""]
+    ipython_shell.run_cell(test_code, store_history=True)
+    with pytest.raises(ValueError, match="No code cells found in IPython session"):
+        get_source(ipython_shell.user_ns["TestClass"])
+def test_get_source_ipython_errors_definition_not_found(ipython_shell):
+    test_code = textwrap.dedent("""class TestClass:\n    ...""").strip()
+    ipython_shell.user_ns["In"] = ["", "print('No class definition here')"]
+    ipython_shell.run_cell(test_code, store_history=True)
+    with pytest.raises(ValueError, match="Could not find source code for TestClass in IPython history"):
+        get_source(ipython_shell.user_ns["TestClass"])
+def test_get_source_ipython_errors_type_error():
+    with pytest.raises(TypeError, match="Expected class or callable"):
+        get_source(None)
+@pytest.mark.parametrize(
+    "tool, expected_tool_source", [(ValidTool(), VALID_TOOL_SOURCE), (valid_tool_function, VALID_TOOL_FUNCTION_SOURCE)]
+)
+def test_instance_to_source(tool, expected_tool_source):
+    tool_source = instance_to_source(tool, base_cls=Tool)
+    assert tool_source == expected_tool_source
+def test_e2e_class_tool_save(tmp_path):
+    class TestTool(Tool):
+        name = "test_tool"
+        description = "Test tool description"
+        inputs = {
+            "task": {
+                "type": "string",
+                "description": "tool input",
+            }
+        }
+        output_type = "string"
+        def forward(self, task: str):
+            import IPython  # noqa: F401
+            return task
+    test_tool = TestTool()
+    test_tool.save(tmp_path, make_gradio_app=True)
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from typing import Any, Optional
+        from smolagents.tools import Tool
+        import IPython
+        class TestTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
+            def forward(self, task: str):
+                import IPython  # noqa: F401
+                return task
+            def __init__(self, *args, **kwargs):
+                self.is_initialized = False
+        """
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"IPython", "smolagents"}
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import TestTool
+        tool = TestTool()
+        launch_gradio_demo(tool)
+        """
+    )
+def test_e2e_ipython_class_tool_save(tmp_path):
+    shell = InteractiveShell.instance()
+    code_blob = textwrap.dedent(
+        f"""\
+        from smolagents.tools import Tool
+        class TestTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {{"task": {{"type": "string",
+                    "description": "tool input",
+                }}
+            }}
+            output_type = "string"
+            def forward(self, task: str):
+                import IPython  # noqa: F401
+                return task
+        TestTool().save("{tmp_path}", make_gradio_app=True)
+        """
+    )
+    assert shell.run_cell(code_blob, store_history=True).success
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from typing import Any, Optional
+        from smolagents.tools import Tool
+        import IPython
+        class TestTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
+            def forward(self, task: str):
+                import IPython  # noqa: F401
+                return task
+            def __init__(self, *args, **kwargs):
+                self.is_initialized = False
+        """
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"IPython", "smolagents"}
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import TestTool
+        tool = TestTool()
+        launch_gradio_demo(tool)
+        """
+    )
+def test_e2e_function_tool_save(tmp_path):
+    @tool
+    def test_tool(task: str) -> str:
+        """
+        Test tool description
+        Args:
+            task: tool input
+        """
+        import IPython  # noqa: F401
+        return task
+    test_tool.save(tmp_path, make_gradio_app=True)
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import Tool
+        from typing import Any, Optional
+        class SimpleTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
+            def forward(self, task: str) -> str:
+                \"""
+                Test tool description
+                Args:
+                    task: tool input
+                \"""
+                import IPython  # noqa: F401
+                return task"""
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"smolagents"}  # FIXME: IPython should be in the requirements
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import SimpleTool
+        tool = SimpleTool()
+        launch_gradio_demo(tool)
+        """
+    )
+def test_e2e_ipython_function_tool_save(tmp_path):
+    shell = InteractiveShell.instance()
+    code_blob = textwrap.dedent(
+        f"""
+        from smolagents import tool
+        @tool
+        def test_tool(task: str) -> str:
+            \"""
+            Test tool description
+            Args:
+                task: tool input
+            \"""
+            import IPython  # noqa: F401
+            return task
+        test_tool.save("{tmp_path}", make_gradio_app=True)
+        """
+    )
+    assert shell.run_cell(code_blob, store_history=True).success
+    assert set(os.listdir(tmp_path)) == {"requirements.txt", "app.py", "tool.py"}
+    assert (tmp_path / "tool.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import Tool
+        from typing import Any, Optional
+        class SimpleTool(Tool):
+            name = "test_tool"
+            description = "Test tool description"
+            inputs = {'task': {'type': 'string', 'description': 'tool input'}}
+            output_type = "string"
+            def forward(self, task: str) -> str:
+                \"""
+                Test tool description
+                Args:
+                    task: tool input
+                \"""
+                import IPython  # noqa: F401
+                return task"""
+    )
+    requirements = set((tmp_path / "requirements.txt").read_text().split())
+    assert requirements == {"smolagents"}  # FIXME: IPython should be in the requirements
+    assert (tmp_path / "app.py").read_text() == textwrap.dedent(
+        """\
+        from smolagents import launch_gradio_demo
+        from tool import SimpleTool
+        tool = SimpleTool()
+        launch_gradio_demo(tool)
+        """
+    )
+@pytest.mark.parametrize(
+    "raw_json, expected_data, expected_blob",
+    [
+        (
+            """{}""",
+            {},
+            "",
+        ),
+        (
+            """Text{}""",
+            {},
+            "Text",
+        ),
+        (
+            """{"simple": "json"}""",
+            {"simple": "json"},
+            "",
+        ),
+        (
+            """With text here{"simple": "json"}""",
+            {"simple": "json"},
+            "With text here",
+        ),
+        (
+            """{"simple": "json"}With text after""",
+            {"simple": "json"},
+            "",
+        ),
+        (
+            """With text before{"simple": "json"}And text after""",
+            {"simple": "json"},
+            "With text before",
+        ),
+    ],
+)
+def test_parse_json_blob_with_valid_json(raw_json, expected_data, expected_blob):
+    data, blob = parse_json_blob(raw_json)
+    assert data == expected_data
+    assert blob == expected_blob
+@pytest.mark.parametrize(
+    "raw_json",
+    [
+        """simple": "json"}""",
+        """With text here"simple": "json"}""",
+        """{"simple": ""json"}With text after""",
+        """{"simple": "json"With text after""",
+        "}}",
+    ],
+)
+def test_parse_json_blob_with_invalid_json(raw_json):
+    with pytest.raises(Exception):
+        parse_json_blob(raw_json)
+@pytest.mark.parametrize(
+    "name,expected",
+    [
+        # Valid identifiers
+        ("valid_name", True),
+        ("ValidName", True),
+        ("valid123", True),
+        ("_private", True),
+        # Invalid identifiers
+        ("", False),
+        ("123invalid", False),
+        ("invalid-name", False),
+        ("invalid name", False),
+        ("invalid.name", False),
+        # Python keywords
+        ("if", False),
+        ("for", False),
+        ("class", False),
+        ("return", False),
+        # Non-string inputs
+        (123, False),
+        (None, False),
+        ([], False),
+        ({}, False),
+    ],
+)
+def test_is_valid_name(name, expected):
+    """Test the is_valid_name function with various inputs."""
+    assert is_valid_name(name) is expected