Spaces:

mozilla-ai
/

surf-spot-finder

Running

App Files Files Community

github-actions[bot] commited on May 20

Commit

35b631a

1 Parent(s): c4c48ee

Sync with https://github.com/mozilla-ai/surf-spot-finder

Browse files

Files changed (3) hide show

app.py +2 -2
components/inputs.py +2 -1
services/agent.py +9 -13

app.py CHANGED Viewed

@@ -40,9 +40,9 @@ async def main():
     # Handle agent execution button click
     if run_button:
         agent, agent_config = await configure_agent(user_inputs)
-        agent_trace, execution_time = await run_agent(agent, agent_config)
-        await display_output(agent_trace, execution_time)
         evaluation_result = await evaluate_agent(agent_config, agent_trace)

     # Handle agent execution button click
     if run_button:
         agent, agent_config = await configure_agent(user_inputs)
+        agent_trace = await run_agent(agent, agent_config)
+        await display_output(agent_trace)
         evaluation_result = await evaluate_agent(agent_config, agent_trace)

components/inputs.py CHANGED Viewed

@@ -8,6 +8,7 @@ from any_agent.evaluation import EvaluationCase
 from any_agent.evaluation.schemas import CheckpointCriteria
 import pandas as pd
 from constants import DEFAULT_EVALUATION_CASE, MODEL_OPTIONS
 from pydantic import BaseModel, ConfigDict
@@ -98,7 +99,7 @@ def get_user_inputs() -> UserInputs:
             index=2,
             format_func=lambda x: "/".join(x.split("/")[-3:]),
         )
-        evaluation_case = DEFAULT_EVALUATION_CASE
         evaluation_case.llm_judge = evaluation_model_id
         # make this an editable json section
         # convert the checkpoints to a df series so that it can be edited

 from any_agent.evaluation.schemas import CheckpointCriteria
 import pandas as pd
 from constants import DEFAULT_EVALUATION_CASE, MODEL_OPTIONS
+import copy
 from pydantic import BaseModel, ConfigDict
             index=2,
             format_func=lambda x: "/".join(x.split("/")[-3:]),
         )
+        evaluation_case = copy.deepcopy(DEFAULT_EVALUATION_CASE)
         evaluation_case.llm_judge = evaluation_model_id
         # make this an editable json section
         # convert the checkpoints to a df series so that it can be edited

services/agent.py CHANGED Viewed

@@ -3,7 +3,6 @@ from components.inputs import UserInputs
 from constants import DEFAULT_TOOLS
 from components.agent_status import export_logs
 import streamlit as st
-import time
 from surf_spot_finder.config import Config
 from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
 from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost, AgentSpan
@@ -12,11 +11,10 @@ from any_agent.evaluation import evaluate, TraceEvaluationResult
 async def display_evaluation_results(result: TraceEvaluationResult):
-    all_results = (
-        result.checkpoint_results
-        + result.hypothesis_answer_results
-        + result.direct_results
-    )
     # Create columns for better layout
     col1, col2 = st.columns(2)
@@ -102,7 +100,7 @@ async def configure_agent(user_inputs: UserInputs) -> tuple[AnyAgent, Config]:
     return agent, config
-async def display_output(agent_trace: AgentTrace, execution_time: float):
     # Display the agent trace in a more organized way
     with st.expander("### 🧩 Agent Trace"):
         for span in agent_trace.spans:
@@ -142,8 +140,9 @@ async def display_output(agent_trace: AgentTrace, execution_time: float):
     cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
     with st.expander("### 🏄 Results", expanded=True):
         time_col, cost_col, tokens_col = st.columns(3)
         with time_col:
-            st.info(f"⏱️ Execution Time: {execution_time:.2f} seconds")
         with cost_col:
             st.info(f"💰 Estimated Cost: ${cost.total_cost:.6f}")
         with tokens_col:
@@ -152,7 +151,7 @@ async def display_output(agent_trace: AgentTrace, execution_time: float):
         st.info(agent_trace.final_output)
-async def run_agent(agent, config) -> tuple[AgentTrace, float]:
     st.markdown("#### 🔍 Running Surf Spot Finder with query")
     query = config.input_prompt_template.format(
@@ -222,11 +221,8 @@ async def run_agent(agent, config) -> tuple[AgentTrace, float]:
             status.update(label=message, expanded=False, state="running")
         export_logs(agent, update_span)
-        start_time = time.time()
         agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
         status.update(label="Finished!", expanded=False, state="complete")
-        end_time = time.time()
         agent.exit()
-        execution_time = end_time - start_time
-        return agent_trace, execution_time

 from constants import DEFAULT_TOOLS
 from components.agent_status import export_logs
 import streamlit as st
 from surf_spot_finder.config import Config
 from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
 from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost, AgentSpan
 async def display_evaluation_results(result: TraceEvaluationResult):
+    if result.ground_truth_result is not None:
+        all_results = [*result.checkpoint_results, result.ground_truth_result]
+    else:
+        all_results = result.checkpoint_results
     # Create columns for better layout
     col1, col2 = st.columns(2)
     return agent, config
+async def display_output(agent_trace: AgentTrace):
     # Display the agent trace in a more organized way
     with st.expander("### 🧩 Agent Trace"):
         for span in agent_trace.spans:
     cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
     with st.expander("### 🏄 Results", expanded=True):
         time_col, cost_col, tokens_col = st.columns(3)
+        duration = agent_trace.duration.total_seconds()
         with time_col:
+            st.info(f"⏱️ Execution Time: {duration:0.2f} seconds")
         with cost_col:
             st.info(f"💰 Estimated Cost: ${cost.total_cost:.6f}")
         with tokens_col:
         st.info(agent_trace.final_output)
+async def run_agent(agent, config) -> AgentTrace:
     st.markdown("#### 🔍 Running Surf Spot Finder with query")
     query = config.input_prompt_template.format(
             status.update(label=message, expanded=False, state="running")
         export_logs(agent, update_span)
         agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
         status.update(label="Finished!", expanded=False, state="complete")
         agent.exit()
+        return agent_trace