github-actions[bot] commited on
Commit
35b631a
Β·
1 Parent(s): c4c48ee

Sync with https://github.com/mozilla-ai/surf-spot-finder

Browse files
Files changed (3) hide show
  1. app.py +2 -2
  2. components/inputs.py +2 -1
  3. services/agent.py +9 -13
app.py CHANGED
@@ -40,9 +40,9 @@ async def main():
40
  # Handle agent execution button click
41
  if run_button:
42
  agent, agent_config = await configure_agent(user_inputs)
43
- agent_trace, execution_time = await run_agent(agent, agent_config)
44
 
45
- await display_output(agent_trace, execution_time)
46
 
47
  evaluation_result = await evaluate_agent(agent_config, agent_trace)
48
 
 
40
  # Handle agent execution button click
41
  if run_button:
42
  agent, agent_config = await configure_agent(user_inputs)
43
+ agent_trace = await run_agent(agent, agent_config)
44
 
45
+ await display_output(agent_trace)
46
 
47
  evaluation_result = await evaluate_agent(agent_config, agent_trace)
48
 
components/inputs.py CHANGED
@@ -8,6 +8,7 @@ from any_agent.evaluation import EvaluationCase
8
  from any_agent.evaluation.schemas import CheckpointCriteria
9
  import pandas as pd
10
  from constants import DEFAULT_EVALUATION_CASE, MODEL_OPTIONS
 
11
 
12
  from pydantic import BaseModel, ConfigDict
13
 
@@ -98,7 +99,7 @@ def get_user_inputs() -> UserInputs:
98
  index=2,
99
  format_func=lambda x: "/".join(x.split("/")[-3:]),
100
  )
101
- evaluation_case = DEFAULT_EVALUATION_CASE
102
  evaluation_case.llm_judge = evaluation_model_id
103
  # make this an editable json section
104
  # convert the checkpoints to a df series so that it can be edited
 
8
  from any_agent.evaluation.schemas import CheckpointCriteria
9
  import pandas as pd
10
  from constants import DEFAULT_EVALUATION_CASE, MODEL_OPTIONS
11
+ import copy
12
 
13
  from pydantic import BaseModel, ConfigDict
14
 
 
99
  index=2,
100
  format_func=lambda x: "/".join(x.split("/")[-3:]),
101
  )
102
+ evaluation_case = copy.deepcopy(DEFAULT_EVALUATION_CASE)
103
  evaluation_case.llm_judge = evaluation_model_id
104
  # make this an editable json section
105
  # convert the checkpoints to a df series so that it can be edited
services/agent.py CHANGED
@@ -3,7 +3,6 @@ from components.inputs import UserInputs
3
  from constants import DEFAULT_TOOLS
4
  from components.agent_status import export_logs
5
  import streamlit as st
6
- import time
7
  from surf_spot_finder.config import Config
8
  from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
9
  from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost, AgentSpan
@@ -12,11 +11,10 @@ from any_agent.evaluation import evaluate, TraceEvaluationResult
12
 
13
 
14
  async def display_evaluation_results(result: TraceEvaluationResult):
15
- all_results = (
16
- result.checkpoint_results
17
- + result.hypothesis_answer_results
18
- + result.direct_results
19
- )
20
 
21
  # Create columns for better layout
22
  col1, col2 = st.columns(2)
@@ -102,7 +100,7 @@ async def configure_agent(user_inputs: UserInputs) -> tuple[AnyAgent, Config]:
102
  return agent, config
103
 
104
 
105
- async def display_output(agent_trace: AgentTrace, execution_time: float):
106
  # Display the agent trace in a more organized way
107
  with st.expander("### 🧩 Agent Trace"):
108
  for span in agent_trace.spans:
@@ -142,8 +140,9 @@ async def display_output(agent_trace: AgentTrace, execution_time: float):
142
  cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
143
  with st.expander("### πŸ„ Results", expanded=True):
144
  time_col, cost_col, tokens_col = st.columns(3)
 
145
  with time_col:
146
- st.info(f"⏱️ Execution Time: {execution_time:.2f} seconds")
147
  with cost_col:
148
  st.info(f"πŸ’° Estimated Cost: ${cost.total_cost:.6f}")
149
  with tokens_col:
@@ -152,7 +151,7 @@ async def display_output(agent_trace: AgentTrace, execution_time: float):
152
  st.info(agent_trace.final_output)
153
 
154
 
155
- async def run_agent(agent, config) -> tuple[AgentTrace, float]:
156
  st.markdown("#### πŸ” Running Surf Spot Finder with query")
157
 
158
  query = config.input_prompt_template.format(
@@ -222,11 +221,8 @@ async def run_agent(agent, config) -> tuple[AgentTrace, float]:
222
  status.update(label=message, expanded=False, state="running")
223
 
224
  export_logs(agent, update_span)
225
- start_time = time.time()
226
  agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
227
  status.update(label="Finished!", expanded=False, state="complete")
228
- end_time = time.time()
229
 
230
  agent.exit()
231
- execution_time = end_time - start_time
232
- return agent_trace, execution_time
 
3
  from constants import DEFAULT_TOOLS
4
  from components.agent_status import export_logs
5
  import streamlit as st
 
6
  from surf_spot_finder.config import Config
7
  from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
8
  from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost, AgentSpan
 
11
 
12
 
13
  async def display_evaluation_results(result: TraceEvaluationResult):
14
+ if result.ground_truth_result is not None:
15
+ all_results = [*result.checkpoint_results, result.ground_truth_result]
16
+ else:
17
+ all_results = result.checkpoint_results
 
18
 
19
  # Create columns for better layout
20
  col1, col2 = st.columns(2)
 
100
  return agent, config
101
 
102
 
103
+ async def display_output(agent_trace: AgentTrace):
104
  # Display the agent trace in a more organized way
105
  with st.expander("### 🧩 Agent Trace"):
106
  for span in agent_trace.spans:
 
140
  cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
141
  with st.expander("### πŸ„ Results", expanded=True):
142
  time_col, cost_col, tokens_col = st.columns(3)
143
+ duration = agent_trace.duration.total_seconds()
144
  with time_col:
145
+ st.info(f"⏱️ Execution Time: {duration:0.2f} seconds")
146
  with cost_col:
147
  st.info(f"πŸ’° Estimated Cost: ${cost.total_cost:.6f}")
148
  with tokens_col:
 
151
  st.info(agent_trace.final_output)
152
 
153
 
154
+ async def run_agent(agent, config) -> AgentTrace:
155
  st.markdown("#### πŸ” Running Surf Spot Finder with query")
156
 
157
  query = config.input_prompt_template.format(
 
221
  status.update(label=message, expanded=False, state="running")
222
 
223
  export_logs(agent, update_span)
 
224
  agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
225
  status.update(label="Finished!", expanded=False, state="complete")
 
226
 
227
  agent.exit()
228
+ return agent_trace