Spaces:
Running
Running
github-actions[bot]
commited on
Commit
Β·
35b631a
1
Parent(s):
c4c48ee
Sync with https://github.com/mozilla-ai/surf-spot-finder
Browse files- app.py +2 -2
- components/inputs.py +2 -1
- services/agent.py +9 -13
app.py
CHANGED
@@ -40,9 +40,9 @@ async def main():
|
|
40 |
# Handle agent execution button click
|
41 |
if run_button:
|
42 |
agent, agent_config = await configure_agent(user_inputs)
|
43 |
-
agent_trace
|
44 |
|
45 |
-
await display_output(agent_trace
|
46 |
|
47 |
evaluation_result = await evaluate_agent(agent_config, agent_trace)
|
48 |
|
|
|
40 |
# Handle agent execution button click
|
41 |
if run_button:
|
42 |
agent, agent_config = await configure_agent(user_inputs)
|
43 |
+
agent_trace = await run_agent(agent, agent_config)
|
44 |
|
45 |
+
await display_output(agent_trace)
|
46 |
|
47 |
evaluation_result = await evaluate_agent(agent_config, agent_trace)
|
48 |
|
components/inputs.py
CHANGED
@@ -8,6 +8,7 @@ from any_agent.evaluation import EvaluationCase
|
|
8 |
from any_agent.evaluation.schemas import CheckpointCriteria
|
9 |
import pandas as pd
|
10 |
from constants import DEFAULT_EVALUATION_CASE, MODEL_OPTIONS
|
|
|
11 |
|
12 |
from pydantic import BaseModel, ConfigDict
|
13 |
|
@@ -98,7 +99,7 @@ def get_user_inputs() -> UserInputs:
|
|
98 |
index=2,
|
99 |
format_func=lambda x: "/".join(x.split("/")[-3:]),
|
100 |
)
|
101 |
-
evaluation_case = DEFAULT_EVALUATION_CASE
|
102 |
evaluation_case.llm_judge = evaluation_model_id
|
103 |
# make this an editable json section
|
104 |
# convert the checkpoints to a df series so that it can be edited
|
|
|
8 |
from any_agent.evaluation.schemas import CheckpointCriteria
|
9 |
import pandas as pd
|
10 |
from constants import DEFAULT_EVALUATION_CASE, MODEL_OPTIONS
|
11 |
+
import copy
|
12 |
|
13 |
from pydantic import BaseModel, ConfigDict
|
14 |
|
|
|
99 |
index=2,
|
100 |
format_func=lambda x: "/".join(x.split("/")[-3:]),
|
101 |
)
|
102 |
+
evaluation_case = copy.deepcopy(DEFAULT_EVALUATION_CASE)
|
103 |
evaluation_case.llm_judge = evaluation_model_id
|
104 |
# make this an editable json section
|
105 |
# convert the checkpoints to a df series so that it can be edited
|
services/agent.py
CHANGED
@@ -3,7 +3,6 @@ from components.inputs import UserInputs
|
|
3 |
from constants import DEFAULT_TOOLS
|
4 |
from components.agent_status import export_logs
|
5 |
import streamlit as st
|
6 |
-
import time
|
7 |
from surf_spot_finder.config import Config
|
8 |
from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
|
9 |
from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost, AgentSpan
|
@@ -12,11 +11,10 @@ from any_agent.evaluation import evaluate, TraceEvaluationResult
|
|
12 |
|
13 |
|
14 |
async def display_evaluation_results(result: TraceEvaluationResult):
|
15 |
-
|
16 |
-
result.checkpoint_results
|
17 |
-
|
18 |
-
|
19 |
-
)
|
20 |
|
21 |
# Create columns for better layout
|
22 |
col1, col2 = st.columns(2)
|
@@ -102,7 +100,7 @@ async def configure_agent(user_inputs: UserInputs) -> tuple[AnyAgent, Config]:
|
|
102 |
return agent, config
|
103 |
|
104 |
|
105 |
-
async def display_output(agent_trace: AgentTrace
|
106 |
# Display the agent trace in a more organized way
|
107 |
with st.expander("### π§© Agent Trace"):
|
108 |
for span in agent_trace.spans:
|
@@ -142,8 +140,9 @@ async def display_output(agent_trace: AgentTrace, execution_time: float):
|
|
142 |
cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
|
143 |
with st.expander("### π Results", expanded=True):
|
144 |
time_col, cost_col, tokens_col = st.columns(3)
|
|
|
145 |
with time_col:
|
146 |
-
st.info(f"β±οΈ Execution Time: {
|
147 |
with cost_col:
|
148 |
st.info(f"π° Estimated Cost: ${cost.total_cost:.6f}")
|
149 |
with tokens_col:
|
@@ -152,7 +151,7 @@ async def display_output(agent_trace: AgentTrace, execution_time: float):
|
|
152 |
st.info(agent_trace.final_output)
|
153 |
|
154 |
|
155 |
-
async def run_agent(agent, config) ->
|
156 |
st.markdown("#### π Running Surf Spot Finder with query")
|
157 |
|
158 |
query = config.input_prompt_template.format(
|
@@ -222,11 +221,8 @@ async def run_agent(agent, config) -> tuple[AgentTrace, float]:
|
|
222 |
status.update(label=message, expanded=False, state="running")
|
223 |
|
224 |
export_logs(agent, update_span)
|
225 |
-
start_time = time.time()
|
226 |
agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
|
227 |
status.update(label="Finished!", expanded=False, state="complete")
|
228 |
-
end_time = time.time()
|
229 |
|
230 |
agent.exit()
|
231 |
-
|
232 |
-
return agent_trace, execution_time
|
|
|
3 |
from constants import DEFAULT_TOOLS
|
4 |
from components.agent_status import export_logs
|
5 |
import streamlit as st
|
|
|
6 |
from surf_spot_finder.config import Config
|
7 |
from any_agent import AgentConfig, AnyAgent, TracingConfig, AgentFramework
|
8 |
from any_agent.tracing.trace import AgentTrace, TotalTokenUseAndCost, AgentSpan
|
|
|
11 |
|
12 |
|
13 |
async def display_evaluation_results(result: TraceEvaluationResult):
|
14 |
+
if result.ground_truth_result is not None:
|
15 |
+
all_results = [*result.checkpoint_results, result.ground_truth_result]
|
16 |
+
else:
|
17 |
+
all_results = result.checkpoint_results
|
|
|
18 |
|
19 |
# Create columns for better layout
|
20 |
col1, col2 = st.columns(2)
|
|
|
100 |
return agent, config
|
101 |
|
102 |
|
103 |
+
async def display_output(agent_trace: AgentTrace):
|
104 |
# Display the agent trace in a more organized way
|
105 |
with st.expander("### π§© Agent Trace"):
|
106 |
for span in agent_trace.spans:
|
|
|
140 |
cost: TotalTokenUseAndCost = agent_trace.get_total_cost()
|
141 |
with st.expander("### π Results", expanded=True):
|
142 |
time_col, cost_col, tokens_col = st.columns(3)
|
143 |
+
duration = agent_trace.duration.total_seconds()
|
144 |
with time_col:
|
145 |
+
st.info(f"β±οΈ Execution Time: {duration:0.2f} seconds")
|
146 |
with cost_col:
|
147 |
st.info(f"π° Estimated Cost: ${cost.total_cost:.6f}")
|
148 |
with tokens_col:
|
|
|
151 |
st.info(agent_trace.final_output)
|
152 |
|
153 |
|
154 |
+
async def run_agent(agent, config) -> AgentTrace:
|
155 |
st.markdown("#### π Running Surf Spot Finder with query")
|
156 |
|
157 |
query = config.input_prompt_template.format(
|
|
|
221 |
status.update(label=message, expanded=False, state="running")
|
222 |
|
223 |
export_logs(agent, update_span)
|
|
|
224 |
agent_trace: AgentTrace = await agent.run_async(query, **kwargs)
|
225 |
status.update(label="Finished!", expanded=False, state="complete")
|
|
|
226 |
|
227 |
agent.exit()
|
228 |
+
return agent_trace
|
|