github-actions[bot] commited on
Commit
62cf4ef
Β·
1 Parent(s): 6b470cd

Sync with https://github.com/mozilla-ai/surf-spot-finder

Browse files
Files changed (6) hide show
  1. Dockerfile +1 -1
  2. README.md +1 -1
  3. app.py +40 -0
  4. constants.py +3 -0
  5. pickers.py +81 -16
  6. utils.py +90 -32
Dockerfile CHANGED
@@ -18,4 +18,4 @@ EXPOSE 8501
18
 
19
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
 
21
- ENTRYPOINT ["streamlit", "run", "demo/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
18
 
19
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
 
21
+ ENTRYPOINT ["streamlit", "run", "demo/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -14,7 +14,7 @@ license: apache-2.0
14
 
15
  # Welcome to Streamlit!
16
 
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
18
 
19
  If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
  forums](https://discuss.streamlit.io).
 
14
 
15
  # Welcome to Streamlit!
16
 
17
+ Edit `/src/app.py` to customize this app to your heart's desire. :heart:
18
 
19
  If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
  forums](https://discuss.streamlit.io).
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import nest_asyncio
4
+ from pickers import get_user_inputs
5
+ from utils import run_agent
6
+
7
+
8
+ nest_asyncio.apply()
9
+
10
+ # Set page config
11
+ st.set_page_config(page_title="Surf Spot Finder", page_icon="πŸ„", layout="wide")
12
+
13
+ # Add title and header
14
+ st.title("πŸ„ Surf Spot Finder")
15
+ st.markdown(
16
+ "Find the best surfing spots based on your location and preferences! [Github Repo](https://github.com/mozilla-ai/surf-spot-finder)"
17
+ )
18
+
19
+ # Sidebar
20
+ with st.sidebar:
21
+ st.markdown("### Configuration")
22
+ st.markdown("Built using [Any-Agent](https://github.com/mozilla-ai/any-agent)")
23
+ user_inputs = get_user_inputs()
24
+ is_valid = user_inputs is not None
25
+ run_button = st.button("Run", disabled=not is_valid, type="primary")
26
+
27
+
28
+ # Main content
29
+ async def main():
30
+ if run_button:
31
+ await run_agent(user_inputs)
32
+ else:
33
+ st.info(
34
+ "πŸ‘ˆ Configure your search parameters in the sidebar and click Run to start!"
35
+ )
36
+
37
+
38
+ if __name__ == "__main__":
39
+ loop = asyncio.new_event_loop()
40
+ loop.run_until_complete(main())
constants.py CHANGED
@@ -1,6 +1,9 @@
1
  MODEL_OPTIONS = [
2
  # "huggingface/novita/deepseek-ai/DeepSeek-V3",
3
  # "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
 
 
 
4
  "gemini/gemini-2.0-flash-lite",
5
  "gemini-2.0-flash",
6
  # "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling
 
1
  MODEL_OPTIONS = [
2
  # "huggingface/novita/deepseek-ai/DeepSeek-V3",
3
  # "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
4
+ "openai/gpt-4.1-nano",
5
+ "openai/gpt-4.1-mini",
6
+ "openai/gpt-4o",
7
  "gemini/gemini-2.0-flash-lite",
8
  "gemini-2.0-flash",
9
  # "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling
pickers.py CHANGED
@@ -4,10 +4,60 @@ import requests
4
  import streamlit as st
5
  from any_agent import AgentFramework
6
  from any_agent.tracing.trace import _is_tracing_supported
 
7
 
8
  from constants import MODEL_OPTIONS
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  @st.cache_resource
12
  def get_area(area_name: str) -> dict:
13
  """Get the area from Nominatim.
@@ -31,27 +81,34 @@ def get_area(area_name: str) -> dict:
31
 
32
 
33
  def get_user_inputs() -> dict:
34
- st.title("Surf Spot Finder")
35
- st.write(
36
- "This app finds the best surf spots in your area based on the weather forecast."
37
- )
38
  default_val = "Los Angeles California, US"
39
 
40
- location = st.text_input("Enter a location", value=default_val)
41
- if location:
42
- location_check = get_area(location)
43
- if not location_check:
44
- st.error("Invalid location. Please enter a valid location.")
45
- return None
46
- else:
47
- # display a checkmark saying that the location is valid
48
- st.success("Valid location")
 
 
49
  max_driving_hours = st.number_input(
50
  "Enter the maximum driving hours", min_value=1, value=2
51
  )
52
- date = st.date_input(
53
- "Select a date in the future", value=datetime.now() + timedelta(days=1)
54
- )
 
 
 
 
 
 
 
 
 
55
 
56
  supported_frameworks = [
57
  framework for framework in AgentFramework if _is_tracing_supported(framework)
@@ -71,10 +128,18 @@ def get_user_inputs() -> dict:
71
  format_func=lambda x: "/".join(x.split("/")[-3:]),
72
  )
73
 
 
 
 
 
 
74
  return {
75
  "location": location,
76
  "max_driving_hours": max_driving_hours,
77
  "date": date,
78
  "framework": framework,
79
  "model_id": model_id,
 
 
 
80
  }
 
4
  import streamlit as st
5
  from any_agent import AgentFramework
6
  from any_agent.tracing.trace import _is_tracing_supported
7
+ from any_agent.evaluation import EvaluationCase
8
 
9
  from constants import MODEL_OPTIONS
10
 
11
 
12
+ def create_evaluation_case() -> EvaluationCase:
13
+ """Create an EvaluationCase from the user configuration.
14
+
15
+ Args:
16
+ case_config (dict): The evaluation case configuration from the user
17
+
18
+ Returns:
19
+ EvaluationCase: The created evaluation case
20
+ """
21
+
22
+ return EvaluationCase(
23
+ llm_judge="openai/gpt-4.1-mini",
24
+ checkpoints=[
25
+ {
26
+ "criteria": "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools",
27
+ "points": 1,
28
+ },
29
+ {
30
+ "criteria": "Check if the agent used the get_wave_forecast tool and it succeeded",
31
+ "points": 1,
32
+ },
33
+ {
34
+ "criteria": "Check if the agent used the get_wind_forecast tool and it succeeded",
35
+ "points": 1,
36
+ },
37
+ {
38
+ "criteria": "Check if the agent used the get_area_lat_lon tool and it succeeded",
39
+ "points": 1,
40
+ },
41
+ {
42
+ "criteria": "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded",
43
+ "points": 1,
44
+ },
45
+ {
46
+ "criteria": "Check if the final answer contains any description about the weather at the chosen location",
47
+ "points": 1,
48
+ },
49
+ {
50
+ "criteria": "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool",
51
+ "points": 1,
52
+ },
53
+ {
54
+ "criteria": "Check that the agent completed in fewer than 10 steps",
55
+ "points": 1,
56
+ },
57
+ ],
58
+ )
59
+
60
+
61
  @st.cache_resource
62
  def get_area(area_name: str) -> dict:
63
  """Get the area from Nominatim.
 
81
 
82
 
83
  def get_user_inputs() -> dict:
 
 
 
 
84
  default_val = "Los Angeles California, US"
85
 
86
+ col1, col2 = st.columns([3, 1])
87
+ with col1:
88
+ location = st.text_input("Enter a location", value=default_val)
89
+ with col2:
90
+ if location:
91
+ location_check = get_area(location)
92
+ if not location_check:
93
+ st.error("❌")
94
+ else:
95
+ st.success("βœ…")
96
+
97
  max_driving_hours = st.number_input(
98
  "Enter the maximum driving hours", min_value=1, value=2
99
  )
100
+
101
+ col_date, col_time = st.columns([2, 1])
102
+ with col_date:
103
+ date = st.date_input(
104
+ "Select a date in the future", value=datetime.now() + timedelta(days=1)
105
+ )
106
+ with col_time:
107
+ # default to 9am
108
+ time = st.time_input(
109
+ "Select a time", value=datetime.now().time().replace(hour=9, minute=0)
110
+ )
111
+ date = datetime.combine(date, time)
112
 
113
  supported_frameworks = [
114
  framework for framework in AgentFramework if _is_tracing_supported(framework)
 
128
  format_func=lambda x: "/".join(x.split("/")[-3:]),
129
  )
130
 
131
+ # Add evaluation case section
132
+ with st.expander("Evaluation Case"):
133
+ evaluation_case = create_evaluation_case()
134
+ st.write(evaluation_case.model_dump(), expanded=True)
135
+
136
  return {
137
  "location": location,
138
  "max_driving_hours": max_driving_hours,
139
  "date": date,
140
  "framework": framework,
141
  "model_id": model_id,
142
+ "evaluation_case": evaluation_case
143
+ if st.checkbox("Run Evaluation", value=True)
144
+ else None,
145
  }
utils.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
  from surf_spot_finder.tools import (
3
  driving_hours_to_meters,
@@ -8,17 +10,22 @@ from surf_spot_finder.tools import (
8
  )
9
  from surf_spot_finder.config import Config
10
  from any_agent import AgentConfig, AnyAgent, TracingConfig
 
 
11
  from any_agent.evaluation import evaluate, TraceEvaluationResult
12
 
13
 
14
- async def run_agent(user_inputs):
15
- st.write("Running surf spot finder...")
 
16
  if "huggingface" in user_inputs["model_id"]:
17
  model_args = {
18
  "extra_headers": {"X-HF-Bill-To": "mozilla-ai"},
 
19
  }
20
  else:
21
  model_args = {}
 
22
  agent_config = AgentConfig(
23
  model_id=user_inputs["model_id"],
24
  model_args=model_args,
@@ -30,6 +37,7 @@ async def run_agent(user_inputs):
30
  driving_hours_to_meters,
31
  ],
32
  )
 
33
  config = Config(
34
  location=user_inputs["location"],
35
  max_driving_hours=user_inputs["max_driving_hours"],
@@ -37,7 +45,9 @@ async def run_agent(user_inputs):
37
  framework=user_inputs["framework"],
38
  main_agent=agent_config,
39
  managed_agents=[],
40
- evaluation_cases=None,
 
 
41
  )
42
 
43
  agent = await AnyAgent.create_async(
@@ -52,43 +62,91 @@ async def run_agent(user_inputs):
52
  MAX_DRIVING_HOURS=config.max_driving_hours,
53
  DATE=config.date,
54
  )
55
- st.write("Running agent with query:\n", query)
56
 
57
- with st.spinner("Running..."):
58
- agent_trace = await agent.run_async(query)
 
 
 
59
  agent.exit()
60
 
61
- st.write("Final output from agent:\n", agent_trace.final_output)
 
 
62
 
63
- # Display the agent trace
64
- with st.expander("Agent Trace", expanded=True):
65
- st.write(agent_trace.spans)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  if config.evaluation_cases is not None:
68
- results = []
69
- st.write("Found evaluation cases, running trace evaluation")
70
- for i, case in enumerate(config.evaluation_cases):
71
- st.write("Evaluating case: ", case)
 
 
 
72
  result: TraceEvaluationResult = evaluate(
73
  evaluation_case=case,
74
  trace=agent_trace,
75
  agent_framework=config.framework,
76
  )
77
- for list_of_checkpoints in [
78
- result.checkpoint_results,
79
- result.direct_results,
80
- result.hypothesis_answer_results,
81
- ]:
82
- for checkpoint in list_of_checkpoints:
83
- msg = (
84
- f"Checkpoint: {checkpoint.criteria}\n"
85
- f"\tPassed: {checkpoint.passed}\n"
86
- f"\tReason: {checkpoint.reason}\n"
87
- f"\tScore: {'%d/%d' % (checkpoint.points, checkpoint.points) if checkpoint.passed else '0/%d' % checkpoint.points}"
88
- )
89
- st.write(msg)
90
- st.write("==========================")
91
- st.write("Overall Score: %d%%", 100 * result.score)
92
- st.write("==========================")
93
- results.append(result)
94
- st.write("Surf spot finder finished running.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any
3
  import streamlit as st
4
  from surf_spot_finder.tools import (
5
  driving_hours_to_meters,
 
10
  )
11
  from surf_spot_finder.config import Config
12
  from any_agent import AgentConfig, AnyAgent, TracingConfig
13
+ from any_agent.tracing.trace import AgentTrace
14
+ from any_agent.tracing.otel_types import StatusCode
15
  from any_agent.evaluation import evaluate, TraceEvaluationResult
16
 
17
 
18
+ async def run_agent(user_inputs: dict[str, Any]):
19
+ st.markdown("### πŸ” Running Surf Spot Finder...")
20
+
21
  if "huggingface" in user_inputs["model_id"]:
22
  model_args = {
23
  "extra_headers": {"X-HF-Bill-To": "mozilla-ai"},
24
+ "temperature": 0.0,
25
  }
26
  else:
27
  model_args = {}
28
+
29
  agent_config = AgentConfig(
30
  model_id=user_inputs["model_id"],
31
  model_args=model_args,
 
37
  driving_hours_to_meters,
38
  ],
39
  )
40
+
41
  config = Config(
42
  location=user_inputs["location"],
43
  max_driving_hours=user_inputs["max_driving_hours"],
 
45
  framework=user_inputs["framework"],
46
  main_agent=agent_config,
47
  managed_agents=[],
48
+ evaluation_cases=[user_inputs.get("evaluation_case")]
49
+ if user_inputs.get("evaluation_case")
50
+ else None,
51
  )
52
 
53
  agent = await AnyAgent.create_async(
 
62
  MAX_DRIVING_HOURS=config.max_driving_hours,
63
  DATE=config.date,
64
  )
 
65
 
66
+ st.markdown("#### πŸ“ Query")
67
+ st.code(query, language="text")
68
+
69
+ with st.spinner("πŸ€” Analyzing surf spots..."):
70
+ agent_trace: AgentTrace = await agent.run_async(query)
71
  agent.exit()
72
 
73
+ st.markdown("### πŸ„ Results")
74
+ st.markdown("#### Final Output")
75
+ st.info(agent_trace.final_output)
76
 
77
+ # Display the agent trace in a more organized way
78
+ with st.expander("### 🧩 Agent Trace"):
79
+ for span in agent_trace.spans:
80
+ # Header with name and status
81
+ col1, col2 = st.columns([4, 1])
82
+ with col1:
83
+ st.markdown(f"**{span.name}**")
84
+ if span.attributes:
85
+ # st.json(span.attributes, expanded=False)
86
+ if "input.value" in span.attributes:
87
+ input_value = json.loads(span.attributes["input.value"])
88
+ if isinstance(input_value, list):
89
+ st.write(f"Input: {input_value[-1]}")
90
+ else:
91
+ st.write(f"Input: {input_value}")
92
+ if "output.value" in span.attributes:
93
+ output_value = json.loads(span.attributes["output.value"])
94
+ if isinstance(output_value, list):
95
+ st.write(f"Output: {output_value[-1]}")
96
+ else:
97
+ st.write(f"Output: {output_value}")
98
+ with col2:
99
+ status_color = (
100
+ "green" if span.status.status_code == StatusCode.OK else "red"
101
+ )
102
+ st.markdown(
103
+ f"<span style='color: {status_color}'>● {span.status.status_code.name}</span>",
104
+ unsafe_allow_html=True,
105
+ )
106
 
107
  if config.evaluation_cases is not None:
108
+ assert (
109
+ len(config.evaluation_cases) == 1
110
+ ), "Only one evaluation case is supported in the demo"
111
+ st.markdown("### πŸ“Š Evaluation Results")
112
+
113
+ with st.spinner("Evaluating results..."):
114
+ case = config.evaluation_cases[0]
115
  result: TraceEvaluationResult = evaluate(
116
  evaluation_case=case,
117
  trace=agent_trace,
118
  agent_framework=config.framework,
119
  )
120
+
121
+ all_results = (
122
+ result.checkpoint_results
123
+ + result.hypothesis_answer_results
124
+ + result.direct_results
125
+ )
126
+
127
+ # Create columns for better layout
128
+ col1, col2 = st.columns(2)
129
+
130
+ with col1:
131
+ st.markdown("#### Criteria Results")
132
+ for checkpoint in all_results:
133
+ if checkpoint.passed:
134
+ st.success(f"βœ… {checkpoint.criteria}")
135
+ else:
136
+ st.error(f"❌ {checkpoint.criteria}")
137
+
138
+ with col2:
139
+ st.markdown("#### Overall Score")
140
+ total_points = sum([result.points for result in all_results])
141
+ if total_points == 0:
142
+ msg = "Total points is 0, cannot calculate score."
143
+ raise ValueError(msg)
144
+ passed_points = sum(
145
+ [result.points for result in all_results if result.passed]
146
+ )
147
+
148
+ # Create a nice score display
149
+ st.markdown(f"### {passed_points}/{total_points}")
150
+ percentage = (passed_points / total_points) * 100
151
+ st.progress(percentage / 100)
152
+ st.markdown(f"**{percentage:.1f}%**")