Spaces:
Running
Running
github-actions[bot]
commited on
Commit
Β·
62cf4ef
1
Parent(s):
6b470cd
Sync with https://github.com/mozilla-ai/surf-spot-finder
Browse files- Dockerfile +1 -1
- README.md +1 -1
- app.py +40 -0
- constants.py +3 -0
- pickers.py +81 -16
- utils.py +90 -32
Dockerfile
CHANGED
@@ -18,4 +18,4 @@ EXPOSE 8501
|
|
18 |
|
19 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
20 |
|
21 |
-
ENTRYPOINT ["streamlit", "run", "demo/
|
|
|
18 |
|
19 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
20 |
|
21 |
+
ENTRYPOINT ["streamlit", "run", "demo/app.py", "--server.port=8501", "--server.address=0.0.0.0"]
|
README.md
CHANGED
@@ -14,7 +14,7 @@ license: apache-2.0
|
|
14 |
|
15 |
# Welcome to Streamlit!
|
16 |
|
17 |
-
Edit `/src/
|
18 |
|
19 |
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
20 |
forums](https://discuss.streamlit.io).
|
|
|
14 |
|
15 |
# Welcome to Streamlit!
|
16 |
|
17 |
+
Edit `/src/app.py` to customize this app to your heart's desire. :heart:
|
18 |
|
19 |
If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
|
20 |
forums](https://discuss.streamlit.io).
|
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import asyncio
|
3 |
+
import nest_asyncio
|
4 |
+
from pickers import get_user_inputs
|
5 |
+
from utils import run_agent
|
6 |
+
|
7 |
+
|
8 |
+
nest_asyncio.apply()
|
9 |
+
|
10 |
+
# Set page config
|
11 |
+
st.set_page_config(page_title="Surf Spot Finder", page_icon="π", layout="wide")
|
12 |
+
|
13 |
+
# Add title and header
|
14 |
+
st.title("π Surf Spot Finder")
|
15 |
+
st.markdown(
|
16 |
+
"Find the best surfing spots based on your location and preferences! [Github Repo](https://github.com/mozilla-ai/surf-spot-finder)"
|
17 |
+
)
|
18 |
+
|
19 |
+
# Sidebar
|
20 |
+
with st.sidebar:
|
21 |
+
st.markdown("### Configuration")
|
22 |
+
st.markdown("Built using [Any-Agent](https://github.com/mozilla-ai/any-agent)")
|
23 |
+
user_inputs = get_user_inputs()
|
24 |
+
is_valid = user_inputs is not None
|
25 |
+
run_button = st.button("Run", disabled=not is_valid, type="primary")
|
26 |
+
|
27 |
+
|
28 |
+
# Main content
|
29 |
+
async def main():
|
30 |
+
if run_button:
|
31 |
+
await run_agent(user_inputs)
|
32 |
+
else:
|
33 |
+
st.info(
|
34 |
+
"π Configure your search parameters in the sidebar and click Run to start!"
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
if __name__ == "__main__":
|
39 |
+
loop = asyncio.new_event_loop()
|
40 |
+
loop.run_until_complete(main())
|
constants.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1 |
MODEL_OPTIONS = [
|
2 |
# "huggingface/novita/deepseek-ai/DeepSeek-V3",
|
3 |
# "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
|
|
|
|
|
|
|
4 |
"gemini/gemini-2.0-flash-lite",
|
5 |
"gemini-2.0-flash",
|
6 |
# "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling
|
|
|
1 |
MODEL_OPTIONS = [
|
2 |
# "huggingface/novita/deepseek-ai/DeepSeek-V3",
|
3 |
# "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
|
4 |
+
"openai/gpt-4.1-nano",
|
5 |
+
"openai/gpt-4.1-mini",
|
6 |
+
"openai/gpt-4o",
|
7 |
"gemini/gemini-2.0-flash-lite",
|
8 |
"gemini-2.0-flash",
|
9 |
# "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling
|
pickers.py
CHANGED
@@ -4,10 +4,60 @@ import requests
|
|
4 |
import streamlit as st
|
5 |
from any_agent import AgentFramework
|
6 |
from any_agent.tracing.trace import _is_tracing_supported
|
|
|
7 |
|
8 |
from constants import MODEL_OPTIONS
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
@st.cache_resource
|
12 |
def get_area(area_name: str) -> dict:
|
13 |
"""Get the area from Nominatim.
|
@@ -31,27 +81,34 @@ def get_area(area_name: str) -> dict:
|
|
31 |
|
32 |
|
33 |
def get_user_inputs() -> dict:
|
34 |
-
st.title("Surf Spot Finder")
|
35 |
-
st.write(
|
36 |
-
"This app finds the best surf spots in your area based on the weather forecast."
|
37 |
-
)
|
38 |
default_val = "Los Angeles California, US"
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
49 |
max_driving_hours = st.number_input(
|
50 |
"Enter the maximum driving hours", min_value=1, value=2
|
51 |
)
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
supported_frameworks = [
|
57 |
framework for framework in AgentFramework if _is_tracing_supported(framework)
|
@@ -71,10 +128,18 @@ def get_user_inputs() -> dict:
|
|
71 |
format_func=lambda x: "/".join(x.split("/")[-3:]),
|
72 |
)
|
73 |
|
|
|
|
|
|
|
|
|
|
|
74 |
return {
|
75 |
"location": location,
|
76 |
"max_driving_hours": max_driving_hours,
|
77 |
"date": date,
|
78 |
"framework": framework,
|
79 |
"model_id": model_id,
|
|
|
|
|
|
|
80 |
}
|
|
|
4 |
import streamlit as st
|
5 |
from any_agent import AgentFramework
|
6 |
from any_agent.tracing.trace import _is_tracing_supported
|
7 |
+
from any_agent.evaluation import EvaluationCase
|
8 |
|
9 |
from constants import MODEL_OPTIONS
|
10 |
|
11 |
|
12 |
+
def create_evaluation_case() -> EvaluationCase:
|
13 |
+
"""Create an EvaluationCase from the user configuration.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
case_config (dict): The evaluation case configuration from the user
|
17 |
+
|
18 |
+
Returns:
|
19 |
+
EvaluationCase: The created evaluation case
|
20 |
+
"""
|
21 |
+
|
22 |
+
return EvaluationCase(
|
23 |
+
llm_judge="openai/gpt-4.1-mini",
|
24 |
+
checkpoints=[
|
25 |
+
{
|
26 |
+
"criteria": "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools",
|
27 |
+
"points": 1,
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"criteria": "Check if the agent used the get_wave_forecast tool and it succeeded",
|
31 |
+
"points": 1,
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"criteria": "Check if the agent used the get_wind_forecast tool and it succeeded",
|
35 |
+
"points": 1,
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"criteria": "Check if the agent used the get_area_lat_lon tool and it succeeded",
|
39 |
+
"points": 1,
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"criteria": "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded",
|
43 |
+
"points": 1,
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"criteria": "Check if the final answer contains any description about the weather at the chosen location",
|
47 |
+
"points": 1,
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"criteria": "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool",
|
51 |
+
"points": 1,
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"criteria": "Check that the agent completed in fewer than 10 steps",
|
55 |
+
"points": 1,
|
56 |
+
},
|
57 |
+
],
|
58 |
+
)
|
59 |
+
|
60 |
+
|
61 |
@st.cache_resource
|
62 |
def get_area(area_name: str) -> dict:
|
63 |
"""Get the area from Nominatim.
|
|
|
81 |
|
82 |
|
83 |
def get_user_inputs() -> dict:
|
|
|
|
|
|
|
|
|
84 |
default_val = "Los Angeles California, US"
|
85 |
|
86 |
+
col1, col2 = st.columns([3, 1])
|
87 |
+
with col1:
|
88 |
+
location = st.text_input("Enter a location", value=default_val)
|
89 |
+
with col2:
|
90 |
+
if location:
|
91 |
+
location_check = get_area(location)
|
92 |
+
if not location_check:
|
93 |
+
st.error("β")
|
94 |
+
else:
|
95 |
+
st.success("β
")
|
96 |
+
|
97 |
max_driving_hours = st.number_input(
|
98 |
"Enter the maximum driving hours", min_value=1, value=2
|
99 |
)
|
100 |
+
|
101 |
+
col_date, col_time = st.columns([2, 1])
|
102 |
+
with col_date:
|
103 |
+
date = st.date_input(
|
104 |
+
"Select a date in the future", value=datetime.now() + timedelta(days=1)
|
105 |
+
)
|
106 |
+
with col_time:
|
107 |
+
# default to 9am
|
108 |
+
time = st.time_input(
|
109 |
+
"Select a time", value=datetime.now().time().replace(hour=9, minute=0)
|
110 |
+
)
|
111 |
+
date = datetime.combine(date, time)
|
112 |
|
113 |
supported_frameworks = [
|
114 |
framework for framework in AgentFramework if _is_tracing_supported(framework)
|
|
|
128 |
format_func=lambda x: "/".join(x.split("/")[-3:]),
|
129 |
)
|
130 |
|
131 |
+
# Add evaluation case section
|
132 |
+
with st.expander("Evaluation Case"):
|
133 |
+
evaluation_case = create_evaluation_case()
|
134 |
+
st.write(evaluation_case.model_dump(), expanded=True)
|
135 |
+
|
136 |
return {
|
137 |
"location": location,
|
138 |
"max_driving_hours": max_driving_hours,
|
139 |
"date": date,
|
140 |
"framework": framework,
|
141 |
"model_id": model_id,
|
142 |
+
"evaluation_case": evaluation_case
|
143 |
+
if st.checkbox("Run Evaluation", value=True)
|
144 |
+
else None,
|
145 |
}
|
utils.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
from surf_spot_finder.tools import (
|
3 |
driving_hours_to_meters,
|
@@ -8,17 +10,22 @@ from surf_spot_finder.tools import (
|
|
8 |
)
|
9 |
from surf_spot_finder.config import Config
|
10 |
from any_agent import AgentConfig, AnyAgent, TracingConfig
|
|
|
|
|
11 |
from any_agent.evaluation import evaluate, TraceEvaluationResult
|
12 |
|
13 |
|
14 |
-
async def run_agent(user_inputs):
|
15 |
-
st.
|
|
|
16 |
if "huggingface" in user_inputs["model_id"]:
|
17 |
model_args = {
|
18 |
"extra_headers": {"X-HF-Bill-To": "mozilla-ai"},
|
|
|
19 |
}
|
20 |
else:
|
21 |
model_args = {}
|
|
|
22 |
agent_config = AgentConfig(
|
23 |
model_id=user_inputs["model_id"],
|
24 |
model_args=model_args,
|
@@ -30,6 +37,7 @@ async def run_agent(user_inputs):
|
|
30 |
driving_hours_to_meters,
|
31 |
],
|
32 |
)
|
|
|
33 |
config = Config(
|
34 |
location=user_inputs["location"],
|
35 |
max_driving_hours=user_inputs["max_driving_hours"],
|
@@ -37,7 +45,9 @@ async def run_agent(user_inputs):
|
|
37 |
framework=user_inputs["framework"],
|
38 |
main_agent=agent_config,
|
39 |
managed_agents=[],
|
40 |
-
evaluation_cases=
|
|
|
|
|
41 |
)
|
42 |
|
43 |
agent = await AnyAgent.create_async(
|
@@ -52,43 +62,91 @@ async def run_agent(user_inputs):
|
|
52 |
MAX_DRIVING_HOURS=config.max_driving_hours,
|
53 |
DATE=config.date,
|
54 |
)
|
55 |
-
st.write("Running agent with query:\n", query)
|
56 |
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
59 |
agent.exit()
|
60 |
|
61 |
-
st.
|
|
|
|
|
62 |
|
63 |
-
# Display the agent trace
|
64 |
-
with st.expander("Agent Trace"
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
if config.evaluation_cases is not None:
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
72 |
result: TraceEvaluationResult = evaluate(
|
73 |
evaluation_case=case,
|
74 |
trace=agent_trace,
|
75 |
agent_framework=config.framework,
|
76 |
)
|
77 |
-
|
78 |
-
|
79 |
-
result.
|
80 |
-
result.hypothesis_answer_results
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from typing import Any
|
3 |
import streamlit as st
|
4 |
from surf_spot_finder.tools import (
|
5 |
driving_hours_to_meters,
|
|
|
10 |
)
|
11 |
from surf_spot_finder.config import Config
|
12 |
from any_agent import AgentConfig, AnyAgent, TracingConfig
|
13 |
+
from any_agent.tracing.trace import AgentTrace
|
14 |
+
from any_agent.tracing.otel_types import StatusCode
|
15 |
from any_agent.evaluation import evaluate, TraceEvaluationResult
|
16 |
|
17 |
|
18 |
+
async def run_agent(user_inputs: dict[str, Any]):
|
19 |
+
st.markdown("### π Running Surf Spot Finder...")
|
20 |
+
|
21 |
if "huggingface" in user_inputs["model_id"]:
|
22 |
model_args = {
|
23 |
"extra_headers": {"X-HF-Bill-To": "mozilla-ai"},
|
24 |
+
"temperature": 0.0,
|
25 |
}
|
26 |
else:
|
27 |
model_args = {}
|
28 |
+
|
29 |
agent_config = AgentConfig(
|
30 |
model_id=user_inputs["model_id"],
|
31 |
model_args=model_args,
|
|
|
37 |
driving_hours_to_meters,
|
38 |
],
|
39 |
)
|
40 |
+
|
41 |
config = Config(
|
42 |
location=user_inputs["location"],
|
43 |
max_driving_hours=user_inputs["max_driving_hours"],
|
|
|
45 |
framework=user_inputs["framework"],
|
46 |
main_agent=agent_config,
|
47 |
managed_agents=[],
|
48 |
+
evaluation_cases=[user_inputs.get("evaluation_case")]
|
49 |
+
if user_inputs.get("evaluation_case")
|
50 |
+
else None,
|
51 |
)
|
52 |
|
53 |
agent = await AnyAgent.create_async(
|
|
|
62 |
MAX_DRIVING_HOURS=config.max_driving_hours,
|
63 |
DATE=config.date,
|
64 |
)
|
|
|
65 |
|
66 |
+
st.markdown("#### π Query")
|
67 |
+
st.code(query, language="text")
|
68 |
+
|
69 |
+
with st.spinner("π€ Analyzing surf spots..."):
|
70 |
+
agent_trace: AgentTrace = await agent.run_async(query)
|
71 |
agent.exit()
|
72 |
|
73 |
+
st.markdown("### π Results")
|
74 |
+
st.markdown("#### Final Output")
|
75 |
+
st.info(agent_trace.final_output)
|
76 |
|
77 |
+
# Display the agent trace in a more organized way
|
78 |
+
with st.expander("### π§© Agent Trace"):
|
79 |
+
for span in agent_trace.spans:
|
80 |
+
# Header with name and status
|
81 |
+
col1, col2 = st.columns([4, 1])
|
82 |
+
with col1:
|
83 |
+
st.markdown(f"**{span.name}**")
|
84 |
+
if span.attributes:
|
85 |
+
# st.json(span.attributes, expanded=False)
|
86 |
+
if "input.value" in span.attributes:
|
87 |
+
input_value = json.loads(span.attributes["input.value"])
|
88 |
+
if isinstance(input_value, list):
|
89 |
+
st.write(f"Input: {input_value[-1]}")
|
90 |
+
else:
|
91 |
+
st.write(f"Input: {input_value}")
|
92 |
+
if "output.value" in span.attributes:
|
93 |
+
output_value = json.loads(span.attributes["output.value"])
|
94 |
+
if isinstance(output_value, list):
|
95 |
+
st.write(f"Output: {output_value[-1]}")
|
96 |
+
else:
|
97 |
+
st.write(f"Output: {output_value}")
|
98 |
+
with col2:
|
99 |
+
status_color = (
|
100 |
+
"green" if span.status.status_code == StatusCode.OK else "red"
|
101 |
+
)
|
102 |
+
st.markdown(
|
103 |
+
f"<span style='color: {status_color}'>β {span.status.status_code.name}</span>",
|
104 |
+
unsafe_allow_html=True,
|
105 |
+
)
|
106 |
|
107 |
if config.evaluation_cases is not None:
|
108 |
+
assert (
|
109 |
+
len(config.evaluation_cases) == 1
|
110 |
+
), "Only one evaluation case is supported in the demo"
|
111 |
+
st.markdown("### π Evaluation Results")
|
112 |
+
|
113 |
+
with st.spinner("Evaluating results..."):
|
114 |
+
case = config.evaluation_cases[0]
|
115 |
result: TraceEvaluationResult = evaluate(
|
116 |
evaluation_case=case,
|
117 |
trace=agent_trace,
|
118 |
agent_framework=config.framework,
|
119 |
)
|
120 |
+
|
121 |
+
all_results = (
|
122 |
+
result.checkpoint_results
|
123 |
+
+ result.hypothesis_answer_results
|
124 |
+
+ result.direct_results
|
125 |
+
)
|
126 |
+
|
127 |
+
# Create columns for better layout
|
128 |
+
col1, col2 = st.columns(2)
|
129 |
+
|
130 |
+
with col1:
|
131 |
+
st.markdown("#### Criteria Results")
|
132 |
+
for checkpoint in all_results:
|
133 |
+
if checkpoint.passed:
|
134 |
+
st.success(f"β
{checkpoint.criteria}")
|
135 |
+
else:
|
136 |
+
st.error(f"β {checkpoint.criteria}")
|
137 |
+
|
138 |
+
with col2:
|
139 |
+
st.markdown("#### Overall Score")
|
140 |
+
total_points = sum([result.points for result in all_results])
|
141 |
+
if total_points == 0:
|
142 |
+
msg = "Total points is 0, cannot calculate score."
|
143 |
+
raise ValueError(msg)
|
144 |
+
passed_points = sum(
|
145 |
+
[result.points for result in all_results if result.passed]
|
146 |
+
)
|
147 |
+
|
148 |
+
# Create a nice score display
|
149 |
+
st.markdown(f"### {passed_points}/{total_points}")
|
150 |
+
percentage = (passed_points / total_points) * 100
|
151 |
+
st.progress(percentage / 100)
|
152 |
+
st.markdown(f"**{percentage:.1f}%**")
|