Spaces:

mozilla-ai
/

surf-spot-finder

Running

App Files Files Community

github-actions[bot] commited on May 12

Commit

62cf4ef

1 Parent(s): 6b470cd

Sync with https://github.com/mozilla-ai/surf-spot-finder

Browse files

Files changed (6) hide show

Dockerfile +1 -1
README.md +1 -1
app.py +40 -0
constants.py +3 -0
pickers.py +81 -16
utils.py +90 -32

Dockerfile CHANGED Viewed

@@ -18,4 +18,4 @@ EXPOSE 8501
 HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "demo/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]


18
19	HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
21	+ ENTRYPOINT ["streamlit", "run", "demo/app.py", "--server.port=8501", "--server.address=0.0.0.0"]

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ license: apache-2.0
 # Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
 forums](https://discuss.streamlit.io).

 # Welcome to Streamlit!
+Edit `/src/app.py` to customize this app to your heart's desire. :heart:
 If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
 forums](https://discuss.streamlit.io).

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import streamlit as st
+import asyncio
+import nest_asyncio
+from pickers import get_user_inputs
+from utils import run_agent
+nest_asyncio.apply()
+# Set page config
+st.set_page_config(page_title="Surf Spot Finder", page_icon="🏄", layout="wide")
+# Add title and header
+st.title("🏄 Surf Spot Finder")
+st.markdown(
+    "Find the best surfing spots based on your location and preferences! [Github Repo](https://github.com/mozilla-ai/surf-spot-finder)"
+)
+# Sidebar
+with st.sidebar:
+    st.markdown("### Configuration")
+    st.markdown("Built using [Any-Agent](https://github.com/mozilla-ai/any-agent)")
+    user_inputs = get_user_inputs()
+    is_valid = user_inputs is not None
+    run_button = st.button("Run", disabled=not is_valid, type="primary")
+# Main content
+async def main():
+    if run_button:
+        await run_agent(user_inputs)
+    else:
+        st.info(
+            "👈 Configure your search parameters in the sidebar and click Run to start!"
+        )
+if __name__ == "__main__":
+    loop = asyncio.new_event_loop()
+    loop.run_until_complete(main())

constants.py CHANGED Viewed

@@ -1,6 +1,9 @@
 MODEL_OPTIONS = [
     # "huggingface/novita/deepseek-ai/DeepSeek-V3",
     # "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
     "gemini/gemini-2.0-flash-lite",
     "gemini-2.0-flash",
     # "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling

 MODEL_OPTIONS = [
     # "huggingface/novita/deepseek-ai/DeepSeek-V3",
     # "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct",
+    "openai/gpt-4.1-nano",
+    "openai/gpt-4.1-mini",
+    "openai/gpt-4o",
     "gemini/gemini-2.0-flash-lite",
     "gemini-2.0-flash",
     # "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling

pickers.py CHANGED Viewed

@@ -4,10 +4,60 @@ import requests
 import streamlit as st
 from any_agent import AgentFramework
 from any_agent.tracing.trace import _is_tracing_supported
 from constants import MODEL_OPTIONS
 @st.cache_resource
 def get_area(area_name: str) -> dict:
     """Get the area from Nominatim.
@@ -31,27 +81,34 @@ def get_area(area_name: str) -> dict:
 def get_user_inputs() -> dict:
-    st.title("Surf Spot Finder")
-    st.write(
-        "This app finds the best surf spots in your area based on the weather forecast."
-    )
     default_val = "Los Angeles California, US"
-    location = st.text_input("Enter a location", value=default_val)
-    if location:
-        location_check = get_area(location)
-        if not location_check:
-            st.error("Invalid location. Please enter a valid location.")
-            return None
-        else:
-            # display a checkmark saying that the location is valid
-            st.success("Valid location")
     max_driving_hours = st.number_input(
         "Enter the maximum driving hours", min_value=1, value=2
     )
-    date = st.date_input(
-        "Select a date in the future", value=datetime.now() + timedelta(days=1)
-    )
     supported_frameworks = [
         framework for framework in AgentFramework if _is_tracing_supported(framework)
@@ -71,10 +128,18 @@ def get_user_inputs() -> dict:
         format_func=lambda x: "/".join(x.split("/")[-3:]),
     )
     return {
         "location": location,
         "max_driving_hours": max_driving_hours,
         "date": date,
         "framework": framework,
         "model_id": model_id,
     }

 import streamlit as st
 from any_agent import AgentFramework
 from any_agent.tracing.trace import _is_tracing_supported
+from any_agent.evaluation import EvaluationCase
 from constants import MODEL_OPTIONS
+def create_evaluation_case() -> EvaluationCase:
+    """Create an EvaluationCase from the user configuration.
+    Args:
+        case_config (dict): The evaluation case configuration from the user
+    Returns:
+        EvaluationCase: The created evaluation case
+    """
+    return EvaluationCase(
+        llm_judge="openai/gpt-4.1-mini",
+        checkpoints=[
+            {
+                "criteria": "Check if the agent used the get_surfing_spots tool and it succeeded, and that the tool was used before the get_wave_forecast and get_wind_forecast tools",
+                "points": 1,
+            },
+            {
+                "criteria": "Check if the agent used the get_wave_forecast tool and it succeeded",
+                "points": 1,
+            },
+            {
+                "criteria": "Check if the agent used the get_wind_forecast tool and it succeeded",
+                "points": 1,
+            },
+            {
+                "criteria": "Check if the agent used the get_area_lat_lon tool and it succeeded",
+                "points": 1,
+            },
+            {
+                "criteria": "Check if the agent used the driving_hours_to_meters tool to convert the driving hours to meters and it succeeded",
+                "points": 1,
+            },
+            {
+                "criteria": "Check if the final answer contains any description about the weather at the chosen location",
+                "points": 1,
+            },
+            {
+                "criteria": "Check if the final answer contains one of the surf spots found by a call of the get_surfing_spots tool",
+                "points": 1,
+            },
+            {
+                "criteria": "Check that the agent completed in fewer than 10 steps",
+                "points": 1,
+            },
+        ],
+    )
 @st.cache_resource
 def get_area(area_name: str) -> dict:
     """Get the area from Nominatim.
 def get_user_inputs() -> dict:
     default_val = "Los Angeles California, US"
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        location = st.text_input("Enter a location", value=default_val)
+    with col2:
+        if location:
+            location_check = get_area(location)
+            if not location_check:
+                st.error("❌")
+            else:
+                st.success("✅")
     max_driving_hours = st.number_input(
         "Enter the maximum driving hours", min_value=1, value=2
     )
+    col_date, col_time = st.columns([2, 1])
+    with col_date:
+        date = st.date_input(
+            "Select a date in the future", value=datetime.now() + timedelta(days=1)
+        )
+    with col_time:
+        # default to 9am
+        time = st.time_input(
+            "Select a time", value=datetime.now().time().replace(hour=9, minute=0)
+        )
+    date = datetime.combine(date, time)
     supported_frameworks = [
         framework for framework in AgentFramework if _is_tracing_supported(framework)
         format_func=lambda x: "/".join(x.split("/")[-3:]),
     )
+    # Add evaluation case section
+    with st.expander("Evaluation Case"):
+        evaluation_case = create_evaluation_case()
+        st.write(evaluation_case.model_dump(), expanded=True)
     return {
         "location": location,
         "max_driving_hours": max_driving_hours,
         "date": date,
         "framework": framework,
         "model_id": model_id,
+        "evaluation_case": evaluation_case
+        if st.checkbox("Run Evaluation", value=True)
+        else None,
     }

utils.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import streamlit as st
 from surf_spot_finder.tools import (
     driving_hours_to_meters,
@@ -8,17 +10,22 @@ from surf_spot_finder.tools import (
 )
 from surf_spot_finder.config import Config
 from any_agent import AgentConfig, AnyAgent, TracingConfig
 from any_agent.evaluation import evaluate, TraceEvaluationResult
-async def run_agent(user_inputs):
-    st.write("Running surf spot finder...")
     if "huggingface" in user_inputs["model_id"]:
         model_args = {
             "extra_headers": {"X-HF-Bill-To": "mozilla-ai"},
         }
     else:
         model_args = {}
     agent_config = AgentConfig(
         model_id=user_inputs["model_id"],
         model_args=model_args,
@@ -30,6 +37,7 @@ async def run_agent(user_inputs):
             driving_hours_to_meters,
         ],
     )
     config = Config(
         location=user_inputs["location"],
         max_driving_hours=user_inputs["max_driving_hours"],
@@ -37,7 +45,9 @@ async def run_agent(user_inputs):
         framework=user_inputs["framework"],
         main_agent=agent_config,
         managed_agents=[],
-        evaluation_cases=None,
     )
     agent = await AnyAgent.create_async(
@@ -52,43 +62,91 @@ async def run_agent(user_inputs):
         MAX_DRIVING_HOURS=config.max_driving_hours,
         DATE=config.date,
     )
-    st.write("Running agent with query:\n", query)
-    with st.spinner("Running..."):
-        agent_trace = await agent.run_async(query)
         agent.exit()
-    st.write("Final output from agent:\n", agent_trace.final_output)
-    # Display the agent trace
-    with st.expander("Agent Trace", expanded=True):
-        st.write(agent_trace.spans)
     if config.evaluation_cases is not None:
-        results = []
-        st.write("Found evaluation cases, running trace evaluation")
-        for i, case in enumerate(config.evaluation_cases):
-            st.write("Evaluating case: ", case)
             result: TraceEvaluationResult = evaluate(
                 evaluation_case=case,
                 trace=agent_trace,
                 agent_framework=config.framework,
             )
-            for list_of_checkpoints in [
-                result.checkpoint_results,
-                result.direct_results,
-                result.hypothesis_answer_results,
-            ]:
-                for checkpoint in list_of_checkpoints:
-                    msg = (
-                        f"Checkpoint: {checkpoint.criteria}\n"
-                        f"\tPassed: {checkpoint.passed}\n"
-                        f"\tReason: {checkpoint.reason}\n"
-                        f"\tScore: {'%d/%d' % (checkpoint.points, checkpoint.points) if checkpoint.passed else '0/%d' % checkpoint.points}"
-                    )
-                    st.write(msg)
-            st.write("==========================")
-            st.write("Overall Score: %d%%", 100 * result.score)
-            st.write("==========================")
-            results.append(result)
-    st.write("Surf spot finder finished running.")

+import json
+from typing import Any
 import streamlit as st
 from surf_spot_finder.tools import (
     driving_hours_to_meters,
 )
 from surf_spot_finder.config import Config
 from any_agent import AgentConfig, AnyAgent, TracingConfig
+from any_agent.tracing.trace import AgentTrace
+from any_agent.tracing.otel_types import StatusCode
 from any_agent.evaluation import evaluate, TraceEvaluationResult
+async def run_agent(user_inputs: dict[str, Any]):
+    st.markdown("### 🔍 Running Surf Spot Finder...")
     if "huggingface" in user_inputs["model_id"]:
         model_args = {
             "extra_headers": {"X-HF-Bill-To": "mozilla-ai"},
+            "temperature": 0.0,
         }
     else:
         model_args = {}
     agent_config = AgentConfig(
         model_id=user_inputs["model_id"],
         model_args=model_args,
             driving_hours_to_meters,
         ],
     )
     config = Config(
         location=user_inputs["location"],
         max_driving_hours=user_inputs["max_driving_hours"],
         framework=user_inputs["framework"],
         main_agent=agent_config,
         managed_agents=[],
+        evaluation_cases=[user_inputs.get("evaluation_case")]
+        if user_inputs.get("evaluation_case")
+        else None,
     )
     agent = await AnyAgent.create_async(
         MAX_DRIVING_HOURS=config.max_driving_hours,
         DATE=config.date,
     )
+    st.markdown("#### 📝 Query")
+    st.code(query, language="text")
+    with st.spinner("🤔 Analyzing surf spots..."):
+        agent_trace: AgentTrace = await agent.run_async(query)
         agent.exit()
+    st.markdown("### 🏄 Results")
+    st.markdown("#### Final Output")
+    st.info(agent_trace.final_output)
+    # Display the agent trace in a more organized way
+    with st.expander("### 🧩 Agent Trace"):
+        for span in agent_trace.spans:
+            # Header with name and status
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                st.markdown(f"**{span.name}**")
+                if span.attributes:
+                    # st.json(span.attributes, expanded=False)
+                    if "input.value" in span.attributes:
+                        input_value = json.loads(span.attributes["input.value"])
+                        if isinstance(input_value, list):
+                            st.write(f"Input: {input_value[-1]}")
+                        else:
+                            st.write(f"Input: {input_value}")
+                    if "output.value" in span.attributes:
+                        output_value = json.loads(span.attributes["output.value"])
+                        if isinstance(output_value, list):
+                            st.write(f"Output: {output_value[-1]}")
+                        else:
+                            st.write(f"Output: {output_value}")
+            with col2:
+                status_color = (
+                    "green" if span.status.status_code == StatusCode.OK else "red"
+                )
+                st.markdown(
+                    f"<span style='color: {status_color}'>● {span.status.status_code.name}</span>",
+                    unsafe_allow_html=True,
+                )
     if config.evaluation_cases is not None:
+        assert (
+            len(config.evaluation_cases) == 1
+        ), "Only one evaluation case is supported in the demo"
+        st.markdown("### 📊 Evaluation Results")
+        with st.spinner("Evaluating results..."):
+            case = config.evaluation_cases[0]
             result: TraceEvaluationResult = evaluate(
                 evaluation_case=case,
                 trace=agent_trace,
                 agent_framework=config.framework,
             )
+            all_results = (
+                result.checkpoint_results
+                + result.hypothesis_answer_results
+                + result.direct_results
+            )
+            # Create columns for better layout
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("#### Criteria Results")
+                for checkpoint in all_results:
+                    if checkpoint.passed:
+                        st.success(f"✅ {checkpoint.criteria}")
+                    else:
+                        st.error(f"❌ {checkpoint.criteria}")
+            with col2:
+                st.markdown("#### Overall Score")
+                total_points = sum([result.points for result in all_results])
+                if total_points == 0:
+                    msg = "Total points is 0, cannot calculate score."
+                    raise ValueError(msg)
+                passed_points = sum(
+                    [result.points for result in all_results if result.passed]
+                )
+                # Create a nice score display
+                st.markdown(f"### {passed_points}/{total_points}")
+                percentage = (passed_points / total_points) * 100
+                st.progress(percentage / 100)
+                st.markdown(f"**{percentage:.1f}%**")