Spaces:
Running
Running
| import os | |
| from any_agent.evaluation import EvaluationCase | |
| from surf_spot_finder.tools import ( | |
| get_area_lat_lon, | |
| get_wave_forecast, | |
| get_wind_forecast, | |
| ) | |
| from any_agent.logging import logger | |
| from any_agent.tools.web_browsing import search_web, visit_webpage, search_tavily | |
| MODEL_OPTIONS = [ | |
| # "huggingface/novita/deepseek-ai/DeepSeek-V3", | |
| # "huggingface/novita/meta-llama/Llama-3.3-70B-Instruct", | |
| "openai/gpt-4.1-nano", | |
| "openai/gpt-4.1-mini", | |
| "openai/gpt-4o", | |
| "gemini/gemini-2.0-flash-lite", | |
| "gemini/gemini-2.0-flash", | |
| # "huggingface/Qwen/Qwen3-32B", # right now throwing an internal error, but novita qwen isn't supporting tool calling | |
| ] | |
| # Novita was the only HF based provider that worked. | |
| # Hugginface API Provider Error: | |
| # Must alternate between assistant/user, which meant that the 'tool' role made it puke | |
| DEFAULT_EVALUATION_CASE = EvaluationCase( | |
| llm_judge=MODEL_OPTIONS[0], | |
| checkpoints=[ | |
| { | |
| "criteria": "Check if the agent considered at least three surf spot options", | |
| "points": 1, | |
| }, | |
| { | |
| "criteria": "Check if the agent gathered wind forecasts for each surf spot being evaluated.", | |
| "points": 1, | |
| }, | |
| { | |
| "criteria": "Check if the agent gathered wave forecasts for each surf spot being evaluated.", | |
| "points": 1, | |
| }, | |
| { | |
| "criteria": "Check if the agent used any web search tools to explore which surf spots should be considered", | |
| "points": 1, | |
| }, | |
| { | |
| "criteria": "Check if the final answer contains any description about the weather (air temp, chance of rain, etc) at the chosen location", | |
| "points": 1, | |
| }, | |
| { | |
| "criteria": "Check if the final answer includes one of the surf spots evaluated by tools", | |
| "points": 1, | |
| }, | |
| { | |
| "criteria": "Check if the final answer includes information about some alternative surf spots if the user is not satisfied with the chosen one", | |
| "points": 1, | |
| }, | |
| ], | |
| ) | |
| DEFAULT_TOOLS = [ | |
| get_wind_forecast, | |
| get_wave_forecast, | |
| get_area_lat_lon, | |
| search_web, | |
| visit_webpage, | |
| ] | |
| if os.getenv("TAVILY_API_KEY"): | |
| DEFAULT_TOOLS.append(search_tavily) | |
| else: | |
| logger.warning("TAVILY_API_KEY not set, skipping Tavily search tool") | |