Final_Assignment_codeagent

Sleeping

App Files Files Community

innovation64 commited on Apr 24

Commit

1e08ceb

verified ·

1 Parent(s): 81917a3

update code

Browse files

Files changed (2) hide show

app.py +411 -26
requirements.txt +6 -0

app.py CHANGED Viewed

@@ -3,25 +3,406 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
@@ -38,13 +419,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -69,7 +453,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
-    # 3. Run your Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
@@ -79,10 +463,13 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -91,7 +478,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
@@ -139,22 +526,21 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
@@ -163,7 +549,6 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
@@ -192,5 +577,5 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
     demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+import json
+import re
+import time
+from typing import List, Dict, Any, Optional, Union, Tuple
+# --- Import necessary libraries ---
+from smolagents import CodeAgent
+from smolagents.models import LiteLLMModel
+from llama_index.core.tools import FunctionTool
+from langgraph.graph import StateGraph, END
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+class GAIAToolkit:
+    """Collection of tools for the GAIA benchmark"""
+    @staticmethod
+    def calculator(expression: str) -> str:
+        """Calculate mathematical expressions
+        Args:
+            expression: Mathematical expression to evaluate
+        Returns:
+            Calculation result
+        """
+        try:
+            # Secure evaluation of expression
+            allowed_chars = set("0123456789+-*/().% ")
+            if any(c not in allowed_chars for c in expression):
+                return "Error: Expression contains invalid characters."
+            result = eval(expression)
+            return str(result)
+        except Exception as e:
+            return f"Error: {str(e)}"
+    @staticmethod
+    def search_web(query: str) -> str:
+        """Search for information related to the query
+        Args:
+            query: Search query
+        Returns:
+            Search results as a string
+        """
+        # Mock search function (in a real implementation, this would use a search API)
+        common_topics = {
+            "population": "The most recent census data shows a population of 3,142,000 for the region.",
+            "weather": "The current weather is sunny with a temperature of 22°C.",
+            "capital": "The capital city is Springfield, established in 1822.",
+            "economic": "The GDP growth rate is 3.2% year-over-year.",
+            "science": "Recent advancements have led to a 40% improvement in efficiency.",
+            "technology": "The latest version was released in March with 15 new features."
+        }
+        # Find the most relevant topic
+        best_match = None
+        best_score = 0
+        for topic, info in common_topics.items():
+            if topic.lower() in query.lower():
+                if len(topic) > best_score:
+                    best_score = len(topic)
+                    best_match = info
+        if best_match:
+            return best_match
+        # If no match found, return a generic response
+        return f"Found information about '{query}': The data shows a significant trend with key values of 42, 73, and 128."
+    @staticmethod
+    def file_reader(file_id: str) -> str:
+        """Read file content from the API
+        Args:
+            file_id: File ID
+        Returns:
+            File content
+        """
+        # In a real implementation, this would fetch files from the GAIA API
+        # Here we simulate some common file contents
+        file_contents = {
+            "data1.csv": "id,name,value\n1,Alpha,42\n2,Beta,73\n3,Gamma,91\n4,Delta,27\n5,Epsilon,68",
+            "text1.txt": "This is a sample text file.\nIt contains multiple lines.\nThe answer to the question is 42.\nThere are 5 total items in the inventory.",
+            "data2.json": '{"data": [{"id": 1, "name": "Item1", "value": 42}, {"id": 2, "name": "Item2", "value": 73}]}'
+        }
+        # Try to match file based on ID
+        for filename, content in file_contents.items():
+            if file_id.lower() in filename.lower():
+                return content
+        # Default to a simple dataset
+        return "id,name,value\n1,A,42\n2,B,73\n3,C,91"
+    @staticmethod
+    def analyze_text(text: str) -> Dict[str, Any]:
+        """Analyze text to extract key information
+        Args:
+            text: Text to analyze
+        Returns:
+            Dictionary with analysis results
+        """
+        word_count = len(text.split())
+        sentences = text.split('.')
+        sentence_count = len([s for s in sentences if s.strip()])
+        # Extract numbers from text
+        numbers = re.findall(r'\d+', text)
+        numbers = [int(n) for n in numbers]
+        # Basic statistics
+        stats = {
+            "word_count": word_count,
+            "sentence_count": sentence_count,
+            "numbers": numbers
+        }
+        # If there are numbers, add some statistics
+        if numbers:
+            stats["sum"] = sum(numbers)
+            stats["average"] = sum(numbers) / len(numbers)
+            stats["min"] = min(numbers)
+            stats["max"] = max(numbers)
+        # Check for CSV format
+        if ',' in text and '\n' in text:
+            lines = text.strip().split('\n')
+            if all(line.count(',') == lines[0].count(',') for line in lines[1:]):
+                # Likely a CSV file
+                headers = lines[0].split(',')
+                data = []
+                for line in lines[1:]:
+                    if line.strip():
+                        values = line.split(',')
+                        row = {headers[i]: values[i] for i in range(min(len(headers), len(values)))}
+                        data.append(row)
+                stats["csv_data"] = data
+                stats["csv_headers"] = headers
+        # Check for JSON format
+        if text.strip().startswith('{') and text.strip().endswith('}'):
+            try:
+                json_data = json.loads(text)
+                stats["json_data"] = json_data
+            except:
+                pass
+        return stats
+    @staticmethod
+    def extract_answer(reasoning: str) -> str:
+        """Extract the final answer from reasoning text
+        Args:
+            reasoning: Text containing reasoning process
+        Returns:
+            Extracted answer
+        """
+        # Look for common answer identification patterns
+        patterns = [
+            r'(?:final answer|answer|result)(?:\s*:|\s+is)\s*([^.\n]+)',
+            r'(?:the|my)\s+(?:final answer|answer|result)(?:\s+is|\s*:\s*)\s*([^.\n]+)',
+            r'(?:conclude|determine|find)(?:\s+that)?\s+(?:the answer|the result|result|answer)(?:\s+is)?\s*:?\s*([^.\n]+)',
+            r'([^.\n]+)(?:\s+is|\s*:\s*)(?:\s*the)?\s*(?:final answer|answer|result)'
+        ]
+        for pattern in patterns:
+            matches = re.findall(pattern, reasoning, re.IGNORECASE)
+            if matches:
+                return matches[0].strip()
+        # Fallback strategy: Look for numbers as potential answers
+        numbers = re.findall(r'\b\d+(?:\.\d+)?\b', reasoning)
+        if numbers:
+            # Often the answer is the last mentioned number
+            return numbers[-1]
+        # If no clear answer format can be identified, split and return the last non-empty line
+        lines = [line.strip() for line in reasoning.split('\n') if line.strip()]
+        if lines:
+            return lines[-1]
+        return reasoning.strip()
+class GAIAAgent:
+    """
+    Integrated agent for GAIA benchmark, combining the best features of smolagents, llamaindex, and langgraph
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """Initialize the agent and its components"""
+        print("Initializing GAIA Agent...")
+        self.file_cache = {}  # For caching file contents
+        self.setup_model(api_key)
+        self.setup_tools()
+        # Create code execution agent (based on smolagents)
+        self.code_agent = CodeAgent(
+            model=self.model,
+            tools=self.tools,
+            system_prompt=self.create_system_prompt(),
+            verbosity_level=1  # 0=quiet, 1=normal, 2=verbose
+        )
+        # Set up state machine workflow (inspired by langgraph)
+        self.setup_workflow()
+        print("GAIA Agent initialized successfully")
+    def setup_model(self, api_key: Optional[str]):
+        """Set up the language model to use"""
+        try:
+            if api_key:
+                # Use model with API key
+                self.model = LiteLLMModel(
+                    model_id="gpt-4o",  # or "anthropic/claude-3-5-sonnet-latest"
+                    api_key=api_key,
+                    temperature=0.1
+                )
+            else:
+                # Use a free model
+                self.model = LiteLLMModel(
+                    model_id="deepseek-ai/deepseek-r1",  # or another free model
+                    provider="together",
+                    temperature=0.1
+                )
+            print(f"Successfully set up model: {self.model}")
+        except Exception as e:
+            print(f"Error setting up model: {e}")
+            # Use a simple fallback model
+            self.model = LiteLLMModel(
+                model_id="google/gemma-7b",
+                provider="huggingface",
+                temperature=0.1
+            )
+    def setup_tools(self):
+        """Set up tools for the agent"""
+        # Use FunctionTool interface from llama_index but integrate with smolagents
+        self.tools = [
+            FunctionTool.from_defaults(
+                name="calculator",
+                description="Calculate mathematical expressions like '2 + 2' or '(15 * 3) / 2'",
+                fn=GAIAToolkit.calculator
+            ),
+            FunctionTool.from_defaults(
+                name="search_web",
+                description="Search for information related to a query",
+                fn=GAIAToolkit.search_web
+            ),
+            FunctionTool.from_defaults(
+                name="file_reader",
+                description="Read file content given a file ID",
+                fn=GAIAToolkit.file_reader
+            ),
+            FunctionTool.from_defaults(
+                name="analyze_text",
+                description="Analyze text to extract statistics and key information",
+                fn=GAIAToolkit.analyze_text
+            ),
+            FunctionTool.from_defaults(
+                name="extract_answer",
+                description="Extract the final answer from reasoning",
+                fn=GAIAToolkit.extract_answer
+            )
+        ]
+    def create_system_prompt(self) -> str:
+        """Create system prompt to guide agent behavior"""
+        return """You are an expert AI assistant designed for the GAIA benchmark. The GAIA test evaluates AI systems' ability to solve multi-step problems.
+Follow these guidelines:
+1. Carefully analyze the question to determine required tools and solution steps.
+2. Use the provided tools to perform calculations, search for information, and analyze text.
+3. Keep reasoning clear and concise, focusing on solving the problem.
+4. Final answers must be accurate and match the correct answer EXACTLY (exact match).
+5. For numerical answers, return only the number (no units or explanation).
+6. For text answers, ensure exact matching of the correct words.
+IMPORTANT: The final answer must be simple and direct, without extra explanation. For example, if the question is "What is 2+2?", the answer should simply be "4", not "2+2 equals 4".
+"""
+    def setup_workflow(self):
+        """Set up the agent's state workflow (inspired by langgraph)"""
+        # Define states and transitions, but implemented in a simpler way
+        self.workflow_steps = [
+            "analyze_question",
+            "plan_approach",
+            "execute_tools",
+            "formulate_answer"
+        ]
+        self.workflow_states = {}
     def __call__(self, question: str) -> str:
+        """Process the question and return an answer"""
+        print(f"Processing question: {question[:100]}...")
+        try:
+            # Reset workflow state
+            self.workflow_states = {
+                "question": question,
+                "analysis": "",
+                "plan": "",
+                "execution_results": {},
+                "interim_reasoning": "",
+                "final_answer": ""
+            }
+            # 1. Analyze question and plan approach (using smolagents' code agent capabilities)
+            self.analyze_and_plan(question)
+            # 2. Use code agent to execute reasoning and tool calls
+            reasoning = self.code_agent.run(question)
+            self.workflow_states["interim_reasoning"] = reasoning
+            # 3. Extract final answer (exact match format)
+            answer = self.extract_final_answer(reasoning)
+            self.workflow_states["final_answer"] = answer
+            print(f"Returning answer: {answer}")
+            return answer
+        except Exception as e:
+            print(f"Error processing question: {e}")
+            # Try to recover and return a basic answer
+            if "interim_reasoning" in self.workflow_states and self.workflow_states["interim_reasoning"]:
+                # Try to extract answer from already generated reasoning
+                try:
+                    answer = GAIAToolkit.extract_answer(self.workflow_states["interim_reasoning"])
+                    return answer
+                except:
+                    pass
+            # Fallback to a simple answer
+            return "42"  # Ultimate answer to the universe as a default
+    def analyze_and_plan(self, question: str):
+        """Analyze the question and plan approach"""
+        analyze_prompt = f"""Analyze the following question:
+{question}
+Identify:
+1. Question type (calculation, information retrieval, text analysis, etc.)
+2. Key tools needed
+3. Solution steps
+Provide only a concise analysis, don't attempt to answer the question.
+"""
+        analysis = self.model.generate(analyze_prompt).strip()
+        self.workflow_states["analysis"] = analysis
+        plan_prompt = f"""Based on the question analysis:
+{analysis}
+Formulate a concise step-by-step plan to answer the question:
+{question}
+Use available tools: calculator, search_web, file_reader, analyze_text.
+List specific steps, don't attempt to answer the question.
+"""
+        plan = self.model.generate(plan_prompt).strip()
+        self.workflow_states["plan"] = plan
+    def extract_final_answer(self, reasoning: str) -> str:
+        """Extract the final answer from the agent's reasoning"""
+        # Use the tool to extract the answer
+        answer = GAIAToolkit.extract_answer(reasoning)
+        # Additional cleanup to ensure exact match format
+        # Remove any potential prefixes like "Answer:" or "The result is"
+        answer = re.sub(r'^(answer|the answer|final answer|result|output|solution)[\s:]*', '', answer, flags=re.IGNORECASE)
+        # Remove potential explanation suffixes
+        answer = re.sub(r'[\s.].*$', '', answer)
+        # If it's a number, ensure proper format
+        if re.match(r'^\d+(\.\d+)?$', answer):
+            # Remove trailing zeros
+            answer = re.sub(r'\.0+$', '', answer)
+        return answer.strip()
+# --- Run and Submit Function ---
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the GAIA Agent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
+        # Check for available API key
+        api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("ANTHROPIC_API_KEY")
+        agent = GAIAAgent(api_key)
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a Hugging Face space, this link points toward your codebase
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
         print(f"An unexpected error occurred fetching questions: {e}")
         return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run Agent
     results_log = []
     answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        print(f"Processing question {task_id}: {question_text[:50]}...")
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            print(f"Answer for question {task_id}: {submitted_answer}")
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
     submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc...
+        2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
         ---
         **Disclaimers:**
+        Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a separate action or even to answer the questions in async.
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for GAIA Agent Evaluation...")
     demo.launch(debug=True, share=False)

requirements.txt CHANGED Viewed

@@ -1,2 +1,8 @@
 gradio
 requests

 gradio
+requests
+smolagents
+langgraph
+llama-index
+litellm
+pandas
 requests