Spaces:

tecuts
/

chat

Running

App Files Files Community

tecuts commited on Jul 2

Commit

13046df

verified ·

1 Parent(s): db547a3

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -159

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import json
 import asyncio
 import requests
 from datetime import datetime
 from typing import List, Dict, Optional
 from fastapi import FastAPI, Request, HTTPException, Depends
@@ -48,22 +49,27 @@ GOOGLE_CX = os.getenv("GOOGLE_CX")
 LLM_API_KEY = os.getenv("LLM_API_KEY")
 LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
-# --- Simplified System Prompts ---
 SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to real-time web search capabilities.
-When search tools are available, use them for queries that need current, recent, or specific factual information.
 **Response Guidelines:**
-1. Use search tools when available and relevant
-2. Synthesize information from multiple sources
-3. Clearly indicate when information comes from search results
-4. Provide comprehensive, well-structured answers
-5. Cite sources appropriately
 Current date: {current_date}"""
 SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
 Current date: {current_date}"""
 # --- Optimized Web Search Tool ---
@@ -132,6 +138,40 @@ def format_search_results_compact(search_results: List[Dict]) -> str:
     return "\n".join(formatted)
 # --- FastAPI Application Setup ---
 app = FastAPI(title="Streaming AI Chatbot", version="2.1.0")
@@ -156,7 +196,7 @@ else:
     client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
     logger.info("OpenAI client initialized successfully")
-# --- Tool Definition ---
 available_tools = [
     {
         "type": "function",
@@ -177,11 +217,40 @@ available_tools = [
     }
 ]
-# --- Streaming Response Generator ---
-async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float):
-    """Generate streaming response with optional search"""
     try:
         # Initial LLM call with streaming
         llm_kwargs = {
             "model": "unsloth/Qwen3-30B-A3B-GGUF",
@@ -191,15 +260,17 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
             "stream": True
         }
-        if use_search:
             llm_kwargs["tools"] = available_tools
             llm_kwargs["tool_choice"] = "auto"
-        source_links = []
         response_content = ""
         tool_calls_data = []
-        # First streaming call
         stream = client.chat.completions.create(**llm_kwargs)
         for chunk in stream:
@@ -209,9 +280,21 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
             if delta.content:
                 content_chunk = delta.content
                 response_content += content_chunk
                 yield f"data: {json.dumps({'type': 'content', 'data': content_chunk})}\n\n"
-            # Handle tool calls
             if delta.tool_calls:
                 for tool_call in delta.tool_calls:
                     if len(tool_calls_data) <= tool_call.index:
@@ -225,11 +308,56 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
                     if tool_call.function.arguments:
                         tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
-        # Process tool calls if any
-        if tool_calls_data and any(tc["function"]["name"] for tc in tool_calls_data):
-            yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
-            # Execute searches concurrently for speed
             search_tasks = []
             for tool_call in tool_calls_data:
                 if tool_call["function"]["name"] == "google_search":
@@ -238,14 +366,13 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
                         query = args.get("query", "").strip()
                         if query:
                             search_tasks.append(google_search_tool_async(query))
                     except json.JSONDecodeError:
                         continue
-            # Run searches concurrently
             if search_tasks:
                 search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
-                # Combine all search results
                 all_results = []
                 for results in search_results_list:
                     if isinstance(results, list):
@@ -257,19 +384,14 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
                                 "domain": result["domain"]
                             })
-                # Format search results
                 if all_results:
                     search_context = format_search_results_compact(all_results)
-                    # Create new message with search context
                     search_messages = messages + [{
                         "role": "system",
                         "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
                     }]
-                    yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
-                    # Generate final response with search context
                     final_stream = client.chat.completions.create(
                         model="unsloth/Qwen3-30B-A3B-GGUF",
                         temperature=temperature,
@@ -282,12 +404,14 @@ async def generate_streaming_response(messages: List[Dict], use_search: bool, te
                         if chunk.choices[0].delta.content:
                             content = chunk.choices[0].delta.content
                             yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
         # Send sources and completion
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
-        yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': bool(source_links)}})}\n\n"
     except Exception as e:
         logger.error(f"Streaming error: {e}")
@@ -302,8 +426,8 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
     try:
         data = await request.json()
         user_message = data.get("message", "").strip()
-        use_search = data.get("use_search", False)  # Default: False
-        temperature = max(0, min(2, data.get("temperature", 0.7)))  # Clamp to valid range
         conversation_history = data.get("history", [])
         if not user_message:
@@ -314,15 +438,15 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
         system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
         messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
-        logger.info(f"Stream request - search: {use_search}, temp: {temperature}")
         return StreamingResponse(
-            generate_streaming_response(messages, use_search, temperature),
             media_type="text/plain",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no"  # Disable nginx buffering
             }
         )
@@ -330,129 +454,4 @@ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin
         raise HTTPException(status_code=400, detail="Invalid JSON")
     except Exception as e:
         logger.error(f"Stream endpoint error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-# --- Regular Chat Endpoint (for backward compatibility) ---
-@app.post("/chat")
-async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
-    if not client:
-        raise HTTPException(status_code=500, detail="LLM client not configured")
-    try:
-        data = await request.json()
-        user_message = data.get("message", "").strip()
-        use_search = data.get("use_search", False)  # Default: False
-        temperature = max(0, min(2, data.get("temperature", 0.7)))
-        conversation_history = data.get("history", [])
-        if not user_message:
-            raise HTTPException(status_code=400, detail="No message provided")
-        # Prepare messages
-        current_date = datetime.now().strftime("%Y-%m-%d")
-        system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
-        messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
-        source_links = []
-        if use_search:
-            # Search-enabled flow (non-streaming for compatibility)
-            llm_response = client.chat.completions.create(
-                model="unsloth/Qwen3-30B-A3B-GGUF",
-                temperature=temperature,
-                messages=messages,
-                tools=available_tools,
-                tool_choice="auto",
-                max_tokens=2000
-            )
-            tool_calls = llm_response.choices[0].message.tool_calls
-            if tool_calls:
-                # Execute searches
-                search_tasks = []
-                for tool_call in tool_calls:
-                    if tool_call.function.name == "google_search":
-                        try:
-                            args = json.loads(tool_call.function.arguments)
-                            query = args.get("query", "").strip()
-                            if query:
-                                search_tasks.append(google_search_tool_async(query))
-                        except json.JSONDecodeError:
-                            continue
-                if search_tasks:
-                    search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
-                    all_results = []
-                    for results in search_results_list:
-                        if isinstance(results, list):
-                            all_results.extend(results)
-                            for result in results:
-                                source_links.append({
-                                    "title": result["source_title"],
-                                    "url": result["url"],
-                                    "domain": result["domain"]
-                                })
-                    if all_results:
-                        search_context = format_search_results_compact(all_results)
-                        search_messages = messages + [{
-                            "role": "system",
-                            "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
-                        }]
-                        final_response = client.chat.completions.create(
-                            model="unsloth/Qwen3-30B-A3B-GGUF",
-                            temperature=temperature,
-                            messages=search_messages,
-                            max_tokens=2000
-                        )
-                        final_content = final_response.choices[0].message.content
-                    else:
-                        final_content = llm_response.choices[0].message.content
-                else:
-                    final_content = llm_response.choices[0].message.content
-            else:
-                final_content = llm_response.choices[0].message.content
-        else:
-            # No search - direct response
-            llm_response = client.chat.completions.create(
-                model="unsloth/Qwen3-30B-A3B-GGUF",
-                temperature=temperature,
-                messages=messages,
-                max_tokens=2000
-            )
-            final_content = llm_response.choices[0].message.content
-        return {
-            "response": final_content,
-            "sources": source_links,
-            "search_used": bool(source_links),
-            "temperature": temperature,
-            "timestamp": datetime.now().isoformat()
-        }
-    except Exception as e:
-        logger.error(f"Chat endpoint error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-# --- Health Check Endpoints ---
-@app.get("/")
-async def root():
-    return {
-        "message": "Streaming AI Chatbot API",
-        "version": "2.1.0",
-        "endpoints": ["/chat", "/chat/stream"],
-        "timestamp": datetime.now().isoformat()
-    }
-@app.get("/health")
-async def health_check():
-    return {
-        "status": "healthy",
-        "timestamp": datetime.now().isoformat(),
-        "services": {
-            "llm_client": client is not None,
-            "google_search": bool(GOOGLE_API_KEY and GOOGLE_CX)
-        }
-    }

 import json
 import asyncio
 import requests
+import re
 from datetime import datetime
 from typing import List, Dict, Optional
 from fastapi import FastAPI, Request, HTTPException, Depends
 LLM_API_KEY = os.getenv("LLM_API_KEY")
 LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
+# --- Enhanced System Prompts ---
 SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to real-time web search capabilities.
+IMPORTANT: When you need current information, recent events, or specific facts that might be outdated, you should explicitly request a search by including the phrase "SEARCH_NEEDED:" followed by your search query in your response.
+For example:
+- If asked about recent news: "SEARCH_NEEDED: latest news about [topic]"
+- If asked about current events: "SEARCH_NEEDED: current status of [event]"
+- If asked about recent developments: "SEARCH_NEEDED: recent developments in [field]"
 **Response Guidelines:**
+1. Use search for queries that need current, recent, or specific factual information
+2. Be proactive in identifying when search is needed
+3. Synthesize information from multiple sources when search results are provided
+4. Clearly indicate when information comes from search results
+5. Provide comprehensive, well-structured answers
+6. Cite sources appropriately
 Current date: {current_date}"""
 SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
 Current date: {current_date}"""
 # --- Optimized Web Search Tool ---
     return "\n".join(formatted)
+# --- Check if query needs search ---
+def should_search(query: str, use_search: bool) -> Optional[str]:
+    """Determine if a query needs search and extract search terms"""
+    if not use_search:
+        return None
+    # Keywords that typically require current information
+    current_keywords = [
+        'today', 'recent', 'latest', 'current', 'now', 'this year', '2024', '2025',
+        'news', 'happening', 'update', 'development', 'status', 'price', 'stock',
+        'weather', 'score', 'result', 'election', 'covid', 'pandemic'
+    ]
+    query_lower = query.lower()
+    # Check for current-info keywords
+    if any(keyword in query_lower for keyword in current_keywords):
+        return query
+    # Check for questions about specific companies, products, or events
+    question_patterns = [
+        r'what.*happened.*',
+        r'when.*did.*',
+        r'how.*is.*doing',
+        r'what.*the.*status',
+        r'is.*still.*',
+        r'has.*been.*',
+    ]
+    if any(re.search(pattern, query_lower) for pattern in question_patterns):
+        return query
+    return None
 # --- FastAPI Application Setup ---
 app = FastAPI(title="Streaming AI Chatbot", version="2.1.0")
     client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
     logger.info("OpenAI client initialized successfully")
+# --- Tool Definition (keeping for potential future use) ---
 available_tools = [
     {
         "type": "function",
     }
 ]
+# --- Enhanced Streaming Response Generator ---
+async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float, original_query: str):
+    """Generate streaming response with intelligent search triggering"""
     try:
+        source_links = []
+        search_performed = False
+        # Check if we should proactively search
+        proactive_search_query = should_search(original_query, use_search)
+        if proactive_search_query:
+            logger.info(f"Proactive search triggered for: {proactive_search_query}")
+            yield f"data: {json.dumps({'type': 'status', 'data': 'Searching for current information...'})}\n\n"
+            search_results = await google_search_tool_async(proactive_search_query, 4)
+            if search_results:
+                search_context = format_search_results_compact(search_results)
+                # Add search context to messages
+                enhanced_messages = messages + [{
+                    "role": "system",
+                    "content": f"Recent search results for your reference:\n\n{search_context}\n\nPlease use this information to provide a comprehensive and up-to-date response."
+                }]
+                for result in search_results:
+                    source_links.append({
+                        "title": result["source_title"],
+                        "url": result["url"],
+                        "domain": result["domain"]
+                    })
+                search_performed = True
+                messages = enhanced_messages
         # Initial LLM call with streaming
         llm_kwargs = {
             "model": "unsloth/Qwen3-30B-A3B-GGUF",
             "stream": True
         }
+        # Try function calling as backup (in case model supports it)
+        if use_search and not search_performed:
             llm_kwargs["tools"] = available_tools
             llm_kwargs["tool_choice"] = "auto"
         response_content = ""
         tool_calls_data = []
+        yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
+        # Stream the response
         stream = client.chat.completions.create(**llm_kwargs)
         for chunk in stream:
             if delta.content:
                 content_chunk = delta.content
                 response_content += content_chunk
+                # Check for search requests in the content
+                if use_search and not search_performed and "SEARCH_NEEDED:" in content_chunk:
+                    # Extract search query from the content
+                    search_match = re.search(r'SEARCH_NEEDED:\s*(.+?)(?:\n|$)', content_chunk)
+                    if search_match:
+                        search_query = search_match.group(1).strip()
+                        logger.info(f"Search requested by model: {search_query}")
+                        # Don't yield this chunk yet, we'll search first
+                        continue
                 yield f"data: {json.dumps({'type': 'content', 'data': content_chunk})}\n\n"
+            # Handle tool calls (backup method)
             if delta.tool_calls:
                 for tool_call in delta.tool_calls:
                     if len(tool_calls_data) <= tool_call.index:
                     if tool_call.function.arguments:
                         tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
+        # Handle model-requested search
+        if use_search and not search_performed and "SEARCH_NEEDED:" in response_content:
+            search_matches = re.findall(r'SEARCH_NEEDED:\s*(.+?)(?:\n|$)', response_content)
+            if search_matches:
+                yield f"data: {json.dumps({'type': 'status', 'data': 'Performing requested search...'})}\n\n"
+                # Execute all requested searches
+                search_tasks = [google_search_tool_async(query.strip()) for query in search_matches]
+                search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
+                all_results = []
+                for results in search_results_list:
+                    if isinstance(results, list):
+                        all_results.extend(results)
+                if all_results:
+                    search_context = format_search_results_compact(all_results)
+                    for result in all_results:
+                        source_links.append({
+                            "title": result["source_title"],
+                            "url": result["url"],
+                            "domain": result["domain"]
+                        })
+                    # Generate new response with search results
+                    search_messages = messages + [{
+                        "role": "system",
+                        "content": f"Search Results:\n\n{search_context}\n\nPlease provide a comprehensive response based on these search results."
+                    }]
+                    final_stream = client.chat.completions.create(
+                        model="unsloth/Qwen3-30B-A3B-GGUF",
+                        temperature=temperature,
+                        messages=search_messages,
+                        max_tokens=2000,
+                        stream=True
+                    )
+                    for chunk in final_stream:
+                        if chunk.choices[0].delta.content:
+                            content = chunk.choices[0].delta.content
+                            yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
+                    search_performed = True
+        # Process function-based tool calls (backup method)
+        elif tool_calls_data and any(tc["function"]["name"] for tc in tool_calls_data):
+            yield f"data: {json.dumps({'type': 'status', 'data': 'Executing search tools...'})}\n\n"
             search_tasks = []
             for tool_call in tool_calls_data:
                 if tool_call["function"]["name"] == "google_search":
                         query = args.get("query", "").strip()
                         if query:
                             search_tasks.append(google_search_tool_async(query))
+                            logger.info(f"Function call search: {query}")
                     except json.JSONDecodeError:
                         continue
             if search_tasks:
                 search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
                 all_results = []
                 for results in search_results_list:
                     if isinstance(results, list):
                                 "domain": result["domain"]
                             })
                 if all_results:
                     search_context = format_search_results_compact(all_results)
                     search_messages = messages + [{
                         "role": "system",
                         "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
                     }]
                     final_stream = client.chat.completions.create(
                         model="unsloth/Qwen3-30B-A3B-GGUF",
                         temperature=temperature,
                         if chunk.choices[0].delta.content:
                             content = chunk.choices[0].delta.content
                             yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
+                    search_performed = True
         # Send sources and completion
         if source_links:
             yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
+        yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': search_performed}})}\n\n"
     except Exception as e:
         logger.error(f"Streaming error: {e}")
     try:
         data = await request.json()
         user_message = data.get("message", "").strip()
+        use_search = data.get("use_search", False)
+        temperature = max(0, min(2, data.get("temperature", 0.7)))
         conversation_history = data.get("history", [])
         if not user_message:
         system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
         messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
+        logger.info(f"Stream request - search: {use_search}, temp: {temperature}, query: {user_message[:50]}...")
         return StreamingResponse(
+            generate_streaming_response(messages, use_search, temperature, user_message),
             media_type="text/plain",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no"
             }
         )
         raise HTTPException(status_code=400, detail="Invalid JSON")
     except Exception as e:
         logger.error(f"Stream endpoint error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))