Spaces:

ResearchEngineering
/

AGI

Sleeping

App Files Files Community

Dmitry Beresnev commited on 12 days ago

Commit

55e1aa1

1 Parent(s): 110f827

fix app, dockerfile, pyproject.toml to add web search

Browse files

Files changed (3) hide show

Dockerfile +1 -1
app.py +111 -1
pyproject.toml +4 -1

Dockerfile CHANGED Viewed

@@ -49,7 +49,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     && rm -rf /var/lib/apt/lists/*
 # Install Python packages
-RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic --break-system-packages
 # Create non-root user
 RUN useradd -m -u 1000 user && \

     && rm -rf /var/lib/apt/lists/*
 # Install Python packages
+RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic duckduckgo-search beautifulsoup4 lxml --break-system-packages
 # Create non-root user
 RUN useradd -m -u 1000 user && \

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import os
 import requests
 import time
 from typing import Optional
 app = FastAPI()
@@ -42,6 +44,13 @@ class ChatCompletionRequest(BaseModel):
     temperature: float = 0.7
 def start_llama_server(model_id: str) -> subprocess.Popen:
     """Start llama-server with specified model (optimized for speed)."""
     cmd = [
@@ -193,4 +202,105 @@ async def chat_completions(request: ChatCompletionRequest):
         response.raise_for_status()
         return response.json()
     except requests.exceptions.RequestException as e:
-        raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")

 import requests
 import time
 from typing import Optional
+from duckduckgo_search import DDGS
+from bs4 import BeautifulSoup
 app = FastAPI()
     temperature: float = 0.7
+class WebChatRequest(BaseModel):
+    messages: list[dict]
+    max_tokens: int = 512
+    temperature: float = 0.7
+    max_search_results: int = 5
 def start_llama_server(model_id: str) -> subprocess.Popen:
     """Start llama-server with specified model (optimized for speed)."""
     cmd = [
         response.raise_for_status()
         return response.json()
     except requests.exceptions.RequestException as e:
+        raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
+def search_web(query: str, max_results: int = 5) -> list[dict]:
+    """Search the web using DuckDuckGo and return results."""
+    try:
+        with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=max_results))
+            return results
+    except Exception as e:
+        print(f"Search error: {e}")
+        return []
+def format_search_context(query: str, search_results: list[dict]) -> str:
+    """Format search results into context for the LLM."""
+    if not search_results:
+        return f"No web results found for: {query}"
+    context = f"# Web Search Results for: {query}\n\n"
+    for i, result in enumerate(search_results, 1):
+        title = result.get("title", "No title")
+        body = result.get("body", "No description")
+        url = result.get("href", "")
+        context += f"## Result {i}: {title}\n"
+        context += f"{body}\n"
+        if url:
+            context += f"Source: {url}\n"
+        context += "\n"
+    return context
+@app.post("/v1/web-chat/completions")
+async def web_chat_completions(request: WebChatRequest):
+    """
+    Chat completions with web search augmentation.
+    The last user message is used as the search query.
+    Search results are injected into the context before sending to the LLM.
+    """
+    try:
+        # Get the last user message as search query
+        user_messages = [msg for msg in request.messages if msg.get("role") == "user"]
+        if not user_messages:
+            raise HTTPException(status_code=400, detail="No user message found")
+        search_query = user_messages[-1].get("content", "")
+        # Perform web search
+        print(f"Searching web for: {search_query}")
+        search_results = search_web(search_query, request.max_search_results)
+        # Format search results as context
+        web_context = format_search_context(search_query, search_results)
+        # Create augmented messages with web context
+        augmented_messages = request.messages.copy()
+        # Insert web context as a system message before the last user message
+        system_prompt = {
+            "role": "system",
+            "content": f"""You are a helpful assistant with access to current web information.
+{web_context}
+Use the above search results to provide accurate, up-to-date information in your response.
+Always cite sources when using information from the search results."""
+        }
+        # Insert system message before the last user message
+        augmented_messages.insert(-1, system_prompt)
+        # Forward to llama-server with augmented context
+        response = requests.post(
+            f"{LLAMA_SERVER_URL}/v1/chat/completions",
+            json={
+                "messages": augmented_messages,
+                "max_tokens": request.max_tokens,
+                "temperature": request.temperature,
+            },
+            timeout=300
+        )
+        response.raise_for_status()
+        result = response.json()
+        # Add metadata about search results
+        result["web_search"] = {
+            "query": search_query,
+            "results_count": len(search_results),
+            "sources": [r.get("href", "") for r in search_results if r.get("href")]
+        }
+        return result
+    except requests.exceptions.RequestException as e:
+        raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")

pyproject.toml CHANGED Viewed

@@ -10,5 +10,8 @@ dependencies = [
     "fastapi>=0.104.0",
     "uvicorn[standard]>=0.24.0",
     "llama-cpp-python>=0.2.0",
-    "huggingface-hub>=0.19.0"
 ]

     "fastapi>=0.104.0",
     "uvicorn[standard]>=0.24.0",
     "llama-cpp-python>=0.2.0",
+    "huggingface-hub>=0.19.0",
+    "duckduckgo-search>=4.0.0",
+    "beautifulsoup4>=4.12.0",
+    "lxml>=4.9.0"
 ]