Dmitry Beresnev commited on
Commit
55e1aa1
·
1 Parent(s): 110f827

fix app, dockerfile, pyproject.toml to add web search

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. app.py +111 -1
  3. pyproject.toml +4 -1
Dockerfile CHANGED
@@ -49,7 +49,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
49
  && rm -rf /var/lib/apt/lists/*
50
 
51
  # Install Python packages
52
- RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic --break-system-packages
53
 
54
  # Create non-root user
55
  RUN useradd -m -u 1000 user && \
 
49
  && rm -rf /var/lib/apt/lists/*
50
 
51
  # Install Python packages
52
+ RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic duckduckgo-search beautifulsoup4 lxml --break-system-packages
53
 
54
  # Create non-root user
55
  RUN useradd -m -u 1000 user && \
app.py CHANGED
@@ -6,6 +6,8 @@ import os
6
  import requests
7
  import time
8
  from typing import Optional
 
 
9
 
10
  app = FastAPI()
11
 
@@ -42,6 +44,13 @@ class ChatCompletionRequest(BaseModel):
42
  temperature: float = 0.7
43
 
44
 
 
 
 
 
 
 
 
45
  def start_llama_server(model_id: str) -> subprocess.Popen:
46
  """Start llama-server with specified model (optimized for speed)."""
47
  cmd = [
@@ -193,4 +202,105 @@ async def chat_completions(request: ChatCompletionRequest):
193
  response.raise_for_status()
194
  return response.json()
195
  except requests.exceptions.RequestException as e:
196
- raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import requests
7
  import time
8
  from typing import Optional
9
+ from duckduckgo_search import DDGS
10
+ from bs4 import BeautifulSoup
11
 
12
  app = FastAPI()
13
 
 
44
  temperature: float = 0.7
45
 
46
 
47
+ class WebChatRequest(BaseModel):
48
+ messages: list[dict]
49
+ max_tokens: int = 512
50
+ temperature: float = 0.7
51
+ max_search_results: int = 5
52
+
53
+
54
  def start_llama_server(model_id: str) -> subprocess.Popen:
55
  """Start llama-server with specified model (optimized for speed)."""
56
  cmd = [
 
202
  response.raise_for_status()
203
  return response.json()
204
  except requests.exceptions.RequestException as e:
205
+ raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
206
+
207
+
208
+ def search_web(query: str, max_results: int = 5) -> list[dict]:
209
+ """Search the web using DuckDuckGo and return results."""
210
+ try:
211
+ with DDGS() as ddgs:
212
+ results = list(ddgs.text(query, max_results=max_results))
213
+ return results
214
+ except Exception as e:
215
+ print(f"Search error: {e}")
216
+ return []
217
+
218
+
219
+ def format_search_context(query: str, search_results: list[dict]) -> str:
220
+ """Format search results into context for the LLM."""
221
+ if not search_results:
222
+ return f"No web results found for: {query}"
223
+
224
+ context = f"# Web Search Results for: {query}\n\n"
225
+
226
+ for i, result in enumerate(search_results, 1):
227
+ title = result.get("title", "No title")
228
+ body = result.get("body", "No description")
229
+ url = result.get("href", "")
230
+
231
+ context += f"## Result {i}: {title}\n"
232
+ context += f"{body}\n"
233
+ if url:
234
+ context += f"Source: {url}\n"
235
+ context += "\n"
236
+
237
+ return context
238
+
239
+
240
+ @app.post("/v1/web-chat/completions")
241
+ async def web_chat_completions(request: WebChatRequest):
242
+ """
243
+ Chat completions with web search augmentation.
244
+
245
+ The last user message is used as the search query.
246
+ Search results are injected into the context before sending to the LLM.
247
+ """
248
+ try:
249
+ # Get the last user message as search query
250
+ user_messages = [msg for msg in request.messages if msg.get("role") == "user"]
251
+ if not user_messages:
252
+ raise HTTPException(status_code=400, detail="No user message found")
253
+
254
+ search_query = user_messages[-1].get("content", "")
255
+
256
+ # Perform web search
257
+ print(f"Searching web for: {search_query}")
258
+ search_results = search_web(search_query, request.max_search_results)
259
+
260
+ # Format search results as context
261
+ web_context = format_search_context(search_query, search_results)
262
+
263
+ # Create augmented messages with web context
264
+ augmented_messages = request.messages.copy()
265
+
266
+ # Insert web context as a system message before the last user message
267
+ system_prompt = {
268
+ "role": "system",
269
+ "content": f"""You are a helpful assistant with access to current web information.
270
+
271
+ {web_context}
272
+
273
+ Use the above search results to provide accurate, up-to-date information in your response.
274
+ Always cite sources when using information from the search results."""
275
+ }
276
+
277
+ # Insert system message before the last user message
278
+ augmented_messages.insert(-1, system_prompt)
279
+
280
+ # Forward to llama-server with augmented context
281
+ response = requests.post(
282
+ f"{LLAMA_SERVER_URL}/v1/chat/completions",
283
+ json={
284
+ "messages": augmented_messages,
285
+ "max_tokens": request.max_tokens,
286
+ "temperature": request.temperature,
287
+ },
288
+ timeout=300
289
+ )
290
+ response.raise_for_status()
291
+
292
+ result = response.json()
293
+
294
+ # Add metadata about search results
295
+ result["web_search"] = {
296
+ "query": search_query,
297
+ "results_count": len(search_results),
298
+ "sources": [r.get("href", "") for r in search_results if r.get("href")]
299
+ }
300
+
301
+ return result
302
+
303
+ except requests.exceptions.RequestException as e:
304
+ raise HTTPException(status_code=500, detail=f"llama-server error: {str(e)}")
305
+ except Exception as e:
306
+ raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
pyproject.toml CHANGED
@@ -10,5 +10,8 @@ dependencies = [
10
  "fastapi>=0.104.0",
11
  "uvicorn[standard]>=0.24.0",
12
  "llama-cpp-python>=0.2.0",
13
- "huggingface-hub>=0.19.0"
 
 
 
14
  ]
 
10
  "fastapi>=0.104.0",
11
  "uvicorn[standard]>=0.24.0",
12
  "llama-cpp-python>=0.2.0",
13
+ "huggingface-hub>=0.19.0",
14
+ "duckduckgo-search>=4.0.0",
15
+ "beautifulsoup4>=4.12.0",
16
+ "lxml>=4.9.0"
17
  ]