tecuts commited on
Commit
846f4fb
·
verified ·
1 Parent(s): d8153f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +284 -298
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import os
2
  import json
 
3
  import requests
4
- import httpx
5
  from datetime import datetime
6
  from typing import List, Dict, Optional
7
- from fastapi import FastAPI, Request, HTTPException, Depends, status
8
- from fastapi.responses import StreamingResponse
9
  from fastapi.middleware.cors import CORSMiddleware
10
- from openai import AsyncOpenAI
11
  from openai import OpenAI
12
  import logging
13
 
@@ -48,150 +47,101 @@ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
48
  GOOGLE_CX = os.getenv("GOOGLE_CX")
49
  LLM_API_KEY = os.getenv("LLM_API_KEY")
50
  LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
51
- MODEL_NAME = os.getenv("MODEL_NAME", "unsloth/Qwen3-30B-A3B-GGUF")
52
 
53
- # --- Enhanced System Prompt ---
54
  SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to real-time web search capabilities.
55
 
56
- IMPORTANT: When search tools are available to you, you should USE them for any query that could benefit from current, recent, or specific factual information, even if you have some knowledge about the topic from your training data.
57
-
58
- **When to use search (be proactive about using search when available):**
59
- - Any mention of recent events, current affairs, or "latest" information
60
- - Specific facts that could have changed since your training
61
- - Statistics, prices, scores, or numerical data
62
- - News, announcements, or current status of anything
63
- - When the user explicitly asks for current information
64
- - Any factual query where fresh information would be valuable
65
 
66
  **Response Guidelines:**
67
- 1. Use search tools when they're available and relevant to the query
68
- 2. Synthesize information from multiple sources when possible
69
  3. Clearly indicate when information comes from search results
70
  4. Provide comprehensive, well-structured answers
71
  5. Cite sources appropriately
72
- 6. If search results are contradictory, mention the discrepancy
73
-
74
- **Current Context**: Today's date is {current_date}. Prioritize recent information when available.
75
 
76
- Remember: When in doubt about whether to search, lean towards using the search tool for more accurate and current information."""
77
 
78
- SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
79
 
80
- When you don't have current information about recent events or changing data, acknowledge this limitation and suggest that the user might want to search for the most up-to-date information.
81
 
82
- **Current Context**: Today's date is {current_date}, but your knowledge has a cutoff date and may not include the most recent information."""
83
-
84
- # --- Enhanced Web Search Tool Implementation ---
85
- def google_search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
86
  """
87
- Enhanced Google Custom Search with better error handling and result formatting
88
  """
89
- if not GOOGLE_API_KEY or not GOOGLE_CX:
90
- logger.error("GOOGLE_API_KEY or GOOGLE_CX environment variables not set.")
91
- return []
92
-
93
- if not queries or not queries[0].strip():
94
- logger.warning("Empty search query provided")
95
  return []
96
 
97
- query = queries[0].strip()
98
- logger.info(f"Executing Google Custom Search for: '{query}'")
99
 
100
  search_url = "https://www.googleapis.com/customsearch/v1"
101
  params = {
102
  "key": GOOGLE_API_KEY,
103
  "cx": GOOGLE_CX,
104
- "q": query,
105
- "num": min(num_results, 10), # Google API max is 10
106
- "dateRestrict": "m6" # Prioritize results from last 6 months for freshness
107
  }
108
 
109
  try:
110
- response = requests.get(search_url, params=params, timeout=15)
 
 
 
 
 
111
  response.raise_for_status()
112
  search_results = response.json()
113
 
114
  if "items" not in search_results:
115
- logger.warning(f"No search results found for query: '{query}'")
116
  return []
117
 
118
- # Enhanced result parsing with better data validation
119
  parsed_results = []
120
- for item in search_results.get("items", []):
121
  title = item.get("title", "").strip()
122
  url = item.get("link", "").strip()
123
  snippet = item.get("snippet", "").strip()
124
 
125
- # Skip results with missing essential information
126
- if not title or not url or not snippet:
127
- continue
128
-
129
- # Extract publication date if available
130
- pub_date = None
131
- if "pagemap" in item and "metatags" in item["pagemap"]:
132
- for meta in item["pagemap"]["metatags"]:
133
- if "article:published_time" in meta:
134
- pub_date = meta["article:published_time"]
135
- break
136
-
137
- parsed_results.append({
138
- "source_title": title,
139
- "url": url,
140
- "snippet": snippet,
141
- "published_date": pub_date,
142
- "domain": url.split('/')[2] if '/' in url else url
143
- })
144
 
145
- logger.info(f"Successfully parsed {len(parsed_results)} search results")
146
  return parsed_results
147
 
148
- except requests.exceptions.Timeout:
149
- logger.error("Google search request timed out")
150
- return []
151
- except requests.exceptions.RequestException as e:
152
- logger.error(f"Error during Google search request: {e}")
153
- return []
154
  except Exception as e:
155
- logger.error(f"Unexpected error in google_search_tool: {e}")
156
  return []
157
 
158
- def format_search_results_for_llm(search_results: List[Dict]) -> str:
159
- """
160
- Format search results with enhanced context for better LLM understanding
161
- """
162
  if not search_results:
163
- return "No relevant search results were found for this query."
164
-
165
- current_date = datetime.now().strftime("%Y-%m-%d")
166
- formatted_results = [f"Search Results (Retrieved on {current_date}):\n"]
167
 
 
168
  for i, result in enumerate(search_results, 1):
169
- formatted_result = f"\n--- Result {i} ---"
170
- formatted_result += f"\nTitle: {result['source_title']}"
171
- formatted_result += f"\nSource: {result['domain']}"
172
- formatted_result += f"\nURL: {result['url']}"
173
-
174
- if result.get('published_date'):
175
- formatted_result += f"\nPublished: {result['published_date']}"
176
-
177
- formatted_result += f"\nContent: {result['snippet']}"
178
- formatted_results.append(formatted_result)
179
-
180
- formatted_results.append(f"\n--- End of Search Results ---\n")
181
- formatted_results.append("Please synthesize this information to provide a comprehensive answer to the user's question. If the search results contain conflicting information, please note the discrepancy. Always cite your sources when using information from the search results.")
182
 
183
- return "\n".join(formatted_results)
184
 
185
  # --- FastAPI Application Setup ---
186
- app = FastAPI(title="AI Chatbot with Enhanced Search", version="2.0.0")
187
 
188
  app.add_middleware(
189
  CORSMiddleware,
190
  allow_origins=[
191
  "https://chrunos.com",
192
  "https://www.chrunos.com",
193
- "http://localhost:3000", # For local development
194
- "http://localhost:8000", # For local development
195
  ],
196
  allow_credentials=True,
197
  allow_methods=["GET", "POST", "OPTIONS"],
@@ -203,22 +153,22 @@ if not LLM_API_KEY or not LLM_BASE_URL:
203
  logger.error("LLM_API_KEY or LLM_BASE_URL not configured")
204
  client = None
205
  else:
206
- client = AsyncOpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
207
  logger.info("OpenAI client initialized successfully")
208
 
209
- # --- Enhanced Tool Definition ---
210
  available_tools = [
211
  {
212
  "type": "function",
213
  "function": {
214
  "name": "google_search",
215
- "description": "REQUIRED for current information: Performs a Google search for recent events, current data, latest news, statistics, prices, or any information that changes frequently. Use this tool proactively when the user's query could benefit from up-to-date information, even if you have some relevant knowledge from training data.",
216
  "parameters": {
217
  "type": "object",
218
  "properties": {
219
  "query": {
220
  "type": "string",
221
- "description": "The search query. Be specific and include relevant keywords. For recent events, include time-related terms like 'latest', '2024', 'recent', etc."
222
  }
223
  },
224
  "required": ["query"]
@@ -227,246 +177,282 @@ available_tools = [
227
  }
228
  ]
229
 
230
- def should_use_search(message: str) -> bool:
231
- """
232
- Intelligent decision making for when to enable search based on message content
233
- """
234
- search_indicators = [
235
- "latest", "recent", "current", "now", "today", "this year", "2024", "2025",
236
- "news", "update", "what's happening", "status", "price", "stock",
237
- "weather", "score", "results", "announcement", "release"
238
- ]
239
 
240
- factual_indicators = [
241
- "who is", "what is", "where is", "when did", "how many", "statistics",
242
- "data", "information about", "tell me about", "facts about"
243
- ]
244
-
245
- message_lower = message.lower()
246
-
247
- # Strong indicators for search
248
- if any(indicator in message_lower for indicator in search_indicators):
249
- return True
250
-
251
- # Moderate indicators for search (factual queries)
252
- if any(indicator in message_lower for indicator in factual_indicators):
253
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
- return False
 
 
256
 
257
- # --- Enhanced Chatbot Endpoint ---
258
- @app.post("/chat")
259
- async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
260
  if not client:
261
  raise HTTPException(status_code=500, detail="LLM client not configured")
262
 
263
  try:
264
  data = await request.json()
265
  user_message = data.get("message", "").strip()
 
 
 
266
 
267
- # Support both 'use_search' and 'user_search' parameter names for flexibility
268
- use_search = data.get("use_search")
269
- if use_search is None:
270
- use_search = data.get("user_search") # Alternative parameter name
 
 
 
271
 
272
- # Allow client to specify temperature (with validation)
273
- temperature = data.get("temperature", 0.7) # Default to 0.7
274
- if not isinstance(temperature, (int, float)) or temperature < 0 or temperature > 2:
275
- logger.warning(f"Invalid temperature value: {temperature}, defaulting to 0.7")
276
- temperature = 0.7
277
 
278
- conversation_history = data.get("history", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- # Debug logging for request parameters
281
- logger.info(f"Request parameters - message length: {len(user_message)}, use_search: {use_search}, temperature: {temperature}, history length: {len(conversation_history)}")
 
 
 
 
282
 
283
  if not user_message:
284
  raise HTTPException(status_code=400, detail="No message provided")
285
 
286
- # Auto-decide search usage if not specified
287
- '''if use_search is None:
288
- use_search = should_use_search(user_message)
289
- logger.info(f"Auto-decided search usage: {use_search}")
290
- else:
291
- logger.info(f"Manual search setting: {use_search}")'''
292
-
293
- # Prepare messages with appropriate system prompt based on search availability
294
  current_date = datetime.now().strftime("%Y-%m-%d")
 
 
295
 
296
- if use_search:
297
- system_content = SYSTEM_PROMPT_WITH_SEARCH.format(current_date=current_date)
298
- else:
299
- system_content = SYSTEM_PROMPT_NO_SEARCH.format(current_date=current_date)
300
-
301
- system_message = {"role": "system", "content": system_content}
302
- messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
303
-
304
- llm_kwargs = {
305
- "model": MODEL_NAME,
306
- "temperature": temperature, # Use client-specified temperature
307
- "messages": messages,
308
- "max_tokens": 2000 # Ensure comprehensive responses
309
- }
310
 
311
  if use_search:
312
- logger.info("Search is ENABLED - tools will be available to the model")
313
- llm_kwargs["tools"] = available_tools
314
- llm_kwargs["tool_choice"] = "required" # Consider using "required" for testing
315
- else:
316
- logger.info("Search is DISABLED - no tools available")
317
-
318
- # First LLM call
319
- logger.info(f"Making LLM request with tools: {bool(use_search)}, temperature: {temperature}")
320
- llm_response = await client.chat.completions.create(**llm_kwargs)
321
- tool_calls = llm_response.choices[0].message.tool_calls
322
- source_links = []
323
-
324
- # Debug: Log tool call information
325
- if tool_calls:
326
- logger.info(f"LLM made {len(tool_calls)} tool calls")
327
- for i, call in enumerate(tool_calls):
328
- logger.info(f"Tool call {i+1}: {call.function.name} with args: {call.function.arguments}")
329
- else:
330
- logger.info("LLM did not make any tool calls")
331
- if use_search:
332
- logger.warning("Search was enabled but LLM chose not to use search tools - this might indicate the query doesn't require current information")
333
-
334
- if tool_calls:
335
- logger.info(f"Processing {len(tool_calls)} tool calls")
336
- tool_outputs = []
337
 
338
- for tool_call in tool_calls:
339
- if tool_call.function.name == "google_search":
340
- try:
341
- function_args = json.loads(tool_call.function.arguments)
342
- search_query = function_args.get("query", "").strip()
343
-
344
- if search_query:
345
- logger.info(f"Executing search for: {search_query}")
346
- search_results = google_search_tool([search_query], num_results=5)
347
-
348
- # Collect source links for response
349
- for result in search_results:
 
 
 
 
 
 
 
 
 
 
350
  source_links.append({
351
  "title": result["source_title"],
352
  "url": result["url"],
353
  "domain": result["domain"]
354
  })
355
-
356
- # Format results for LLM
357
- formatted_results = format_search_results_for_llm(search_results)
358
- tool_outputs.append({
359
- "tool_call_id": tool_call.id,
360
- "output": formatted_results
361
- })
362
- else:
363
- logger.warning("Empty search query in tool call")
364
- tool_outputs.append({
365
- "tool_call_id": tool_call.id,
366
- "output": "Error: Empty search query provided."
367
- })
368
-
369
- except json.JSONDecodeError as e:
370
- logger.error(f"Failed to parse tool call arguments: {e}")
371
- tool_outputs.append({
372
- "tool_call_id": tool_call.id,
373
- "output": "Error: Failed to parse search parameters."
374
- })
375
-
376
- # Continue conversation with search results
377
- messages.append(llm_response.choices[0].message)
378
- for output_item in tool_outputs:
379
- messages.append({
380
- "role": "tool",
381
- "tool_call_id": output_item["tool_call_id"],
382
- "content": output_item["output"]
383
- })
384
-
385
- # Final response generation with search context
386
- # Enhanced response structure
387
- logger.info(f"Chat response generated successfully. Search used: {bool(tool_calls)}, Temperature: {temperature}")
388
-
389
- async def stream_response():
390
- if not client:
391
- yield f"data: {json.dumps({'chunk': 'Error: LLM client not configured', 'sources': [], 'search_used': bool(tool_calls), 'temperature': temperature, 'timestamp': datetime.now().isoformat()})}\n\n"
392
- yield "data: [DONE]\n\n"
393
- return
394
-
395
- # 使用httpx手动处理流式响应
396
- headers = {
397
- "Content-Type": "application/json",
398
- "Authorization": f"Bearer {LLM_API_KEY}"
399
- }
400
-
401
- payload = {
402
- "model": MODEL_NAME,
403
- "messages": messages,
404
- "temperature": temperature,
405
- "stream": True
406
- }
407
-
408
- async with httpx.AsyncClient() as http_client:
409
- try:
410
- response = await http_client.post(f"{LLM_BASE_URL}/chat/completions", headers=headers, json=payload)
411
- response.raise_for_status() # Raise HTTP errors
412
- async for line in response.aiter_lines():
413
- if line.startswith("data: "):
414
- data = line[len("data: "):]
415
- if data.strip() == "[DONE]":
416
- yield "data: [DONE]\n\n"
417
- break
418
- try:
419
- chunk = json.loads(data)
420
- content = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
421
- if content:
422
- yield f"data: {json.dumps({'chunk': content, 'sources': source_links, 'search_used': bool(tool_calls), 'temperature': temperature, 'timestamp': datetime.now().isoformat()})}\n\n"
423
- except json.JSONDecodeError:
424
- logger.warning(f"Failed to decode JSON chunk: {data}")
425
- continue
426
- except httpx.HTTPError as e:
427
- logger.error(f"HTTP request failed: {str(e)}")
428
- yield f"data: {json.dumps({'error': 'Failed to generate response', 'details': str(e)})}\n\n"
429
- yield "data: [DONE]\n\n"
430
- return
431
- except Exception as e:
432
- logger.error(f"Unexpected error in streaming response: {str(e)}")
433
- yield f"data: {json.dumps({'error': 'An unexpected error occurred', 'details': str(e)})}\n\n"
434
- yield "data: [DONE]\n\n"
435
- return
436
-
437
- # 发送结束信号
438
- yield "data: [DONE]\n\n"
439
-
440
- return StreamingResponse(stream_response(), media_type="text/event-stream")
441
 
442
- except HTTPException:
443
- raise
444
- except json.JSONDecodeError:
445
- logger.error("Invalid JSON in request body")
446
- raise HTTPException(status_code=400, detail="Invalid JSON in request body")
447
  except Exception as e:
448
- logger.error(f"Unexpected error in /chat endpoint: {e}")
449
- raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
450
 
451
- # --- Health Check Endpoint ---
452
  @app.get("/")
453
  async def root():
454
  return {
455
- "message": "Enhanced AI Chatbot API is running",
456
- "version": "2.0.0",
457
- "features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting"],
458
  "timestamp": datetime.now().isoformat()
459
  }
460
 
461
- # --- Health Check Endpoint ---
462
  @app.get("/health")
463
  async def health_check():
464
- health_status = {
465
  "status": "healthy",
466
  "timestamp": datetime.now().isoformat(),
467
  "services": {
468
  "llm_client": client is not None,
469
  "google_search": bool(GOOGLE_API_KEY and GOOGLE_CX)
470
  }
471
- }
472
- return health_status
 
1
  import os
2
  import json
3
+ import asyncio
4
  import requests
 
5
  from datetime import datetime
6
  from typing import List, Dict, Optional
7
+ from fastapi import FastAPI, Request, HTTPException, Depends
 
8
  from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import StreamingResponse
10
  from openai import OpenAI
11
  import logging
12
 
 
47
  GOOGLE_CX = os.getenv("GOOGLE_CX")
48
  LLM_API_KEY = os.getenv("LLM_API_KEY")
49
  LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
 
50
 
51
+ # --- Simplified System Prompts ---
52
  SYSTEM_PROMPT_WITH_SEARCH = """You are an intelligent AI assistant with access to real-time web search capabilities.
53
 
54
+ When search tools are available, use them for queries that need current, recent, or specific factual information.
 
 
 
 
 
 
 
 
55
 
56
  **Response Guidelines:**
57
+ 1. Use search tools when available and relevant
58
+ 2. Synthesize information from multiple sources
59
  3. Clearly indicate when information comes from search results
60
  4. Provide comprehensive, well-structured answers
61
  5. Cite sources appropriately
 
 
 
62
 
63
+ Current date: {current_date}"""
64
 
65
+ SYSTEM_PROMPT_NO_SEARCH = """You are an intelligent AI assistant. Provide helpful, accurate, and comprehensive responses based on your training data.
66
 
67
+ Current date: {current_date}"""
68
 
69
+ # --- Optimized Web Search Tool ---
70
+ async def google_search_tool_async(query: str, num_results: int = 3) -> List[Dict]:
 
 
71
  """
72
+ Async Google Custom Search - reduced results for faster response
73
  """
74
+ if not GOOGLE_API_KEY or not GOOGLE_CX or not query.strip():
 
 
 
 
 
75
  return []
76
 
77
+ logger.info(f"Executing search for: '{query}'")
 
78
 
79
  search_url = "https://www.googleapis.com/customsearch/v1"
80
  params = {
81
  "key": GOOGLE_API_KEY,
82
  "cx": GOOGLE_CX,
83
+ "q": query.strip(),
84
+ "num": min(num_results, 5), # Reduced for speed
85
+ "dateRestrict": "m3" # Last 3 months for freshness
86
  }
87
 
88
  try:
89
+ # Run in thread pool to avoid blocking
90
+ loop = asyncio.get_event_loop()
91
+ response = await loop.run_in_executor(
92
+ None,
93
+ lambda: requests.get(search_url, params=params, timeout=10)
94
+ )
95
  response.raise_for_status()
96
  search_results = response.json()
97
 
98
  if "items" not in search_results:
 
99
  return []
100
 
 
101
  parsed_results = []
102
+ for item in search_results.get("items", [])[:num_results]: # Limit results
103
  title = item.get("title", "").strip()
104
  url = item.get("link", "").strip()
105
  snippet = item.get("snippet", "").strip()
106
 
107
+ if title and url and snippet:
108
+ parsed_results.append({
109
+ "source_title": title,
110
+ "url": url,
111
+ "snippet": snippet,
112
+ "domain": url.split('/')[2] if '/' in url else url
113
+ })
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ logger.info(f"Retrieved {len(parsed_results)} search results")
116
  return parsed_results
117
 
 
 
 
 
 
 
118
  except Exception as e:
119
+ logger.error(f"Search error: {e}")
120
  return []
121
 
122
+ def format_search_results_compact(search_results: List[Dict]) -> str:
123
+ """Compact formatting for faster processing"""
 
 
124
  if not search_results:
125
+ return "No search results found."
 
 
 
126
 
127
+ formatted = ["Search Results:"]
128
  for i, result in enumerate(search_results, 1):
129
+ formatted.append(f"\n{i}. {result['source_title']}")
130
+ formatted.append(f" Source: {result['domain']}")
131
+ formatted.append(f" Content: {result['snippet']}")
 
 
 
 
 
 
 
 
 
 
132
 
133
+ return "\n".join(formatted)
134
 
135
  # --- FastAPI Application Setup ---
136
+ app = FastAPI(title="Streaming AI Chatbot", version="2.1.0")
137
 
138
  app.add_middleware(
139
  CORSMiddleware,
140
  allow_origins=[
141
  "https://chrunos.com",
142
  "https://www.chrunos.com",
143
+ "http://localhost:3000",
144
+ "http://localhost:8000",
145
  ],
146
  allow_credentials=True,
147
  allow_methods=["GET", "POST", "OPTIONS"],
 
153
  logger.error("LLM_API_KEY or LLM_BASE_URL not configured")
154
  client = None
155
  else:
156
+ client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
157
  logger.info("OpenAI client initialized successfully")
158
 
159
+ # --- Tool Definition ---
160
  available_tools = [
161
  {
162
  "type": "function",
163
  "function": {
164
  "name": "google_search",
165
+ "description": "Search Google for current information, recent events, or specific facts.",
166
  "parameters": {
167
  "type": "object",
168
  "properties": {
169
  "query": {
170
  "type": "string",
171
+ "description": "Search query with relevant keywords"
172
  }
173
  },
174
  "required": ["query"]
 
177
  }
178
  ]
179
 
180
+ # --- Streaming Response Generator ---
181
+ async def generate_streaming_response(messages: List[Dict], use_search: bool, temperature: float):
182
+ """Generate streaming response with optional search"""
 
 
 
 
 
 
183
 
184
+ try:
185
+ # Initial LLM call with streaming
186
+ llm_kwargs = {
187
+ "model": "unsloth/Qwen3-30B-A3B-GGUF",
188
+ "temperature": temperature,
189
+ "messages": messages,
190
+ "max_tokens": 2000,
191
+ "stream": True
192
+ }
193
+
194
+ if use_search:
195
+ llm_kwargs["tools"] = available_tools
196
+ llm_kwargs["tool_choice"] = "auto"
197
+
198
+ source_links = []
199
+ response_content = ""
200
+ tool_calls_data = []
201
+
202
+ # First streaming call
203
+ stream = client.chat.completions.create(**llm_kwargs)
204
+
205
+ for chunk in stream:
206
+ delta = chunk.choices[0].delta
207
+
208
+ # Handle content streaming
209
+ if delta.content:
210
+ content_chunk = delta.content
211
+ response_content += content_chunk
212
+ yield f"data: {json.dumps({'type': 'content', 'data': content_chunk})}\n\n"
213
+
214
+ # Handle tool calls
215
+ if delta.tool_calls:
216
+ for tool_call in delta.tool_calls:
217
+ if len(tool_calls_data) <= tool_call.index:
218
+ tool_calls_data.extend([{"id": "", "function": {"name": "", "arguments": ""}}
219
+ for _ in range(tool_call.index + 1 - len(tool_calls_data))])
220
+
221
+ if tool_call.id:
222
+ tool_calls_data[tool_call.index]["id"] = tool_call.id
223
+ if tool_call.function.name:
224
+ tool_calls_data[tool_call.index]["function"]["name"] = tool_call.function.name
225
+ if tool_call.function.arguments:
226
+ tool_calls_data[tool_call.index]["function"]["arguments"] += tool_call.function.arguments
227
+
228
+ # Process tool calls if any
229
+ if tool_calls_data and any(tc["function"]["name"] for tc in tool_calls_data):
230
+ yield f"data: {json.dumps({'type': 'status', 'data': 'Searching...'})}\n\n"
231
+
232
+ # Execute searches concurrently for speed
233
+ search_tasks = []
234
+ for tool_call in tool_calls_data:
235
+ if tool_call["function"]["name"] == "google_search":
236
+ try:
237
+ args = json.loads(tool_call["function"]["arguments"])
238
+ query = args.get("query", "").strip()
239
+ if query:
240
+ search_tasks.append(google_search_tool_async(query))
241
+ except json.JSONDecodeError:
242
+ continue
243
+
244
+ # Run searches concurrently
245
+ if search_tasks:
246
+ search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
247
+
248
+ # Combine all search results
249
+ all_results = []
250
+ for results in search_results_list:
251
+ if isinstance(results, list):
252
+ all_results.extend(results)
253
+ for result in results:
254
+ source_links.append({
255
+ "title": result["source_title"],
256
+ "url": result["url"],
257
+ "domain": result["domain"]
258
+ })
259
+
260
+ # Format search results
261
+ if all_results:
262
+ search_context = format_search_results_compact(all_results)
263
+
264
+ # Create new message with search context
265
+ search_messages = messages + [{
266
+ "role": "system",
267
+ "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
268
+ }]
269
+
270
+ yield f"data: {json.dumps({'type': 'status', 'data': 'Generating response...'})}\n\n"
271
+
272
+ # Generate final response with search context
273
+ final_stream = client.chat.completions.create(
274
+ model="unsloth/Qwen3-30B-A3B-GGUF",
275
+ temperature=temperature,
276
+ messages=search_messages,
277
+ max_tokens=2000,
278
+ stream=True
279
+ )
280
+
281
+ for chunk in final_stream:
282
+ if chunk.choices[0].delta.content:
283
+ content = chunk.choices[0].delta.content
284
+ yield f"data: {json.dumps({'type': 'content', 'data': content})}\n\n"
285
+
286
+ # Send sources and completion
287
+ if source_links:
288
+ yield f"data: {json.dumps({'type': 'sources', 'data': source_links})}\n\n"
289
+
290
+ yield f"data: {json.dumps({'type': 'done', 'data': {'search_used': bool(source_links)}})}\n\n"
291
 
292
+ except Exception as e:
293
+ logger.error(f"Streaming error: {e}")
294
+ yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"
295
 
296
+ # --- Streaming Chat Endpoint ---
297
+ @app.post("/chat/stream")
298
+ async def chat_stream_endpoint(request: Request, _: None = Depends(verify_origin)):
299
  if not client:
300
  raise HTTPException(status_code=500, detail="LLM client not configured")
301
 
302
  try:
303
  data = await request.json()
304
  user_message = data.get("message", "").strip()
305
+ use_search = data.get("use_search", False) # Default: False
306
+ temperature = max(0, min(2, data.get("temperature", 0.7))) # Clamp to valid range
307
+ conversation_history = data.get("history", [])
308
 
309
+ if not user_message:
310
+ raise HTTPException(status_code=400, detail="No message provided")
311
+
312
+ # Prepare messages
313
+ current_date = datetime.now().strftime("%Y-%m-%d")
314
+ system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
315
+ messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
316
 
317
+ logger.info(f"Stream request - search: {use_search}, temp: {temperature}")
 
 
 
 
318
 
319
+ return StreamingResponse(
320
+ generate_streaming_response(messages, use_search, temperature),
321
+ media_type="text/plain",
322
+ headers={
323
+ "Cache-Control": "no-cache",
324
+ "Connection": "keep-alive",
325
+ "X-Accel-Buffering": "no" # Disable nginx buffering
326
+ }
327
+ )
328
+
329
+ except json.JSONDecodeError:
330
+ raise HTTPException(status_code=400, detail="Invalid JSON")
331
+ except Exception as e:
332
+ logger.error(f"Stream endpoint error: {e}")
333
+ raise HTTPException(status_code=500, detail=str(e))
334
+
335
+ # --- Regular Chat Endpoint (for backward compatibility) ---
336
+ @app.post("/chat")
337
+ async def chat_endpoint(request: Request, _: None = Depends(verify_origin)):
338
+ if not client:
339
+ raise HTTPException(status_code=500, detail="LLM client not configured")
340
 
341
+ try:
342
+ data = await request.json()
343
+ user_message = data.get("message", "").strip()
344
+ use_search = data.get("use_search", False) # Default: False
345
+ temperature = max(0, min(2, data.get("temperature", 0.7)))
346
+ conversation_history = data.get("history", [])
347
 
348
  if not user_message:
349
  raise HTTPException(status_code=400, detail="No message provided")
350
 
351
+ # Prepare messages
 
 
 
 
 
 
 
352
  current_date = datetime.now().strftime("%Y-%m-%d")
353
+ system_content = (SYSTEM_PROMPT_WITH_SEARCH if use_search else SYSTEM_PROMPT_NO_SEARCH).format(current_date=current_date)
354
+ messages = [{"role": "system", "content": system_content}] + conversation_history + [{"role": "user", "content": user_message}]
355
 
356
+ source_links = []
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
  if use_search:
359
+ # Search-enabled flow (non-streaming for compatibility)
360
+ llm_response = client.chat.completions.create(
361
+ model="unsloth/Qwen3-30B-A3B-GGUF",
362
+ temperature=temperature,
363
+ messages=messages,
364
+ tools=available_tools,
365
+ tool_choice="auto",
366
+ max_tokens=2000
367
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
+ tool_calls = llm_response.choices[0].message.tool_calls
370
+
371
+ if tool_calls:
372
+ # Execute searches
373
+ search_tasks = []
374
+ for tool_call in tool_calls:
375
+ if tool_call.function.name == "google_search":
376
+ try:
377
+ args = json.loads(tool_call.function.arguments)
378
+ query = args.get("query", "").strip()
379
+ if query:
380
+ search_tasks.append(google_search_tool_async(query))
381
+ except json.JSONDecodeError:
382
+ continue
383
+
384
+ if search_tasks:
385
+ search_results_list = await asyncio.gather(*search_tasks, return_exceptions=True)
386
+ all_results = []
387
+ for results in search_results_list:
388
+ if isinstance(results, list):
389
+ all_results.extend(results)
390
+ for result in results:
391
  source_links.append({
392
  "title": result["source_title"],
393
  "url": result["url"],
394
  "domain": result["domain"]
395
  })
396
+
397
+ if all_results:
398
+ search_context = format_search_results_compact(all_results)
399
+ search_messages = messages + [{
400
+ "role": "system",
401
+ "content": f"{search_context}\n\nPlease provide a comprehensive response based on the search results above."
402
+ }]
403
+
404
+ final_response = client.chat.completions.create(
405
+ model="unsloth/Qwen3-30B-A3B-GGUF",
406
+ temperature=temperature,
407
+ messages=search_messages,
408
+ max_tokens=2000
409
+ )
410
+ final_content = final_response.choices[0].message.content
411
+ else:
412
+ final_content = llm_response.choices[0].message.content
413
+ else:
414
+ final_content = llm_response.choices[0].message.content
415
+ else:
416
+ final_content = llm_response.choices[0].message.content
417
+ else:
418
+ # No search - direct response
419
+ llm_response = client.chat.completions.create(
420
+ model="unsloth/Qwen3-30B-A3B-GGUF",
421
+ temperature=temperature,
422
+ messages=messages,
423
+ max_tokens=2000
424
+ )
425
+ final_content = llm_response.choices[0].message.content
426
+
427
+ return {
428
+ "response": final_content,
429
+ "sources": source_links,
430
+ "search_used": bool(source_links),
431
+ "temperature": temperature,
432
+ "timestamp": datetime.now().isoformat()
433
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
 
 
 
 
 
435
  except Exception as e:
436
+ logger.error(f"Chat endpoint error: {e}")
437
+ raise HTTPException(status_code=500, detail=str(e))
438
 
439
+ # --- Health Check Endpoints ---
440
  @app.get("/")
441
  async def root():
442
  return {
443
+ "message": "Streaming AI Chatbot API",
444
+ "version": "2.1.0",
445
+ "endpoints": ["/chat", "/chat/stream"],
446
  "timestamp": datetime.now().isoformat()
447
  }
448
 
 
449
  @app.get("/health")
450
  async def health_check():
451
+ return {
452
  "status": "healthy",
453
  "timestamp": datetime.now().isoformat(),
454
  "services": {
455
  "llm_client": client is not None,
456
  "google_search": bool(GOOGLE_API_KEY and GOOGLE_CX)
457
  }
458
+ }