ssahoo34803's picture
Update app.py
6308425 verified
"""
ShopSmart AI - Enhanced Conversational Shopping Assistant
Human-Computer Interaction Capstone Project
This application demonstrates advanced conversational AI for product discovery
with improved context management, specification accuracy, and intelligent comparisons.
Project: MSAI-631-M20 Human-Computer Interaction
Team: Saswat Sahoo & Shashank Lakkimsetty
Institution: University of the Cumberlands
Date: August 2025
Enhanced Features:
- Intelligent "better than" query handling
- Accurate product specifications
- Robust context management
- Improved conversation coherence
"""
import gradio as gr
from huggingface_hub import InferenceClient
import time
import re
# Initialize the AI model client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
# ================================================================================
# ENHANCED PRODUCT KNOWLEDGE BASE
# ================================================================================
PRODUCT_CATEGORIES = {
"smartphones": ["phone", "iphone", "galaxy", "pixel", "smartphone", "android"],
"laptops": ["laptop", "macbook", "thinkpad", "dell", "hp", "computer", "notebook"],
"audio": ["headphones", "earbuds", "speaker", "airpods", "sony", "bose", "audio", "noise-cancelling", "wireless"],
"gaming": ["gaming", "xbox", "playstation", "nintendo", "steam", "pc gaming"],
"wearables": ["watch", "fitness", "tracker", "apple watch", "garmin", "fitbit"],
"tablets": ["tablet", "ipad", "surface", "kindle", "android tablet"],
"tv_display": ["tv", "monitor", "display", "oled", "qled", "4k", "screen"]
}
QUERY_TYPES = {
"comparison": ["vs", "versus", "compare", "difference", "better than", "superior to"],
"better_than": ["better than", "superior to", "upgrade from", "improve on", "outperform"],
"budget": ["budget", "cheap", "affordable", "under", "less than", "maximum"],
"recommendation": ["best", "top", "recommend", "suggest", "good", "should"],
"specific": ["specs", "specifications", "features", "details", "about"]
}
# Enhanced product knowledge for accurate comparisons
PRODUCT_KNOWLEDGE = {
"sony wh-1000xm4": {
"category": "audio",
"type": "over-ear headphones",
"price": 298,
"key_features": ["noise cancellation", "30hr battery", "touch controls"],
"better_alternatives": [
"bose quietcomfort ultra",
"apple airpods max",
"sennheiser momentum 4"
]
},
"iphone 12": {
"category": "smartphones",
"price": 599,
"screen": "6.1 inch",
"better_alternatives": ["iphone 15", "iphone 14", "samsung galaxy s24"]
}
}
# ================================================================================
# ENHANCED NLP FUNCTIONS
# ================================================================================
def detect_product_category(message: str) -> str:
"""Enhanced product category detection with better accuracy."""
message_lower = message.lower()
# Check for specific product mentions first
for product in PRODUCT_KNOWLEDGE:
if product in message_lower:
return PRODUCT_KNOWLEDGE[product]["category"]
# Fall back to keyword matching
for category, keywords in PRODUCT_CATEGORIES.items():
if any(keyword in message_lower for keyword in keywords):
return category
return "general"
def detect_query_type(message: str) -> str:
"""Enhanced query type detection including 'better than' queries."""
message_lower = message.lower()
# Check for "better than" queries first (most specific)
if any(phrase in message_lower for phrase in ["better than", "superior to", "upgrade from", "outperform"]):
return "better_than"
# Check other query types
for query_type, keywords in QUERY_TYPES.items():
if any(keyword in message_lower for keyword in keywords):
return query_type
return "general"
def extract_reference_product(message: str) -> str:
"""Extract the reference product from 'better than X' queries."""
message_lower = message.lower()
# Pattern to find product after "better than", "superior to", etc.
patterns = [
r'better than (.+?)(?:\?|$|\.)',
r'superior to (.+?)(?:\?|$|\.)',
r'upgrade from (.+?)(?:\?|$|\.)',
r'outperform (.+?)(?:\?|$|\.)',
r'improve on (.+?)(?:\?|$|\.)'
]
for pattern in patterns:
match = re.search(pattern, message_lower)
if match:
product = match.group(1).strip()
# Clean up common suffixes
product = re.sub(r'\s+(headphones|laptop|phone|watch)$', '', product)
return product
return None
def extract_budget(message: str) -> str:
"""Enhanced budget extraction with more patterns."""
budget_patterns = [
r'under \$?(\d+(?:,\d{3})*)',
r'less than \$?(\d+(?:,\d{3})*)',
r'maximum \$?(\d+(?:,\d{3})*)',
r'budget of \$?(\d+(?:,\d{3})*)',
r'\$?(\d+(?:,\d{3})*) or less',
r'up to \$?(\d+(?:,\d{3})*)'
]
for pattern in budget_patterns:
match = re.search(pattern, message.lower())
if match:
amount = match.group(1).replace(',', '')
return f"${amount}"
return None
def clean_message(text: str) -> str:
"""Aggressive text cleaning to prevent training data artifacts."""
if not text:
return ""
# Remove ALL problematic training tokens and artifacts
bad_tokens = [
"[USER]", "[ASSISTANT]", "[/USER]", "[/ASSISTANT]",
"[ASS]", "[/ASS]", "[INST]", "[/INST]",
"<|user|>", "<|assistant|>", "<|system|>", "<|im_start|>", "<|im_end|>",
"Human:", "AI:", "Assistant:", "User query:", "User:", "Bot:",
"Product A:", "Product B:", "[USER", "[ASSISTANT", "USER:", "ASSISTANT:"
]
# Remove training artifacts case-insensitively
for token in bad_tokens:
text = text.replace(token, "")
text = text.replace(token.lower(), "")
text = text.replace(token.upper(), "")
# Remove pattern of "User query: [question]" that appears in training data
text = re.sub(r'User query:.*?\?', '', text, flags=re.IGNORECASE)
text = re.sub(r'User:.*?\?', '', text, flags=re.IGNORECASE)
# Fix spacing issues - ensure spaces around punctuation and formatting
text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text) # Add space after sentence endings
text = re.sub(r'(\d+)([A-Z])', r'\1 \2', text) # Add space after numbers before letters
text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) # Add space between lowercase and uppercase
text = re.sub(r'(\))([A-Z])', r'\1 \2', text) # Add space after closing parentheses
text = re.sub(r'(:)([A-Z])', r'\1 \2', text) # Add space after colons
# Normalize excessive whitespace but preserve intentional formatting
text = re.sub(r' +', ' ', text) # Multiple spaces to single space
text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) # Multiple newlines to double newline
return text.strip()
def create_enhanced_prompt(message: str, category: str, query_type: str, budget: str = None, reference_product: str = None) -> str:
"""Create intelligent prompts that prevent specification errors and topic jumping."""
# Base instructions to prevent common errors
base_instructions = """
CRITICAL INSTRUCTIONS:
- Stay focused on the user's specific question - do not change topics
- Use accurate product specifications - no made-up numbers
- For headphones, focus on: sound quality, noise cancellation, battery life, comfort, price
- For phones, focus on: camera, battery, display, performance, price
- For laptops, focus on: processor, RAM, storage, battery, graphics, price
- Never use "Display:" for audio features - use "Audio Features:" instead
- Include specific model names and current pricing when possible
- If unsure about specs, say "verify current specifications" rather than guess
"""
if query_type == "better_than" and reference_product:
specific_instruction = f"""
The user wants products BETTER than "{reference_product}".
Requirements:
1. Suggest 2-3 products that are genuinely superior or newer
2. Explain WHY they are better (specific improvements)
3. Include price comparison if relevant
4. Focus on {category} products only
Format response as:
**Product Name 1**
β€’ Key improvement over {reference_product}
β€’ Specifications
β€’ Price
**Product Name 2**
β€’ Key improvement over {reference_product}
β€’ Specifications
β€’ Price
"""
else:
specific_instruction = f"""
Provide helpful {category} product information for this {query_type} query.
Use bullet points for clear organization.
Include specific model names and accurate specifications.
"""
budget_context = f"\nBudget constraint: Keep recommendations within {budget}" if budget else ""
return f"{base_instructions}\n{specific_instruction}{budget_context}\n\nUser query: {message}"
def maintain_conversation_context(history, message):
"""Enhanced context management to prevent topic jumping."""
if not history:
return []
# Get current query category to maintain topic focus
current_category = detect_product_category(message)
# Filter history to relevant exchanges (same category or general)
relevant_history = []
for item in history[-3:]: # Last 3 exchanges
if isinstance(item, (list, tuple)) and len(item) >= 2:
human_msg, ai_msg = item[0], item[1]
# Check if previous exchange was in same category
if human_msg:
prev_category = detect_product_category(human_msg)
if prev_category == current_category or prev_category == "general":
relevant_history.append({
"role": "user",
"content": clean_message(human_msg)
})
if ai_msg:
relevant_history.append({
"role": "assistant",
"content": clean_message(ai_msg)
})
return relevant_history
# ================================================================================
# MAIN CONVERSATION HANDLER
# ================================================================================
def respond(message, history, system_message):
"""Enhanced response function with better error handling and accuracy."""
# Enhanced greeting
if not history and message.lower().strip() in ["hi", "hello", "hey", "demo", "start"]:
yield """# πŸ›οΈ Welcome to ShopSmart AI!
**Enhanced HCI Project Demonstration | University of the Cumberlands**
I'm your intelligent shopping assistant with advanced natural language understanding and accurate product knowledge.
## 🎯 **Enhanced Capabilities:**
### **πŸ” Smart Product Research**
β€’ Accurate product specifications and current pricing
β€’ Intelligent "better than" recommendations
β€’ Category-focused expertise across major product types
### **βš–οΈ Advanced Comparisons**
β€’ Side-by-side analysis with real specifications
β€’ "Upgrade path" recommendations for existing products
β€’ Value analysis based on your specific needs
### **🧠 Conversation Intelligence**
β€’ Maintains topic focus throughout conversation
β€’ Understands follow-up questions in context
β€’ Prevents topic jumping and specification errors
## πŸ“± **Try These Enhanced Queries:**
β€’ *"Show me headphones better than Sony WH-1000XM4"*
β€’ *"Find gaming laptops under $1500 with RTX 4060"*
β€’ *"Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"*
β€’ *"Upgrade options from Apple Watch Series 8"*
**What product research can I help you with today?**"""
return
# Enhanced query analysis
category = detect_product_category(message)
query_type = detect_query_type(message)
budget = extract_budget(message)
reference_product = extract_reference_product(message) if query_type == "better_than" else None
# Create focused prompt
enhanced_query = create_enhanced_prompt(message, category, query_type, budget, reference_product)
conversation_context = maintain_conversation_context(history, message)
# Much stronger system prompt to prevent training data regurgitation
system_prompt = f"""You are ShopSmart AI, a helpful shopping assistant.
CRITICAL RULES:
- Answer ONLY the user's current question
- Do NOT include training examples, sample conversations, or fake dialogues
- Do NOT use tokens like [ASS], [USER], [ASSISTANT], or "User query:"
- Do NOT generate multiple questions and answers
- Give ONE direct response to the current question only
- Use natural, conversational language
- Focus on {category} products for this {query_type} query
Current user question: {message}
Provide a single, direct answer about {category} products."""
messages = [
{"role": "system", "content": system_prompt}
] + conversation_context + [
{"role": "user", "content": enhanced_query}
]
response = ""
try:
for chunk in client.chat_completion(
messages,
stream=True,
max_tokens=300, # Reduced to prevent long training examples
temperature=0.4, # Balanced for natural responses
top_p=0.8, # More focused responses
stop=["[", "User:", "Human:", "Assistant:", "[ASS]", "[USER]", "[INST]"] # Stop at training tokens
):
if chunk.choices[0].delta.content:
token = chunk.choices[0].delta.content
# Add token directly without aggressive cleaning during streaming
response += token
# Clean only the final response for display
cleaned_response = clean_message(response)
# Prevent overly long responses
if len(cleaned_response) > 3500:
cleaned_response += "\n\n✨ *Need more specific details? Just ask!*"
yield cleaned_response
return
yield cleaned_response
except Exception as e:
# Simple error message without fallback content
yield f"❌ AI model temporarily unavailable. Error: {str(e)}\n\nPlease try again in a moment when the model servers recover."
# ================================================================================
# ENHANCED USER INTERFACE
# ================================================================================
demo = gr.ChatInterface(
respond,
type="messages",
title="πŸ›οΈ ShopSmart AI - Enhanced Conversational Shopping Assistant",
description="""
**Enhanced Human-Computer Interaction Project | MSAI-631-M20**
**University of the Cumberlands | Team: Saswat Sahoo & Shashank Lakkimsetty**
---
### πŸŽ“ **Enhanced Research Features**
This enhanced version addresses real-world conversational AI challenges:
**πŸ”§ Problem-Solving Improvements:**
β€’ **Accurate Specifications** - No more made-up product specs or wrong categories
β€’ **Topic Coherence** - Prevents conversation jumping between unrelated products
β€’ **"Better Than" Intelligence** - Understands upgrade recommendations correctly
β€’ **Error Prevention** - Eliminates training token artifacts and formatting issues
**🧠 Advanced AI Processing:**
β€’ **Context-Aware Filtering** - Maintains relevant conversation history
β€’ **Reference Product Extraction** - Identifies products for comparison/upgrade
β€’ **Enhanced Prompt Engineering** - Prevents common AI model errors
β€’ **Robust Error Handling** - Professional fallbacks with accurate demo content
### πŸ”¬ **Technical Implementation**
**Enhanced NLP Pipeline** | **Improved Context Management** | **Error-Resistant Design**
""",
examples=[
["Show me headphones better than Sony WH-1000XM4"],
["Find gaming laptops under $1500 with RTX 4060 graphics"],
["Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"],
["What's a good upgrade from Apple Watch Series 8?"],
["Recommend wireless earbuds better than AirPods Pro 2"],
["Best gaming monitors under $800 with 144Hz refresh rate"]
],
additional_inputs=[
gr.Textbox(
value="You are ShopSmart AI with enhanced accuracy and context management. Provide precise product recommendations with accurate specifications. Stay focused on the user's specific query and category.",
label="πŸ”§ Enhanced System Configuration",
lines=3,
interactive=True
)
],
cache_examples=False,
theme=gr.themes.Soft(),
css="""
.gradio-container {
font-family: 'Segoe UI', system-ui, sans-serif;
max-width: 1200px;
margin: auto;
}
.gr-button-primary {
background: linear-gradient(45deg, #2563eb, #1d4ed8) !important;
border: none !important;
}
"""
)
if __name__ == "__main__":
print("πŸš€ Starting ShopSmart AI - Enhanced Version")
print("πŸ”§ Enhanced Features: Accurate specs, better context, no artifacts")
print("πŸ“š Project: Enhanced Conversational AI for Product Discovery")
print("πŸŽ“ University of the Cumberlands | MSAI-631-M20")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True
)