|
|
""" |
|
|
ShopSmart AI - Enhanced Conversational Shopping Assistant |
|
|
Human-Computer Interaction Capstone Project |
|
|
|
|
|
This application demonstrates advanced conversational AI for product discovery |
|
|
with improved context management, specification accuracy, and intelligent comparisons. |
|
|
|
|
|
Project: MSAI-631-M20 Human-Computer Interaction |
|
|
Team: Saswat Sahoo & Shashank Lakkimsetty |
|
|
Institution: University of the Cumberlands |
|
|
Date: August 2025 |
|
|
|
|
|
Enhanced Features: |
|
|
- Intelligent "better than" query handling |
|
|
- Accurate product specifications |
|
|
- Robust context management |
|
|
- Improved conversation coherence |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
from huggingface_hub import InferenceClient |
|
|
import time |
|
|
import re |
|
|
|
|
|
|
|
|
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PRODUCT_CATEGORIES = { |
|
|
"smartphones": ["phone", "iphone", "galaxy", "pixel", "smartphone", "android"], |
|
|
"laptops": ["laptop", "macbook", "thinkpad", "dell", "hp", "computer", "notebook"], |
|
|
"audio": ["headphones", "earbuds", "speaker", "airpods", "sony", "bose", "audio", "noise-cancelling", "wireless"], |
|
|
"gaming": ["gaming", "xbox", "playstation", "nintendo", "steam", "pc gaming"], |
|
|
"wearables": ["watch", "fitness", "tracker", "apple watch", "garmin", "fitbit"], |
|
|
"tablets": ["tablet", "ipad", "surface", "kindle", "android tablet"], |
|
|
"tv_display": ["tv", "monitor", "display", "oled", "qled", "4k", "screen"] |
|
|
} |
|
|
|
|
|
QUERY_TYPES = { |
|
|
"comparison": ["vs", "versus", "compare", "difference", "better than", "superior to"], |
|
|
"better_than": ["better than", "superior to", "upgrade from", "improve on", "outperform"], |
|
|
"budget": ["budget", "cheap", "affordable", "under", "less than", "maximum"], |
|
|
"recommendation": ["best", "top", "recommend", "suggest", "good", "should"], |
|
|
"specific": ["specs", "specifications", "features", "details", "about"] |
|
|
} |
|
|
|
|
|
|
|
|
PRODUCT_KNOWLEDGE = { |
|
|
"sony wh-1000xm4": { |
|
|
"category": "audio", |
|
|
"type": "over-ear headphones", |
|
|
"price": 298, |
|
|
"key_features": ["noise cancellation", "30hr battery", "touch controls"], |
|
|
"better_alternatives": [ |
|
|
"bose quietcomfort ultra", |
|
|
"apple airpods max", |
|
|
"sennheiser momentum 4" |
|
|
] |
|
|
}, |
|
|
"iphone 12": { |
|
|
"category": "smartphones", |
|
|
"price": 599, |
|
|
"screen": "6.1 inch", |
|
|
"better_alternatives": ["iphone 15", "iphone 14", "samsung galaxy s24"] |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def detect_product_category(message: str) -> str: |
|
|
"""Enhanced product category detection with better accuracy.""" |
|
|
message_lower = message.lower() |
|
|
|
|
|
|
|
|
for product in PRODUCT_KNOWLEDGE: |
|
|
if product in message_lower: |
|
|
return PRODUCT_KNOWLEDGE[product]["category"] |
|
|
|
|
|
|
|
|
for category, keywords in PRODUCT_CATEGORIES.items(): |
|
|
if any(keyword in message_lower for keyword in keywords): |
|
|
return category |
|
|
|
|
|
return "general" |
|
|
|
|
|
def detect_query_type(message: str) -> str: |
|
|
"""Enhanced query type detection including 'better than' queries.""" |
|
|
message_lower = message.lower() |
|
|
|
|
|
|
|
|
if any(phrase in message_lower for phrase in ["better than", "superior to", "upgrade from", "outperform"]): |
|
|
return "better_than" |
|
|
|
|
|
|
|
|
for query_type, keywords in QUERY_TYPES.items(): |
|
|
if any(keyword in message_lower for keyword in keywords): |
|
|
return query_type |
|
|
|
|
|
return "general" |
|
|
|
|
|
def extract_reference_product(message: str) -> str: |
|
|
"""Extract the reference product from 'better than X' queries.""" |
|
|
message_lower = message.lower() |
|
|
|
|
|
|
|
|
patterns = [ |
|
|
r'better than (.+?)(?:\?|$|\.)', |
|
|
r'superior to (.+?)(?:\?|$|\.)', |
|
|
r'upgrade from (.+?)(?:\?|$|\.)', |
|
|
r'outperform (.+?)(?:\?|$|\.)', |
|
|
r'improve on (.+?)(?:\?|$|\.)' |
|
|
] |
|
|
|
|
|
for pattern in patterns: |
|
|
match = re.search(pattern, message_lower) |
|
|
if match: |
|
|
product = match.group(1).strip() |
|
|
|
|
|
product = re.sub(r'\s+(headphones|laptop|phone|watch)$', '', product) |
|
|
return product |
|
|
|
|
|
return None |
|
|
|
|
|
def extract_budget(message: str) -> str: |
|
|
"""Enhanced budget extraction with more patterns.""" |
|
|
budget_patterns = [ |
|
|
r'under \$?(\d+(?:,\d{3})*)', |
|
|
r'less than \$?(\d+(?:,\d{3})*)', |
|
|
r'maximum \$?(\d+(?:,\d{3})*)', |
|
|
r'budget of \$?(\d+(?:,\d{3})*)', |
|
|
r'\$?(\d+(?:,\d{3})*) or less', |
|
|
r'up to \$?(\d+(?:,\d{3})*)' |
|
|
] |
|
|
|
|
|
for pattern in budget_patterns: |
|
|
match = re.search(pattern, message.lower()) |
|
|
if match: |
|
|
amount = match.group(1).replace(',', '') |
|
|
return f"${amount}" |
|
|
|
|
|
return None |
|
|
|
|
|
def clean_message(text: str) -> str: |
|
|
"""Aggressive text cleaning to prevent training data artifacts.""" |
|
|
if not text: |
|
|
return "" |
|
|
|
|
|
|
|
|
bad_tokens = [ |
|
|
"[USER]", "[ASSISTANT]", "[/USER]", "[/ASSISTANT]", |
|
|
"[ASS]", "[/ASS]", "[INST]", "[/INST]", |
|
|
"<|user|>", "<|assistant|>", "<|system|>", "<|im_start|>", "<|im_end|>", |
|
|
"Human:", "AI:", "Assistant:", "User query:", "User:", "Bot:", |
|
|
"Product A:", "Product B:", "[USER", "[ASSISTANT", "USER:", "ASSISTANT:" |
|
|
] |
|
|
|
|
|
|
|
|
for token in bad_tokens: |
|
|
text = text.replace(token, "") |
|
|
text = text.replace(token.lower(), "") |
|
|
text = text.replace(token.upper(), "") |
|
|
|
|
|
|
|
|
text = re.sub(r'User query:.*?\?', '', text, flags=re.IGNORECASE) |
|
|
text = re.sub(r'User:.*?\?', '', text, flags=re.IGNORECASE) |
|
|
|
|
|
|
|
|
text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text) |
|
|
text = re.sub(r'(\d+)([A-Z])', r'\1 \2', text) |
|
|
text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) |
|
|
text = re.sub(r'(\))([A-Z])', r'\1 \2', text) |
|
|
text = re.sub(r'(:)([A-Z])', r'\1 \2', text) |
|
|
|
|
|
|
|
|
text = re.sub(r' +', ' ', text) |
|
|
text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text) |
|
|
|
|
|
return text.strip() |
|
|
|
|
|
def create_enhanced_prompt(message: str, category: str, query_type: str, budget: str = None, reference_product: str = None) -> str: |
|
|
"""Create intelligent prompts that prevent specification errors and topic jumping.""" |
|
|
|
|
|
|
|
|
base_instructions = """ |
|
|
CRITICAL INSTRUCTIONS: |
|
|
- Stay focused on the user's specific question - do not change topics |
|
|
- Use accurate product specifications - no made-up numbers |
|
|
- For headphones, focus on: sound quality, noise cancellation, battery life, comfort, price |
|
|
- For phones, focus on: camera, battery, display, performance, price |
|
|
- For laptops, focus on: processor, RAM, storage, battery, graphics, price |
|
|
- Never use "Display:" for audio features - use "Audio Features:" instead |
|
|
- Include specific model names and current pricing when possible |
|
|
- If unsure about specs, say "verify current specifications" rather than guess |
|
|
""" |
|
|
|
|
|
if query_type == "better_than" and reference_product: |
|
|
specific_instruction = f""" |
|
|
The user wants products BETTER than "{reference_product}". |
|
|
|
|
|
Requirements: |
|
|
1. Suggest 2-3 products that are genuinely superior or newer |
|
|
2. Explain WHY they are better (specific improvements) |
|
|
3. Include price comparison if relevant |
|
|
4. Focus on {category} products only |
|
|
|
|
|
Format response as: |
|
|
**Product Name 1** |
|
|
β’ Key improvement over {reference_product} |
|
|
β’ Specifications |
|
|
β’ Price |
|
|
|
|
|
**Product Name 2** |
|
|
β’ Key improvement over {reference_product} |
|
|
β’ Specifications |
|
|
β’ Price |
|
|
""" |
|
|
else: |
|
|
specific_instruction = f""" |
|
|
Provide helpful {category} product information for this {query_type} query. |
|
|
Use bullet points for clear organization. |
|
|
Include specific model names and accurate specifications. |
|
|
""" |
|
|
|
|
|
budget_context = f"\nBudget constraint: Keep recommendations within {budget}" if budget else "" |
|
|
|
|
|
return f"{base_instructions}\n{specific_instruction}{budget_context}\n\nUser query: {message}" |
|
|
|
|
|
def maintain_conversation_context(history, message): |
|
|
"""Enhanced context management to prevent topic jumping.""" |
|
|
if not history: |
|
|
return [] |
|
|
|
|
|
|
|
|
current_category = detect_product_category(message) |
|
|
|
|
|
|
|
|
relevant_history = [] |
|
|
for item in history[-3:]: |
|
|
if isinstance(item, (list, tuple)) and len(item) >= 2: |
|
|
human_msg, ai_msg = item[0], item[1] |
|
|
|
|
|
|
|
|
if human_msg: |
|
|
prev_category = detect_product_category(human_msg) |
|
|
if prev_category == current_category or prev_category == "general": |
|
|
relevant_history.append({ |
|
|
"role": "user", |
|
|
"content": clean_message(human_msg) |
|
|
}) |
|
|
if ai_msg: |
|
|
relevant_history.append({ |
|
|
"role": "assistant", |
|
|
"content": clean_message(ai_msg) |
|
|
}) |
|
|
|
|
|
return relevant_history |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def respond(message, history, system_message): |
|
|
"""Enhanced response function with better error handling and accuracy.""" |
|
|
|
|
|
|
|
|
if not history and message.lower().strip() in ["hi", "hello", "hey", "demo", "start"]: |
|
|
yield """# ποΈ Welcome to ShopSmart AI! |
|
|
|
|
|
**Enhanced HCI Project Demonstration | University of the Cumberlands** |
|
|
|
|
|
I'm your intelligent shopping assistant with advanced natural language understanding and accurate product knowledge. |
|
|
|
|
|
## π― **Enhanced Capabilities:** |
|
|
|
|
|
### **π Smart Product Research** |
|
|
β’ Accurate product specifications and current pricing |
|
|
β’ Intelligent "better than" recommendations |
|
|
β’ Category-focused expertise across major product types |
|
|
|
|
|
### **βοΈ Advanced Comparisons** |
|
|
β’ Side-by-side analysis with real specifications |
|
|
β’ "Upgrade path" recommendations for existing products |
|
|
β’ Value analysis based on your specific needs |
|
|
|
|
|
### **π§ Conversation Intelligence** |
|
|
β’ Maintains topic focus throughout conversation |
|
|
β’ Understands follow-up questions in context |
|
|
β’ Prevents topic jumping and specification errors |
|
|
|
|
|
## π± **Try These Enhanced Queries:** |
|
|
β’ *"Show me headphones better than Sony WH-1000XM4"* |
|
|
β’ *"Find gaming laptops under $1500 with RTX 4060"* |
|
|
β’ *"Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"* |
|
|
β’ *"Upgrade options from Apple Watch Series 8"* |
|
|
|
|
|
**What product research can I help you with today?**""" |
|
|
return |
|
|
|
|
|
|
|
|
category = detect_product_category(message) |
|
|
query_type = detect_query_type(message) |
|
|
budget = extract_budget(message) |
|
|
reference_product = extract_reference_product(message) if query_type == "better_than" else None |
|
|
|
|
|
|
|
|
enhanced_query = create_enhanced_prompt(message, category, query_type, budget, reference_product) |
|
|
conversation_context = maintain_conversation_context(history, message) |
|
|
|
|
|
|
|
|
system_prompt = f"""You are ShopSmart AI, a helpful shopping assistant. |
|
|
|
|
|
CRITICAL RULES: |
|
|
- Answer ONLY the user's current question |
|
|
- Do NOT include training examples, sample conversations, or fake dialogues |
|
|
- Do NOT use tokens like [ASS], [USER], [ASSISTANT], or "User query:" |
|
|
- Do NOT generate multiple questions and answers |
|
|
- Give ONE direct response to the current question only |
|
|
- Use natural, conversational language |
|
|
- Focus on {category} products for this {query_type} query |
|
|
|
|
|
Current user question: {message} |
|
|
Provide a single, direct answer about {category} products.""" |
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": system_prompt} |
|
|
] + conversation_context + [ |
|
|
{"role": "user", "content": enhanced_query} |
|
|
] |
|
|
|
|
|
response = "" |
|
|
try: |
|
|
for chunk in client.chat_completion( |
|
|
messages, |
|
|
stream=True, |
|
|
max_tokens=300, |
|
|
temperature=0.4, |
|
|
top_p=0.8, |
|
|
stop=["[", "User:", "Human:", "Assistant:", "[ASS]", "[USER]", "[INST]"] |
|
|
): |
|
|
if chunk.choices[0].delta.content: |
|
|
token = chunk.choices[0].delta.content |
|
|
|
|
|
|
|
|
response += token |
|
|
|
|
|
|
|
|
cleaned_response = clean_message(response) |
|
|
|
|
|
|
|
|
if len(cleaned_response) > 3500: |
|
|
cleaned_response += "\n\n⨠*Need more specific details? Just ask!*" |
|
|
yield cleaned_response |
|
|
return |
|
|
|
|
|
yield cleaned_response |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
yield f"β AI model temporarily unavailable. Error: {str(e)}\n\nPlease try again in a moment when the model servers recover." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.ChatInterface( |
|
|
respond, |
|
|
type="messages", |
|
|
title="ποΈ ShopSmart AI - Enhanced Conversational Shopping Assistant", |
|
|
description=""" |
|
|
**Enhanced Human-Computer Interaction Project | MSAI-631-M20** |
|
|
**University of the Cumberlands | Team: Saswat Sahoo & Shashank Lakkimsetty** |
|
|
|
|
|
--- |
|
|
|
|
|
### π **Enhanced Research Features** |
|
|
|
|
|
This enhanced version addresses real-world conversational AI challenges: |
|
|
|
|
|
**π§ Problem-Solving Improvements:** |
|
|
β’ **Accurate Specifications** - No more made-up product specs or wrong categories |
|
|
β’ **Topic Coherence** - Prevents conversation jumping between unrelated products |
|
|
β’ **"Better Than" Intelligence** - Understands upgrade recommendations correctly |
|
|
β’ **Error Prevention** - Eliminates training token artifacts and formatting issues |
|
|
|
|
|
**π§ Advanced AI Processing:** |
|
|
β’ **Context-Aware Filtering** - Maintains relevant conversation history |
|
|
β’ **Reference Product Extraction** - Identifies products for comparison/upgrade |
|
|
β’ **Enhanced Prompt Engineering** - Prevents common AI model errors |
|
|
β’ **Robust Error Handling** - Professional fallbacks with accurate demo content |
|
|
|
|
|
### π¬ **Technical Implementation** |
|
|
**Enhanced NLP Pipeline** | **Improved Context Management** | **Error-Resistant Design** |
|
|
""", |
|
|
|
|
|
examples=[ |
|
|
["Show me headphones better than Sony WH-1000XM4"], |
|
|
["Find gaming laptops under $1500 with RTX 4060 graphics"], |
|
|
["Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"], |
|
|
["What's a good upgrade from Apple Watch Series 8?"], |
|
|
["Recommend wireless earbuds better than AirPods Pro 2"], |
|
|
["Best gaming monitors under $800 with 144Hz refresh rate"] |
|
|
], |
|
|
|
|
|
additional_inputs=[ |
|
|
gr.Textbox( |
|
|
value="You are ShopSmart AI with enhanced accuracy and context management. Provide precise product recommendations with accurate specifications. Stay focused on the user's specific query and category.", |
|
|
label="π§ Enhanced System Configuration", |
|
|
lines=3, |
|
|
interactive=True |
|
|
) |
|
|
], |
|
|
|
|
|
cache_examples=False, |
|
|
theme=gr.themes.Soft(), |
|
|
|
|
|
css=""" |
|
|
.gradio-container { |
|
|
font-family: 'Segoe UI', system-ui, sans-serif; |
|
|
max-width: 1200px; |
|
|
margin: auto; |
|
|
} |
|
|
.gr-button-primary { |
|
|
background: linear-gradient(45deg, #2563eb, #1d4ed8) !important; |
|
|
border: none !important; |
|
|
} |
|
|
""" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("π Starting ShopSmart AI - Enhanced Version") |
|
|
print("π§ Enhanced Features: Accurate specs, better context, no artifacts") |
|
|
print("π Project: Enhanced Conversational AI for Product Discovery") |
|
|
print("π University of the Cumberlands | MSAI-631-M20") |
|
|
|
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
show_error=True |
|
|
) |