makeup_residency_project

Sleeping

App Files Files Community

makeup_residency_project / app.py

ssahoo34803

Update app.py

6308425 verified 4 months ago

raw

history blame contribute delete

17.9 kB

	"""
	ShopSmart AI - Enhanced Conversational Shopping Assistant
	Human-Computer Interaction Capstone Project

	This application demonstrates advanced conversational AI for product discovery
	with improved context management, specification accuracy, and intelligent comparisons.

	Project: MSAI-631-M20 Human-Computer Interaction
	Team: Saswat Sahoo & Shashank Lakkimsetty
	Institution: University of the Cumberlands
	Date: August 2025

	Enhanced Features:
	- Intelligent "better than" query handling
	- Accurate product specifications
	- Robust context management
	- Improved conversation coherence
	"""

	import gradio as gr
	from huggingface_hub import InferenceClient
	import time
	import re

	# Initialize the AI model client
	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

	# ================================================================================
	# ENHANCED PRODUCT KNOWLEDGE BASE
	# ================================================================================

	PRODUCT_CATEGORIES = {
	"smartphones": ["phone", "iphone", "galaxy", "pixel", "smartphone", "android"],
	"laptops": ["laptop", "macbook", "thinkpad", "dell", "hp", "computer", "notebook"],
	"audio": ["headphones", "earbuds", "speaker", "airpods", "sony", "bose", "audio", "noise-cancelling", "wireless"],
	"gaming": ["gaming", "xbox", "playstation", "nintendo", "steam", "pc gaming"],
	"wearables": ["watch", "fitness", "tracker", "apple watch", "garmin", "fitbit"],
	"tablets": ["tablet", "ipad", "surface", "kindle", "android tablet"],
	"tv_display": ["tv", "monitor", "display", "oled", "qled", "4k", "screen"]
	}

	QUERY_TYPES = {
	"comparison": ["vs", "versus", "compare", "difference", "better than", "superior to"],
	"better_than": ["better than", "superior to", "upgrade from", "improve on", "outperform"],
	"budget": ["budget", "cheap", "affordable", "under", "less than", "maximum"],
	"recommendation": ["best", "top", "recommend", "suggest", "good", "should"],
	"specific": ["specs", "specifications", "features", "details", "about"]
	}

	# Enhanced product knowledge for accurate comparisons
	PRODUCT_KNOWLEDGE = {
	"sony wh-1000xm4": {
	"category": "audio",
	"type": "over-ear headphones",
	"price": 298,
	"key_features": ["noise cancellation", "30hr battery", "touch controls"],
	"better_alternatives": [
	"bose quietcomfort ultra",
	"apple airpods max",
	"sennheiser momentum 4"
	]
	},
	"iphone 12": {
	"category": "smartphones",
	"price": 599,
	"screen": "6.1 inch",
	"better_alternatives": ["iphone 15", "iphone 14", "samsung galaxy s24"]
	}
	}

	# ================================================================================
	# ENHANCED NLP FUNCTIONS
	# ================================================================================

	def detect_product_category(message: str) -> str:
	"""Enhanced product category detection with better accuracy."""
	message_lower = message.lower()

	# Check for specific product mentions first
	for product in PRODUCT_KNOWLEDGE:
	if product in message_lower:
	return PRODUCT_KNOWLEDGE[product]["category"]

	# Fall back to keyword matching
	for category, keywords in PRODUCT_CATEGORIES.items():
	if any(keyword in message_lower for keyword in keywords):
	return category

	return "general"

	def detect_query_type(message: str) -> str:
	"""Enhanced query type detection including 'better than' queries."""
	message_lower = message.lower()

	# Check for "better than" queries first (most specific)
	if any(phrase in message_lower for phrase in ["better than", "superior to", "upgrade from", "outperform"]):
	return "better_than"

	# Check other query types
	for query_type, keywords in QUERY_TYPES.items():
	if any(keyword in message_lower for keyword in keywords):
	return query_type

	return "general"

	def extract_reference_product(message: str) -> str:
	"""Extract the reference product from 'better than X' queries."""
	message_lower = message.lower()

	# Pattern to find product after "better than", "superior to", etc.
	patterns = [
	r'better than (.+?)(?:\?\|$\|\.)',
	r'superior to (.+?)(?:\?\|$\|\.)',
	r'upgrade from (.+?)(?:\?\|$\|\.)',
	r'outperform (.+?)(?:\?\|$\|\.)',
	r'improve on (.+?)(?:\?\|$\|\.)'
	]

	for pattern in patterns:
	match = re.search(pattern, message_lower)
	if match:
	product = match.group(1).strip()
	# Clean up common suffixes
	product = re.sub(r'\s+(headphones\|laptop\|phone\|watch)$', '', product)
	return product

	return None

	def extract_budget(message: str) -> str:
	"""Enhanced budget extraction with more patterns."""
	budget_patterns = [
	r'under \$?(\d+(?:,\d{3})*)',
	r'less than \$?(\d+(?:,\d{3})*)',
	r'maximum \$?(\d+(?:,\d{3})*)',
	r'budget of \$?(\d+(?:,\d{3})*)',
	r'\$?(\d+(?:,\d{3})*) or less',
	r'up to \$?(\d+(?:,\d{3})*)'
	]

	for pattern in budget_patterns:
	match = re.search(pattern, message.lower())
	if match:
	amount = match.group(1).replace(',', '')
	return f"${amount}"

	return None

	def clean_message(text: str) -> str:
	"""Aggressive text cleaning to prevent training data artifacts."""
	if not text:
	return ""

	# Remove ALL problematic training tokens and artifacts
	bad_tokens = [
	"[USER]", "[ASSISTANT]", "[/USER]", "[/ASSISTANT]",
	"[ASS]", "[/ASS]", "[INST]", "[/INST]",
	"<\|user\|>", "<\|assistant\|>", "<\|system\|>", "<\|im_start\|>", "<\|im_end\|>",
	"Human:", "AI:", "Assistant:", "User query:", "User:", "Bot:",
	"Product A:", "Product B:", "[USER", "[ASSISTANT", "USER:", "ASSISTANT:"
	]

	# Remove training artifacts case-insensitively
	for token in bad_tokens:
	text = text.replace(token, "")
	text = text.replace(token.lower(), "")
	text = text.replace(token.upper(), "")

	# Remove pattern of "User query: [question]" that appears in training data
	text = re.sub(r'User query:.*?\?', '', text, flags=re.IGNORECASE)
	text = re.sub(r'User:.*?\?', '', text, flags=re.IGNORECASE)

	# Fix spacing issues - ensure spaces around punctuation and formatting
	text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text) # Add space after sentence endings
	text = re.sub(r'(\d+)([A-Z])', r'\1 \2', text) # Add space after numbers before letters
	text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text) # Add space between lowercase and uppercase
	text = re.sub(r'(\))([A-Z])', r'\1 \2', text) # Add space after closing parentheses
	text = re.sub(r'(:)([A-Z])', r'\1 \2', text) # Add space after colons

	# Normalize excessive whitespace but preserve intentional formatting
	text = re.sub(r' +', ' ', text) # Multiple spaces to single space
	text = re.sub(r'\n\s\n\s\n+', '\n\n', text) # Multiple newlines to double newline

	return text.strip()

	def create_enhanced_prompt(message: str, category: str, query_type: str, budget: str = None, reference_product: str = None) -> str:
	"""Create intelligent prompts that prevent specification errors and topic jumping."""

	# Base instructions to prevent common errors
	base_instructions = """
	CRITICAL INSTRUCTIONS:
	- Stay focused on the user's specific question - do not change topics
	- Use accurate product specifications - no made-up numbers
	- For headphones, focus on: sound quality, noise cancellation, battery life, comfort, price
	- For phones, focus on: camera, battery, display, performance, price
	- For laptops, focus on: processor, RAM, storage, battery, graphics, price
	- Never use "Display:" for audio features - use "Audio Features:" instead
	- Include specific model names and current pricing when possible
	- If unsure about specs, say "verify current specifications" rather than guess
	"""

	if query_type == "better_than" and reference_product:
	specific_instruction = f"""
	The user wants products BETTER than "{reference_product}".

	Requirements:
	1. Suggest 2-3 products that are genuinely superior or newer
	2. Explain WHY they are better (specific improvements)
	3. Include price comparison if relevant
	4. Focus on {category} products only

	Format response as:
	Product Name 1
	• Key improvement over {reference_product}
	• Specifications
	• Price

	Product Name 2
	• Key improvement over {reference_product}
	• Specifications
	• Price
	"""
	else:
	specific_instruction = f"""
	Provide helpful {category} product information for this {query_type} query.
	Use bullet points for clear organization.
	Include specific model names and accurate specifications.
	"""

	budget_context = f"\nBudget constraint: Keep recommendations within {budget}" if budget else ""

	return f"{base_instructions}\n{specific_instruction}{budget_context}\n\nUser query: {message}"

	def maintain_conversation_context(history, message):
	"""Enhanced context management to prevent topic jumping."""
	if not history:
	return []

	# Get current query category to maintain topic focus
	current_category = detect_product_category(message)

	# Filter history to relevant exchanges (same category or general)
	relevant_history = []
	for item in history[-3:]: # Last 3 exchanges
	if isinstance(item, (list, tuple)) and len(item) >= 2:
	human_msg, ai_msg = item[0], item[1]

	# Check if previous exchange was in same category
	if human_msg:
	prev_category = detect_product_category(human_msg)
	if prev_category == current_category or prev_category == "general":
	relevant_history.append({
	"role": "user",
	"content": clean_message(human_msg)
	})
	if ai_msg:
	relevant_history.append({
	"role": "assistant",
	"content": clean_message(ai_msg)
	})

	return relevant_history

	# ================================================================================
	# MAIN CONVERSATION HANDLER
	# ================================================================================

	def respond(message, history, system_message):
	"""Enhanced response function with better error handling and accuracy."""

	# Enhanced greeting
	if not history and message.lower().strip() in ["hi", "hello", "hey", "demo", "start"]:
	yield """# 🛍️ Welcome to ShopSmart AI!

	Enhanced HCI Project Demonstration \| University of the Cumberlands

	I'm your intelligent shopping assistant with advanced natural language understanding and accurate product knowledge.

	## 🎯 Enhanced Capabilities:

	### 🔍 Smart Product Research
	• Accurate product specifications and current pricing
	• Intelligent "better than" recommendations
	• Category-focused expertise across major product types

	### ⚖️ Advanced Comparisons
	• Side-by-side analysis with real specifications
	• "Upgrade path" recommendations for existing products
	• Value analysis based on your specific needs

	### 🧠 Conversation Intelligence
	• Maintains topic focus throughout conversation
	• Understands follow-up questions in context
	• Prevents topic jumping and specification errors

	## 📱 Try These Enhanced Queries:
	• "Show me headphones better than Sony WH-1000XM4"
	• "Find gaming laptops under $1500 with RTX 4060"
	• "Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"
	• "Upgrade options from Apple Watch Series 8"

	What product research can I help you with today?"""
	return

	# Enhanced query analysis
	category = detect_product_category(message)
	query_type = detect_query_type(message)
	budget = extract_budget(message)
	reference_product = extract_reference_product(message) if query_type == "better_than" else None

	# Create focused prompt
	enhanced_query = create_enhanced_prompt(message, category, query_type, budget, reference_product)
	conversation_context = maintain_conversation_context(history, message)

	# Much stronger system prompt to prevent training data regurgitation
	system_prompt = f"""You are ShopSmart AI, a helpful shopping assistant.

	CRITICAL RULES:
	- Answer ONLY the user's current question
	- Do NOT include training examples, sample conversations, or fake dialogues
	- Do NOT use tokens like [ASS], [USER], [ASSISTANT], or "User query:"
	- Do NOT generate multiple questions and answers
	- Give ONE direct response to the current question only
	- Use natural, conversational language
	- Focus on {category} products for this {query_type} query

	Current user question: {message}
	Provide a single, direct answer about {category} products."""

	messages = [
	{"role": "system", "content": system_prompt}
	] + conversation_context + [
	{"role": "user", "content": enhanced_query}
	]

	response = ""
	try:
	for chunk in client.chat_completion(
	messages,
	stream=True,
	max_tokens=300, # Reduced to prevent long training examples
	temperature=0.4, # Balanced for natural responses
	top_p=0.8, # More focused responses
	stop=["[", "User:", "Human:", "Assistant:", "[ASS]", "[USER]", "[INST]"] # Stop at training tokens
	):
	if chunk.choices[0].delta.content:
	token = chunk.choices[0].delta.content

	# Add token directly without aggressive cleaning during streaming
	response += token

	# Clean only the final response for display
	cleaned_response = clean_message(response)

	# Prevent overly long responses
	if len(cleaned_response) > 3500:
	cleaned_response += "\n\n✨ Need more specific details? Just ask!"
	yield cleaned_response
	return

	yield cleaned_response

	except Exception as e:
	# Simple error message without fallback content
	yield f"❌ AI model temporarily unavailable. Error: {str(e)}\n\nPlease try again in a moment when the model servers recover."

	# ================================================================================
	# ENHANCED USER INTERFACE
	# ================================================================================

	demo = gr.ChatInterface(
	respond,
	type="messages",
	title="🛍️ ShopSmart AI - Enhanced Conversational Shopping Assistant",
	description="""
	Enhanced Human-Computer Interaction Project \| MSAI-631-M20
	University of the Cumberlands \| Team: Saswat Sahoo & Shashank Lakkimsetty

	---

	### 🎓 Enhanced Research Features

	This enhanced version addresses real-world conversational AI challenges:

	🔧 Problem-Solving Improvements:
	• Accurate Specifications - No more made-up product specs or wrong categories
	• Topic Coherence - Prevents conversation jumping between unrelated products
	• "Better Than" Intelligence - Understands upgrade recommendations correctly
	• Error Prevention - Eliminates training token artifacts and formatting issues

	🧠 Advanced AI Processing:
	• Context-Aware Filtering - Maintains relevant conversation history
	• Reference Product Extraction - Identifies products for comparison/upgrade
	• Enhanced Prompt Engineering - Prevents common AI model errors
	• Robust Error Handling - Professional fallbacks with accurate demo content

	### 🔬 Technical Implementation
	Enhanced NLP Pipeline \| Improved Context Management \| Error-Resistant Design
	""",

	examples=[
	["Show me headphones better than Sony WH-1000XM4"],
	["Find gaming laptops under $1500 with RTX 4060 graphics"],
	["Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"],
	["What's a good upgrade from Apple Watch Series 8?"],
	["Recommend wireless earbuds better than AirPods Pro 2"],
	["Best gaming monitors under $800 with 144Hz refresh rate"]
	],

	additional_inputs=[
	gr.Textbox(
	value="You are ShopSmart AI with enhanced accuracy and context management. Provide precise product recommendations with accurate specifications. Stay focused on the user's specific query and category.",
	label="🔧 Enhanced System Configuration",
	lines=3,
	interactive=True
	)
	],

	cache_examples=False,
	theme=gr.themes.Soft(),

	css="""
	.gradio-container {
	font-family: 'Segoe UI', system-ui, sans-serif;
	max-width: 1200px;
	margin: auto;
	}
	.gr-button-primary {
	background: linear-gradient(45deg, #2563eb, #1d4ed8) !important;
	border: none !important;
	}
	"""
	)

	if __name__ == "__main__":
	print("🚀 Starting ShopSmart AI - Enhanced Version")
	print("🔧 Enhanced Features: Accurate specs, better context, no artifacts")
	print("📚 Project: Enhanced Conversational AI for Product Discovery")
	print("🎓 University of the Cumberlands \| MSAI-631-M20")

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	show_error=True
	)