"""
ShopSmart AI - Enhanced Conversational Shopping Assistant
Human-Computer Interaction Capstone Project

This application demonstrates advanced conversational AI for product discovery
with improved context management, specification accuracy, and intelligent comparisons.

Project: MSAI-631-M20 Human-Computer Interaction
Team: Saswat Sahoo & Shashank Lakkimsetty
Institution: University of the Cumberlands
Date: August 2025

Enhanced Features:
- Intelligent "better than" query handling
- Accurate product specifications
- Robust context management
- Improved conversation coherence
"""

import gradio as gr
from huggingface_hub import InferenceClient
import time
import re

# Initialize the AI model client
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# ================================================================================
# ENHANCED PRODUCT KNOWLEDGE BASE
# ================================================================================

PRODUCT_CATEGORIES = {
    "smartphones": ["phone", "iphone", "galaxy", "pixel", "smartphone", "android"],
    "laptops": ["laptop", "macbook", "thinkpad", "dell", "hp", "computer", "notebook"],
    "audio": ["headphones", "earbuds", "speaker", "airpods", "sony", "bose", "audio", "noise-cancelling", "wireless"],
    "gaming": ["gaming", "xbox", "playstation", "nintendo", "steam", "pc gaming"],
    "wearables": ["watch", "fitness", "tracker", "apple watch", "garmin", "fitbit"],
    "tablets": ["tablet", "ipad", "surface", "kindle", "android tablet"],
    "tv_display": ["tv", "monitor", "display", "oled", "qled", "4k", "screen"]
}

QUERY_TYPES = {
    "comparison": ["vs", "versus", "compare", "difference", "better than", "superior to"],
    "better_than": ["better than", "superior to", "upgrade from", "improve on", "outperform"],
    "budget": ["budget", "cheap", "affordable", "under", "less than", "maximum"],
    "recommendation": ["best", "top", "recommend", "suggest", "good", "should"],
    "specific": ["specs", "specifications", "features", "details", "about"]
}

# Enhanced product knowledge for accurate comparisons
PRODUCT_KNOWLEDGE = {
    "sony wh-1000xm4": {
        "category": "audio",
        "type": "over-ear headphones",
        "price": 298,
        "key_features": ["noise cancellation", "30hr battery", "touch controls"],
        "better_alternatives": [
            "bose quietcomfort ultra",
            "apple airpods max", 
            "sennheiser momentum 4"
        ]
    },
    "iphone 12": {
        "category": "smartphones",
        "price": 599,
        "screen": "6.1 inch",
        "better_alternatives": ["iphone 15", "iphone 14", "samsung galaxy s24"]
    }
}

# ================================================================================
# ENHANCED NLP FUNCTIONS
# ================================================================================

def detect_product_category(message: str) -> str:
    """Enhanced product category detection with better accuracy."""
    message_lower = message.lower()
    
    # Check for specific product mentions first
    for product in PRODUCT_KNOWLEDGE:
        if product in message_lower:
            return PRODUCT_KNOWLEDGE[product]["category"]
    
    # Fall back to keyword matching
    for category, keywords in PRODUCT_CATEGORIES.items():
        if any(keyword in message_lower for keyword in keywords):
            return category
    
    return "general"

def detect_query_type(message: str) -> str:
    """Enhanced query type detection including 'better than' queries."""
    message_lower = message.lower()
    
    # Check for "better than" queries first (most specific)
    if any(phrase in message_lower for phrase in ["better than", "superior to", "upgrade from", "outperform"]):
        return "better_than"
    
    # Check other query types
    for query_type, keywords in QUERY_TYPES.items():
        if any(keyword in message_lower for keyword in keywords):
            return query_type
    
    return "general"

def extract_reference_product(message: str) -> str:
    """Extract the reference product from 'better than X' queries."""
    message_lower = message.lower()
    
    # Pattern to find product after "better than", "superior to", etc.
    patterns = [
        r'better than (.+?)(?:\?|$|\.)',
        r'superior to (.+?)(?:\?|$|\.)',
        r'upgrade from (.+?)(?:\?|$|\.)',
        r'outperform (.+?)(?:\?|$|\.)',
        r'improve on (.+?)(?:\?|$|\.)'
    ]
    
    for pattern in patterns:
        match = re.search(pattern, message_lower)
        if match:
            product = match.group(1).strip()
            # Clean up common suffixes
            product = re.sub(r'\s+(headphones|laptop|phone|watch)$', '', product)
            return product
    
    return None

def extract_budget(message: str) -> str:
    """Enhanced budget extraction with more patterns."""
    budget_patterns = [
        r'under \$?(\d+(?:,\d{3})*)',
        r'less than \$?(\d+(?:,\d{3})*)',
        r'maximum \$?(\d+(?:,\d{3})*)',
        r'budget of \$?(\d+(?:,\d{3})*)',
        r'\$?(\d+(?:,\d{3})*) or less',
        r'up to \$?(\d+(?:,\d{3})*)'
    ]
    
    for pattern in budget_patterns:
        match = re.search(pattern, message.lower())
        if match:
            amount = match.group(1).replace(',', '')
            return f"${amount}"
    
    return None

def clean_message(text: str) -> str:
    """Aggressive text cleaning to prevent training data artifacts."""
    if not text:
        return ""
    
    # Remove ALL problematic training tokens and artifacts
    bad_tokens = [
        "[USER]", "[ASSISTANT]", "[/USER]", "[/ASSISTANT]", 
        "[ASS]", "[/ASS]", "[INST]", "[/INST]",
        "<|user|>", "<|assistant|>", "<|system|>", "<|im_start|>", "<|im_end|>",
        "Human:", "AI:", "Assistant:", "User query:", "User:", "Bot:",
        "Product A:", "Product B:", "[USER", "[ASSISTANT", "USER:", "ASSISTANT:"
    ]
    
    # Remove training artifacts case-insensitively
    for token in bad_tokens:
        text = text.replace(token, "")
        text = text.replace(token.lower(), "")
        text = text.replace(token.upper(), "")
    
    # Remove pattern of "User query: [question]" that appears in training data
    text = re.sub(r'User query:.*?\?', '', text, flags=re.IGNORECASE)
    text = re.sub(r'User:.*?\?', '', text, flags=re.IGNORECASE)
    
    # Fix spacing issues - ensure spaces around punctuation and formatting
    text = re.sub(r'([.!?])([A-Z])', r'\1 \2', text)  # Add space after sentence endings
    text = re.sub(r'(\d+)([A-Z])', r'\1 \2', text)    # Add space after numbers before letters
    text = re.sub(r'([a-z])([A-Z])', r'\1 \2', text)  # Add space between lowercase and uppercase
    text = re.sub(r'(\))([A-Z])', r'\1 \2', text)     # Add space after closing parentheses
    text = re.sub(r'(:)([A-Z])', r'\1 \2', text)      # Add space after colons
    
    # Normalize excessive whitespace but preserve intentional formatting
    text = re.sub(r' +', ' ', text)  # Multiple spaces to single space
    text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)  # Multiple newlines to double newline
    
    return text.strip()

def create_enhanced_prompt(message: str, category: str, query_type: str, budget: str = None, reference_product: str = None) -> str:
    """Create intelligent prompts that prevent specification errors and topic jumping."""
    
    # Base instructions to prevent common errors
    base_instructions = """
    CRITICAL INSTRUCTIONS:
    - Stay focused on the user's specific question - do not change topics
    - Use accurate product specifications - no made-up numbers
    - For headphones, focus on: sound quality, noise cancellation, battery life, comfort, price
    - For phones, focus on: camera, battery, display, performance, price
    - For laptops, focus on: processor, RAM, storage, battery, graphics, price
    - Never use "Display:" for audio features - use "Audio Features:" instead
    - Include specific model names and current pricing when possible
    - If unsure about specs, say "verify current specifications" rather than guess
    """
    
    if query_type == "better_than" and reference_product:
        specific_instruction = f"""
        The user wants products BETTER than "{reference_product}".
        
        Requirements:
        1. Suggest 2-3 products that are genuinely superior or newer
        2. Explain WHY they are better (specific improvements)
        3. Include price comparison if relevant
        4. Focus on {category} products only
        
        Format response as:
        **Product Name 1**
        • Key improvement over {reference_product}
        • Specifications
        • Price
        
        **Product Name 2** 
        • Key improvement over {reference_product}
        • Specifications  
        • Price
        """
    else:
        specific_instruction = f"""
        Provide helpful {category} product information for this {query_type} query.
        Use bullet points for clear organization.
        Include specific model names and accurate specifications.
        """
    
    budget_context = f"\nBudget constraint: Keep recommendations within {budget}" if budget else ""
    
    return f"{base_instructions}\n{specific_instruction}{budget_context}\n\nUser query: {message}"

def maintain_conversation_context(history, message):
    """Enhanced context management to prevent topic jumping."""
    if not history:
        return []
    
    # Get current query category to maintain topic focus
    current_category = detect_product_category(message)
    
    # Filter history to relevant exchanges (same category or general)
    relevant_history = []
    for item in history[-3:]:  # Last 3 exchanges
        if isinstance(item, (list, tuple)) and len(item) >= 2:
            human_msg, ai_msg = item[0], item[1]
            
            # Check if previous exchange was in same category
            if human_msg:
                prev_category = detect_product_category(human_msg)
                if prev_category == current_category or prev_category == "general":
                    relevant_history.append({
                        "role": "user", 
                        "content": clean_message(human_msg)
                    })
                    if ai_msg:
                        relevant_history.append({
                            "role": "assistant", 
                            "content": clean_message(ai_msg)
                        })
    
    return relevant_history

# ================================================================================
# MAIN CONVERSATION HANDLER
# ================================================================================

def respond(message, history, system_message):
    """Enhanced response function with better error handling and accuracy."""
    
    # Enhanced greeting
    if not history and message.lower().strip() in ["hi", "hello", "hey", "demo", "start"]:
        yield """# 🛍️ Welcome to ShopSmart AI!

**Enhanced HCI Project Demonstration | University of the Cumberlands**

I'm your intelligent shopping assistant with advanced natural language understanding and accurate product knowledge.

## 🎯 **Enhanced Capabilities:**

### **🔍 Smart Product Research**
• Accurate product specifications and current pricing
• Intelligent "better than" recommendations
• Category-focused expertise across major product types

### **⚖️ Advanced Comparisons**  
• Side-by-side analysis with real specifications
• "Upgrade path" recommendations for existing products
• Value analysis based on your specific needs

### **🧠 Conversation Intelligence**
• Maintains topic focus throughout conversation
• Understands follow-up questions in context
• Prevents topic jumping and specification errors

## 📱 **Try These Enhanced Queries:**
• *"Show me headphones better than Sony WH-1000XM4"*
• *"Find gaming laptops under $1500 with RTX 4060"*
• *"Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"*
• *"Upgrade options from Apple Watch Series 8"*

**What product research can I help you with today?**"""
        return
    
    # Enhanced query analysis
    category = detect_product_category(message)
    query_type = detect_query_type(message)
    budget = extract_budget(message)
    reference_product = extract_reference_product(message) if query_type == "better_than" else None
    
    # Create focused prompt
    enhanced_query = create_enhanced_prompt(message, category, query_type, budget, reference_product)
    conversation_context = maintain_conversation_context(history, message)
    
    # Much stronger system prompt to prevent training data regurgitation
    system_prompt = f"""You are ShopSmart AI, a helpful shopping assistant. 

CRITICAL RULES:
- Answer ONLY the user's current question
- Do NOT include training examples, sample conversations, or fake dialogues
- Do NOT use tokens like [ASS], [USER], [ASSISTANT], or "User query:"
- Do NOT generate multiple questions and answers
- Give ONE direct response to the current question only
- Use natural, conversational language
- Focus on {category} products for this {query_type} query

Current user question: {message}
Provide a single, direct answer about {category} products."""

    messages = [
        {"role": "system", "content": system_prompt}
    ] + conversation_context + [
        {"role": "user", "content": enhanced_query}
    ]
    
    response = ""
    try:
        for chunk in client.chat_completion(
            messages,
            stream=True,
            max_tokens=300,        # Reduced to prevent long training examples
            temperature=0.4,       # Balanced for natural responses
            top_p=0.8,            # More focused responses
            stop=["[", "User:", "Human:", "Assistant:", "[ASS]", "[USER]", "[INST]"]  # Stop at training tokens
        ):
            if chunk.choices[0].delta.content:
                token = chunk.choices[0].delta.content
                
                # Add token directly without aggressive cleaning during streaming
                response += token
                
                # Clean only the final response for display
                cleaned_response = clean_message(response)
                
                # Prevent overly long responses
                if len(cleaned_response) > 3500:
                    cleaned_response += "\n\n✨ *Need more specific details? Just ask!*"
                    yield cleaned_response
                    return
                    
                yield cleaned_response
                
    except Exception as e:
        # Simple error message without fallback content
        yield f"❌ AI model temporarily unavailable. Error: {str(e)}\n\nPlease try again in a moment when the model servers recover."

# ================================================================================
# ENHANCED USER INTERFACE
# ================================================================================

demo = gr.ChatInterface(
    respond,
    type="messages",
    title="🛍️ ShopSmart AI - Enhanced Conversational Shopping Assistant",
    description="""
**Enhanced Human-Computer Interaction Project | MSAI-631-M20**  
**University of the Cumberlands | Team: Saswat Sahoo & Shashank Lakkimsetty**

---

### 🎓 **Enhanced Research Features**

This enhanced version addresses real-world conversational AI challenges:

**🔧 Problem-Solving Improvements:**
• **Accurate Specifications** - No more made-up product specs or wrong categories
• **Topic Coherence** - Prevents conversation jumping between unrelated products  
• **"Better Than" Intelligence** - Understands upgrade recommendations correctly
• **Error Prevention** - Eliminates training token artifacts and formatting issues

**🧠 Advanced AI Processing:**
• **Context-Aware Filtering** - Maintains relevant conversation history
• **Reference Product Extraction** - Identifies products for comparison/upgrade
• **Enhanced Prompt Engineering** - Prevents common AI model errors
• **Robust Error Handling** - Professional fallbacks with accurate demo content

### 🔬 **Technical Implementation**
**Enhanced NLP Pipeline** | **Improved Context Management** | **Error-Resistant Design**
    """,
    
    examples=[
        ["Show me headphones better than Sony WH-1000XM4"],
        ["Find gaming laptops under $1500 with RTX 4060 graphics"],
        ["Compare iPhone 15 Pro vs Samsung Galaxy S24 Ultra cameras"],
        ["What's a good upgrade from Apple Watch Series 8?"],
        ["Recommend wireless earbuds better than AirPods Pro 2"],
        ["Best gaming monitors under $800 with 144Hz refresh rate"]
    ],
    
    additional_inputs=[
        gr.Textbox(
            value="You are ShopSmart AI with enhanced accuracy and context management. Provide precise product recommendations with accurate specifications. Stay focused on the user's specific query and category.",
            label="🔧 Enhanced System Configuration",
            lines=3,
            interactive=True
        )
    ],
    
    cache_examples=False,
    theme=gr.themes.Soft(),
    
    css="""
    .gradio-container {
        font-family: 'Segoe UI', system-ui, sans-serif;
        max-width: 1200px;
        margin: auto;
    }
    .gr-button-primary {
        background: linear-gradient(45deg, #2563eb, #1d4ed8) !important;
        border: none !important;
    }
    """
)

if __name__ == "__main__":
    print("🚀 Starting ShopSmart AI - Enhanced Version")
    print("🔧 Enhanced Features: Accurate specs, better context, no artifacts")
    print("📚 Project: Enhanced Conversational AI for Product Discovery")
    print("🎓 University of the Cumberlands | MSAI-631-M20")
    
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True
    )