Spaces:

MCP-1st-Birthday
/

rewardpilot-web-ui

Running

File size: 5,911 Bytes

ceed1e7
 
 
 
 
 
 
 
 
 
 
 
 
 
f7a5fec
ceed1e7

"""
ElevenLabs Voice Assistant for RewardPilot
Converts AI text responses to natural speech
"""

import os
import logging
from typing import Optional, List, Dict
import io

logger = logging.getLogger(__name__)

# Check if ElevenLabs is available
try:
    from elevenlabs.types import Voice, VoiceSettings
    from elevenlabs.client import ElevenLabs
    ELEVENLABS_AVAILABLE = True
except ImportError:
    ELEVENLABS_AVAILABLE = False
    logger.warning("ElevenLabs not installed. Voice features will be disabled.")


class VoiceAssistant:
    """Handle text-to-speech conversion using ElevenLabs"""
    
    def __init__(self):
        self.api_key = os.getenv("ELEVENLABS_API_KEY")
        self.enabled = ELEVENLABS_AVAILABLE and bool(self.api_key)
        
        if self.enabled:
            self.client = ElevenLabs(api_key=self.api_key)
            logger.info("✅ ElevenLabs Voice Assistant initialized")
        else:
            logger.warning("⚠️ ElevenLabs disabled (missing API key or library)")
        
        # Voice configurations
        self.voices = {
            "Rachel": {
                "voice_id": "21m00Tcm4TlvDq8ikWAM",  # Professional female
                "description": "Clear, professional female voice"
            },
            "Adam": {
                "voice_id": "pNInz6obpgDQGcFmaJgB",  # Deep male
                "description": "Deep, authoritative male voice"
            },
            "Bella": {
                "voice_id": "EXAVITQu4vr4xnSDxMaL",  # Friendly female
                "description": "Warm, friendly female voice"
            },
            "Antoni": {
                "voice_id": "ErXwobaYiN019PkySvjV",  # Well-rounded male
                "description": "Well-rounded, versatile male voice"
            },
            "Elli": {
                "voice_id": "MF3mGyEYCl7XYWbV9V6O",  # Young female
                "description": "Young, energetic female voice"
            }
        }
    
    def text_to_speech(
        self, 
        text: str, 
        voice_name: str = "Rachel",
        model: str = "eleven_turbo_v2"
    ) -> Optional[bytes]:
        """
        Convert text to speech audio
        
        Args:
            text: Text to convert
            voice_name: Name of voice to use
            model: ElevenLabs model (eleven_turbo_v2 is fastest)
        
        Returns:
            Audio bytes or None if failed
        """
        if not self.enabled:
            logger.warning("Voice generation skipped (ElevenLabs not enabled)")
            return None
        
        if not text or len(text.strip()) == 0:
            logger.warning("Empty text provided for voice generation")
            return None
        
        # Limit text length to avoid API errors
        if len(text) > 2500:
            text = text[:2500] + "..."
            logger.info(f"Text truncated to 2500 characters for voice generation")
        
        try:
            voice_config = self.voices.get(voice_name, self.voices["Rachel"])
            voice_id = voice_config["voice_id"]
            
            logger.info(f"🎤 Generating speech with {voice_name} ({len(text)} chars)")
            
            # Generate audio using ElevenLabs
            audio = self.client.generate(
                text=text,
                voice=Voice(
                    voice_id=voice_id,
                    settings=VoiceSettings(
                        stability=0.5,        # Balance between consistency and expressiveness
                        similarity_boost=0.75, # How closely to match the original voice
                        style=0.0,            # Exaggeration level
                        use_speaker_boost=True # Enhance clarity
                    )
                ),
                model=model
            )
            
            # Convert generator to bytes
            audio_bytes = b"".join(audio)
            
            logger.info(f"✅ Generated {len(audio_bytes)} bytes of audio")
            return audio_bytes
            
        except Exception as e:
            logger.error(f"❌ Voice generation failed: {e}")
            return None
    
    def get_voice_list(self) -> List[Dict[str, str]]:
        """Get list of available voices"""
        return [
            {"name": name, "description": config["description"]}
            for name, config in self.voices.items()
        ]
    
    def create_audio_summary(self, recommendation_data: dict) -> str:
        """
        Create a concise audio-friendly summary of recommendation
        
        Args:
            recommendation_data: Normalized recommendation data
        
        Returns:
            Audio-optimized text
        """
        card = recommendation_data.get('recommended_card', 'Unknown Card')
        rewards = recommendation_data.get('rewards_earned', 0)
        rate = recommendation_data.get('rewards_rate', 'N/A')
        merchant = recommendation_data.get('merchant', 'this merchant')
        reasoning = recommendation_data.get('reasoning', '')
        
        # Create concise, natural-sounding summary
        summary = f"For your purchase at {merchant}, I recommend using your {card}. "
        summary += f"You'll earn {rewards:.2f} dollars in rewards at a rate of {rate}. "
        
        # Add simplified reasoning (first sentence only)
        if reasoning:
            first_sentence = reasoning.split('.')[0].strip()
            if first_sentence and len(first_sentence) > 20:
                summary += f"{first_sentence}. "
        
        # Add warnings if present
        warnings = recommendation_data.get('warnings', [])
        if warnings:
            summary += "Important note: " + warnings[0]
        
        return summary


# Global instance
voice_assistant = VoiceAssistant()


def get_voice_assistant() -> VoiceAssistant:
    """Get the global voice assistant instance"""
    return voice_assistant