File size: 5,911 Bytes
ceed1e7
 
 
 
 
 
 
 
 
 
 
 
 
 
f7a5fec
ceed1e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
ElevenLabs Voice Assistant for RewardPilot
Converts AI text responses to natural speech
"""

import os
import logging
from typing import Optional, List, Dict
import io

logger = logging.getLogger(__name__)

# Check if ElevenLabs is available
try:
    from elevenlabs.types import Voice, VoiceSettings
    from elevenlabs.client import ElevenLabs
    ELEVENLABS_AVAILABLE = True
except ImportError:
    ELEVENLABS_AVAILABLE = False
    logger.warning("ElevenLabs not installed. Voice features will be disabled.")


class VoiceAssistant:
    """Handle text-to-speech conversion using ElevenLabs"""
    
    def __init__(self):
        self.api_key = os.getenv("ELEVENLABS_API_KEY")
        self.enabled = ELEVENLABS_AVAILABLE and bool(self.api_key)
        
        if self.enabled:
            self.client = ElevenLabs(api_key=self.api_key)
            logger.info("✅ ElevenLabs Voice Assistant initialized")
        else:
            logger.warning("⚠️ ElevenLabs disabled (missing API key or library)")
        
        # Voice configurations
        self.voices = {
            "Rachel": {
                "voice_id": "21m00Tcm4TlvDq8ikWAM",  # Professional female
                "description": "Clear, professional female voice"
            },
            "Adam": {
                "voice_id": "pNInz6obpgDQGcFmaJgB",  # Deep male
                "description": "Deep, authoritative male voice"
            },
            "Bella": {
                "voice_id": "EXAVITQu4vr4xnSDxMaL",  # Friendly female
                "description": "Warm, friendly female voice"
            },
            "Antoni": {
                "voice_id": "ErXwobaYiN019PkySvjV",  # Well-rounded male
                "description": "Well-rounded, versatile male voice"
            },
            "Elli": {
                "voice_id": "MF3mGyEYCl7XYWbV9V6O",  # Young female
                "description": "Young, energetic female voice"
            }
        }
    
    def text_to_speech(
        self, 
        text: str, 
        voice_name: str = "Rachel",
        model: str = "eleven_turbo_v2"
    ) -> Optional[bytes]:
        """
        Convert text to speech audio
        
        Args:
            text: Text to convert
            voice_name: Name of voice to use
            model: ElevenLabs model (eleven_turbo_v2 is fastest)
        
        Returns:
            Audio bytes or None if failed
        """
        if not self.enabled:
            logger.warning("Voice generation skipped (ElevenLabs not enabled)")
            return None
        
        if not text or len(text.strip()) == 0:
            logger.warning("Empty text provided for voice generation")
            return None
        
        # Limit text length to avoid API errors
        if len(text) > 2500:
            text = text[:2500] + "..."
            logger.info(f"Text truncated to 2500 characters for voice generation")
        
        try:
            voice_config = self.voices.get(voice_name, self.voices["Rachel"])
            voice_id = voice_config["voice_id"]
            
            logger.info(f"🎤 Generating speech with {voice_name} ({len(text)} chars)")
            
            # Generate audio using ElevenLabs
            audio = self.client.generate(
                text=text,
                voice=Voice(
                    voice_id=voice_id,
                    settings=VoiceSettings(
                        stability=0.5,        # Balance between consistency and expressiveness
                        similarity_boost=0.75, # How closely to match the original voice
                        style=0.0,            # Exaggeration level
                        use_speaker_boost=True # Enhance clarity
                    )
                ),
                model=model
            )
            
            # Convert generator to bytes
            audio_bytes = b"".join(audio)
            
            logger.info(f"✅ Generated {len(audio_bytes)} bytes of audio")
            return audio_bytes
            
        except Exception as e:
            logger.error(f"❌ Voice generation failed: {e}")
            return None
    
    def get_voice_list(self) -> List[Dict[str, str]]:
        """Get list of available voices"""
        return [
            {"name": name, "description": config["description"]}
            for name, config in self.voices.items()
        ]
    
    def create_audio_summary(self, recommendation_data: dict) -> str:
        """
        Create a concise audio-friendly summary of recommendation
        
        Args:
            recommendation_data: Normalized recommendation data
        
        Returns:
            Audio-optimized text
        """
        card = recommendation_data.get('recommended_card', 'Unknown Card')
        rewards = recommendation_data.get('rewards_earned', 0)
        rate = recommendation_data.get('rewards_rate', 'N/A')
        merchant = recommendation_data.get('merchant', 'this merchant')
        reasoning = recommendation_data.get('reasoning', '')
        
        # Create concise, natural-sounding summary
        summary = f"For your purchase at {merchant}, I recommend using your {card}. "
        summary += f"You'll earn {rewards:.2f} dollars in rewards at a rate of {rate}. "
        
        # Add simplified reasoning (first sentence only)
        if reasoning:
            first_sentence = reasoning.split('.')[0].strip()
            if first_sentence and len(first_sentence) > 20:
                summary += f"{first_sentence}. "
        
        # Add warnings if present
        warnings = recommendation_data.get('warnings', [])
        if warnings:
            summary += "Important note: " + warnings[0]
        
        return summary


# Global instance
voice_assistant = VoiceAssistant()


def get_voice_assistant() -> VoiceAssistant:
    """Get the global voice assistant instance"""
    return voice_assistant