| | import uuid |
| | import base64 |
| | from pathlib import Path |
| | from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL |
| | from gtts import gTTS |
| | from fastapi import HTTPException |
| |
|
| | def text_to_speech( |
| | text: str, |
| | voice: str = "en", |
| | fmt: str = "mp3", |
| | ) -> str: |
| | """ |
| | Convert text to speech using gTTS (Google Translate, free). |
| | Only MP3 is supported. |
| | Returns file path. |
| | """ |
| | if not text or not text.strip(): |
| | raise ValueError("Text cannot be empty") |
| |
|
| | if fmt != "mp3": |
| | raise ValueError("Only MP3 format is supported by the free TTS backend") |
| |
|
| | try: |
| | temp_dir = Path("temp_audio") |
| | temp_dir.mkdir(exist_ok=True) |
| |
|
| | output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" |
| | output_path = temp_dir / output_filename |
| |
|
| | |
| | tts = gTTS(text=text.strip(), lang=voice or "en") |
| | tts.save(str(output_path)) |
| |
|
| | return str(output_path) |
| |
|
| | except Exception as e: |
| | raise Exception(f"Unexpected error in text_to_speech: {str(e)}") |
| |
|
| |
|
| | def text_to_speech_base64( |
| | text: str, |
| | voice: str = "en", |
| | fmt: str = "mp3", |
| | ) -> dict: |
| | """ |
| | Convert text to speech and return as Base64. |
| | Only MP3 is supported. |
| | Returns dict with Base64 and metadata. |
| | """ |
| | if not text or not text.strip(): |
| | raise ValueError("Text cannot be empty") |
| |
|
| | if fmt != "mp3": |
| | raise ValueError("Only MP3 format is supported by the free TTS backend") |
| |
|
| | try: |
| | temp_dir = Path("temp_audio") |
| | temp_dir.mkdir(exist_ok=True) |
| |
|
| | output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" |
| | output_path = temp_dir / output_filename |
| |
|
| | |
| | tts = gTTS(text=text.strip(), lang=voice or "en") |
| | tts.save(str(output_path)) |
| |
|
| | |
| | with open(output_path, "rb") as audio_file: |
| | audio_bytes = audio_file.read() |
| | audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') |
| |
|
| | |
| | output_path.unlink(missing_ok=True) |
| |
|
| | return { |
| | "audio_base64": audio_base64, |
| | "mime_type": "audio/mpeg", |
| | "format": fmt, |
| | "filename": output_filename, |
| | "size_bytes": len(audio_bytes), |
| | "size_base64": len(audio_base64) |
| | } |
| |
|
| | except Exception as e: |
| | raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}") |