Spaces:
Sleeping
Sleeping
Ko-TTS-Arena Contributors
commited on
Commit
·
c82ad44
1
Parent(s):
50a02f7
fix: Use REST API v1beta1 for Gemini TTS, disable google-wavenet/neural2
Browse files- models.py +3 -3
- requirements.txt +1 -2
- tts.py +29 -34
models.py
CHANGED
|
@@ -596,13 +596,13 @@ def insert_initial_models():
|
|
| 596 |
is_active=has_openai,
|
| 597 |
model_url="https://platform.openai.com/docs/guides/text-to-speech",
|
| 598 |
),
|
| 599 |
-
# Google Cloud TTS -
|
| 600 |
Model(
|
| 601 |
id="google-wavenet",
|
| 602 |
name="Google Wavenet (ko-KR)",
|
| 603 |
model_type=ModelType.TTS,
|
| 604 |
is_open=False,
|
| 605 |
-
is_active=
|
| 606 |
model_url="https://cloud.google.com/text-to-speech",
|
| 607 |
),
|
| 608 |
Model(
|
|
@@ -610,7 +610,7 @@ def insert_initial_models():
|
|
| 610 |
name="Google Neural2 (ko-KR)",
|
| 611 |
model_type=ModelType.TTS,
|
| 612 |
is_open=False,
|
| 613 |
-
is_active=
|
| 614 |
model_url="https://cloud.google.com/text-to-speech",
|
| 615 |
),
|
| 616 |
# CLOVA TTS (네이버 클라우드 - 한국어 특화) - API 키 있을 때만 활성화
|
|
|
|
| 596 |
is_active=has_openai,
|
| 597 |
model_url="https://platform.openai.com/docs/guides/text-to-speech",
|
| 598 |
),
|
| 599 |
+
# Google Cloud TTS - 비활성화 (Gemini TTS 사용)
|
| 600 |
Model(
|
| 601 |
id="google-wavenet",
|
| 602 |
name="Google Wavenet (ko-KR)",
|
| 603 |
model_type=ModelType.TTS,
|
| 604 |
is_open=False,
|
| 605 |
+
is_active=False, # Gemini TTS로 대체
|
| 606 |
model_url="https://cloud.google.com/text-to-speech",
|
| 607 |
),
|
| 608 |
Model(
|
|
|
|
| 610 |
name="Google Neural2 (ko-KR)",
|
| 611 |
model_type=ModelType.TTS,
|
| 612 |
is_open=False,
|
| 613 |
+
is_active=False, # Gemini TTS로 대체
|
| 614 |
model_url="https://cloud.google.com/text-to-speech",
|
| 615 |
),
|
| 616 |
# CLOVA TTS (네이버 클라우드 - 한국어 특화) - API 키 있을 때만 활성화
|
requirements.txt
CHANGED
|
@@ -14,5 +14,4 @@ huggingface-hub
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
| 17 |
-
typecast-python
|
| 18 |
-
google-cloud-texttospeech
|
|
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
| 17 |
+
typecast-python
|
|
|
tts.py
CHANGED
|
@@ -55,12 +55,10 @@ HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-syn
|
|
| 55 |
# Typecast TTS
|
| 56 |
TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
|
| 57 |
|
| 58 |
-
# Gemini TTS (Google Cloud) - API Key
|
| 59 |
GEMINI_TTS_API_KEY = os.getenv("GEMINI_TTS_API_KEY")
|
| 60 |
if GEMINI_TTS_API_KEY:
|
| 61 |
-
|
| 62 |
-
os.environ["GOOGLE_API_KEY"] = GEMINI_TTS_API_KEY
|
| 63 |
-
print("[Gemini TTS] API Key loaded and set as GOOGLE_API_KEY")
|
| 64 |
|
| 65 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 66 |
"""
|
|
@@ -458,46 +456,43 @@ def predict_typecast_tts(text: str, voice_id: str = "tc_612ed01c7eb720fddd3ddedf
|
|
| 458 |
|
| 459 |
|
| 460 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|
| 461 |
-
"""Gemini TTS API 호출 (API Key
|
| 462 |
if not GEMINI_TTS_API_KEY:
|
| 463 |
-
raise ValueError(
|
| 464 |
-
"GEMINI_TTS_API_KEY 환경 변수가 설정되지 않았습니다."
|
| 465 |
-
)
|
| 466 |
|
| 467 |
try:
|
| 468 |
-
|
| 469 |
-
from google.cloud import texttospeech_v1beta1 as texttospeech
|
| 470 |
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
language_code="ko-kr",
|
| 478 |
-
model_name=model,
|
| 479 |
-
)
|
| 480 |
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
prompt="친절하고 자연스러운 톤으로 말해주세요",
|
| 485 |
-
),
|
| 486 |
-
voice=voice_params,
|
| 487 |
-
audio_config=texttospeech.AudioConfig(
|
| 488 |
-
audio_encoding=texttospeech.AudioEncoding.LINEAR16,
|
| 489 |
-
sample_rate_hertz=24000,
|
| 490 |
-
),
|
| 491 |
-
)
|
| 492 |
|
|
|
|
| 493 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 494 |
-
f.write(
|
| 495 |
return f.name
|
| 496 |
|
| 497 |
-
except
|
| 498 |
-
raise ValueError(
|
| 499 |
-
"google-cloud-texttospeech 패키지가 설치되지 않았습니다. requirements.txt를 확인하세요."
|
| 500 |
-
)
|
| 501 |
except Exception as e:
|
| 502 |
raise ValueError(f"Gemini TTS API 오류: {str(e)}")
|
| 503 |
|
|
|
|
| 55 |
# Typecast TTS
|
| 56 |
TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
|
| 57 |
|
| 58 |
+
# Gemini TTS (Google Cloud) - REST API v1beta1 with API Key
|
| 59 |
GEMINI_TTS_API_KEY = os.getenv("GEMINI_TTS_API_KEY")
|
| 60 |
if GEMINI_TTS_API_KEY:
|
| 61 |
+
print("[Gemini TTS] API Key loaded")
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 64 |
"""
|
|
|
|
| 456 |
|
| 457 |
|
| 458 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|
| 459 |
+
"""Gemini TTS API 호출 (REST API v1beta1 with API Key)"""
|
| 460 |
if not GEMINI_TTS_API_KEY:
|
| 461 |
+
raise ValueError("GEMINI_TTS_API_KEY 환경 변수가 설정되지 않았습니다.")
|
|
|
|
|
|
|
| 462 |
|
| 463 |
try:
|
| 464 |
+
url = f"https://texttospeech.googleapis.com/v1beta1/text:synthesize?key={GEMINI_TTS_API_KEY}"
|
|
|
|
| 465 |
|
| 466 |
+
payload = {
|
| 467 |
+
"input": {
|
| 468 |
+
"text": text,
|
| 469 |
+
"prompt": "친절하고 자연스러운 톤으로 말해주세요"
|
| 470 |
+
},
|
| 471 |
+
"voice": {
|
| 472 |
+
"languageCode": "ko-kr",
|
| 473 |
+
"name": voice,
|
| 474 |
+
"modelName": model
|
| 475 |
+
},
|
| 476 |
+
"audioConfig": {
|
| 477 |
+
"audioEncoding": "LINEAR16",
|
| 478 |
+
"sampleRateHertz": 24000
|
| 479 |
+
}
|
| 480 |
+
}
|
| 481 |
|
| 482 |
+
response = requests.post(url, json=payload, timeout=60)
|
| 483 |
+
response.raise_for_status()
|
|
|
|
|
|
|
|
|
|
| 484 |
|
| 485 |
+
audio_content = response.json().get("audioContent")
|
| 486 |
+
if not audio_content:
|
| 487 |
+
raise ValueError("Gemini TTS API가 오디오를 반환하지 않았습니다.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
+
audio_bytes = base64.b64decode(audio_content)
|
| 490 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 491 |
+
f.write(audio_bytes)
|
| 492 |
return f.name
|
| 493 |
|
| 494 |
+
except requests.exceptions.RequestException as e:
|
| 495 |
+
raise ValueError(f"Gemini TTS API 요청 오류: {str(e)}")
|
|
|
|
|
|
|
| 496 |
except Exception as e:
|
| 497 |
raise ValueError(f"Gemini TTS API 오류: {str(e)}")
|
| 498 |
|