Edge_TTS / app.py
jafrilalam's picture
Create app.py
5d169a1 verified
raw
history blame
7.09 kB
import gradio as gr
import edge_tts
import asyncio
import tempfile
import os
import time
async def get_voices():
# Mock voice list to include specified voices
voices = [
{"ShortName": "bn-IN-TanishaaNeural", "Locale": "bn-IN", "Gender": "Female"},
{"ShortName": "bn-IN-BashkarNeural", "Locale": "bn-IN", "Gender": "Male"},
{"ShortName": "bn-BD-PradeepNeural", "Locale": "bn-BD", "Gender": "Male"},
{"ShortName": "bn-BD-NabanitaNeural", "Locale": "bn-BD", "Gender": "Female"},
]
return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, "বাংলা লেখা সংযুক্ত করুন"
if not voice:
return None, "বাচনভঙ্গি এবং কণ্ঠস্বর নির্বাচন করুন"
voice_short_name = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
# Save to mp3 file with custom name
timestamp = time.strftime("%Y%m%d_%H%M%S")
with tempfile.NamedTemporaryFile(delete=False, suffix=f"_tts_{timestamp}.mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
async def tts_interface(text, voice, rate, pitch):
audio, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
return audio, None, gr.Warning(warning)
return audio, audio, None
def reset_fields():
return "", "", 0, 0, None, None, ""
async def create_demo():
voices = await get_voices()
# Custom CSS for aesthetic and robust UI
css = """
body {
font-family: 'Noto Sans Bengali', sans-serif;
background: linear-gradient(135deg, #e0f7fa, #b2ebf2);
}
.gr-button-primary {
background: linear-gradient(45deg, #0288d1, #4fc3f7) !important;
border: none !important;
color: white !important;
padding: 12px 24px !important;
border-radius: 8px !important;
font-weight: bold !important;
transition: all 0.3s ease !important;
}
.gr-button-primary:hover {
background: linear-gradient(45deg, #0277bd, #29b6f6) !important;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2) !important;
}
.gr-button-secondary {
background: linear-gradient(45deg, #e57373, #f06292) !important;
border: none !important;
color: white !important;
padding: 12px 24px !important;
border-radius: 8px !important;
font-weight: bold !important;
transition: all 0.3s ease !important;
}
.gr-button-secondary:hover {
background: linear-gradient(45deg, #d32f2f, #e91e63) !important;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2) !important;
}
.gr-textbox, .gr-dropdown, .gr-slider {
border-radius: 8px !important;
border: 1px solid #b0bec5 !important;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1) !important;
}
.gr-group {
background: white !important;
border-radius: 12px !important;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1) !important;
padding: 20px !important;
margin-bottom: 20px !important;
}
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
color: #01579b !important;
}
.gr-audio, .gr-file {
border-radius: 8px !important;
background: #f5f5f5 !important;
padding: 10px !important;
}
.container {
max-width: 800px !important;
margin: auto !important;
}
"""
with gr.Blocks(css=css, analytics_enabled=False) as demo:
gr.Markdown(
"""
# 🎙️ Edge TTS For Bangla Language
লেখা থেকে উচ্চ-মানের কণ্ঠস্বরে রূপান্তর করুন। বাংলা ভাষায় স্বাভাবিক এবং সুন্দর কণ্ঠস্বর উপভোগ করুন।
""",
elem_classes=["container"]
)
with gr.Group():
gr.Markdown("### লেখা ইনপুট এবং সেটিংস")
text_input = gr.Textbox(
label="প্রদত্ত লেখা",
lines=5,
placeholder="এখানে আপনার বাংলা লেখা লিখুন, যেমন: 'আমি বাংলায় কথা বলি।'",
show_copy_button=True
)
voice_dropdown = gr.Dropdown(
choices=[""] + list(voices.keys()),
label="বাচনভঙ্গি এবং কণ্ঠস্বর",
value="",
info="একটি কণ্ঠস্বর নির্বাচন করুন।"
)
rate_slider = gr.Slider(
minimum=-50,
maximum=50,
value=0,
label="Speech Rate Adjustment (%)",
step=1,
info="কথার গতি সামঞ্জস্য করুন: 0% ডিফল্ট, +50% দ্রুত, -50% ধীর।"
)
pitch_slider = gr.Slider(
minimum=-20,
maximum=20,
value=0,
label="Pitch Adjustment (Hz)",
step=1,
info="কণ্ঠের স্বর সামঞ্জস্য করুন: 0 Hz ডিফল্ট, +20 Hz উচ্চ, -20 Hz নিম্ন।"
)
with gr.Row():
generate_btn = gr.Button("লেখা থেকে কণ্ঠস্বরে রূপান্তর করুন", variant="primary")
reset_btn = gr.Button("রিসেট করুন", variant="secondary")
with gr.Group():
gr.Markdown("### আউটপুট")
audio_output = gr.Audio(label="Generated Audio", type="filepath")
download_output = gr.File(label="অডিও ফাইল ডাউনলোড করুন")
warning_md = gr.Markdown(label="Warning", visible=False)
# Event handlers
generate_btn.click(
fn=tts_interface,
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
outputs=[audio_output, download_output, warning_md],
show_progress=True
)
reset_btn.click(
fn=reset_fields,
inputs=[],
outputs=[text_input, voice_dropdown, rate_slider, pitch_slider, audio_output, download_output, warning_md]
)
return demo
async def main():
demo = await create_demo()
demo.queue(default_concurrency_limit=50)
demo.launch(show_api=False)
if __name__ == "__main__":
asyncio.run(main())