Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

Athspi commited on Feb 18

Commit

91f8d48

verified ·

1 Parent(s): 9b199cb

Create app.py

Browse files

Files changed (1) hide show

app.py +172 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import gradio as gr
+import torch
+import os
+from faster_whisper import WhisperModel
+from moviepy.video.io.VideoFileClip import VideoFileClip
+import logging
+import google.generativeai as genai
+# Suppress moviepy logs
+logging.getLogger("moviepy").setLevel(logging.ERROR)
+# Configure Gemini API
+genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+# Create the Gemini model
+generation_config = {
+    "temperature": 1,
+    "top_p": 0.95,
+    "top_k": 40,
+    "max_output_tokens": 8192,
+    "response_mime_type": "text/plain",
+}
+model = genai.GenerativeModel(
+    model_name="gemini-2.0-flash-exp",
+    generation_config=generation_config,
+)
+# Define the Whisper model and device
+MODEL_NAME = "Systran/faster-whisper-large-v3"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+compute_type = "float32" if device == "cuda" else "int8"
+# Load the Whisper model
+whisper_model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
+# List of all supported languages in Whisper
+SUPPORTED_LANGUAGES = [
+    "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
+    "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
+    "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
+    "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
+    "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
+    "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
+    "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
+    "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
+    "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
+    "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
+    "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
+    "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
+    "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
+    "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
+    "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
+    "Sundanese"
+]
+def extract_audio_from_video(video_file):
+    """Extract audio from a video file and save it as a WAV file."""
+    video = VideoFileClip(video_file)
+    audio_file = "extracted_audio.wav"
+    video.audio.write_audiofile(audio_file, fps=16000, logger=None)  # Suppress logs
+    return audio_file
+def generate_subtitles(audio_file, language="Auto Detect"):
+    """Generate subtitles from an audio file using Whisper."""
+    # Transcribe the audio
+    segments, info = whisper_model.transcribe(
+        audio_file,
+        task="transcribe",
+        language=None if language == "Auto Detect" else language.lower(),
+        word_timestamps=True
+    )
+    # Generate SRT format subtitles
+    srt_subtitles = ""
+    for i, segment in enumerate(segments, start=1):
+        start_time = segment.start
+        end_time = segment.end
+        text = segment.text.strip()
+        # Format timestamps for SRT
+        start_time_srt = format_timestamp(start_time)
+        end_time_srt = format_timestamp(end_time)
+        # Add to SRT
+        srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n"
+    return srt_subtitles, info.language
+def format_timestamp(seconds):
+    """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds = seconds % 60
+    milliseconds = int((seconds - int(seconds)) * 1000)
+    return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
+def translate_srt(srt_text, target_language):
+    """Translate an SRT file while preserving timestamps."""
+    # Magic prompt for Gemini
+    prompt = f"Translate the following SRT subtitles into {target_language}. Preserve the SRT format (timestamps and structure). Translate only the text after the timestamp. Do not add explanations or extra text.\n\n{srt_text}"
+    # Send the prompt to Gemini
+    response = model.generate_content(prompt)
+    return response.text
+def process_video(video_file, language="Auto Detect", translate_to=None):
+    """Process a video file to generate and translate subtitles."""
+    # Extract audio from the video
+    audio_file = extract_audio_from_video(video_file)
+    # Generate subtitles
+    subtitles, detected_language = generate_subtitles(audio_file, language)
+    # Save original subtitles to an SRT file
+    original_srt_file = "original_subtitles.srt"
+    with open(original_srt_file, "w", encoding="utf-8") as f:
+        f.write(subtitles)
+    # Translate subtitles if a target language is provided
+    translated_srt_file = None
+    if translate_to and translate_to != "None":
+        translated_subtitles = translate_srt(subtitles, translate_to)
+        translated_srt_file = "translated_subtitles.srt"
+        with open(translated_srt_file, "w", encoding="utf-8") as f:
+            f.write(translated_subtitles)
+    # Clean up extracted audio file
+    os.remove(audio_file)
+    return original_srt_file, translated_srt_file, detected_language
+# Define the Gradio interface
+with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
+    # Header
+    with gr.Column():
+        gr.Markdown("# 🎥 AutoSubGen")
+        gr.Markdown("### AI-Powered Video Subtitle Generator")
+        gr.Markdown("Automatically generate and translate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
+    # Main content
+    with gr.Tab("Generate Subtitles"):
+        gr.Markdown("### Upload a video file to generate subtitles.")
+        with gr.Row():
+            video_input = gr.Video(label="Upload Video File", scale=2)
+            language_dropdown = gr.Dropdown(
+                choices=SUPPORTED_LANGUAGES,
+                label="Select Language",
+                value="Auto Detect",
+                scale=1
+            )
+            translate_to_dropdown = gr.Dropdown(
+                choices=["None"] + SUPPORTED_LANGUAGES[1:],  # Exclude "Auto Detect"
+                label="Translate To",
+                value="None",
+                scale=1
+            )
+        generate_button = gr.Button("Generate Subtitles", variant="primary")
+        with gr.Row():
+            original_subtitle_output = gr.File(label="Download Original Subtitles (SRT)")
+            translated_subtitle_output = gr.File(label="Download Translated Subtitles (SRT)")
+        detected_language_output = gr.Textbox(label="Detected Language")
+    # Link button to function
+    generate_button.click(
+        process_video,
+        inputs=[video_input, language_dropdown, translate_to_dropdown],
+        outputs=[original_subtitle_output, translated_subtitle_output, detected_language_output]
+    )
+# Launch the Gradio interface with a public link
+demo.launch(share=True)