Spaces:

jpjp9292
/

Speech-to-Text

Runtime error

App Files Files Community

jpjp9292 commited on May 24, 2024

Commit

50c4728

verified ·

1 Parent(s): 40eb44b

Create app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import gradio as gr
+import time
+from moviepy.editor import VideoFileClip
+from faster_whisper import WhisperModel
+# 비디오를 MP3로 변환하는 함수
+def convert_mp4_to_mp3(video_file_path, output_dir):
+    video = VideoFileClip(video_file_path)
+    audio = video.audio
+    output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3")
+    audio.write_audiofile(output_path)
+    audio.close()
+    video.close()
+    return output_path
+# Whisper 모델을 사용하여 MP3 파일을 텍스트로 변환하는 함수
+def transcribe_audio(model_size, audio_file):
+    model = WhisperModel(model_size, device="cpu", compute_type="int8")
+    start_time = time.time()
+    try:
+        segments, info = model.transcribe(audio_file, beam_size=5)
+        detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability)
+        result = []
+        for segment in segments:
+            result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+        result_text = "\n".join(result)
+    except PermissionError as e:
+        return f"PermissionError: {e}"
+    except ValueError as e:
+        return f"ValueError: {e}"
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds"
+# Gradio 인터페이스에서 사용할 메인 함수
+def process_video(model_size, video_file=None, video_url=None):
+    if video_url:
+        video_file_path = gr.processing_utils.download_url(video_url, dir='/tmp')
+    elif video_file:
+        video_file_path = video_file.name
+    else:
+        return "Please upload a video file or provide a video URL."
+    save_path = "/tmp"
+    mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path)
+    transcription = transcribe_audio(model_size, mp3_file_path)
+    return transcription
+# Gradio 인터페이스 정의
+iface = gr.Interface(
+    fn=process_video,
+    inputs=[
+        gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"),
+        gr.File(label="Upload Video File", optional=True),
+        gr.Textbox(label="Video URL", optional=True)
+    ],
+    outputs="text",
+    title="Video to Text Converter using Whisper",
+    description="Upload a video file or provide a video URL, select the Whisper model size, and get the transcribed text."
+)
+if __name__ == "__main__":
+    iface.launch()