Spaces:
Runtime error
Runtime error
| # λΌμ΄λΈλ¬λ¦¬ μ€μ | |
| import os, re | |
| import gradio as gr | |
| import time | |
| from moviepy.editor import VideoFileClip | |
| from faster_whisper import WhisperModel | |
| # λΉλμ€λ₯Ό MP3λ‘ λ³ννλ ν¨μ | |
| def convert_mp4_to_mp3(video_file_path, output_dir): | |
| video = VideoFileClip(video_file_path) | |
| audio = video.audio | |
| output_path = os.path.join(output_dir, os.path.splitext(os.path.basename(video_file_path))[0] + ".mp3") | |
| audio.write_audiofile(output_path) | |
| audio.close() | |
| video.close() | |
| return output_path | |
| # Whisper λͺ¨λΈμ μ¬μ©νμ¬ MP3 νμΌμ ν μ€νΈλ‘ λ³ννλ ν¨μ | |
| def transcribe_audio(model_size, audio_file): | |
| model = WhisperModel(model_size, device="cpu", compute_type="int8") | |
| start_time = time.time() | |
| try: | |
| segments, info = model.transcribe(audio_file, beam_size=5) | |
| detected_language = "Detected language '%s' with probability %f" % (info.language, info.language_probability) | |
| result = [] | |
| for segment in segments: | |
| result.append("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) | |
| result_text = "\n".join(result) | |
| except PermissionError as e: | |
| return f"PermissionError: {e}" | |
| except ValueError as e: | |
| return f"ValueError: {e}" | |
| end_time = time.time() | |
| elapsed_time = end_time - start_time | |
| return f"{detected_language}\n\nTranscription:\n{result_text}\n\nElapsed time: {elapsed_time:.2f} seconds" | |
| # Gradio μΈν°νμ΄μ€μμ μ¬μ©ν λ©μΈ ν¨μ | |
| def process_video(model_size, video_file=None): | |
| if not video_file: | |
| return "Please upload a video file." | |
| video_file_path = video_file.name | |
| print(f"Using uploaded video file: {video_file_path}") | |
| save_path = "/tmp" | |
| mp3_file_path = convert_mp4_to_mp3(video_file_path, save_path) | |
| print(f"Converted video to MP3: {mp3_file_path}") | |
| transcription = transcribe_audio(model_size, mp3_file_path) | |
| print("Transcription complete") | |
| return transcription | |
| # Gradio μΈν°νμ΄μ€ μ μ | |
| iface = gr.Interface( | |
| fn=process_video, | |
| inputs=[ | |
| gr.Dropdown(["tiny", "base", "small", "medium", "large"], label="Model Size"), | |
| gr.File(label="Upload Video File") | |
| ], | |
| outputs="text", | |
| title="Video to Text Converter using Whisper", | |
| description="Upload a video file, select the Whisper model size, and get the transcribed text.", | |
| live=True | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |