```python import whisper import tempfile import os class WhisperTranscriber: def __init__(self, model_size="base"): # Available models: tiny, base, small, medium, large self.model = whisper.load_model(model_size) def transcribe_audio(self, audio_path, language=None): """ Transcribe audio using Whisper model Args: audio_path (str): Path to audio file language (str, optional): Language code (e.g., 'en'). If None, auto-detect Returns: dict: Transcription result containing text, segments, language, etc. """ result = self.model.transcribe(audio_path, language=language) return result def transcribe_bytes(self, audio_bytes, temp_prefix="whisper_temp"): """ Transcribe raw audio bytes by saving to temporary file Args: audio_bytes (bytes): Raw audio data temp_prefix (str): Prefix for temporary file Returns: dict: Transcription result """ with tempfile.NamedTemporaryFile(prefix=temp_prefix, delete=True) as temp_file: temp_file.write(audio_bytes) temp_file.flush() return self.transcribe_audio(temp_file.name) def detect_language(self, audio_path): """ Detect the language of the audio Args: audio_path (str): Path to audio file Returns: str: Language code (e.g., 'en') """ # Load audio and pad/trim it to fit 30 seconds audio = whisper.load_audio(audio_path) audio = whisper.pad_or_trim(audio) # Make log-Mel spectrogram and move to device mel = whisper.log_mel_spectrogram(audio).to(self.model.device) # Detect language _, probs = self.model.detect_language(mel) return max(probs, key=probs.get) def transcribe_with_timestamps(self, audio_path): """ Get transcription with word-level timestamps Args: audio_path (str): Path to audio file Returns: dict: Transcription with word-level timestamps """ result = self.model.transcribe(audio_path, word_timestamps=True) return result ```