#!/usr/bin/env python3 """ Whisper Server for Hebrew Transcription - Hugging Face Spaces Version """ from fastapi import FastAPI, File, UploadFile, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from faster_whisper import WhisperModel import tempfile import os import logging from typing import Optional import torch import soundfile as sf import librosa # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Initialize FastAPI app app = FastAPI(title="Hebrew Whisper Transcription API") # Configure CORS app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Initialize Whisper model class WhisperService: def __init__(self): logger.info("🚀 Initializing Whisper model...") # Auto-detect GPU if torch.cuda.is_available(): logger.info(f"🎮 Using GPU: {torch.cuda.get_device_name()}") self.device = "cuda" self.compute_type = "float16" self.cpu_threads = 8 self.num_workers = 2 else: logger.info("💻 Using CPU (slower but free!)") self.device = "cpu" self.compute_type = "int8" self.cpu_threads = 4 self.num_workers = 1 # Load model self.model = WhisperModel( "ivrit-ai/whisper-large-v3-ct2", device=self.device, compute_type=self.compute_type, cpu_threads=self.cpu_threads, num_workers=self.num_workers ) logger.info("✅ Whisper model loaded successfully!") def transcribe(self, audio_path: str, initial_prompt: Optional[str] = None) -> dict: """Transcribe audio file to Hebrew text""" try: if not initial_prompt: initial_prompt = "נוסעים יקרים, רכבת ישראל" # Simple transcribe like hebrew_live_simple.py segments, info = self.model.transcribe( audio_path, language="he", initial_prompt=initial_prompt ) full_text = " ".join([segment.text.strip() for segment in segments]) # Common corrections corrections = { "נושאים יקרים": "נוסעים יקרים", "נושאים עיקריים": "נוסעים יקרים", } for wrong, correct in corrections.items(): full_text = full_text.replace(wrong, correct) return { "text": full_text, "language": info.language, "duration": info.duration, "success": True } except Exception as e: logger.error(f"Transcription error: {str(e)}") raise # Initialize service whisper_service = WhisperService() @app.get("/") def root(): return { "status": "Hebrew Whisper API is running! 🚀", "device": whisper_service.device, "compute_type": whisper_service.compute_type } @app.post("/transcribe") async def transcribe_audio( audio: UploadFile = File(...), prompt: Optional[str] = None ): if not audio.filename: raise HTTPException(status_code=400, detail="No file uploaded") try: # Save file with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio.filename)[1]) as tmp_file: content = await audio.read() tmp_file.write(content) tmp_file_path = tmp_file.name logger.info(f"📝 Transcribing: {audio.filename}") # Convert to WAV if needed try: if audio.filename.endswith(('.webm', '.ogg', '.m4a', '.mp3')): logger.info(f"Converting to 16kHz WAV...") audio_data, sr = librosa.load(tmp_file_path, sr=16000, mono=True) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as wav_file: sf.write(wav_file.name, audio_data, 16000) old_path = tmp_file_path tmp_file_path = wav_file.name os.unlink(old_path) except Exception as e: logger.warning(f"Could not convert, using original: {e}") result = whisper_service.transcribe(tmp_file_path, initial_prompt=prompt) logger.info(f"✅ Transcription complete!") return JSONResponse(content=result) except Exception as e: logger.error(f"Error: {str(e)}") raise HTTPException(status_code=500, detail=f"Transcription failed: {str(e)}") finally: if 'tmp_file_path' in locals() and os.path.exists(tmp_file_path): os.unlink(tmp_file_path) @app.post("/transcribe-blob") async def transcribe_blob( audio: UploadFile = File(...), prompt: Optional[str] = None ): return await transcribe_audio(audio, prompt) # No need for uvicorn.run() - Hugging Face runs it automatically