sakshigpatil's picture
remove target accents to add only american, british, indian, australian
edf056d verified
```python
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
from dotenv import load_dotenv
import openai
import librosa
import numpy as np
import soundfile as sf
from pydub import AudioSegment
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import requests
import tempfile
import wave
from typing import Dict, Optional, Tuple
# Load environment variables
load_dotenv()
app = Flask(__name__)
CORS(app)
# Initialize services
openai.api_key = os.getenv("OPENAI_API_KEY")
# Load accent models (placeholder - in a real app you'd have actual models)
ACCENT_MODELS = {
"american_general": None,
"british_rp": None,
# Add other accent models here
}
@app.route("/detect_accent", methods=["POST"])
def detect_accent():
"""Detect the accent from an audio file"""
if "audio" not in request.files:
return jsonify({"error": "No audio file provided"}), 400
audio_file = request.files["audio"]
try:
# Save temporarily for processing
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
audio_file.save(tmp.name)
# In a real app, you'd use a model like wav2vec2 for accent detection
# For demo purposes, we'll just return a random accent
accents = ["american_general", "british_rp", "australian_general"]
detected_accent = np.random.choice(accents)
os.unlink(tmp.name)
return jsonify({
"accent": detected_accent,
"confidence": np.random.uniform(0.7, 0.95)
})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/convert_accent", methods=["POST"])
def convert_accent():
"""Convert audio from one accent to another"""
if "audio" not in request.files:
return jsonify({"error": "No audio file provided"}), 400
target_accent = request.form.get("target_accent", "american_general")
audio_file = request.files["audio"]
try:
# 1. Transcribe the audio using Whisper
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
audio_file.save(tmp.name)
# Use OpenAI Whisper for transcription
with open(tmp.name, "rb") as f:
transcript = openai.Audio.transcribe("whisper-1", f)
text = transcript["text"]
os.unlink(tmp.name)
# 2. Convert text to target accent (simplified for demo)
# In a real app, you'd use accent-specific text transformation
converted_text = f"[Converted to {target_accent}] {text}"
# 3. Synthesize speech with target accent
# Using Google TTS (would require API key in production)
tts_url = "https://texttospeech.googleapis.com/v1/text:synthesize"
headers = {
"Authorization": f"Bearer {os.getenv('GOOGLE_TTS_API_KEY')}",
"Content-Type": "application/json"
}
payload = {
"input": {"text": converted_text},
"voice": {"languageCode": get_locale_for_accent(target_accent)},
"audioConfig": {"audioEncoding": "MP3"}
}
response = requests.post(tts_url, headers=headers, json=payload)
if response.status_code != 200:
raise Exception("TTS API error")
# Save the converted audio
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_out:
tmp_out.write(response.content)
converted_audio_path = tmp_out.name
return jsonify({
"status": "success",
"converted_audio_url": f"/get_audio/{os.path.basename(converted_audio_path)}"
})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/get_audio/<filename>", methods=["GET"])
def get_audio(filename: str):
"""Serve converted audio files"""
try:
filepath = os.path.join(tempfile.gettempdir(), filename)
if not os.path.exists(filepath):
return jsonify({"error": "File not found"}), 404
return send_file(filepath, mimetype="audio/mp3")
except Exception as e:
return jsonify({"error": str(e)}), 500
def get_locale_for_accent(accent: str) -> str:
"""Map accent to locale code for TTS"""
accent_locale_map = {
"american_general": "en-US",
"british_rp": "en-GB",
"australian_general": "en-AU",
"indian_general": "en-IN"
}
return accent_locale_map.get(accent, "en-US")
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0", port=5000)
```
These files provide:
1. `requirements.txt` - All necessary Python libraries for the project
2. `app.py` - A Flask API server that handles:
- Accent detection
- Audio conversion
- Text-to-speech synthesis
- File serving
Note that in a production environment, you would:
- Use proper authentication
- Set up proper error handling
- Implement actual ML models for accent detection/conversion
- Use proper storage for audio files
- Add rate limiting
- Set up proper logging
___METADATA_START___
{"repoId":"sakshigpatil/accentify-the-global-tongue-twister","isNew":false,"userName":"sakshigpatil"}
___METADATA_END___