|
|
```python |
|
|
from flask import Flask, request, jsonify |
|
|
from flask_cors import CORS |
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
import openai |
|
|
import librosa |
|
|
import numpy as np |
|
|
import soundfile as sf |
|
|
from pydub import AudioSegment |
|
|
import torch |
|
|
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor |
|
|
import requests |
|
|
import tempfile |
|
|
import wave |
|
|
from typing import Dict, Optional, Tuple |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
app = Flask(__name__) |
|
|
CORS(app) |
|
|
|
|
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
|
|
|
ACCENT_MODELS = { |
|
|
"american_general": None, |
|
|
"british_rp": None, |
|
|
|
|
|
} |
|
|
|
|
|
@app.route("/detect_accent", methods=["POST"]) |
|
|
def detect_accent(): |
|
|
"""Detect the accent from an audio file""" |
|
|
if "audio" not in request.files: |
|
|
return jsonify({"error": "No audio file provided"}), 400 |
|
|
|
|
|
audio_file = request.files["audio"] |
|
|
|
|
|
try: |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: |
|
|
audio_file.save(tmp.name) |
|
|
|
|
|
|
|
|
|
|
|
accents = ["american_general", "british_rp", "australian_general"] |
|
|
detected_accent = np.random.choice(accents) |
|
|
|
|
|
os.unlink(tmp.name) |
|
|
|
|
|
return jsonify({ |
|
|
"accent": detected_accent, |
|
|
"confidence": np.random.uniform(0.7, 0.95) |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
@app.route("/convert_accent", methods=["POST"]) |
|
|
def convert_accent(): |
|
|
"""Convert audio from one accent to another""" |
|
|
if "audio" not in request.files: |
|
|
return jsonify({"error": "No audio file provided"}), 400 |
|
|
|
|
|
target_accent = request.form.get("target_accent", "american_general") |
|
|
audio_file = request.files["audio"] |
|
|
|
|
|
try: |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: |
|
|
audio_file.save(tmp.name) |
|
|
|
|
|
|
|
|
with open(tmp.name, "rb") as f: |
|
|
transcript = openai.Audio.transcribe("whisper-1", f) |
|
|
|
|
|
text = transcript["text"] |
|
|
os.unlink(tmp.name) |
|
|
|
|
|
|
|
|
|
|
|
converted_text = f"[Converted to {target_accent}] {text}" |
|
|
|
|
|
|
|
|
|
|
|
tts_url = "https://texttospeech.googleapis.com/v1/text:synthesize" |
|
|
headers = { |
|
|
"Authorization": f"Bearer {os.getenv('GOOGLE_TTS_API_KEY')}", |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
|
|
|
payload = { |
|
|
"input": {"text": converted_text}, |
|
|
"voice": {"languageCode": get_locale_for_accent(target_accent)}, |
|
|
"audioConfig": {"audioEncoding": "MP3"} |
|
|
} |
|
|
|
|
|
response = requests.post(tts_url, headers=headers, json=payload) |
|
|
|
|
|
if response.status_code != 200: |
|
|
raise Exception("TTS API error") |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_out: |
|
|
tmp_out.write(response.content) |
|
|
converted_audio_path = tmp_out.name |
|
|
|
|
|
return jsonify({ |
|
|
"status": "success", |
|
|
"converted_audio_url": f"/get_audio/{os.path.basename(converted_audio_path)}" |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
@app.route("/get_audio/<filename>", methods=["GET"]) |
|
|
def get_audio(filename: str): |
|
|
"""Serve converted audio files""" |
|
|
try: |
|
|
filepath = os.path.join(tempfile.gettempdir(), filename) |
|
|
|
|
|
if not os.path.exists(filepath): |
|
|
return jsonify({"error": "File not found"}), 404 |
|
|
|
|
|
return send_file(filepath, mimetype="audio/mp3") |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
def get_locale_for_accent(accent: str) -> str: |
|
|
"""Map accent to locale code for TTS""" |
|
|
accent_locale_map = { |
|
|
"american_general": "en-US", |
|
|
"british_rp": "en-GB", |
|
|
"australian_general": "en-AU", |
|
|
"indian_general": "en-IN" |
|
|
} |
|
|
return accent_locale_map.get(accent, "en-US") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(debug=True, host="0.0.0.0", port=5000) |
|
|
``` |
|
|
|
|
|
These files provide: |
|
|
1. `requirements.txt` - All necessary Python libraries for the project |
|
|
2. `app.py` - A Flask API server that handles: |
|
|
- Accent detection |
|
|
- Audio conversion |
|
|
- Text-to-speech synthesis |
|
|
- File serving |
|
|
|
|
|
Note that in a production environment, you would: |
|
|
- Use proper authentication |
|
|
- Set up proper error handling |
|
|
- Implement actual ML models for accent detection/conversion |
|
|
- Use proper storage for audio files |
|
|
- Add rate limiting |
|
|
- Set up proper logging |
|
|
___METADATA_START___ |
|
|
{"repoId":"sakshigpatil/accentify-the-global-tongue-twister","isNew":false,"userName":"sakshigpatil"} |
|
|
___METADATA_END___ |