Spaces:

sakshigpatil
/

accentify-the-global-tongue-twister

Running

App Files Files Community

accentify-the-global-tongue-twister / app.py

sakshigpatil

remove target accents to add only american, british, indian, australian

edf056d verified 9 days ago

raw

history blame contribute delete

5.42 kB

	```python
	from flask import Flask, request, jsonify
	from flask_cors import CORS
	import os
	from dotenv import load_dotenv
	import openai
	import librosa
	import numpy as np
	import soundfile as sf
	from pydub import AudioSegment
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import requests
	import tempfile
	import wave
	from typing import Dict, Optional, Tuple

	# Load environment variables
	load_dotenv()

	app = Flask(__name__)
	CORS(app)

	# Initialize services
	openai.api_key = os.getenv("OPENAI_API_KEY")

	# Load accent models (placeholder - in a real app you'd have actual models)
	ACCENT_MODELS = {
	"american_general": None,
	"british_rp": None,
	# Add other accent models here
	}

	@app.route("/detect_accent", methods=["POST"])
	def detect_accent():
	"""Detect the accent from an audio file"""
	if "audio" not in request.files:
	return jsonify({"error": "No audio file provided"}), 400

	audio_file = request.files["audio"]

	try:
	# Save temporarily for processing
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	audio_file.save(tmp.name)

	# In a real app, you'd use a model like wav2vec2 for accent detection
	# For demo purposes, we'll just return a random accent
	accents = ["american_general", "british_rp", "australian_general"]
	detected_accent = np.random.choice(accents)

	os.unlink(tmp.name)

	return jsonify({
	"accent": detected_accent,
	"confidence": np.random.uniform(0.7, 0.95)
	})

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	@app.route("/convert_accent", methods=["POST"])
	def convert_accent():
	"""Convert audio from one accent to another"""
	if "audio" not in request.files:
	return jsonify({"error": "No audio file provided"}), 400

	target_accent = request.form.get("target_accent", "american_general")
	audio_file = request.files["audio"]

	try:
	# 1. Transcribe the audio using Whisper
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	audio_file.save(tmp.name)

	# Use OpenAI Whisper for transcription
	with open(tmp.name, "rb") as f:
	transcript = openai.Audio.transcribe("whisper-1", f)

	text = transcript["text"]
	os.unlink(tmp.name)

	# 2. Convert text to target accent (simplified for demo)
	# In a real app, you'd use accent-specific text transformation
	converted_text = f"[Converted to {target_accent}] {text}"

	# 3. Synthesize speech with target accent
	# Using Google TTS (would require API key in production)
	tts_url = "https://texttospeech.googleapis.com/v1/text:synthesize"
	headers = {
	"Authorization": f"Bearer {os.getenv('GOOGLE_TTS_API_KEY')}",
	"Content-Type": "application/json"
	}

	payload = {
	"input": {"text": converted_text},
	"voice": {"languageCode": get_locale_for_accent(target_accent)},
	"audioConfig": {"audioEncoding": "MP3"}
	}

	response = requests.post(tts_url, headers=headers, json=payload)

	if response.status_code != 200:
	raise Exception("TTS API error")

	# Save the converted audio
	with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp_out:
	tmp_out.write(response.content)
	converted_audio_path = tmp_out.name

	return jsonify({
	"status": "success",
	"converted_audio_url": f"/get_audio/{os.path.basename(converted_audio_path)}"
	})

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	@app.route("/get_audio/<filename>", methods=["GET"])
	def get_audio(filename: str):
	"""Serve converted audio files"""
	try:
	filepath = os.path.join(tempfile.gettempdir(), filename)

	if not os.path.exists(filepath):
	return jsonify({"error": "File not found"}), 404

	return send_file(filepath, mimetype="audio/mp3")

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	def get_locale_for_accent(accent: str) -> str:
	"""Map accent to locale code for TTS"""
	accent_locale_map = {
	"american_general": "en-US",
	"british_rp": "en-GB",
	"australian_general": "en-AU",
	"indian_general": "en-IN"
	}
	return accent_locale_map.get(accent, "en-US")

	if __name__ == "__main__":
	app.run(debug=True, host="0.0.0.0", port=5000)
	```

	These files provide:
	1. `requirements.txt` - All necessary Python libraries for the project
	2. `app.py` - A Flask API server that handles:
	- Accent detection
	- Audio conversion
	- Text-to-speech synthesis
	- File serving

	Note that in a production environment, you would:
	- Use proper authentication
	- Set up proper error handling
	- Implement actual ML models for accent detection/conversion
	- Use proper storage for audio files
	- Add rate limiting
	- Set up proper logging
	___METADATA_START___
	{"repoId":"sakshigpatil/accentify-the-global-tongue-twister","isNew":false,"userName":"sakshigpatil"}
	___METADATA_END___