Spaces:

sakshigpatil
/

accentify-the-global-tongue-twister

Running

App Files Files Community

accentify-the-global-tongue-twister / backend /accent_conversion.py

sakshigpatil

improve my site it not indentifies accent and not converting

51d9d90 verified 10 days ago

raw

history blame contribute delete

5.96 kB

	```python
	import torch
	import torchaudio
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	from gtts import gTTS
	import os
	import tempfile

	class AccentConverter:
	def __init__(self):
	# Load models for different accents
	self.models = {
	'american': {
	'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
	'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
	},
	'british': {
	'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
	'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
	},
	'australian': {
	'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
	'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
	},
	'indian': {
	'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
	'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
	}
	}

	def load_audio(self, file_path):
	waveform, sample_rate = torchaudio.load(file_path)
	return waveform, sample_rate

	def transcribe_audio(self, waveform, sample_rate, accent='american'):
	# Resample if needed
	if sample_rate != 16000:
	resampler = torchaudio.transforms.Resample(sample_rate, 16000)
	waveform = resampler(waveform)

	# Process with wav2vec2
	inputs = self.models[accent]['processor'](
	waveform.squeeze(0),
	sampling_rate=16000,
	return_tensors="pt"
	)

	with torch.no_grad():
	logits = self.models[accent]['model'](inputs.input_values).logits

	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = self.models[accent]['processor'].batch_decode(predicted_ids)[0]
	return transcription

	def text_to_speech(self, text, accent='american', output_file='output.mp3'):
	# Map accents to gTTS language codes
	accent_map = {
	'american': 'en-us',
	'british': 'en-gb',
	'australian': 'en-au',
	'indian': 'en-in'
	}

	tts = gTTS(text=text, lang=accent_map.get(accent, 'en-us'))
	tts.save(output_file)
	return output_file
	def detect_accent(self, audio_path):
	"""Improved accent detection using acoustic features and ML"""
	try:
	waveform, sample_rate = self.load_audio(audio_path)

	# Extract features using torchaudio
	mfcc = torchaudio.transforms.MFCC(
	sample_rate=sample_rate,
	n_mfcc=13,
	melkwargs={'n_fft': 400, 'hop_length': 160, 'n_mels': 23}
	)(waveform)

	# Calculate statistics of MFCCs (mean, std) as features
	features = torch.cat([
	mfcc.mean(dim=-1),
	mfcc.std(dim=-1)
	], dim=1).squeeze().numpy()

	# Load our trained accent classifier model
	classifier = torch.load('models/accent_classifier.pt')
	classifier.eval()

	# Predict accent
	with torch.no_grad():
	inputs = torch.from_numpy(features).unsqueeze(0).float()
	outputs = classifier(inputs)
	_, predicted = torch.max(outputs, 1)

	# Map prediction index to accent names
	accent_names = ['american', 'british', 'australian', 'indian']
	detected_accent = accent_names[predicted.item()]

	return detected_accent

	except Exception as e:
	print(f"Accent detection error: {e}")
	return "unknown"
	def convert_accent(self, input_file, source_accent, target_accent):
	waveform, sample_rate = self.load_audio(input_file)

	# First detect source accent if not provided
	if source_accent == "auto":
	source_accent = self.detect_accent(input_file)
	print(f"Detected source accent: {source_accent}")

	# Transcribe with source accent model
	transcription = self.transcribe_audio(waveform, sample_rate, source_accent)

	# Apply accent conversion rules
	converted_text = self.apply_accent_rules(transcription, source_accent, target_accent)

	# Convert to speech with target accent
	with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
	output_file = tmp.name

	self.text_to_speech(converted_text, target_accent, output_file)
	return output_file, converted_text

	def apply_accent_rules(self, text, source_accent, target_accent):
	"""Apply accent-specific transformation rules to text"""
	# Example rules for demonstration - in reality this would be more sophisticated
	rules = {
	'american_to_british': [
	(r'\btruck\b', 'lorry'),
	(r'\belevator\b', 'lift'),
	(r'\bapartment\b', 'flat')
	],
	'british_to_american': [
	(r'\blorry\b', 'truck'),
	(r'\blift\b', 'elevator'),
	(r'\bflat\b', 'apartment')
	],
	# Add more conversion rules here
	}

	conversion_key = f"{source_accent}_to_{target_accent}"
	if conversion_key in rules:
	for pattern, replacement in rules[conversion_key]:
	text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)

	return f"[Converted from {source_accent} to {target_accent}] {text}"
	```