sakshigpatil's picture
improve my site it not indentifies accent and not converting
51d9d90 verified
```python
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from gtts import gTTS
import os
import tempfile
class AccentConverter:
def __init__(self):
# Load models for different accents
self.models = {
'american': {
'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
},
'british': {
'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
},
'australian': {
'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
},
'indian': {
'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
}
}
def load_audio(self, file_path):
waveform, sample_rate = torchaudio.load(file_path)
return waveform, sample_rate
def transcribe_audio(self, waveform, sample_rate, accent='american'):
# Resample if needed
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
waveform = resampler(waveform)
# Process with wav2vec2
inputs = self.models[accent]['processor'](
waveform.squeeze(0),
sampling_rate=16000,
return_tensors="pt"
)
with torch.no_grad():
logits = self.models[accent]['model'](inputs.input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = self.models[accent]['processor'].batch_decode(predicted_ids)[0]
return transcription
def text_to_speech(self, text, accent='american', output_file='output.mp3'):
# Map accents to gTTS language codes
accent_map = {
'american': 'en-us',
'british': 'en-gb',
'australian': 'en-au',
'indian': 'en-in'
}
tts = gTTS(text=text, lang=accent_map.get(accent, 'en-us'))
tts.save(output_file)
return output_file
def detect_accent(self, audio_path):
"""Improved accent detection using acoustic features and ML"""
try:
waveform, sample_rate = self.load_audio(audio_path)
# Extract features using torchaudio
mfcc = torchaudio.transforms.MFCC(
sample_rate=sample_rate,
n_mfcc=13,
melkwargs={'n_fft': 400, 'hop_length': 160, 'n_mels': 23}
)(waveform)
# Calculate statistics of MFCCs (mean, std) as features
features = torch.cat([
mfcc.mean(dim=-1),
mfcc.std(dim=-1)
], dim=1).squeeze().numpy()
# Load our trained accent classifier model
classifier = torch.load('models/accent_classifier.pt')
classifier.eval()
# Predict accent
with torch.no_grad():
inputs = torch.from_numpy(features).unsqueeze(0).float()
outputs = classifier(inputs)
_, predicted = torch.max(outputs, 1)
# Map prediction index to accent names
accent_names = ['american', 'british', 'australian', 'indian']
detected_accent = accent_names[predicted.item()]
return detected_accent
except Exception as e:
print(f"Accent detection error: {e}")
return "unknown"
def convert_accent(self, input_file, source_accent, target_accent):
waveform, sample_rate = self.load_audio(input_file)
# First detect source accent if not provided
if source_accent == "auto":
source_accent = self.detect_accent(input_file)
print(f"Detected source accent: {source_accent}")
# Transcribe with source accent model
transcription = self.transcribe_audio(waveform, sample_rate, source_accent)
# Apply accent conversion rules
converted_text = self.apply_accent_rules(transcription, source_accent, target_accent)
# Convert to speech with target accent
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
output_file = tmp.name
self.text_to_speech(converted_text, target_accent, output_file)
return output_file, converted_text
def apply_accent_rules(self, text, source_accent, target_accent):
"""Apply accent-specific transformation rules to text"""
# Example rules for demonstration - in reality this would be more sophisticated
rules = {
'american_to_british': [
(r'\btruck\b', 'lorry'),
(r'\belevator\b', 'lift'),
(r'\bapartment\b', 'flat')
],
'british_to_american': [
(r'\blorry\b', 'truck'),
(r'\blift\b', 'elevator'),
(r'\bflat\b', 'apartment')
],
# Add more conversion rules here
}
conversion_key = f"{source_accent}_to_{target_accent}"
if conversion_key in rules:
for pattern, replacement in rules[conversion_key]:
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
return f"[Converted from {source_accent} to {target_accent}] {text}"
```