```python
import torch
import torchaudio
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
from gtts import gTTS
import os
import tempfile

class AccentConverter:
    def __init__(self):
        # Load models for different accents
        self.models = {
            'american': {
                'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
                'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
            },
            'british': {
                'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
                'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
            },
            'australian': {
                'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
                'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
            },
            'indian': {
                'model': Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self"),
                'processor': Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
            }
        }
        
    def load_audio(self, file_path):
        waveform, sample_rate = torchaudio.load(file_path)
        return waveform, sample_rate

    def transcribe_audio(self, waveform, sample_rate, accent='american'):
        # Resample if needed
        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
            waveform = resampler(waveform)
        
        # Process with wav2vec2
        inputs = self.models[accent]['processor'](
            waveform.squeeze(0), 
            sampling_rate=16000, 
            return_tensors="pt"
        )
        
        with torch.no_grad():
            logits = self.models[accent]['model'](inputs.input_values).logits
        
        predicted_ids = torch.argmax(logits, dim=-1)
        transcription = self.models[accent]['processor'].batch_decode(predicted_ids)[0]
        return transcription

    def text_to_speech(self, text, accent='american', output_file='output.mp3'):
        # Map accents to gTTS language codes
        accent_map = {
            'american': 'en-us',
            'british': 'en-gb',
            'australian': 'en-au',
            'indian': 'en-in'
        }
        
        tts = gTTS(text=text, lang=accent_map.get(accent, 'en-us'))
        tts.save(output_file)
        return output_file
    def detect_accent(self, audio_path):
        """Improved accent detection using acoustic features and ML"""
        try:
            waveform, sample_rate = self.load_audio(audio_path)
            
            # Extract features using torchaudio
            mfcc = torchaudio.transforms.MFCC(
                sample_rate=sample_rate,
                n_mfcc=13,
                melkwargs={'n_fft': 400, 'hop_length': 160, 'n_mels': 23}
            )(waveform)
            
            # Calculate statistics of MFCCs (mean, std) as features
            features = torch.cat([
                mfcc.mean(dim=-1),
                mfcc.std(dim=-1)
            ], dim=1).squeeze().numpy()
            
            # Load our trained accent classifier model
            classifier = torch.load('models/accent_classifier.pt')
            classifier.eval()
            
            # Predict accent
            with torch.no_grad():
                inputs = torch.from_numpy(features).unsqueeze(0).float()
                outputs = classifier(inputs)
                _, predicted = torch.max(outputs, 1)
            
            # Map prediction index to accent names
            accent_names = ['american', 'british', 'australian', 'indian']
            detected_accent = accent_names[predicted.item()]
            
            return detected_accent
            
        except Exception as e:
            print(f"Accent detection error: {e}")
            return "unknown"
def convert_accent(self, input_file, source_accent, target_accent):
        waveform, sample_rate = self.load_audio(input_file)
        
        # First detect source accent if not provided
        if source_accent == "auto":
            source_accent = self.detect_accent(input_file)
            print(f"Detected source accent: {source_accent}")
        
        # Transcribe with source accent model
        transcription = self.transcribe_audio(waveform, sample_rate, source_accent)
        
        # Apply accent conversion rules
        converted_text = self.apply_accent_rules(transcription, source_accent, target_accent)
        
        # Convert to speech with target accent
        with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp:
            output_file = tmp.name
        
        self.text_to_speech(converted_text, target_accent, output_file)
        return output_file, converted_text

    def apply_accent_rules(self, text, source_accent, target_accent):
        """Apply accent-specific transformation rules to text"""
        # Example rules for demonstration - in reality this would be more sophisticated
        rules = {
            'american_to_british': [
                (r'\btruck\b', 'lorry'),
                (r'\belevator\b', 'lift'),
                (r'\bapartment\b', 'flat')
            ],
            'british_to_american': [
                (r'\blorry\b', 'truck'),
                (r'\blift\b', 'elevator'),
                (r'\bflat\b', 'apartment')
            ],
            # Add more conversion rules here
        }
        
        conversion_key = f"{source_accent}_to_{target_accent}"
        if conversion_key in rules:
            for pattern, replacement in rules[conversion_key]:
                text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
        
        return f"[Converted from {source_accent} to {target_accent}] {text}"
```