Spaces:

sachin7777777
/

multiprojectfinal

Sleeping

App Files Files Community

sachin7777777 commited on Sep 16

Commit

43e9a81

verified ·

1 Parent(s): b1b1487

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -24

app.py CHANGED Viewed

@@ -1,21 +1,24 @@
 import gradio as gr
-from transformers import pipeline
 import pandas as pd
 import plotly.express as px
 # ------------------------------
 # Load pretrained models
 # ------------------------------
 text_classifier = pipeline(
     "text-classification",
     model="j-hartmann/emotion-english-distilroberta-base",
     top_k=None  # returns all scores
 )
-audio_classifier = pipeline(
-    "audio-classification",
-    model="Dpngtm/wav2vec2-emotion-recognition"
-)
 # ------------------------------
 # Map emotion to emoji
@@ -27,11 +30,7 @@ EMOJI_MAP = {
     "joy": "😄",
     "neutral": "😐",
     "sadness": "😢",
-    "surprise": "😲",
-    "hap": "😄",  # for audio model
-    "neu": "😐",
-    "sad": "😢",
-    "ang": "😡"
 }
 # ------------------------------
@@ -62,7 +61,7 @@ def fuse_predictions(text_preds=None, audio_preds=None, w_text=0.5, w_audio=0.5)
     return {"fused_label": best[0], "fused_score": round(best[1], 3), "all_scores": scores}
 # ------------------------------
-# Create bar chart
 # ------------------------------
 def make_bar_chart(scores_dict, title="Emotion Scores"):
     df = pd.DataFrame({
@@ -77,22 +76,37 @@ def make_bar_chart(scores_dict, title="Emotion Scores"):
     return fig
 # ------------------------------
-# Prediction function
 # ------------------------------
 def predict(text, audio, w_text, w_audio):
     text_preds, audio_preds = None, None
     if text:
-        text_preds = text_classifier(text)[0]
     if audio:
-        audio_preds = audio_classifier(audio)
     fused = fuse_predictions(text_preds, audio_preds, w_text, w_audio)
-    # Display final predicted emotion with emoji
     label = fused['fused_label']
-    emoji = EMOJI_MAP.get(label, "")
-    final_emotion = f"### Final Predicted Emotion: {label.upper()} {emoji} (score: {fused['fused_score']})"
-    # Bar charts
     charts = []
     if text_preds:
         charts.append(make_bar_chart({p['label']: p['score'] for p in text_preds}, "Text Emotion Scores"))
@@ -100,10 +114,10 @@ def predict(text, audio, w_text, w_audio):
         charts.append(make_bar_chart({p['label']: p['score'] for p in audio_preds}, "Audio Emotion Scores"))
     charts.append(make_bar_chart(fused['all_scores'], "Fused Emotion Scores"))
-    return final_emotion, charts
 # ------------------------------
-# Build Gradio interface
 # ------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## 🎭 Multimodal Emotion Classification (Text + Speech)")
@@ -112,13 +126,13 @@ with gr.Blocks() as demo:
         with gr.Column():
             txt = gr.Textbox(label="Text input", placeholder="Type something emotional...")
             aud = gr.Audio(type="filepath", label="Upload speech (wav/mp3)")
-            w1 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Text weight (w_text)")
-            w2 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Audio weight (w_audio)")
             btn = gr.Button("Predict")
         with gr.Column():
-            final_label = gr.Markdown(label="Predicted Emotion")
             chart_output = gr.Plot(label="Emotion Scores")
-    btn.click(fn=predict, inputs=[txt, aud, w1, w2], outputs=[final_label, chart_output])
 demo.launch()

 import gradio as gr
+from transformers import pipeline, Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
+import torch
 import pandas as pd
 import plotly.express as px
+import soundfile as sf
 # ------------------------------
 # Load pretrained models
 # ------------------------------
+# Text classifier
 text_classifier = pipeline(
     "text-classification",
     model="j-hartmann/emotion-english-distilroberta-base",
     top_k=None  # returns all scores
 )
+# Audio classifier (Wav2Vec2)
+audio_model_name = "Dpngtm/wav2vec2-emotion-recognition"
+audio_processor = Wav2Vec2Processor.from_pretrained(audio_model_name)
+audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(audio_model_name)
 # ------------------------------
 # Map emotion to emoji
     "joy": "😄",
     "neutral": "😐",
     "sadness": "😢",
+    "surprise": "😲"
 }
 # ------------------------------
     return {"fused_label": best[0], "fused_score": round(best[1], 3), "all_scores": scores}
 # ------------------------------
+# Bar chart function
 # ------------------------------
 def make_bar_chart(scores_dict, title="Emotion Scores"):
     df = pd.DataFrame({
     return fig
 # ------------------------------
+# Audio prediction helper
+# ------------------------------
+def predict_audio(audio_file):
+    speech, sr = sf.read(audio_file)
+    inputs = audio_processor(speech, sampling_rate=sr, return_tensors="pt", padding=True)
+    with torch.no_grad():
+        logits = audio_model(**inputs).logits
+    probs = torch.nn.functional.softmax(logits, dim=-1).squeeze().tolist()
+    labels = [audio_model.config.id2label[i] for i in range(len(probs))]
+    return [{"label": l, "score": s} for l, s in zip(labels, probs)]
+# ------------------------------
+# Gradio prediction function
 # ------------------------------
 def predict(text, audio, w_text, w_audio):
     text_preds, audio_preds = None, None
     if text:
+        text_preds = text_classifier(text)
     if audio:
+        audio_preds = predict_audio(audio)
     fused = fuse_predictions(text_preds, audio_preds, w_text, w_audio)
+    # Final emotion with animated emoji
     label = fused['fused_label']
+    emoji = EMOJI_MAP.get(label, "❓")
+    final_emotion = f"### {label.upper()} {emoji} \nScore: {fused['fused_score']}"
+    animation = f"<div style='font-size:80px; animation: bounce 1s infinite;'>{emoji}</div>"
+    # Charts
     charts = []
     if text_preds:
         charts.append(make_bar_chart({p['label']: p['score'] for p in text_preds}, "Text Emotion Scores"))
         charts.append(make_bar_chart({p['label']: p['score'] for p in audio_preds}, "Audio Emotion Scores"))
     charts.append(make_bar_chart(fused['all_scores'], "Fused Emotion Scores"))
+    return final_emotion + animation, charts
 # ------------------------------
+# Build Gradio app
 # ------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## 🎭 Multimodal Emotion Classification (Text + Speech)")
         with gr.Column():
             txt = gr.Textbox(label="Text input", placeholder="Type something emotional...")
             aud = gr.Audio(type="filepath", label="Upload speech (wav/mp3)")
+            w1 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Text weight")
+            w2 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Audio weight")
             btn = gr.Button("Predict")
         with gr.Column():
+            final_label = gr.HTML(label="Predicted Emotion")
             chart_output = gr.Plot(label="Emotion Scores")
+    btn.click(fn=predict, inputs=[txt, aud, w1, w2], outputs=[final_label, chart_output]*3)
 demo.launch()