sachin7777777 commited on
Commit
43e9a81
Β·
verified Β·
1 Parent(s): b1b1487

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -24
app.py CHANGED
@@ -1,21 +1,24 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
3
  import pandas as pd
4
  import plotly.express as px
 
5
 
6
  # ------------------------------
7
  # Load pretrained models
8
  # ------------------------------
 
9
  text_classifier = pipeline(
10
  "text-classification",
11
  model="j-hartmann/emotion-english-distilroberta-base",
12
  top_k=None # returns all scores
13
  )
14
 
15
- audio_classifier = pipeline(
16
- "audio-classification",
17
- model="Dpngtm/wav2vec2-emotion-recognition"
18
- )
19
 
20
  # ------------------------------
21
  # Map emotion to emoji
@@ -27,11 +30,7 @@ EMOJI_MAP = {
27
  "joy": "πŸ˜„",
28
  "neutral": "😐",
29
  "sadness": "😒",
30
- "surprise": "😲",
31
- "hap": "πŸ˜„", # for audio model
32
- "neu": "😐",
33
- "sad": "😒",
34
- "ang": "😑"
35
  }
36
 
37
  # ------------------------------
@@ -62,7 +61,7 @@ def fuse_predictions(text_preds=None, audio_preds=None, w_text=0.5, w_audio=0.5)
62
  return {"fused_label": best[0], "fused_score": round(best[1], 3), "all_scores": scores}
63
 
64
  # ------------------------------
65
- # Create bar chart
66
  # ------------------------------
67
  def make_bar_chart(scores_dict, title="Emotion Scores"):
68
  df = pd.DataFrame({
@@ -77,22 +76,37 @@ def make_bar_chart(scores_dict, title="Emotion Scores"):
77
  return fig
78
 
79
  # ------------------------------
80
- # Prediction function
 
 
 
 
 
 
 
 
 
 
 
 
81
  # ------------------------------
82
  def predict(text, audio, w_text, w_audio):
83
  text_preds, audio_preds = None, None
 
84
  if text:
85
- text_preds = text_classifier(text)[0]
86
  if audio:
87
- audio_preds = audio_classifier(audio)
 
88
  fused = fuse_predictions(text_preds, audio_preds, w_text, w_audio)
89
 
90
- # Display final predicted emotion with emoji
91
  label = fused['fused_label']
92
- emoji = EMOJI_MAP.get(label, "")
93
- final_emotion = f"### Final Predicted Emotion: {label.upper()} {emoji} (score: {fused['fused_score']})"
 
94
 
95
- # Bar charts
96
  charts = []
97
  if text_preds:
98
  charts.append(make_bar_chart({p['label']: p['score'] for p in text_preds}, "Text Emotion Scores"))
@@ -100,10 +114,10 @@ def predict(text, audio, w_text, w_audio):
100
  charts.append(make_bar_chart({p['label']: p['score'] for p in audio_preds}, "Audio Emotion Scores"))
101
  charts.append(make_bar_chart(fused['all_scores'], "Fused Emotion Scores"))
102
 
103
- return final_emotion, charts
104
 
105
  # ------------------------------
106
- # Build Gradio interface
107
  # ------------------------------
108
  with gr.Blocks() as demo:
109
  gr.Markdown("## 🎭 Multimodal Emotion Classification (Text + Speech)")
@@ -112,13 +126,13 @@ with gr.Blocks() as demo:
112
  with gr.Column():
113
  txt = gr.Textbox(label="Text input", placeholder="Type something emotional...")
114
  aud = gr.Audio(type="filepath", label="Upload speech (wav/mp3)")
115
- w1 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Text weight (w_text)")
116
- w2 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Audio weight (w_audio)")
117
  btn = gr.Button("Predict")
118
  with gr.Column():
119
- final_label = gr.Markdown(label="Predicted Emotion")
120
  chart_output = gr.Plot(label="Emotion Scores")
121
 
122
- btn.click(fn=predict, inputs=[txt, aud, w1, w2], outputs=[final_label, chart_output])
123
 
124
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
3
+ import torch
4
  import pandas as pd
5
  import plotly.express as px
6
+ import soundfile as sf
7
 
8
  # ------------------------------
9
  # Load pretrained models
10
  # ------------------------------
11
+ # Text classifier
12
  text_classifier = pipeline(
13
  "text-classification",
14
  model="j-hartmann/emotion-english-distilroberta-base",
15
  top_k=None # returns all scores
16
  )
17
 
18
+ # Audio classifier (Wav2Vec2)
19
+ audio_model_name = "Dpngtm/wav2vec2-emotion-recognition"
20
+ audio_processor = Wav2Vec2Processor.from_pretrained(audio_model_name)
21
+ audio_model = Wav2Vec2ForSequenceClassification.from_pretrained(audio_model_name)
22
 
23
  # ------------------------------
24
  # Map emotion to emoji
 
30
  "joy": "πŸ˜„",
31
  "neutral": "😐",
32
  "sadness": "😒",
33
+ "surprise": "😲"
 
 
 
 
34
  }
35
 
36
  # ------------------------------
 
61
  return {"fused_label": best[0], "fused_score": round(best[1], 3), "all_scores": scores}
62
 
63
  # ------------------------------
64
+ # Bar chart function
65
  # ------------------------------
66
  def make_bar_chart(scores_dict, title="Emotion Scores"):
67
  df = pd.DataFrame({
 
76
  return fig
77
 
78
  # ------------------------------
79
+ # Audio prediction helper
80
+ # ------------------------------
81
+ def predict_audio(audio_file):
82
+ speech, sr = sf.read(audio_file)
83
+ inputs = audio_processor(speech, sampling_rate=sr, return_tensors="pt", padding=True)
84
+ with torch.no_grad():
85
+ logits = audio_model(**inputs).logits
86
+ probs = torch.nn.functional.softmax(logits, dim=-1).squeeze().tolist()
87
+ labels = [audio_model.config.id2label[i] for i in range(len(probs))]
88
+ return [{"label": l, "score": s} for l, s in zip(labels, probs)]
89
+
90
+ # ------------------------------
91
+ # Gradio prediction function
92
  # ------------------------------
93
  def predict(text, audio, w_text, w_audio):
94
  text_preds, audio_preds = None, None
95
+
96
  if text:
97
+ text_preds = text_classifier(text)
98
  if audio:
99
+ audio_preds = predict_audio(audio)
100
+
101
  fused = fuse_predictions(text_preds, audio_preds, w_text, w_audio)
102
 
103
+ # Final emotion with animated emoji
104
  label = fused['fused_label']
105
+ emoji = EMOJI_MAP.get(label, "❓")
106
+ final_emotion = f"### {label.upper()} {emoji} \nScore: {fused['fused_score']}"
107
+ animation = f"<div style='font-size:80px; animation: bounce 1s infinite;'>{emoji}</div>"
108
 
109
+ # Charts
110
  charts = []
111
  if text_preds:
112
  charts.append(make_bar_chart({p['label']: p['score'] for p in text_preds}, "Text Emotion Scores"))
 
114
  charts.append(make_bar_chart({p['label']: p['score'] for p in audio_preds}, "Audio Emotion Scores"))
115
  charts.append(make_bar_chart(fused['all_scores'], "Fused Emotion Scores"))
116
 
117
+ return final_emotion + animation, charts
118
 
119
  # ------------------------------
120
+ # Build Gradio app
121
  # ------------------------------
122
  with gr.Blocks() as demo:
123
  gr.Markdown("## 🎭 Multimodal Emotion Classification (Text + Speech)")
 
126
  with gr.Column():
127
  txt = gr.Textbox(label="Text input", placeholder="Type something emotional...")
128
  aud = gr.Audio(type="filepath", label="Upload speech (wav/mp3)")
129
+ w1 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Text weight")
130
+ w2 = gr.Slider(minimum=0.0, maximum=1.0, value=0.5, label="Audio weight")
131
  btn = gr.Button("Predict")
132
  with gr.Column():
133
+ final_label = gr.HTML(label="Predicted Emotion")
134
  chart_output = gr.Plot(label="Emotion Scores")
135
 
136
+ btn.click(fn=predict, inputs=[txt, aud, w1, w2], outputs=[final_label, chart_output]*3)
137
 
138
  demo.launch()