openWakeWord / app.py
pvanand's picture
Update app.py
24ad263 verified
import gradio as gr
import pandas as pd
import collections
import scipy.signal
import numpy as np
import plotly.graph_objects as go
from functools import partial
from openwakeword.model import Model
# Download models first
import openwakeword
openwakeword.utils.download_models()
# Load openWakeWord models
model = Model(inference_framework="onnx")
# Define function to process audio
def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=10))):
# Resample audio to 16khz if needed
if audio[0] != 16000:
data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000))
else:
data = audio[1]
# Get predictions
for i in range(0, data.shape[0], 1280):
if len(data.shape) == 2 or data.shape[-1] == 2:
chunk = data[i:i+1280][:, 0] # just get one channel of audio
else:
chunk = data[i:i+1280]
if chunk.shape[0] == 1280:
prediction = model.predict(chunk)
for key in prediction:
state[key].append(prediction[key])
# Create bar chart with average of recent predictions
model_names = []
scores = []
for key in state.keys():
if len(state[key]) > 0:
model_names.append(key.replace('_', ' ').title())
# Average last few frames for smoother display
scores.append(np.mean(list(state[key])))
# Sort by score for better visibility
if len(scores) > 0:
sorted_indices = np.argsort(scores)[::-1]
model_names = [model_names[i] for i in sorted_indices]
scores = [scores[i] for i in sorted_indices]
# Create Plotly figure with horizontal bar chart
fig = go.Figure()
# Add horizontal bar trace
fig.add_trace(go.Bar(
y=model_names,
x=scores,
orientation='h',
marker=dict(
color=scores,
colorscale='Blues',
cmin=0,
cmax=1,
line=dict(color='rgba(58, 71, 80, 0.6)', width=1)
),
text=[f'{score:.3f}' for score in scores],
textposition='outside',
hovertemplate='<b>%{y}</b><br>Score: %{x:.3f}<extra></extra>'
))
# Update layout
fig.update_layout(
title={
'text': 'Real-time Wake Word Detection',
'x': 0.5,
'xanchor': 'center',
'font': {'size': 18, 'color': '#2c3e50'}
},
xaxis=dict(
title='Detection Score',
range=[0, 1.1],
gridcolor='rgba(200, 200, 200, 0.3)',
showgrid=True
),
yaxis=dict(
title='',
autorange='reversed' # Keep highest scores at top
),
height=500,
margin=dict(l=150, r=50, t=80, b=50),
plot_bgcolor='rgba(240, 242, 245, 0.5)',
paper_bgcolor='white',
showlegend=False
)
return fig, state
# Create Gradio interface and launch
desc = """This is a demo of the pre-trained models included in the latest release
of the [openWakeWord](https://github.com/dscripka/openWakeWord) library.
Click on the "record from microphone" button below to start capturing.
The real-time scores from each model will be shown in the interactive bar chart (higher bars = stronger detection).
Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details).
**Try these phrases:**
- **alexa** - "alexa"
- **hey mycroft** - "hey mycroft"
- **hey jarvis** - "hey jarvis"
- **hey rhasspy** - "hey rhasspy"
- **weather** - "what's the weather", "tell me today's weather"
- **timer** - "set a timer for 1 minute", "create 1 hour alarm"
"""
gr_int = gr.Interface(
title="openWakeWord Live Demo",
description=desc,
css=".flex {flex-direction: column} .gr-panel {width: 100%}",
fn=process_audio,
inputs=[
gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False),
"state"
],
outputs=[
gr.Plot(show_label=False),
"state"
],
live=True
)
gr_int.launch()