RonaldCeballos commited on
Commit
c5e79e0
Β·
1 Parent(s): 54985cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +156 -64
app.py CHANGED
@@ -113,8 +113,63 @@ def load_model_from_hub(species_type, model_version):
113
  print(f"Error loading {species_type} - {model_version}: {str(e)}")
114
  return None, None
115
 
116
- def predict_species(species_type, model_version, audio_file):
117
- """Main prediction function"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  global current_audio_path, current_audio_name
119
 
120
  if audio_file is None:
@@ -123,7 +178,6 @@ def predict_species(species_type, model_version, audio_file):
123
  try:
124
  # Store the current audio path and name for feedback
125
  current_audio_path = audio_file
126
- # Extract the original audio file name
127
  current_audio_name = os.path.basename(audio_file)
128
 
129
  # Load model
@@ -131,24 +185,23 @@ def predict_species(species_type, model_version, audio_file):
131
  if model is None or classes is None:
132
  return pd.DataFrame({"Error": [f"Could not load {species_type} - {model_version} model"]})
133
 
134
- # Process audio
135
  wav, sr = torchaudio.load(audio_file)
136
  wav = wav.mean(dim=0) # Convert to mono
137
 
138
- # Extract 5-second chunks
139
  chunks = extract_chunks(wav.numpy(), sr, time=5)
140
 
141
  results = []
142
 
143
  for i, chunk in enumerate(chunks):
144
- # Create spectrogram
145
  spectrogram = create_spectrogram(chunk)
146
 
147
- # Normalize
148
- if np.std(spectrogram) > 0:
149
- spectrogram = (spectrogram - np.mean(spectrogram)) / np.std(spectrogram)
150
 
151
- # Predict
152
  species, confidence = predict_with_model(spectrogram, model, classes)
153
 
154
  time_start = i * 5
@@ -174,56 +227,85 @@ def predict_species(species_type, model_version, audio_file):
174
  print(f"Prediction error: {str(e)}")
175
  return pd.DataFrame({"Error": [f"Error during analysis: {str(e)}"]})
176
 
177
- def extract_chunks(audio_clean, sr, time):
178
- """Extract audio chunks"""
179
- n_samples = sr * time
180
- chunks = []
181
- for i in range(0, len(audio_clean), n_samples):
182
- start = i
183
- end = i + n_samples
184
-
185
- if end <= len(audio_clean):
186
- chunk = audio_clean[start:end]
187
- else:
188
- # Circular padding
189
- missing = end - len(audio_clean)
190
- padding = audio_clean[:missing]
191
- chunk = np.concatenate([audio_clean[start:], padding])
192
-
193
- chunks.append(chunk)
194
- return np.array(chunks)
195
-
196
- def create_spectrogram(array_audio, n_fft=2048):
197
- """Create spectrogram from audio array"""
198
- dta = np.abs(librosa.stft(array_audio, n_fft=n_fft))
199
- D = librosa.amplitude_to_db(dta, ref=np.max)
200
- return D
201
-
202
- def predict_with_model(spec, model, classes):
203
- """Predict species from spectrogram"""
204
- # Ensure correct dimensions (1025, 313)
205
- if spec.shape != (1025, 313):
206
- # Resize if needed
207
- spec = resize_spectrogram(spec, (1025, 313))
208
 
209
- # Preprocess for model
210
- arr = np.expand_dims(spec[..., np.newaxis], axis=0).astype('float32')
211
- X = arr / np.max(arr)
212
-
213
- # Predict
214
- pred = model.predict(X, verbose=0)
215
- pred_class_idx = np.argmax(pred)
216
- pred_class = str(classes[pred_class_idx])
217
- prob = float(pred[0][pred_class_idx])
218
-
219
- return pred_class, prob
220
-
221
- def resize_spectrogram(spec, target_shape):
222
- """Resize spectrogram to target shape"""
223
- from scipy import ndimage
224
- zoom_factors = (target_shape[0] / spec.shape[0], target_shape[1] / spec.shape[1])
225
- resized = ndimage.zoom(spec, zoom_factors, order=1)
226
- return resized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  def save_feedback_to_dataset(audio_file_path, original_audio_name, feedback_text, consent_given, species_type, model_version, results_df):
229
  """Save audio and feedback to private Hugging Face dataset"""
@@ -341,6 +423,7 @@ with gr.Blocks(
341
  css="""
342
  .gradio-container { max-width: 1200px; margin: auto; }
343
  .consent-text { font-size: 0.9em; color: #666; }
 
344
  """
345
  ) as demo:
346
 
@@ -348,10 +431,16 @@ with gr.Blocks(
348
  current_results = gr.State(value=pd.DataFrame())
349
 
350
  gr.Markdown("""
351
- #Species Audio Classifier
352
  **Upload an audio file to identify species using AI models**
353
 
354
- *Models are loaded from: [RonaldCeballos/SpeciesClassifiers](https://huggingface.co/RonaldCeballos/SpeciesClassifiers)*
 
 
 
 
 
 
355
  """)
356
 
357
  with gr.Row():
@@ -386,10 +475,11 @@ with gr.Blocks(
386
  gr.Markdown("""
387
  ### πŸ’‘ Instructions:
388
  1. Select species category
389
- 2. Choose model version
390
  3. Upload audio file (WAV, MP3, etc.)
391
  4. Click "Analyze Audio"
392
  5. Review results by 5-second segments
 
393
  """)
394
 
395
  with gr.Column(scale=2):
@@ -399,12 +489,14 @@ with gr.Blocks(
399
  label="🎧 Analyzed Chunks",
400
  headers=["Chunks", "Time", "Species", "Confidence"],
401
  wrap=True,
402
- max_height=400
 
403
  )
404
 
405
  with gr.Accordion("πŸ’¬ Submit Feedback for Model Improvement", open=False):
406
  gr.Markdown("""
407
  **Help us improve!** Submit your audio and feedback to our private dataset for model training.
 
408
  """)
409
 
410
  consent_checkbox = gr.Checkbox(
@@ -426,7 +518,7 @@ with gr.Blocks(
426
 
427
  # Event handlers
428
  predict_btn.click(
429
- fn=predict_species,
430
  inputs=[species_selector, model_selector, audio_input],
431
  outputs=results_display
432
  ).then(
 
113
  print(f"Error loading {species_type} - {model_version}: {str(e)}")
114
  return None, None
115
 
116
+ def predict_with_model(spec, model, classes):
117
+ """Predict species from spectrogram - Adapted from notebook"""
118
+ # Ensure correct dimensions (1025, 313)
119
+ if spec.shape != (1025, 313):
120
+ # Resize if needed
121
+ spec = resize_spectrogram(spec, (1025, 313))
122
+
123
+ # Preprocess for model - exactly as in notebook
124
+ arr = np.expand_dims(spec[..., np.newaxis], axis=0).astype('float32')
125
+ X = arr / np.max(arr)
126
+
127
+ # Predict
128
+ pred = model.predict(X, verbose=0)
129
+ pred_class_idx = np.argmax(pred)
130
+ pred_class = str(classes[pred_class_idx])
131
+ prob = float(pred[0][pred_class_idx])
132
+
133
+ return pred_class, prob
134
+
135
+ def extract_chunks(audio_clean, sr, time=5):
136
+ """Extract audio chunks - Adapted from notebook's ext_chunks function"""
137
+ n_samples = sr * time
138
+ chunks = []
139
+ for i in range(0, len(audio_clean), n_samples):
140
+ start = i
141
+ end = i + n_samples
142
+
143
+ if end <= len(audio_clean):
144
+ chunk = audio_clean[start:end]
145
+ else:
146
+ # Circular padding - exactly as in notebook
147
+ faltan = end - len(audio_clean)
148
+ padding = audio_clean[:faltan]
149
+ chunk = np.concatenate([audio_clean[start:], padding])
150
+
151
+ chunks.append(chunk)
152
+ return np.array(chunks)
153
+
154
+ def create_spectrogram(array_audio, n_fft=2048):
155
+ """Create spectrogram from audio array - Adapted from notebook's spectogram function"""
156
+ if isinstance(array_audio, np.ndarray):
157
+ dta = np.abs(librosa.stft(array_audio, n_fft=n_fft))
158
+ D = librosa.amplitude_to_db(dta, ref=np.max)
159
+ else:
160
+ dta = np.abs(librosa.stft(array_audio.numpy()))
161
+ D = librosa.amplitude_to_db(dta, ref=np.max)
162
+ return D
163
+
164
+ def resize_spectrogram(spec, target_shape):
165
+ """Resize spectrogram to target shape"""
166
+ from scipy import ndimage
167
+ zoom_factors = (target_shape[0] / spec.shape[0], target_shape[1] / spec.shape[1])
168
+ resized = ndimage.zoom(spec, zoom_factors, order=1)
169
+ return resized
170
+
171
+ def predict_species_all_chunks(species_type, model_version, audio_file):
172
+ """Main prediction function that processes all chunks"""
173
  global current_audio_path, current_audio_name
174
 
175
  if audio_file is None:
 
178
  try:
179
  # Store the current audio path and name for feedback
180
  current_audio_path = audio_file
 
181
  current_audio_name = os.path.basename(audio_file)
182
 
183
  # Load model
 
185
  if model is None or classes is None:
186
  return pd.DataFrame({"Error": [f"Could not load {species_type} - {model_version} model"]})
187
 
188
+ # Process audio - using notebook approach
189
  wav, sr = torchaudio.load(audio_file)
190
  wav = wav.mean(dim=0) # Convert to mono
191
 
192
+ # Extract 5-second chunks using notebook function
193
  chunks = extract_chunks(wav.numpy(), sr, time=5)
194
 
195
  results = []
196
 
197
  for i, chunk in enumerate(chunks):
198
+ # Create spectrogram using notebook function
199
  spectrogram = create_spectrogram(chunk)
200
 
201
+ # Normalize exactly as in notebook
202
+ spectrogram = (spectrogram - np.mean(spectrogram)) / np.std(spectrogram)
 
203
 
204
+ # Predict using adapted notebook function
205
  species, confidence = predict_with_model(spectrogram, model, classes)
206
 
207
  time_start = i * 5
 
227
  print(f"Prediction error: {str(e)}")
228
  return pd.DataFrame({"Error": [f"Error during analysis: {str(e)}"]})
229
 
230
+ def predict_species_final(species_type, model_version, audio_file):
231
+ """Enhanced prediction with voting system across chunks"""
232
+ global current_audio_path, current_audio_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ if audio_file is None:
235
+ return pd.DataFrame({"Info": ["Please upload an audio file"]})
236
+
237
+ try:
238
+ current_audio_path = audio_file
239
+ current_audio_name = os.path.basename(audio_file)
240
+
241
+ # Load model
242
+ model, classes = load_model_from_hub(species_type, model_version)
243
+ if model is None or classes is None:
244
+ return pd.DataFrame({"Error": [f"Could not load {species_type} - {model_version} model"]})
245
+
246
+ # Process audio
247
+ wav, sr = torchaudio.load(audio_file)
248
+ wav = wav.mean(dim=0)
249
+
250
+ # Extract chunks
251
+ chunks = extract_chunks(wav.numpy(), sr, time=5)
252
+
253
+ results = []
254
+ species_votes = {}
255
+
256
+ for i, chunk in enumerate(chunks):
257
+ # Create and normalize spectrogram
258
+ spectrogram = create_spectrogram(chunk)
259
+ spectrogram = (spectrogram - np.mean(spectrogram)) / np.std(spectrogram)
260
+
261
+ # Predict
262
+ species, confidence = predict_with_model(spectrogram, model, classes)
263
+
264
+ # Count votes for final prediction
265
+ if species in species_votes:
266
+ species_votes[species] += confidence
267
+ else:
268
+ species_votes[species] = confidence
269
+
270
+ time_start = i * 5
271
+ time_end = (i + 1) * 5
272
+
273
+ results.append({
274
+ 'Segment': f'{i+1}',
275
+ 'Time': f'{time_start}s - {time_end}s',
276
+ 'Species': species,
277
+ 'Confidence': f'{confidence:.1%}'
278
+ })
279
+
280
+ # Determine final prediction
281
+ if species_votes:
282
+ final_species = max(species_votes, key=species_votes.get)
283
+ final_confidence = species_votes[final_species] / len(chunks)
284
+
285
+ # Add final prediction row
286
+ final_row = pd.DataFrame({
287
+ 'Segment': ['FINAL'],
288
+ 'Time': ['Full Audio'],
289
+ 'Species': [final_species],
290
+ 'Confidence': [f'{final_confidence:.1%}']
291
+ })
292
+
293
+ results_df = pd.concat([pd.DataFrame(results), final_row], ignore_index=True)
294
+ else:
295
+ results_df = pd.DataFrame(results)
296
+
297
+ # Clean memory
298
+ del model
299
+ gc.collect()
300
+
301
+ if results_df.empty:
302
+ return pd.DataFrame({"Info": ["No valid segments detected in the audio"]})
303
+
304
+ return results_df
305
+
306
+ except Exception as e:
307
+ print(f"Prediction error: {str(e)}")
308
+ return pd.DataFrame({"Error": [f"Error during analysis: {str(e)}"]})
309
 
310
  def save_feedback_to_dataset(audio_file_path, original_audio_name, feedback_text, consent_given, species_type, model_version, results_df):
311
  """Save audio and feedback to private Hugging Face dataset"""
 
423
  css="""
424
  .gradio-container { max-width: 1200px; margin: auto; }
425
  .consent-text { font-size: 0.9em; color: #666; }
426
+ .final-prediction { background-color: #e8f5e8 !important; font-weight: bold; }
427
  """
428
  ) as demo:
429
 
 
431
  current_results = gr.State(value=pd.DataFrame())
432
 
433
  gr.Markdown("""
434
+ ## Species Audio Classifier
435
  **Upload an audio file to identify species using AI models**
436
 
437
+ *Based on your notebook implementation - Models are loaded from: [RonaldCeballos/SpeciesClassifiers](https://huggingface.co/RonaldCeballos/SpeciesClassifiers)*
438
+
439
+ πŸ” **How it works:**
440
+ - Audio is split into 5-second segments
441
+ - Each segment is converted to a spectrogram
442
+ - AI model predicts species for each segment
443
+ - Final prediction is based on voting across all segments
444
  """)
445
 
446
  with gr.Row():
 
475
  gr.Markdown("""
476
  ### πŸ’‘ Instructions:
477
  1. Select species category
478
+ 2. Choose model version
479
  3. Upload audio file (WAV, MP3, etc.)
480
  4. Click "Analyze Audio"
481
  5. Review results by 5-second segments
482
+ 6. Final prediction shown at the bottom
483
  """)
484
 
485
  with gr.Column(scale=2):
 
489
  label="🎧 Analyzed Chunks",
490
  headers=["Chunks", "Time", "Species", "Confidence"],
491
  wrap=True,
492
+ max_height=500,
493
+ datatype=["str", "str", "str", "str"]
494
  )
495
 
496
  with gr.Accordion("πŸ’¬ Submit Feedback for Model Improvement", open=False):
497
  gr.Markdown("""
498
  **Help us improve!** Submit your audio and feedback to our private dataset for model training.
499
+ *Using the same approach as your notebook implementation*
500
  """)
501
 
502
  consent_checkbox = gr.Checkbox(
 
518
 
519
  # Event handlers
520
  predict_btn.click(
521
+ fn=predict_species_final, # Using the enhanced version with voting
522
  inputs=[species_selector, model_selector, audio_input],
523
  outputs=results_display
524
  ).then(