Fixes evaluation instructions and updates WER scores
#2
by
andreagasparini
- opened
README.md
CHANGED
|
@@ -24,7 +24,7 @@ model-index:
|
|
| 24 |
metrics:
|
| 25 |
- name: Test WER
|
| 26 |
type: wer
|
| 27 |
-
value: 1.
|
| 28 |
- task:
|
| 29 |
name: Automatic Speech Recognition
|
| 30 |
type: automatic-speech-recognition
|
|
@@ -38,7 +38,7 @@ model-index:
|
|
| 38 |
metrics:
|
| 39 |
- name: Test WER
|
| 40 |
type: wer
|
| 41 |
-
value: 3.
|
| 42 |
---
|
| 43 |
|
| 44 |
# Wav2Vec2-Large-960h-Lv60 + Self-Training
|
|
@@ -85,9 +85,9 @@ To transcribe audio files the model can be used as a standalone acoustic model a
|
|
| 85 |
transcription = processor.batch_decode(predicted_ids)
|
| 86 |
```
|
| 87 |
|
| 88 |
-
|
| 89 |
|
| 90 |
-
|
| 91 |
|
| 92 |
```python
|
| 93 |
from datasets import load_dataset
|
|
@@ -110,7 +110,7 @@ def map_to_pred(batch):
|
|
| 110 |
logits = model(input_values, attention_mask=attention_mask).logits
|
| 111 |
|
| 112 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 113 |
-
transcription = processor.batch_decode(predicted_ids)
|
| 114 |
batch["transcription"] = transcription
|
| 115 |
return batch
|
| 116 |
|
|
@@ -123,4 +123,4 @@ print("WER:", wer(result["text"], result["transcription"]))
|
|
| 123 |
|
| 124 |
| "clean" | "other" |
|
| 125 |
|---|---|
|
| 126 |
-
| 1.
|
|
|
|
| 24 |
metrics:
|
| 25 |
- name: Test WER
|
| 26 |
type: wer
|
| 27 |
+
value: 1.86
|
| 28 |
- task:
|
| 29 |
name: Automatic Speech Recognition
|
| 30 |
type: automatic-speech-recognition
|
|
|
|
| 38 |
metrics:
|
| 39 |
- name: Test WER
|
| 40 |
type: wer
|
| 41 |
+
value: 3.88
|
| 42 |
---
|
| 43 |
|
| 44 |
# Wav2Vec2-Large-960h-Lv60 + Self-Training
|
|
|
|
| 85 |
transcription = processor.batch_decode(predicted_ids)
|
| 86 |
```
|
| 87 |
|
| 88 |
+
## Evaluation
|
| 89 |
|
| 90 |
+
This code snippet shows how to evaluate **facebook/wav2vec2-large-960h-lv60-self** on LibriSpeech's "clean" and "other" test data.
|
| 91 |
|
| 92 |
```python
|
| 93 |
from datasets import load_dataset
|
|
|
|
| 110 |
logits = model(input_values, attention_mask=attention_mask).logits
|
| 111 |
|
| 112 |
predicted_ids = torch.argmax(logits, dim=-1)
|
| 113 |
+
transcription = processor.batch_decode(predicted_ids)[0]
|
| 114 |
batch["transcription"] = transcription
|
| 115 |
return batch
|
| 116 |
|
|
|
|
| 123 |
|
| 124 |
| "clean" | "other" |
|
| 125 |
|---|---|
|
| 126 |
+
| 1.86 | 3.88 |
|