Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -42,7 +42,7 @@ try:
|
|
| 42 |
except Exception:
|
| 43 |
build_ctcdecoder = None
|
| 44 |
|
| 45 |
-
# --- Robust alphabet build + dedupe ---
|
| 46 |
try:
|
| 47 |
vocab = processor.tokenizer.get_vocab()
|
| 48 |
max_id = max(vocab.values())
|
|
@@ -56,19 +56,24 @@ try:
|
|
| 56 |
if pad_id is None:
|
| 57 |
pad_id = vocab.get("<pad>", None)
|
| 58 |
|
| 59 |
-
# Build preliminary alphabet: only pad_id -> "" (blank). keep other tokens as-is
|
|
|
|
| 60 |
preliminary = []
|
| 61 |
for idx, tok in enumerate(tokens_by_id):
|
| 62 |
if pad_id is not None and idx == pad_id:
|
| 63 |
preliminary.append("") # blank token only for pad
|
| 64 |
else:
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
|
| 71 |
# Now ensure alphabet entries are unique (pyctcdecode forbids duplicates).
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
seen = {}
|
| 73 |
alphabet_for_decoder = []
|
| 74 |
duplicates = []
|
|
@@ -84,6 +89,7 @@ try:
|
|
| 84 |
continue
|
| 85 |
|
| 86 |
if sym in seen:
|
|
|
|
| 87 |
uniq = f"{sym}#{idx}"
|
| 88 |
alphabet_for_decoder.append(uniq)
|
| 89 |
duplicates.append((idx, sym, uniq))
|
|
@@ -147,32 +153,27 @@ def _build_decoder_kwargs_from_signature(beam_width, alpha, beta, decode_fn):
|
|
| 147 |
"""
|
| 148 |
sig = inspect.signature(decode_fn)
|
| 149 |
params = list(sig.parameters.keys())
|
| 150 |
-
#
|
| 151 |
-
# Build kwargs by probing known candidate names
|
| 152 |
beam_candidates = ["beam_width", "beam", "beam_size", "beamWidth", "beamSize"]
|
| 153 |
alpha_candidates = ["alpha", "lm_weight", "lm_alpha", "lm_scale", "alpha_score"]
|
| 154 |
beta_candidates = ["beta", "word_score", "word_insertion", "word_bonus", "word_insertion_score"]
|
| 155 |
|
| 156 |
kwargs = {}
|
| 157 |
-
# beam: prefer named if present
|
| 158 |
for name in beam_candidates:
|
| 159 |
if name in params:
|
| 160 |
kwargs[name] = int(beam_width)
|
| 161 |
break
|
| 162 |
|
| 163 |
-
# alpha (lm weight)
|
| 164 |
for name in alpha_candidates:
|
| 165 |
if name in params:
|
| 166 |
kwargs[name] = float(alpha)
|
| 167 |
break
|
| 168 |
|
| 169 |
-
# beta (word insertion)
|
| 170 |
for name in beta_candidates:
|
| 171 |
if name in params:
|
| 172 |
kwargs[name] = float(beta)
|
| 173 |
break
|
| 174 |
|
| 175 |
-
# If beam wasn't found as named param, we will use positional second argument
|
| 176 |
args = []
|
| 177 |
if not any(name in params for name in beam_candidates):
|
| 178 |
args = [int(beam_width)]
|
|
@@ -215,21 +216,17 @@ def decode_with_lm_np(logits_np: np.ndarray, beam_width=50, alpha=0.8, beta=1.0)
|
|
| 215 |
used = None
|
| 216 |
# Try with probs first, then fallback to log-probs
|
| 217 |
try:
|
| 218 |
-
# attempt call
|
| 219 |
text = decode_fn(probs, *args, **kwargs)
|
| 220 |
used = "probs"
|
| 221 |
except TypeError as e1:
|
| 222 |
-
# maybe kwargs names were wrong or function expects different args; try alternative: try without kwargs (positional)
|
| 223 |
try:
|
| 224 |
print("decode(probs, *args, **kwargs) failed:", e1)
|
| 225 |
-
# if args empty, try with only probs (some decoders default)
|
| 226 |
if args:
|
| 227 |
text = decode_fn(probs, *args)
|
| 228 |
else:
|
| 229 |
text = decode_fn(probs)
|
| 230 |
used = "probs_positional_fallback"
|
| 231 |
except Exception as e2:
|
| 232 |
-
# try log-probs with same strategy
|
| 233 |
try:
|
| 234 |
print("probs positional fallback failed:", e2, " — trying log-probs")
|
| 235 |
text = decode_fn(lp, *args, **kwargs)
|
|
@@ -449,8 +446,8 @@ def create_audio_component():
|
|
| 449 |
return gr.Textbox(label="Fallback - paste path to audio file")
|
| 450 |
|
| 451 |
# --- Build UI ---
|
| 452 |
-
title = "ASR
|
| 453 |
-
desc = "
|
| 454 |
|
| 455 |
with gr.Blocks() as demo:
|
| 456 |
gr.Markdown(f"## {title}\n\n{desc}\n\nDevice: **{DEVICE}**")
|
|
@@ -464,15 +461,14 @@ with gr.Blocks() as demo:
|
|
| 464 |
btn = gr.Button("Transcribe")
|
| 465 |
with gr.Column(scale=3):
|
| 466 |
out_txt = gr.Textbox(label="Transcription (raw)", lines=4)
|
| 467 |
-
out_norm = gr.Textbox(label="Transcription (normalized)", lines=2)
|
| 468 |
out_conf = gr.Textbox(label="Confidence")
|
| 469 |
def _run(a, use_lm, beam, a_w, b_w):
|
| 470 |
if a is None:
|
| 471 |
-
return "No audio", ""
|
| 472 |
res = transcribe(a, use_lm=use_lm, beam_width=beam, alpha=a_w, beta=b_w)
|
| 473 |
-
return res["transcription"],
|
| 474 |
btn.click(_run, inputs=[audio_in, use_lm_checkbox, beam_slider, alpha_slider, beta_slider],
|
| 475 |
-
outputs=[out_txt,
|
| 476 |
|
| 477 |
# Launch the demo (share=True yields a public link in Spaces/Colab)
|
| 478 |
if __name__ == "__main__":
|
|
|
|
| 42 |
except Exception:
|
| 43 |
build_ctcdecoder = None
|
| 44 |
|
| 45 |
+
# --- Robust alphabet build + dedupe, with '|' -> ' ' mapping enforced ---
|
| 46 |
try:
|
| 47 |
vocab = processor.tokenizer.get_vocab()
|
| 48 |
max_id = max(vocab.values())
|
|
|
|
| 56 |
if pad_id is None:
|
| 57 |
pad_id = vocab.get("<pad>", None)
|
| 58 |
|
| 59 |
+
# Build preliminary alphabet: only pad_id -> "" (blank). keep other tokens as-is,
|
| 60 |
+
# but map '|' to space ' ' as requested.
|
| 61 |
preliminary = []
|
| 62 |
for idx, tok in enumerate(tokens_by_id):
|
| 63 |
if pad_id is not None and idx == pad_id:
|
| 64 |
preliminary.append("") # blank token only for pad
|
| 65 |
else:
|
| 66 |
+
# map '|' to actual space
|
| 67 |
+
if tok == "|":
|
| 68 |
+
preliminary.append(" ")
|
| 69 |
+
else:
|
| 70 |
+
preliminary.append(tok)
|
| 71 |
|
| 72 |
# Now ensure alphabet entries are unique (pyctcdecode forbids duplicates).
|
| 73 |
+
# If duplicates appear (e.g. multiple token ids mapped to same symbol), we keep the *first*
|
| 74 |
+
# occurrence of that symbol unchanged and make subsequent ones unique markers.
|
| 75 |
+
# Note: ideal solution is to make ARPA/tokenizer use the same single symbol for space;
|
| 76 |
+
# here we force '|'->' ' and then disambiguate duplicates to satisfy pyctcdecode's uniqueness requirement.
|
| 77 |
seen = {}
|
| 78 |
alphabet_for_decoder = []
|
| 79 |
duplicates = []
|
|
|
|
| 89 |
continue
|
| 90 |
|
| 91 |
if sym in seen:
|
| 92 |
+
# duplicate symbol: make unique marker so list entries remain unique
|
| 93 |
uniq = f"{sym}#{idx}"
|
| 94 |
alphabet_for_decoder.append(uniq)
|
| 95 |
duplicates.append((idx, sym, uniq))
|
|
|
|
| 153 |
"""
|
| 154 |
sig = inspect.signature(decode_fn)
|
| 155 |
params = list(sig.parameters.keys())
|
| 156 |
+
# candidates for param names
|
|
|
|
| 157 |
beam_candidates = ["beam_width", "beam", "beam_size", "beamWidth", "beamSize"]
|
| 158 |
alpha_candidates = ["alpha", "lm_weight", "lm_alpha", "lm_scale", "alpha_score"]
|
| 159 |
beta_candidates = ["beta", "word_score", "word_insertion", "word_bonus", "word_insertion_score"]
|
| 160 |
|
| 161 |
kwargs = {}
|
|
|
|
| 162 |
for name in beam_candidates:
|
| 163 |
if name in params:
|
| 164 |
kwargs[name] = int(beam_width)
|
| 165 |
break
|
| 166 |
|
|
|
|
| 167 |
for name in alpha_candidates:
|
| 168 |
if name in params:
|
| 169 |
kwargs[name] = float(alpha)
|
| 170 |
break
|
| 171 |
|
|
|
|
| 172 |
for name in beta_candidates:
|
| 173 |
if name in params:
|
| 174 |
kwargs[name] = float(beta)
|
| 175 |
break
|
| 176 |
|
|
|
|
| 177 |
args = []
|
| 178 |
if not any(name in params for name in beam_candidates):
|
| 179 |
args = [int(beam_width)]
|
|
|
|
| 216 |
used = None
|
| 217 |
# Try with probs first, then fallback to log-probs
|
| 218 |
try:
|
|
|
|
| 219 |
text = decode_fn(probs, *args, **kwargs)
|
| 220 |
used = "probs"
|
| 221 |
except TypeError as e1:
|
|
|
|
| 222 |
try:
|
| 223 |
print("decode(probs, *args, **kwargs) failed:", e1)
|
|
|
|
| 224 |
if args:
|
| 225 |
text = decode_fn(probs, *args)
|
| 226 |
else:
|
| 227 |
text = decode_fn(probs)
|
| 228 |
used = "probs_positional_fallback"
|
| 229 |
except Exception as e2:
|
|
|
|
| 230 |
try:
|
| 231 |
print("probs positional fallback failed:", e2, " — trying log-probs")
|
| 232 |
text = decode_fn(lp, *args, **kwargs)
|
|
|
|
| 446 |
return gr.Textbox(label="Fallback - paste path to audio file")
|
| 447 |
|
| 448 |
# --- Build UI ---
|
| 449 |
+
title = "ASR Bagui (LM ON)"
|
| 450 |
+
desc = "Le tout premier ASR de la langue fulfulde Cameroun. By ABDOUL-BAGUI, M2 in UMa"
|
| 451 |
|
| 452 |
with gr.Blocks() as demo:
|
| 453 |
gr.Markdown(f"## {title}\n\n{desc}\n\nDevice: **{DEVICE}**")
|
|
|
|
| 461 |
btn = gr.Button("Transcribe")
|
| 462 |
with gr.Column(scale=3):
|
| 463 |
out_txt = gr.Textbox(label="Transcription (raw)", lines=4)
|
|
|
|
| 464 |
out_conf = gr.Textbox(label="Confidence")
|
| 465 |
def _run(a, use_lm, beam, a_w, b_w):
|
| 466 |
if a is None:
|
| 467 |
+
return "No audio", ""
|
| 468 |
res = transcribe(a, use_lm=use_lm, beam_width=beam, alpha=a_w, beta=b_w)
|
| 469 |
+
return res["transcription"], str(res["confidence"])
|
| 470 |
btn.click(_run, inputs=[audio_in, use_lm_checkbox, beam_slider, alpha_slider, beta_slider],
|
| 471 |
+
outputs=[out_txt, out_conf])
|
| 472 |
|
| 473 |
# Launch the demo (share=True yields a public link in Spaces/Colab)
|
| 474 |
if __name__ == "__main__":
|