IbnAoudi commited on
Commit
9d86cca
·
verified ·
1 Parent(s): 54b41ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -23
app.py CHANGED
@@ -42,7 +42,7 @@ try:
42
  except Exception:
43
  build_ctcdecoder = None
44
 
45
- # --- Robust alphabet build + dedupe ---
46
  try:
47
  vocab = processor.tokenizer.get_vocab()
48
  max_id = max(vocab.values())
@@ -56,19 +56,24 @@ try:
56
  if pad_id is None:
57
  pad_id = vocab.get("<pad>", None)
58
 
59
- # Build preliminary alphabet: only pad_id -> "" (blank). keep other tokens as-is.
 
60
  preliminary = []
61
  for idx, tok in enumerate(tokens_by_id):
62
  if pad_id is not None and idx == pad_id:
63
  preliminary.append("") # blank token only for pad
64
  else:
65
- preliminary.append(tok)
66
-
67
- # OPTIONAL: if you *know* your ARPA uses spaces instead of '|' you can map '|' -> ' '
68
- # Do this only if ARPA/tokenization require it:
69
- # preliminary = [(" " if t == "|" else t) for t in preliminary]
70
 
71
  # Now ensure alphabet entries are unique (pyctcdecode forbids duplicates).
 
 
 
 
72
  seen = {}
73
  alphabet_for_decoder = []
74
  duplicates = []
@@ -84,6 +89,7 @@ try:
84
  continue
85
 
86
  if sym in seen:
 
87
  uniq = f"{sym}#{idx}"
88
  alphabet_for_decoder.append(uniq)
89
  duplicates.append((idx, sym, uniq))
@@ -147,32 +153,27 @@ def _build_decoder_kwargs_from_signature(beam_width, alpha, beta, decode_fn):
147
  """
148
  sig = inspect.signature(decode_fn)
149
  params = list(sig.parameters.keys())
150
- # skip first param name (usually 'self' or 'probs' positional), we will pass probs as first positional
151
- # Build kwargs by probing known candidate names
152
  beam_candidates = ["beam_width", "beam", "beam_size", "beamWidth", "beamSize"]
153
  alpha_candidates = ["alpha", "lm_weight", "lm_alpha", "lm_scale", "alpha_score"]
154
  beta_candidates = ["beta", "word_score", "word_insertion", "word_bonus", "word_insertion_score"]
155
 
156
  kwargs = {}
157
- # beam: prefer named if present
158
  for name in beam_candidates:
159
  if name in params:
160
  kwargs[name] = int(beam_width)
161
  break
162
 
163
- # alpha (lm weight)
164
  for name in alpha_candidates:
165
  if name in params:
166
  kwargs[name] = float(alpha)
167
  break
168
 
169
- # beta (word insertion)
170
  for name in beta_candidates:
171
  if name in params:
172
  kwargs[name] = float(beta)
173
  break
174
 
175
- # If beam wasn't found as named param, we will use positional second argument
176
  args = []
177
  if not any(name in params for name in beam_candidates):
178
  args = [int(beam_width)]
@@ -215,21 +216,17 @@ def decode_with_lm_np(logits_np: np.ndarray, beam_width=50, alpha=0.8, beta=1.0)
215
  used = None
216
  # Try with probs first, then fallback to log-probs
217
  try:
218
- # attempt call
219
  text = decode_fn(probs, *args, **kwargs)
220
  used = "probs"
221
  except TypeError as e1:
222
- # maybe kwargs names were wrong or function expects different args; try alternative: try without kwargs (positional)
223
  try:
224
  print("decode(probs, *args, **kwargs) failed:", e1)
225
- # if args empty, try with only probs (some decoders default)
226
  if args:
227
  text = decode_fn(probs, *args)
228
  else:
229
  text = decode_fn(probs)
230
  used = "probs_positional_fallback"
231
  except Exception as e2:
232
- # try log-probs with same strategy
233
  try:
234
  print("probs positional fallback failed:", e2, " — trying log-probs")
235
  text = decode_fn(lp, *args, **kwargs)
@@ -449,8 +446,8 @@ def create_audio_component():
449
  return gr.Textbox(label="Fallback - paste path to audio file")
450
 
451
  # --- Build UI ---
452
- title = "ASR Live (LM {})".format("ON" if decoder else "OFF")
453
- desc = "Live transcription use LM if available. Beam/alpha/beta active only when LM built."
454
 
455
  with gr.Blocks() as demo:
456
  gr.Markdown(f"## {title}\n\n{desc}\n\nDevice: **{DEVICE}**")
@@ -464,15 +461,14 @@ with gr.Blocks() as demo:
464
  btn = gr.Button("Transcribe")
465
  with gr.Column(scale=3):
466
  out_txt = gr.Textbox(label="Transcription (raw)", lines=4)
467
- out_norm = gr.Textbox(label="Transcription (normalized)", lines=2)
468
  out_conf = gr.Textbox(label="Confidence")
469
  def _run(a, use_lm, beam, a_w, b_w):
470
  if a is None:
471
- return "No audio", "", ""
472
  res = transcribe(a, use_lm=use_lm, beam_width=beam, alpha=a_w, beta=b_w)
473
- return res["transcription"], res["transcription_norm"], str(res["confidence"])
474
  btn.click(_run, inputs=[audio_in, use_lm_checkbox, beam_slider, alpha_slider, beta_slider],
475
- outputs=[out_txt, out_norm, out_conf])
476
 
477
  # Launch the demo (share=True yields a public link in Spaces/Colab)
478
  if __name__ == "__main__":
 
42
  except Exception:
43
  build_ctcdecoder = None
44
 
45
+ # --- Robust alphabet build + dedupe, with '|' -> ' ' mapping enforced ---
46
  try:
47
  vocab = processor.tokenizer.get_vocab()
48
  max_id = max(vocab.values())
 
56
  if pad_id is None:
57
  pad_id = vocab.get("<pad>", None)
58
 
59
+ # Build preliminary alphabet: only pad_id -> "" (blank). keep other tokens as-is,
60
+ # but map '|' to space ' ' as requested.
61
  preliminary = []
62
  for idx, tok in enumerate(tokens_by_id):
63
  if pad_id is not None and idx == pad_id:
64
  preliminary.append("") # blank token only for pad
65
  else:
66
+ # map '|' to actual space
67
+ if tok == "|":
68
+ preliminary.append(" ")
69
+ else:
70
+ preliminary.append(tok)
71
 
72
  # Now ensure alphabet entries are unique (pyctcdecode forbids duplicates).
73
+ # If duplicates appear (e.g. multiple token ids mapped to same symbol), we keep the *first*
74
+ # occurrence of that symbol unchanged and make subsequent ones unique markers.
75
+ # Note: ideal solution is to make ARPA/tokenizer use the same single symbol for space;
76
+ # here we force '|'->' ' and then disambiguate duplicates to satisfy pyctcdecode's uniqueness requirement.
77
  seen = {}
78
  alphabet_for_decoder = []
79
  duplicates = []
 
89
  continue
90
 
91
  if sym in seen:
92
+ # duplicate symbol: make unique marker so list entries remain unique
93
  uniq = f"{sym}#{idx}"
94
  alphabet_for_decoder.append(uniq)
95
  duplicates.append((idx, sym, uniq))
 
153
  """
154
  sig = inspect.signature(decode_fn)
155
  params = list(sig.parameters.keys())
156
+ # candidates for param names
 
157
  beam_candidates = ["beam_width", "beam", "beam_size", "beamWidth", "beamSize"]
158
  alpha_candidates = ["alpha", "lm_weight", "lm_alpha", "lm_scale", "alpha_score"]
159
  beta_candidates = ["beta", "word_score", "word_insertion", "word_bonus", "word_insertion_score"]
160
 
161
  kwargs = {}
 
162
  for name in beam_candidates:
163
  if name in params:
164
  kwargs[name] = int(beam_width)
165
  break
166
 
 
167
  for name in alpha_candidates:
168
  if name in params:
169
  kwargs[name] = float(alpha)
170
  break
171
 
 
172
  for name in beta_candidates:
173
  if name in params:
174
  kwargs[name] = float(beta)
175
  break
176
 
 
177
  args = []
178
  if not any(name in params for name in beam_candidates):
179
  args = [int(beam_width)]
 
216
  used = None
217
  # Try with probs first, then fallback to log-probs
218
  try:
 
219
  text = decode_fn(probs, *args, **kwargs)
220
  used = "probs"
221
  except TypeError as e1:
 
222
  try:
223
  print("decode(probs, *args, **kwargs) failed:", e1)
 
224
  if args:
225
  text = decode_fn(probs, *args)
226
  else:
227
  text = decode_fn(probs)
228
  used = "probs_positional_fallback"
229
  except Exception as e2:
 
230
  try:
231
  print("probs positional fallback failed:", e2, " — trying log-probs")
232
  text = decode_fn(lp, *args, **kwargs)
 
446
  return gr.Textbox(label="Fallback - paste path to audio file")
447
 
448
  # --- Build UI ---
449
+ title = "ASR Bagui (LM ON)"
450
+ desc = "Le tout premier ASR de la langue fulfulde Cameroun. By ABDOUL-BAGUI, M2 in UMa"
451
 
452
  with gr.Blocks() as demo:
453
  gr.Markdown(f"## {title}\n\n{desc}\n\nDevice: **{DEVICE}**")
 
461
  btn = gr.Button("Transcribe")
462
  with gr.Column(scale=3):
463
  out_txt = gr.Textbox(label="Transcription (raw)", lines=4)
 
464
  out_conf = gr.Textbox(label="Confidence")
465
  def _run(a, use_lm, beam, a_w, b_w):
466
  if a is None:
467
+ return "No audio", ""
468
  res = transcribe(a, use_lm=use_lm, beam_width=beam, alpha=a_w, beta=b_w)
469
+ return res["transcription"], str(res["confidence"])
470
  btn.click(_run, inputs=[audio_in, use_lm_checkbox, beam_slider, alpha_slider, beta_slider],
471
+ outputs=[out_txt, out_conf])
472
 
473
  # Launch the demo (share=True yields a public link in Spaces/Colab)
474
  if __name__ == "__main__":