Spaces:

IbnAoudi
/

ASR_Gradio

Sleeping

App Files Files Community

IbnAoudi commited on 21 days ago

Commit

9d86cca

verified ·

1 Parent(s): 54b41ee

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -23

app.py CHANGED Viewed

@@ -42,7 +42,7 @@ try:
 except Exception:
     build_ctcdecoder = None
-# --- Robust alphabet build + dedupe ---
 try:
     vocab = processor.tokenizer.get_vocab()
     max_id = max(vocab.values())
@@ -56,19 +56,24 @@ try:
     if pad_id is None:
         pad_id = vocab.get("<pad>", None)
-    # Build preliminary alphabet: only pad_id -> "" (blank). keep other tokens as-is.
     preliminary = []
     for idx, tok in enumerate(tokens_by_id):
         if pad_id is not None and idx == pad_id:
             preliminary.append("")   # blank token only for pad
         else:
-            preliminary.append(tok)
-    # OPTIONAL: if you *know* your ARPA uses spaces instead of '|' you can map '|' -> ' '
-    # Do this only if ARPA/tokenization require it:
-    # preliminary = [(" " if t == "|" else t) for t in preliminary]
     # Now ensure alphabet entries are unique (pyctcdecode forbids duplicates).
     seen = {}
     alphabet_for_decoder = []
     duplicates = []
@@ -84,6 +89,7 @@ try:
             continue
         if sym in seen:
             uniq = f"{sym}#{idx}"
             alphabet_for_decoder.append(uniq)
             duplicates.append((idx, sym, uniq))
@@ -147,32 +153,27 @@ def _build_decoder_kwargs_from_signature(beam_width, alpha, beta, decode_fn):
     """
     sig = inspect.signature(decode_fn)
     params = list(sig.parameters.keys())
-    # skip first param name (usually 'self' or 'probs' positional), we will pass probs as first positional
-    # Build kwargs by probing known candidate names
     beam_candidates = ["beam_width", "beam", "beam_size", "beamWidth", "beamSize"]
     alpha_candidates = ["alpha", "lm_weight", "lm_alpha", "lm_scale", "alpha_score"]
     beta_candidates = ["beta", "word_score", "word_insertion", "word_bonus", "word_insertion_score"]
     kwargs = {}
-    # beam: prefer named if present
     for name in beam_candidates:
         if name in params:
             kwargs[name] = int(beam_width)
             break
-    # alpha (lm weight)
     for name in alpha_candidates:
         if name in params:
             kwargs[name] = float(alpha)
             break
-    # beta (word insertion)
     for name in beta_candidates:
         if name in params:
             kwargs[name] = float(beta)
             break
-    # If beam wasn't found as named param, we will use positional second argument
     args = []
     if not any(name in params for name in beam_candidates):
         args = [int(beam_width)]
@@ -215,21 +216,17 @@ def decode_with_lm_np(logits_np: np.ndarray, beam_width=50, alpha=0.8, beta=1.0)
     used = None
     # Try with probs first, then fallback to log-probs
     try:
-        # attempt call
         text = decode_fn(probs, *args, **kwargs)
         used = "probs"
     except TypeError as e1:
-        # maybe kwargs names were wrong or function expects different args; try alternative: try without kwargs (positional)
         try:
             print("decode(probs, *args, **kwargs) failed:", e1)
-            # if args empty, try with only probs (some decoders default)
             if args:
                 text = decode_fn(probs, *args)
             else:
                 text = decode_fn(probs)
             used = "probs_positional_fallback"
         except Exception as e2:
-            # try log-probs with same strategy
             try:
                 print("probs positional fallback failed:", e2, " — trying log-probs")
                 text = decode_fn(lp, *args, **kwargs)
@@ -449,8 +446,8 @@ def create_audio_component():
                 return gr.Textbox(label="Fallback - paste path to audio file")
 # --- Build UI ---
-title = "ASR Live (LM {})".format("ON" if decoder else "OFF")
-desc = "Live transcription — use LM if available. Beam/alpha/beta active only when LM built."
 with gr.Blocks() as demo:
     gr.Markdown(f"## {title}\n\n{desc}\n\nDevice: **{DEVICE}**")
@@ -464,15 +461,14 @@ with gr.Blocks() as demo:
             btn = gr.Button("Transcribe")
         with gr.Column(scale=3):
             out_txt = gr.Textbox(label="Transcription (raw)", lines=4)
-            out_norm = gr.Textbox(label="Transcription (normalized)", lines=2)
             out_conf = gr.Textbox(label="Confidence")
     def _run(a, use_lm, beam, a_w, b_w):
         if a is None:
-            return "No audio", "", ""
         res = transcribe(a, use_lm=use_lm, beam_width=beam, alpha=a_w, beta=b_w)
-        return res["transcription"], res["transcription_norm"], str(res["confidence"])
     btn.click(_run, inputs=[audio_in, use_lm_checkbox, beam_slider, alpha_slider, beta_slider],
-              outputs=[out_txt, out_norm, out_conf])
 # Launch the demo (share=True yields a public link in Spaces/Colab)
 if __name__ == "__main__":

 except Exception:
     build_ctcdecoder = None
+# --- Robust alphabet build + dedupe, with '|' -> ' ' mapping enforced ---
 try:
     vocab = processor.tokenizer.get_vocab()
     max_id = max(vocab.values())
     if pad_id is None:
         pad_id = vocab.get("<pad>", None)
+    # Build preliminary alphabet: only pad_id -> "" (blank). keep other tokens as-is,
+    # but map '|' to space ' ' as requested.
     preliminary = []
     for idx, tok in enumerate(tokens_by_id):
         if pad_id is not None and idx == pad_id:
             preliminary.append("")   # blank token only for pad
         else:
+            # map '|' to actual space
+            if tok == "|":
+                preliminary.append(" ")
+            else:
+                preliminary.append(tok)
     # Now ensure alphabet entries are unique (pyctcdecode forbids duplicates).
+    # If duplicates appear (e.g. multiple token ids mapped to same symbol), we keep the *first*
+    # occurrence of that symbol unchanged and make subsequent ones unique markers.
+    # Note: ideal solution is to make ARPA/tokenizer use the same single symbol for space;
+    # here we force '|'->' ' and then disambiguate duplicates to satisfy pyctcdecode's uniqueness requirement.
     seen = {}
     alphabet_for_decoder = []
     duplicates = []
             continue
         if sym in seen:
+            # duplicate symbol: make unique marker so list entries remain unique
             uniq = f"{sym}#{idx}"
             alphabet_for_decoder.append(uniq)
             duplicates.append((idx, sym, uniq))
     """
     sig = inspect.signature(decode_fn)
     params = list(sig.parameters.keys())
+    # candidates for param names
     beam_candidates = ["beam_width", "beam", "beam_size", "beamWidth", "beamSize"]
     alpha_candidates = ["alpha", "lm_weight", "lm_alpha", "lm_scale", "alpha_score"]
     beta_candidates = ["beta", "word_score", "word_insertion", "word_bonus", "word_insertion_score"]
     kwargs = {}
     for name in beam_candidates:
         if name in params:
             kwargs[name] = int(beam_width)
             break
     for name in alpha_candidates:
         if name in params:
             kwargs[name] = float(alpha)
             break
     for name in beta_candidates:
         if name in params:
             kwargs[name] = float(beta)
             break
     args = []
     if not any(name in params for name in beam_candidates):
         args = [int(beam_width)]
     used = None
     # Try with probs first, then fallback to log-probs
     try:
         text = decode_fn(probs, *args, **kwargs)
         used = "probs"
     except TypeError as e1:
         try:
             print("decode(probs, *args, **kwargs) failed:", e1)
             if args:
                 text = decode_fn(probs, *args)
             else:
                 text = decode_fn(probs)
             used = "probs_positional_fallback"
         except Exception as e2:
             try:
                 print("probs positional fallback failed:", e2, " — trying log-probs")
                 text = decode_fn(lp, *args, **kwargs)
                 return gr.Textbox(label="Fallback - paste path to audio file")
 # --- Build UI ---
+title = "ASR Bagui (LM ON)"
+desc = "Le tout premier ASR de la langue fulfulde Cameroun. By ABDOUL-BAGUI, M2 in UMa"
 with gr.Blocks() as demo:
     gr.Markdown(f"## {title}\n\n{desc}\n\nDevice: **{DEVICE}**")
             btn = gr.Button("Transcribe")
         with gr.Column(scale=3):
             out_txt = gr.Textbox(label="Transcription (raw)", lines=4)
             out_conf = gr.Textbox(label="Confidence")
     def _run(a, use_lm, beam, a_w, b_w):
         if a is None:
+            return "No audio", ""
         res = transcribe(a, use_lm=use_lm, beam_width=beam, alpha=a_w, beta=b_w)
+        return res["transcription"], str(res["confidence"])
     btn.click(_run, inputs=[audio_in, use_lm_checkbox, beam_slider, alpha_slider, beta_slider],
+              outputs=[out_txt, out_conf])
 # Launch the demo (share=True yields a public link in Spaces/Colab)
 if __name__ == "__main__":