Spaces:

marcsixtysix
/

Speech-recognition-pl-small

Sleeping

marcsixtysix commited on Jan 24

Commit

0b59fd9

verified ·

1 Parent(s): 1c47828

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,32 @@
 from transformers import pipeline
 import gradio as gr
 pipe = pipeline(model="marcsixtysix/whisper-base-pl")
 def transcribe(audio):
     text = pipe(audio)["text"]
-    return text
 demo = gr.Interface(
     fn=transcribe,

 from transformers import pipeline
 import gradio as gr
+import requests
+def correct_polish_text(text):
+    api_url = "https://api.languagetoolplus.com/v2/check"
+    params = {
+        "text": text,
+        "language": "pl",
+    }
+    response = requests.post(api_url, data=params)
+    if response.status_code == 200:
+        matches = response.json().get("matches", [])
+        corrected_text = text
+        for match in reversed(matches):
+            start = match["offset"]
+            end = start + match["length"]
+            replacement = match["replacements"][0]["value"] if match["replacements"] else text[start:end]
+            corrected_text = corrected_text[:start] + replacement + corrected_text[end:]
+        return corrected_text
+    else:
+        return text
 pipe = pipeline(model="marcsixtysix/whisper-base-pl")
 def transcribe(audio):
     text = pipe(audio)["text"]
+    corrected_text = correct_polish_text(text)
+    return corrected_text
 demo = gr.Interface(
     fn=transcribe,